radv: enable lowering of GS intrinsics for the LLVM backend
[mesa.git] / src / amd / vulkan / radv_nir_to_llvm.c
index 961d6db0ba0e0c866c3899ed1e9a6222fccd096f..900246d275ef57a13cf3240a1ae7360cfa45b573 100644 (file)
@@ -29,6 +29,7 @@
 #include "radv_shader.h"
 #include "radv_shader_helper.h"
 #include "radv_shader_args.h"
+#include "radv_debug.h"
 #include "nir/nir.h"
 
 #include "sid.h"
@@ -875,39 +876,21 @@ static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi)
 
 static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
                                     unsigned stream,
+                                    LLVMValueRef vertexidx,
                                     LLVMValueRef *addrs);
 
 static void
-visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
+visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream,
+                              LLVMValueRef vertexidx, LLVMValueRef *addrs)
 {
-       LLVMValueRef gs_next_vertex;
-       LLVMValueRef can_emit;
        unsigned offset = 0;
        struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
 
        if (ctx->args->options->key.vs_common_out.as_ngg) {
-               gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
+               gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
                return;
        }
 
-       /* Write vertex attribute values to GSVS ring */
-       gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
-                                      ctx->gs_next_vertex[stream],
-                                      "");
-
-       /* If this thread has already emitted the declared maximum number of
-        * vertices, don't emit any more: excessive vertex emissions are not
-        * supposed to have any effect.
-        */
-       can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
-                                LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
-
-       bool use_kill = !ctx->args->shader_info->gs.writes_memory;
-       if (use_kill)
-               ac_build_kill_if_false(&ctx->ac, can_emit);
-       else
-               ac_build_ifcc(&ctx->ac, can_emit, 6505);
-
        for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
                unsigned output_usage_mask =
                        ctx->args->shader_info->gs.output_usage_mask[i];
@@ -932,7 +915,7 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
 
                        offset++;
 
-                       voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");
+                       voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
                        voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
 
                        out_val = ac_to_integer(&ctx->ac, out_val);
@@ -948,16 +931,9 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
                }
        }
 
-       gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex,
-                                     ctx->ac.i32_1, "");
-       LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
-
        ac_build_sendmsg(&ctx->ac,
                         AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
                         ctx->gs_wave_id);
-
-       if (!use_kill)
-               ac_build_endif(&ctx->ac, 6505);
 }
 
 static void
@@ -3308,25 +3284,11 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
 
 static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
                                     unsigned stream,
+                                    LLVMValueRef vertexidx,
                                     LLVMValueRef *addrs)
 {
        LLVMBuilderRef builder = ctx->ac.builder;
        LLVMValueRef tmp;
-       const LLVMValueRef vertexidx =
-               LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
-
-       /* If this thread has already emitted the declared maximum number of
-        * vertices, skip the write: excessive vertex emissions are not
-        * supposed to have any effect.
-        */
-       const LLVMValueRef can_emit =
-               LLVMBuildICmp(builder, LLVMIntULT, vertexidx,
-                             LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
-       ac_build_ifcc(&ctx->ac, can_emit, 9001);
-
-       tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
-       tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, "");
-       LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
 
        const LLVMValueRef vertexptr =
                ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
@@ -3358,6 +3320,13 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
        }
        assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
 
+       /* Store the current number of emitted vertices to zero out remaining
+        * primitive flags in case the geometry shader doesn't emit the maximum
+        * number of vertices.
+        */
+       tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
+       LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
+
        /* Determine and store whether this vertex completed a primitive. */
        const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
 
@@ -3394,8 +3363,6 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
        tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
        tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
        LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
-
-       ac_build_endif(&ctx->ac, 9001);
 }
 
 static void
@@ -3924,7 +3891,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 
        ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
                             args->options->family, float_mode,
-                            args->shader_info->wave_size, 64);
+                            args->shader_info->wave_size,
+                            args->shader_info->ballot_bit_size);
        ctx.context = ctx.ac.context;
 
        ctx.max_workgroup_size = 0;
@@ -3946,7 +3914,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 
        ctx.abi.inputs = &ctx.inputs[0];
        ctx.abi.emit_outputs = handle_shader_outputs_post;
-       ctx.abi.emit_vertex = visit_emit_vertex;
+       ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
        ctx.abi.load_ubo = radv_load_ubo;
        ctx.abi.load_ssbo = radv_load_ssbo;
        ctx.abi.load_sampler_desc = radv_get_sampler_desc;
@@ -4266,7 +4234,7 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
        free(elf_buffer);
 }
 
-void
+static void
 radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
                        struct radv_shader_binary **rbinary,
                        const struct radv_shader_args *args,
@@ -4392,7 +4360,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
        LLVMPositionBuilderAtEnd(ctx->ac.builder, end_bb);
 }
 
-void
+static void
 radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
                            struct nir_shader *geom_shader,
                            struct radv_shader_binary **rbinary,
@@ -4431,3 +4399,36 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
        (*rbinary)->is_gs_copy_shader = true;
        
 }
+
+void
+llvm_compile_shader(struct radv_device *device,
+                   unsigned shader_count,
+                   struct nir_shader *const *shaders,
+                   struct radv_shader_binary **binary,
+                   struct radv_shader_args *args)
+{
+       enum ac_target_machine_options tm_options = 0;
+       struct ac_llvm_compiler ac_llvm;
+       bool thread_compiler;
+
+       tm_options |= AC_TM_SUPPORTS_SPILL;
+       if (args->options->check_ir)
+               tm_options |= AC_TM_CHECK_IR;
+       if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
+               tm_options |= AC_TM_NO_LOAD_STORE_OPT;
+
+       thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
+
+       radv_init_llvm_compiler(&ac_llvm, thread_compiler,
+                               args->options->family, tm_options,
+                               args->shader_info->wave_size);
+
+       if (args->is_gs_copy_shader) {
+               radv_compile_gs_copy_shader(&ac_llvm, *shaders, binary, args);
+       } else {
+               radv_compile_nir_shader(&ac_llvm, binary, args,
+                                       shaders, shader_count);
+       }
+
+       radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+}