radeonsi: rename si_compiler -> ac_llvm_compiler
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
index 9bc679f3296f24e848e64ca2f3a08f24bb50b1d0..5dc12d87243631c4f8e667c85aa817c60924919a 100644 (file)
@@ -69,7 +69,7 @@ enum si_arg_regfile {
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
                               struct si_screen *sscreen,
-                              struct si_compiler *compiler);
+                              struct ac_llvm_compiler *compiler);
 
 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
                                 struct lp_build_tgsi_context *bld_base,
@@ -2141,9 +2141,8 @@ void si_load_system_value(struct si_shader_context *ctx,
                        LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT),
                        LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT),
                        LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT),
-                       lp_build_emit_llvm_unary(&ctx->bld_base, TGSI_OPCODE_RCP,
-                                                LLVMGetParam(ctx->main_fn,
-                                                             SI_PARAM_POS_W_FLOAT)),
+                       ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
+                                     LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)),
                };
                value = ac_build_gather_values(&ctx->ac, pos, 4);
                break;
@@ -2164,10 +2163,8 @@ void si_load_system_value(struct si_shader_context *ctx,
                        LLVMConstReal(ctx->f32, 0),
                        LLVMConstReal(ctx->f32, 0)
                };
-               pos[0] = lp_build_emit_llvm_unary(&ctx->bld_base,
-                                                 TGSI_OPCODE_FRC, pos[0]);
-               pos[1] = lp_build_emit_llvm_unary(&ctx->bld_base,
-                                                 TGSI_OPCODE_FRC, pos[1]);
+               pos[0] = ac_build_fract(&ctx->ac, pos[0], 32);
+               pos[1] = ac_build_fract(&ctx->ac, pos[1], 32);
                value = ac_build_gather_values(&ctx->ac, pos, 4);
                break;
        }
@@ -2300,6 +2297,7 @@ void si_load_system_value(struct si_shader_context *ctx,
 void si_declare_compute_memory(struct si_shader_context *ctx)
 {
        struct si_shader_selector *sel = ctx->shader->selector;
+       unsigned lds_size = sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE];
 
        LLVMTypeRef i8p = LLVMPointerType(ctx->i8, AC_LOCAL_ADDR_SPACE);
        LLVMValueRef var;
@@ -2307,7 +2305,7 @@ void si_declare_compute_memory(struct si_shader_context *ctx)
        assert(!ctx->ac.lds);
 
        var = LLVMAddGlobalInAddressSpace(ctx->ac.module,
-                                         LLVMArrayType(ctx->i8, sel->local_size),
+                                         LLVMArrayType(ctx->i8, lds_size),
                                          "compute_lds",
                                          AC_LOCAL_ADDR_SPACE);
        LLVMSetAlignment(var, 4);
@@ -3451,7 +3449,7 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
                                  8 + SI_SGPR_VS_STATE_BITS);
 
 #if !HAVE_32BIT_POINTERS
-       ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1,
+       ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
                                  8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
 #endif
 
@@ -3491,7 +3489,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
                                  8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
 
 #if !HAVE_32BIT_POINTERS
-       ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1,
+       ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
                                  8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
 #endif
 
@@ -3678,38 +3676,36 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
         * an IF statement is added that clamps all colors if the constant
         * is true.
         */
-       if (ctx->type == PIPE_SHADER_VERTEX) {
-               struct lp_build_if_state if_ctx;
-               LLVMValueRef cond = NULL;
-               LLVMValueRef addr, val;
-
-               for (i = 0; i < info->num_outputs; i++) {
-                       if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
-                           info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
-                               continue;
+       struct lp_build_if_state if_ctx;
+       LLVMValueRef cond = NULL;
+       LLVMValueRef addr, val;
 
-                       /* We've found a color. */
-                       if (!cond) {
-                               /* The state is in the first bit of the user SGPR. */
-                               cond = LLVMGetParam(ctx->main_fn,
-                                                   ctx->param_vs_state_bits);
-                               cond = LLVMBuildTrunc(ctx->ac.builder, cond,
-                                                     ctx->i1, "");
-                               lp_build_if(&if_ctx, &ctx->gallivm, cond);
-                       }
+       for (i = 0; i < info->num_outputs; i++) {
+               if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
+                   info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
+                       continue;
 
-                       for (j = 0; j < 4; j++) {
-                               addr = addrs[4 * i + j];
-                               val = LLVMBuildLoad(ctx->ac.builder, addr, "");
-                               val = ac_build_clamp(&ctx->ac, val);
-                               LLVMBuildStore(ctx->ac.builder, val, addr);
-                       }
+               /* We've found a color. */
+               if (!cond) {
+                       /* The state is in the first bit of the user SGPR. */
+                       cond = LLVMGetParam(ctx->main_fn,
+                                           ctx->param_vs_state_bits);
+                       cond = LLVMBuildTrunc(ctx->ac.builder, cond,
+                                             ctx->i1, "");
+                       lp_build_if(&if_ctx, &ctx->gallivm, cond);
                }
 
-               if (cond)
-                       lp_build_endif(&if_ctx);
+               for (j = 0; j < 4; j++) {
+                       addr = addrs[4 * i + j];
+                       val = LLVMBuildLoad(ctx->ac.builder, addr, "");
+                       val = ac_build_clamp(&ctx->ac, val);
+                       LLVMBuildStore(ctx->ac.builder, val, addr);
+               }
        }
 
+       if (cond)
+               lp_build_endif(&if_ctx);
+
        for (i = 0; i < info->num_outputs; i++) {
                outputs[i].semantic_name = info->output_semantic_name[i];
                outputs[i].semantic_index = info->output_semantic_index[i];
@@ -4021,8 +4017,10 @@ static LLVMValueRef si_llvm_emit_ddxy_interp(
        for (i = 0; i < 2; i++) {
                a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
                                            LLVMConstInt(ctx->i32, i, 0), "");
-               result[i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDX, a);
-               result[2+i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDY, a);
+               result[i] = ac_build_ddxy(&ctx->ac, AC_TID_MASK_TOP_LEFT, 1,
+                                         ac_to_integer(&ctx->ac, a)); /* DDX */
+               result[2+i] = ac_build_ddxy(&ctx->ac, AC_TID_MASK_TOP_LEFT, 2,
+                                           ac_to_integer(&ctx->ac, a)); /* DDY */
        }
 
        return ac_build_gather_values(&ctx->ac, result, 4);
@@ -4635,10 +4633,10 @@ static void declare_global_desc_pointers(struct si_shader_context *ctx,
 static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
                                            struct si_function_info *fninfo)
 {
+       ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
        add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
        add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
        add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
-       ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
 }
 
 static void declare_vs_input_vgprs(struct si_shader_context *ctx,
@@ -4745,7 +4743,7 @@ static void create_function(struct si_shader_context *ctx)
                        /* no extra parameters */
                } else {
                        if (shader->is_gs_copy_shader) {
-                               fninfo.num_params = ctx->param_rw_buffers + 1;
+                               fninfo.num_params = ctx->param_vs_state_bits + 1;
                                fninfo.num_sgpr_params = fninfo.num_params;
                        }
 
@@ -4866,13 +4864,12 @@ static void create_function(struct si_shader_context *ctx)
                if (ctx->type == PIPE_SHADER_VERTEX) {
                        declare_vs_specific_input_sgprs(ctx, &fninfo);
                } else {
+                       ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                        ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                        ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
-                       if (!HAVE_32BIT_POINTERS) {
-                               /* Declare as many input SGPRs as the VS has. */
+                       /* Declare as many input SGPRs as the VS has. */
+                       if (!HAVE_32BIT_POINTERS)
                                add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-                               ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
-                       }
                }
 
                if (!HAVE_32BIT_POINTERS) {
@@ -4918,6 +4915,7 @@ static void create_function(struct si_shader_context *ctx)
        case PIPE_SHADER_TESS_EVAL:
                declare_global_desc_pointers(ctx, &fninfo);
                declare_per_stage_desc_pointers(ctx, &fninfo, true);
+               ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
 
@@ -5643,7 +5641,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
 static int si_compile_llvm(struct si_screen *sscreen,
                           struct ac_shader_binary *binary,
                           struct si_shader_config *conf,
-                          struct si_compiler *compiler,
+                          struct ac_llvm_compiler *compiler,
                           LLVMModuleRef mod,
                           struct pipe_debug_callback *debug,
                           unsigned processor,
@@ -5721,27 +5719,21 @@ static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
 /* Generate code for the hardware VS shader stage to go with a geometry shader */
 struct si_shader *
 si_generate_gs_copy_shader(struct si_screen *sscreen,
-                          struct si_compiler *compiler,
+                          struct ac_llvm_compiler *compiler,
                           struct si_shader_selector *gs_selector,
                           struct pipe_debug_callback *debug)
 {
        struct si_shader_context ctx;
        struct si_shader *shader;
        LLVMBuilderRef builder;
-       struct si_shader_output_values *outputs;
+       struct si_shader_output_values outputs[SI_MAX_VS_OUTPUTS];
        struct tgsi_shader_info *gsinfo = &gs_selector->info;
        int i, r;
 
-       outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
-
-       if (!outputs)
-               return NULL;
 
        shader = CALLOC_STRUCT(si_shader);
-       if (!shader) {
-               FREE(outputs);
+       if (!shader)
                return NULL;
-       }
 
        /* We can leave the fence as permanently signaled because the GS copy
         * shader only becomes visible globally after it has been compiled. */
@@ -5832,8 +5824,51 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                                               stream);
                }
 
-               if (stream == 0)
+               if (stream == 0) {
+                       /* Vertex color clamping.
+                        *
+                        * This uses a state constant loaded in a user data SGPR and
+                        * an IF statement is added that clamps all colors if the constant
+                        * is true.
+                        */
+                       struct lp_build_if_state if_ctx;
+                       LLVMValueRef v[2], cond = NULL;
+                       LLVMBasicBlockRef blocks[2];
+
+                       for (unsigned i = 0; i < gsinfo->num_outputs; i++) {
+                               if (gsinfo->output_semantic_name[i] != TGSI_SEMANTIC_COLOR &&
+                                   gsinfo->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR)
+                                       continue;
+
+                               /* We've found a color. */
+                               if (!cond) {
+                                       /* The state is in the first bit of the user SGPR. */
+                                       cond = LLVMGetParam(ctx.main_fn,
+                                                           ctx.param_vs_state_bits);
+                                       cond = LLVMBuildTrunc(ctx.ac.builder, cond,
+                                                             ctx.i1, "");
+                                       lp_build_if(&if_ctx, &ctx.gallivm, cond);
+                                       /* Remember blocks for Phi. */
+                                       blocks[0] = if_ctx.true_block;
+                                       blocks[1] = if_ctx.entry_block;
+                               }
+
+                               for (unsigned j = 0; j < 4; j++) {
+                                       /* Insert clamp into the true block. */
+                                       v[0] = ac_build_clamp(&ctx.ac, outputs[i].values[j]);
+                                       v[1] = outputs[i].values[j];
+
+                                       /* Insert Phi into the endif block. */
+                                       LLVMPositionBuilderAtEnd(ctx.ac.builder, if_ctx.merge_block);
+                                       outputs[i].values[j] = ac_build_phi(&ctx.ac, ctx.f32, 2, v, blocks);
+                                       LLVMPositionBuilderAtEnd(ctx.ac.builder, if_ctx.true_block);
+                               }
+                       }
+                       if (cond)
+                               lp_build_endif(&if_ctx);
+
                        si_llvm_export_vs(&ctx, outputs, gsinfo->num_outputs);
+               }
 
                LLVMBuildBr(builder, end_bb);
        }
@@ -5860,8 +5895,6 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 
        si_llvm_dispose(&ctx);
 
-       FREE(outputs);
-
        if (r != 0) {
                FREE(shader);
                shader = NULL;
@@ -5968,7 +6001,7 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
                               struct si_screen *sscreen,
-                              struct si_compiler *compiler)
+                              struct ac_llvm_compiler *compiler)
 {
        struct lp_build_tgsi_context *bld_base;
 
@@ -6141,16 +6174,24 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
                        if (!shader->is_monolithic)
                                ac_init_exec_full_mask(&ctx->ac);
 
-                       /* The barrier must execute for all shaders in a
-                        * threadgroup.
-                        */
-                       si_llvm_emit_barrier(NULL, bld_base, NULL);
-
                        LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
                        LLVMValueRef ena =
                                LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
                                            ac_get_thread_id(&ctx->ac), num_threads, "");
                        lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);
+
+                       /* The barrier must execute for all shaders in a
+                        * threadgroup.
+                        *
+                        * Execute the barrier inside the conditional block,
+                        * so that empty waves can jump directly to s_endpgm,
+                        * which will also signal the barrier.
+                        *
+                        * If the shader is TCS and the TCS epilog is present
+                        * and contains a barrier, it will wait there and then
+                        * reach s_endpgm.
+                        */
+                       si_llvm_emit_barrier(NULL, bld_base, NULL);
                }
        }
 
@@ -6750,7 +6791,7 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
 }
 
 int si_compile_tgsi_shader(struct si_screen *sscreen,
-                          struct si_compiler *compiler,
+                          struct ac_llvm_compiler *compiler,
                           struct si_shader *shader,
                           struct pipe_debug_callback *debug)
 {
@@ -7091,7 +7132,7 @@ si_get_shader_part(struct si_screen *sscreen,
                   enum pipe_shader_type type,
                   bool prolog,
                   union si_shader_part_key *key,
-                  struct si_compiler *compiler,
+                  struct ac_llvm_compiler *compiler,
                   struct pipe_debug_callback *debug,
                   void (*build)(struct si_shader_context *,
                                 union si_shader_part_key *),
@@ -7337,7 +7378,7 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 }
 
 static bool si_get_vs_prolog(struct si_screen *sscreen,
-                            struct si_compiler *compiler,
+                            struct ac_llvm_compiler *compiler,
                             struct si_shader *shader,
                             struct pipe_debug_callback *debug,
                             struct si_shader *main_part,
@@ -7365,7 +7406,7 @@ static bool si_get_vs_prolog(struct si_screen *sscreen,
  * Select and compile (or reuse) vertex shader parts (prolog & epilog).
  */
 static bool si_shader_select_vs_parts(struct si_screen *sscreen,
-                                     struct si_compiler *compiler,
+                                     struct ac_llvm_compiler *compiler,
                                      struct si_shader *shader,
                                      struct pipe_debug_callback *debug)
 {
@@ -7454,7 +7495,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
  * Select and compile (or reuse) TCS parts (epilog).
  */
 static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
-                                      struct si_compiler *compiler,
+                                      struct ac_llvm_compiler *compiler,
                                       struct si_shader *shader,
                                       struct pipe_debug_callback *debug)
 {
@@ -7486,7 +7527,7 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
  * Select and compile (or reuse) GS parts (prolog).
  */
 static bool si_shader_select_gs_parts(struct si_screen *sscreen,
-                                     struct si_compiler *compiler,
+                                     struct ac_llvm_compiler *compiler,
                                      struct si_shader *shader,
                                      struct pipe_debug_callback *debug)
 {
@@ -7899,7 +7940,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
  * Select and compile (or reuse) pixel shader parts (prolog & epilog).
  */
 static bool si_shader_select_ps_parts(struct si_screen *sscreen,
-                                     struct si_compiler *compiler,
+                                     struct ac_llvm_compiler *compiler,
                                      struct si_shader *shader,
                                      struct pipe_debug_callback *debug)
 {
@@ -8030,7 +8071,7 @@ static void si_fix_resource_usage(struct si_screen *sscreen,
        }
 }
 
-int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
+int si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug)
 {