X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader.c;h=5dc12d87243631c4f8e667c85aa817c60924919a;hb=0eb65b49442888ec45895b1aa5c0f8087361364f;hp=9bc679f3296f24e848e64ca2f3a08f24bb50b1d0;hpb=0810f15046f3bcbcb8c9c9895e2b676b97720df1;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 9bc679f3296..5dc12d87243 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -69,7 +69,7 @@ enum si_arg_regfile { static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_screen *sscreen, - struct si_compiler *compiler); + struct ac_llvm_compiler *compiler); static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, @@ -2141,9 +2141,8 @@ void si_load_system_value(struct si_shader_context *ctx, LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT), LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT), LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT), - lp_build_emit_llvm_unary(&ctx->bld_base, TGSI_OPCODE_RCP, - LLVMGetParam(ctx->main_fn, - SI_PARAM_POS_W_FLOAT)), + ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, + LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)), }; value = ac_build_gather_values(&ctx->ac, pos, 4); break; @@ -2164,10 +2163,8 @@ void si_load_system_value(struct si_shader_context *ctx, LLVMConstReal(ctx->f32, 0), LLVMConstReal(ctx->f32, 0) }; - pos[0] = lp_build_emit_llvm_unary(&ctx->bld_base, - TGSI_OPCODE_FRC, pos[0]); - pos[1] = lp_build_emit_llvm_unary(&ctx->bld_base, - TGSI_OPCODE_FRC, pos[1]); + pos[0] = ac_build_fract(&ctx->ac, pos[0], 32); + pos[1] = ac_build_fract(&ctx->ac, pos[1], 32); value = ac_build_gather_values(&ctx->ac, pos, 4); break; } @@ -2300,6 +2297,7 @@ void si_load_system_value(struct si_shader_context *ctx, void si_declare_compute_memory(struct si_shader_context *ctx) { struct si_shader_selector *sel = ctx->shader->selector; + unsigned lds_size = sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]; LLVMTypeRef i8p = LLVMPointerType(ctx->i8, AC_LOCAL_ADDR_SPACE); LLVMValueRef var; @@ -2307,7 +2305,7 @@ void si_declare_compute_memory(struct si_shader_context *ctx) assert(!ctx->ac.lds); var = LLVMAddGlobalInAddressSpace(ctx->ac.module, - LLVMArrayType(ctx->i8, sel->local_size), + LLVMArrayType(ctx->i8, lds_size), "compute_lds", AC_LOCAL_ADDR_SPACE); LLVMSetAlignment(var, 4); @@ -3451,7 +3449,7 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) 8 + SI_SGPR_VS_STATE_BITS); #if !HAVE_32BIT_POINTERS - ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1, + ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4, 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES); #endif @@ -3491,7 +3489,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); #if !HAVE_32BIT_POINTERS - ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 1, + ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4, 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES); #endif @@ -3678,38 +3676,36 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi, * an IF statement is added that clamps all colors if the constant * is true. */ - if (ctx->type == PIPE_SHADER_VERTEX) { - struct lp_build_if_state if_ctx; - LLVMValueRef cond = NULL; - LLVMValueRef addr, val; - - for (i = 0; i < info->num_outputs; i++) { - if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR && - info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR) - continue; + struct lp_build_if_state if_ctx; + LLVMValueRef cond = NULL; + LLVMValueRef addr, val; - /* We've found a color. */ - if (!cond) { - /* The state is in the first bit of the user SGPR. */ - cond = LLVMGetParam(ctx->main_fn, - ctx->param_vs_state_bits); - cond = LLVMBuildTrunc(ctx->ac.builder, cond, - ctx->i1, ""); - lp_build_if(&if_ctx, &ctx->gallivm, cond); - } + for (i = 0; i < info->num_outputs; i++) { + if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR && + info->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR) + continue; - for (j = 0; j < 4; j++) { - addr = addrs[4 * i + j]; - val = LLVMBuildLoad(ctx->ac.builder, addr, ""); - val = ac_build_clamp(&ctx->ac, val); - LLVMBuildStore(ctx->ac.builder, val, addr); - } + /* We've found a color. */ + if (!cond) { + /* The state is in the first bit of the user SGPR. */ + cond = LLVMGetParam(ctx->main_fn, + ctx->param_vs_state_bits); + cond = LLVMBuildTrunc(ctx->ac.builder, cond, + ctx->i1, ""); + lp_build_if(&if_ctx, &ctx->gallivm, cond); } - if (cond) - lp_build_endif(&if_ctx); + for (j = 0; j < 4; j++) { + addr = addrs[4 * i + j]; + val = LLVMBuildLoad(ctx->ac.builder, addr, ""); + val = ac_build_clamp(&ctx->ac, val); + LLVMBuildStore(ctx->ac.builder, val, addr); + } } + if (cond) + lp_build_endif(&if_ctx); + for (i = 0; i < info->num_outputs; i++) { outputs[i].semantic_name = info->output_semantic_name[i]; outputs[i].semantic_index = info->output_semantic_index[i]; @@ -4021,8 +4017,10 @@ static LLVMValueRef si_llvm_emit_ddxy_interp( for (i = 0; i < 2; i++) { a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij, LLVMConstInt(ctx->i32, i, 0), ""); - result[i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDX, a); - result[2+i] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_DDY, a); + result[i] = ac_build_ddxy(&ctx->ac, AC_TID_MASK_TOP_LEFT, 1, + ac_to_integer(&ctx->ac, a)); /* DDX */ + result[2+i] = ac_build_ddxy(&ctx->ac, AC_TID_MASK_TOP_LEFT, 2, + ac_to_integer(&ctx->ac, a)); /* DDY */ } return ac_build_gather_values(&ctx->ac, result, 4); @@ -4635,10 +4633,10 @@ static void declare_global_desc_pointers(struct si_shader_context *ctx, static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx, struct si_function_info *fninfo) { + ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id); - ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32); } static void declare_vs_input_vgprs(struct si_shader_context *ctx, @@ -4745,7 +4743,7 @@ static void create_function(struct si_shader_context *ctx) /* no extra parameters */ } else { if (shader->is_gs_copy_shader) { - fninfo.num_params = ctx->param_rw_buffers + 1; + fninfo.num_params = ctx->param_vs_state_bits + 1; fninfo.num_sgpr_params = fninfo.num_params; } @@ -4866,13 +4864,12 @@ static void create_function(struct si_shader_context *ctx) if (ctx->type == PIPE_SHADER_VERTEX) { declare_vs_specific_input_sgprs(ctx, &fninfo); } else { + ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32); - if (!HAVE_32BIT_POINTERS) { - /* Declare as many input SGPRs as the VS has. */ + /* Declare as many input SGPRs as the VS has. */ + if (!HAVE_32BIT_POINTERS) add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ - } } if (!HAVE_32BIT_POINTERS) { @@ -4918,6 +4915,7 @@ static void create_function(struct si_shader_context *ctx) case PIPE_SHADER_TESS_EVAL: declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, true); + ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -5643,7 +5641,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader, static int si_compile_llvm(struct si_screen *sscreen, struct ac_shader_binary *binary, struct si_shader_config *conf, - struct si_compiler *compiler, + struct ac_llvm_compiler *compiler, LLVMModuleRef mod, struct pipe_debug_callback *debug, unsigned processor, @@ -5721,27 +5719,21 @@ static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret) /* Generate code for the hardware VS shader stage to go with a geometry shader */ struct si_shader * si_generate_gs_copy_shader(struct si_screen *sscreen, - struct si_compiler *compiler, + struct ac_llvm_compiler *compiler, struct si_shader_selector *gs_selector, struct pipe_debug_callback *debug) { struct si_shader_context ctx; struct si_shader *shader; LLVMBuilderRef builder; - struct si_shader_output_values *outputs; + struct si_shader_output_values outputs[SI_MAX_VS_OUTPUTS]; struct tgsi_shader_info *gsinfo = &gs_selector->info; int i, r; - outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0])); - - if (!outputs) - return NULL; shader = CALLOC_STRUCT(si_shader); - if (!shader) { - FREE(outputs); + if (!shader) return NULL; - } /* We can leave the fence as permanently signaled because the GS copy * shader only becomes visible globally after it has been compiled. */ @@ -5832,8 +5824,51 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, stream); } - if (stream == 0) + if (stream == 0) { + /* Vertex color clamping. + * + * This uses a state constant loaded in a user data SGPR and + * an IF statement is added that clamps all colors if the constant + * is true. + */ + struct lp_build_if_state if_ctx; + LLVMValueRef v[2], cond = NULL; + LLVMBasicBlockRef blocks[2]; + + for (unsigned i = 0; i < gsinfo->num_outputs; i++) { + if (gsinfo->output_semantic_name[i] != TGSI_SEMANTIC_COLOR && + gsinfo->output_semantic_name[i] != TGSI_SEMANTIC_BCOLOR) + continue; + + /* We've found a color. */ + if (!cond) { + /* The state is in the first bit of the user SGPR. */ + cond = LLVMGetParam(ctx.main_fn, + ctx.param_vs_state_bits); + cond = LLVMBuildTrunc(ctx.ac.builder, cond, + ctx.i1, ""); + lp_build_if(&if_ctx, &ctx.gallivm, cond); + /* Remember blocks for Phi. */ + blocks[0] = if_ctx.true_block; + blocks[1] = if_ctx.entry_block; + } + + for (unsigned j = 0; j < 4; j++) { + /* Insert clamp into the true block. */ + v[0] = ac_build_clamp(&ctx.ac, outputs[i].values[j]); + v[1] = outputs[i].values[j]; + + /* Insert Phi into the endif block. */ + LLVMPositionBuilderAtEnd(ctx.ac.builder, if_ctx.merge_block); + outputs[i].values[j] = ac_build_phi(&ctx.ac, ctx.f32, 2, v, blocks); + LLVMPositionBuilderAtEnd(ctx.ac.builder, if_ctx.true_block); + } + } + if (cond) + lp_build_endif(&if_ctx); + si_llvm_export_vs(&ctx, outputs, gsinfo->num_outputs); + } LLVMBuildBr(builder, end_bb); } @@ -5860,8 +5895,6 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, si_llvm_dispose(&ctx); - FREE(outputs); - if (r != 0) { FREE(shader); shader = NULL; @@ -5968,7 +6001,7 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_screen *sscreen, - struct si_compiler *compiler) + struct ac_llvm_compiler *compiler) { struct lp_build_tgsi_context *bld_base; @@ -6141,16 +6174,24 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) if (!shader->is_monolithic) ac_init_exec_full_mask(&ctx->ac); - /* The barrier must execute for all shaders in a - * threadgroup. - */ - si_llvm_emit_barrier(NULL, bld_base, NULL); - LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); LLVMValueRef ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), num_threads, ""); lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena); + + /* The barrier must execute for all shaders in a + * threadgroup. + * + * Execute the barrier inside the conditional block, + * so that empty waves can jump directly to s_endpgm, + * which will also signal the barrier. + * + * If the shader is TCS and the TCS epilog is present + * and contains a barrier, it will wait there and then + * reach s_endpgm. + */ + si_llvm_emit_barrier(NULL, bld_base, NULL); } } @@ -6750,7 +6791,7 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, } int si_compile_tgsi_shader(struct si_screen *sscreen, - struct si_compiler *compiler, + struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug) { @@ -7091,7 +7132,7 @@ si_get_shader_part(struct si_screen *sscreen, enum pipe_shader_type type, bool prolog, union si_shader_part_key *key, - struct si_compiler *compiler, + struct ac_llvm_compiler *compiler, struct pipe_debug_callback *debug, void (*build)(struct si_shader_context *, union si_shader_part_key *), @@ -7337,7 +7378,7 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, } static bool si_get_vs_prolog(struct si_screen *sscreen, - struct si_compiler *compiler, + struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug, struct si_shader *main_part, @@ -7365,7 +7406,7 @@ static bool si_get_vs_prolog(struct si_screen *sscreen, * Select and compile (or reuse) vertex shader parts (prolog & epilog). */ static bool si_shader_select_vs_parts(struct si_screen *sscreen, - struct si_compiler *compiler, + struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug) { @@ -7454,7 +7495,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, * Select and compile (or reuse) TCS parts (epilog). */ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, - struct si_compiler *compiler, + struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug) { @@ -7486,7 +7527,7 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen, * Select and compile (or reuse) GS parts (prolog). */ static bool si_shader_select_gs_parts(struct si_screen *sscreen, - struct si_compiler *compiler, + struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug) { @@ -7899,7 +7940,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, * Select and compile (or reuse) pixel shader parts (prolog & epilog). */ static bool si_shader_select_ps_parts(struct si_screen *sscreen, - struct si_compiler *compiler, + struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug) { @@ -8030,7 +8071,7 @@ static void si_fix_resource_usage(struct si_screen *sscreen, } } -int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler, +int si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug) {