X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader.c;h=e942d345dbcce20e8fce75871ccc16cc648c5bcd;hb=2208b760f338fbd9176f4375e23f0ba20a05ce96;hp=c1a310275c180dd8ce316a849c3070bab77f7b19;hpb=b73ce64fb8d71dd4cecaa9548a9bc4ba110c24e8;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c1a310275c1..e942d345dbc 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1090,17 +1090,19 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base, TGSI_NUM_CHANNELS); } + /* Split 64-bit loads. */ + if (tgsi_type_is_64bit(type)) { + LLVMValueRef lo, hi; + + lo = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle, dw_addr); + hi = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle + 1, dw_addr); + return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi); + } + dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, LLVMConstInt(ctx->i32, swizzle, 0)); value = ac_lds_load(&ctx->ac, dw_addr); - if (tgsi_type_is_64bit(type)) { - LLVMValueRef value2; - dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, - ctx->i32_1); - value2 = ac_lds_load(&ctx->ac, dw_addr); - return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); - } return bitcast(bld_base, type, value); } @@ -1903,7 +1905,7 @@ void si_declare_compute_memory(struct si_shader_context *ctx, assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED); assert(decl->Range.First == decl->Range.Last); - assert(!ctx->shared_memory); + assert(!ctx->ac.lds); var = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->i8, sel->local_size), @@ -1911,7 +1913,7 @@ void si_declare_compute_memory(struct si_shader_context *ctx, LOCAL_ADDR_SPACE); LLVMSetAlignment(var, 4); - ctx->shared_memory = LLVMBuildBitCast(ctx->ac.builder, var, i8p, ""); + ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, ""); } static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i) @@ -2081,13 +2083,12 @@ static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct si_shader_context *ct } /* Initialize arguments for the shader export intrinsic */ -static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, +static void si_llvm_init_export_args(struct si_shader_context *ctx, LLVMValueRef *values, unsigned target, struct ac_export_args *args) { - struct si_shader_context *ctx = si_shader_context(bld_base); - struct lp_build_context *base = &bld_base->base; + LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32); LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef val[4]; unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR; @@ -2118,10 +2119,10 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, } args->compr = false; - args->out[0] = base->undef; - args->out[1] = base->undef; - args->out[2] = base->undef; - args->out[3] = base->undef; + args->out[0] = f32undef; + args->out[1] = f32undef; + args->out[2] = f32undef; + args->out[3] = f32undef; switch (spi_shader_col_format) { case V_028714_SPI_SHADER_ZERO: @@ -2180,10 +2181,10 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, case V_028714_SPI_SHADER_SNORM16_ABGR: for (chan = 0; chan < 4; chan++) { /* Clamp between [-1, 1]. */ - val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN, + val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN, values[chan], LLVMConstReal(ctx->f32, 1)); - val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX, + val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX, val[chan], LLVMConstReal(ctx->f32, -1)); /* Convert to a signed integer in [-32767, 32767]. */ @@ -2213,7 +2214,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, /* Clamp. */ for (chan = 0; chan < 4; chan++) { val[chan] = ac_to_integer(&ctx->ac, values[chan]); - val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN, + val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN, val[chan], chan == 3 ? max_alpha : max_rgb); } @@ -2237,10 +2238,10 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, /* Clamp. */ for (chan = 0; chan < 4; chan++) { val[chan] = ac_to_integer(&ctx->ac, values[chan]); - val[chan] = lp_build_emit_llvm_binary(bld_base, + val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_IMIN, val[chan], chan == 3 ? max_alpha : max_rgb); - val[chan] = lp_build_emit_llvm_binary(bld_base, + val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_IMAX, val[chan], chan == 3 ? min_alpha : min_rgb); } @@ -2310,11 +2311,9 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context * return LLVMBuildFMul(ctx->ac.builder, alpha, coverage, ""); } -static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base, +static void si_llvm_emit_clipvertex(struct si_shader_context *ctx, struct ac_export_args *pos, LLVMValueRef *out_elts) { - struct si_shader_context *ctx = si_shader_context(bld_base); - struct lp_build_context *base = &bld_base->base; unsigned reg_index; unsigned chan; unsigned const_chan; @@ -2341,8 +2340,8 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base, base_elt = buffer_load_const(ctx, const_resource, addr); args->out[chan] = - lp_build_add(base, args->out[chan], - lp_build_mul(base, base_elt, + lp_build_add(&ctx->bld_base.base, args->out[chan], + lp_build_mul(&ctx->bld_base.base, base_elt, out_elts[const_chan])); } } @@ -2512,7 +2511,7 @@ static void si_export_param(struct si_shader_context *ctx, unsigned index, { struct ac_export_args args; - si_llvm_init_export_args(&ctx->bld_base, values, + si_llvm_init_export_args(ctx, values, V_008DFC_SQ_EXP_PARAM + index, &args); ac_build_export(&ctx->ac, &args); } @@ -2565,11 +2564,10 @@ static void si_build_param_exports(struct si_shader_context *ctx, } /* Generate export instructions for hardware VS shader stage */ -static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, +static void si_llvm_export_vs(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput) { - struct si_shader_context *ctx = si_shader_context(bld_base); struct si_shader *shader = ctx->shader; struct ac_export_args pos_args[4] = {}; LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL; @@ -2580,7 +2578,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, for (i = 0; i < noutput; i++) { switch (outputs[i].semantic_name) { case TGSI_SEMANTIC_POSITION: - si_llvm_init_export_args(bld_base, outputs[i].values, + si_llvm_init_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS, &pos_args[0]); break; case TGSI_SEMANTIC_PSIZE: @@ -2598,14 +2596,14 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, case TGSI_SEMANTIC_CLIPDIST: if (!shader->key.opt.clip_disable) { unsigned index = 2 + outputs[i].semantic_index; - si_llvm_init_export_args(bld_base, outputs[i].values, + si_llvm_init_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS + index, &pos_args[index]); } break; case TGSI_SEMANTIC_CLIPVERTEX: if (!shader->key.opt.clip_disable) { - si_llvm_emit_clipvertex(bld_base, pos_args, + si_llvm_emit_clipvertex(ctx, pos_args, outputs[i].values); } break; @@ -3174,6 +3172,9 @@ static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base) LLVMConstInt(ctx->i32, param * 4, 0), ""); for (chan = 0; chan < 4; chan++) { + if (!(info->output_usagemask[i] & (1 << chan))) + continue; + lds_store(bld_base, chan, dw_addr, LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "")); } @@ -3339,7 +3340,7 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi, i++; } - si_llvm_export_vs(&ctx->bld_base, outputs, i); + si_llvm_export_vs(ctx, outputs, i); FREE(outputs); } @@ -3480,7 +3481,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, /* Get the export arguments, also find out what the last one is. */ for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) { - si_llvm_init_export_args(bld_base, color, + si_llvm_init_export_args(ctx, color, V_008DFC_SQ_EXP_MRT + c, &args[c]); if (args[c].enabled_channels) last = c; @@ -3500,7 +3501,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, struct ac_export_args args; /* Export */ - si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index, + si_llvm_init_export_args(ctx, color, V_008DFC_SQ_EXP_MRT + index, &args); if (is_last) { args.valid_mask = 1; /* whether the EXEC mask is valid */ @@ -4738,9 +4739,7 @@ static void create_function(struct si_shader_context *ctx) if (shader->key.as_ls || ctx->type == PIPE_SHADER_TESS_CTRL || /* GFX9 has the ESGS ring buffer in LDS. */ - (ctx->screen->b.chip_class >= GFX9 && - (shader->key.as_es || - ctx->type == PIPE_SHADER_GEOMETRY))) + type == SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY) ac_declare_lds_as_pointer(&ctx->ac); } @@ -5172,7 +5171,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen, max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); if (!check_debug_option || - si_can_dump_shader(&sscreen->b, processor)) { + si_can_dump_shader(sscreen, processor)) { if (processor == PIPE_SHADER_FRAGMENT) { fprintf(file, "*** SHADER CONFIG ***\n" "SPI_PS_INPUT_ADDR = 0x%04x\n" @@ -5244,7 +5243,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader, FILE *file, bool check_debug_option) { if (!check_debug_option || - si_can_dump_shader(&sscreen->b, processor)) + si_can_dump_shader(sscreen, processor)) si_dump_shader_key(processor, shader, file); if (!check_debug_option && shader->binary.llvm_ir_string) { @@ -5261,7 +5260,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader, } if (!check_debug_option || - (si_can_dump_shader(&sscreen->b, processor) && + (si_can_dump_shader(sscreen, processor) && !(sscreen->b.debug_flags & DBG(NO_ASM)))) { fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor)); @@ -5299,7 +5298,7 @@ static int si_compile_llvm(struct si_screen *sscreen, int r = 0; unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations); - if (si_can_dump_shader(&sscreen->b, processor)) { + if (si_can_dump_shader(sscreen, processor)) { fprintf(stderr, "radeonsi: Compiling shader %d\n", count); if (!(sscreen->b.debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) { @@ -5481,7 +5480,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, } if (stream == 0) - si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs); + si_llvm_export_vs(&ctx, outputs, gsinfo->num_outputs); LLVMBuildBr(builder, end_bb); } @@ -5499,7 +5498,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, debug, PIPE_SHADER_GEOMETRY, "GS Copy Shader"); if (!r) { - if (si_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY)) + if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY)) fprintf(stderr, "GS Copy Shader:\n"); si_shader_dump(sscreen, ctx.shader, debug, PIPE_SHADER_GEOMETRY, stderr, true); @@ -5883,11 +5882,13 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info, key->vs_prolog.num_input_sgprs = num_input_sgprs; key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1; key->vs_prolog.as_ls = shader_out->key.as_ls; + key->vs_prolog.as_es = shader_out->key.as_es; if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) { key->vs_prolog.as_ls = 1; key->vs_prolog.num_merged_next_stage_vgprs = 2; } else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) { + key->vs_prolog.as_es = 1; key->vs_prolog.num_merged_next_stage_vgprs = 5; } @@ -6415,7 +6416,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, /* Dump TGSI code before doing TGSI->LLVM conversion in case the * conversion fails. */ - if (si_can_dump_shader(&sscreen->b, sel->info.processor) && + if (si_can_dump_shader(sscreen, sel->info.processor) && !(sscreen->b.debug_flags & DBG(NO_TGSI))) { if (sel->tokens) tgsi_dump(sel->tokens, 0); @@ -6542,7 +6543,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, union si_shader_part_key vs_prolog_key; si_get_vs_prolog_key(&es->info, shader->info.num_input_sgprs, - &shader->key.part.tcs.ls_prolog, + &shader->key.part.gs.vs_prolog, shader, &vs_prolog_key); vs_prolog_key.vs_prolog.is_monolithic = true; si_build_vs_prolog_function(&ctx, &vs_prolog_key); @@ -6624,7 +6625,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, si_optimize_vs_outputs(&ctx); if ((debug && debug->debug_message) || - si_can_dump_shader(&sscreen->b, ctx.type)) + si_can_dump_shader(sscreen, ctx.type)) si_count_scratch_private_memory(&ctx); /* Compile to bytecode. */ @@ -6768,6 +6769,8 @@ si_get_shader_part(struct si_screen *sscreen, switch (type) { case PIPE_SHADER_VERTEX: + shader.key.as_ls = key->vs_prolog.as_ls; + shader.key.as_es = key->vs_prolog.as_es; break; case PIPE_SHADER_TESS_CTRL: assert(!prolog); @@ -6810,10 +6813,15 @@ out: static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx) { LLVMValueRef ptr[2], list; + bool is_merged_shader = + ctx->screen->b.chip_class >= GFX9 && + (ctx->type == PIPE_SHADER_TESS_CTRL || + ctx->type == PIPE_SHADER_GEOMETRY || + ctx->shader->key.as_ls || ctx->shader->key.as_es); /* Get the pointer to rw buffers. */ - ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS); - ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI); + ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); + ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI); list = lp_build_gather_values(&ctx->gallivm, ptr, 2); list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, ""); list = LLVMBuildIntToPtr(ctx->ac.builder, list, @@ -7805,7 +7813,7 @@ void si_shader_destroy(struct si_shader *shader) r600_resource_reference(&shader->bo, NULL); if (!shader->is_binary_shared) - si_radeon_shader_binary_clean(&shader->binary); + ac_shader_binary_clean(&shader->binary); free(shader->shader_log); }