X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader.c;h=7bc977b6ed7795da6f1cf2ab76159a01e5a75522;hb=2b3ebe307c2250f3cc8ffea203e3a2196a70d496;hp=9ee1190b9e96d59366c2a24adafad961fd144e58;hpb=88509518b01d7c1d7436a790bf9be5cf3c41a528;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 9ee1190b9e9..7bc977b6ed7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -33,13 +33,13 @@ #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_misc.h" -#include "radeon/radeon_elf_util.h" #include "util/u_memory.h" #include "util/u_string.h" #include "tgsi/tgsi_build.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_dump.h" +#include "ac_binary.h" #include "ac_llvm_util.h" #include "si_shader_internal.h" #include "si_pipe.h" @@ -55,8 +55,9 @@ static const char *scratch_rsrc_dword1_symbol = struct si_shader_output_values { LLVMValueRef values[4]; - unsigned name; - unsigned sid; + unsigned semantic_name; + unsigned semantic_index; + ubyte vertex_stream[4]; }; static void si_init_shader_ctx(struct si_shader_context *ctx, @@ -97,14 +98,6 @@ enum { LOCAL_ADDR_SPACE = 3, }; -#define SENDMSG_GS 2 -#define SENDMSG_GS_DONE 3 - -#define SENDMSG_GS_OP_NOP (0 << 4) -#define SENDMSG_GS_OP_CUT (1 << 4) -#define SENDMSG_GS_OP_EMIT (2 << 4) -#define SENDMSG_GS_OP_EMIT_CUT (3 << 4) - /** * Returns a unique index for a semantic name and index. The index must be * less than 64, so that a 64-bit bitmask of used inputs or outputs can be @@ -175,7 +168,7 @@ static LLVMValueRef unpack_param(struct si_shader_context *ctx, param); if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind) - value = bitcast(&ctx->soa.bld_base, + value = bitcast(&ctx->bld_base, TGSI_TYPE_UNSIGNED, value); if (rshift) @@ -250,7 +243,7 @@ get_tcs_out_patch_stride(struct si_shader_context *ctx) static LLVMValueRef get_tcs_out_patch0_offset(struct si_shader_context *ctx) { - return lp_build_mul_imm(&ctx->soa.bld_base.uint_bld, + return lp_build_mul_imm(&ctx->bld_base.uint_bld, unpack_param(ctx, SI_PARAM_TCS_OUT_OFFSETS, 0, 16), @@ -260,7 +253,7 @@ get_tcs_out_patch0_offset(struct si_shader_context *ctx) static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx) { - return lp_build_mul_imm(&ctx->soa.bld_base.uint_bld, + return lp_build_mul_imm(&ctx->bld_base.uint_bld, unpack_param(ctx, SI_PARAM_TCS_OUT_OFFSETS, 16, 16), @@ -306,71 +299,13 @@ get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx) ""); } -static LLVMValueRef build_gep0(struct si_shader_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index) -{ - LLVMValueRef indices[2] = { - LLVMConstInt(ctx->i32, 0, 0), - index, - }; - return LLVMBuildGEP(ctx->gallivm.builder, base_ptr, - indices, 2, ""); -} - -static void build_indexed_store(struct si_shader_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index, - LLVMValueRef value) -{ - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; - struct gallivm_state *gallivm = bld_base->base.gallivm; - - LLVMBuildStore(gallivm->builder, value, - build_gep0(ctx, base_ptr, index)); -} - -/** - * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad. - * It's equivalent to doing a load from &base_ptr[index]. - * - * \param base_ptr Where the array starts. - * \param index The element index into the array. - * \param uniform Whether the base_ptr and index can be assumed to be - * dynamically uniform - */ -static LLVMValueRef build_indexed_load(struct si_shader_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index, - bool uniform) -{ - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef pointer; - - pointer = build_gep0(ctx, base_ptr, index); - if (uniform) - LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md); - return LLVMBuildLoad(gallivm->builder, pointer, ""); -} - -/** - * Do a load from &base_ptr[index], but also add a flag that it's loading - * a constant from a dynamically uniform index. - */ -static LLVMValueRef build_indexed_load_const( - struct si_shader_context *ctx, - LLVMValueRef base_ptr, LLVMValueRef index) -{ - LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true); - LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md); - return result; -} - static LLVMValueRef get_instance_index_for_fetch( struct si_shader_context *radeon_bld, unsigned param_start_instance, unsigned divisor) { struct si_shader_context *ctx = - si_shader_context(&radeon_bld->soa.bld_base); - struct gallivm_state *gallivm = radeon_bld->soa.bld_base.base.gallivm; + si_shader_context(&radeon_bld->bld_base); + struct gallivm_state *gallivm = radeon_bld->bld_base.base.gallivm; LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn, ctx->param_instance_id); @@ -384,56 +319,103 @@ static LLVMValueRef get_instance_index_for_fetch( LLVMGetParam(radeon_bld->main_fn, param_start_instance), ""); } +/* Bitcast <4 x float> to <2 x double>, extract the component, and convert + * to float. */ +static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx, + LLVMValueRef vec4, + unsigned double_index) +{ + LLVMBuilderRef builder = ctx->gallivm.builder; + LLVMTypeRef f64 = LLVMDoubleTypeInContext(ctx->gallivm.context); + LLVMValueRef dvec2 = LLVMBuildBitCast(builder, vec4, + LLVMVectorType(f64, 2), ""); + LLVMValueRef index = LLVMConstInt(ctx->i32, double_index, 0); + LLVMValueRef value = LLVMBuildExtractElement(builder, dvec2, index, ""); + return LLVMBuildFPTrunc(builder, value, ctx->f32, ""); +} + static void declare_input_vs( struct si_shader_context *ctx, unsigned input_index, const struct tgsi_full_declaration *decl, LLVMValueRef out[4]) { - struct lp_build_context *base = &ctx->soa.bld_base.base; + struct lp_build_context *base = &ctx->bld_base.base; struct gallivm_state *gallivm = base->gallivm; unsigned chan; unsigned fix_fetch; + unsigned num_fetches; + unsigned fetch_stride; LLVMValueRef t_list_ptr; LLVMValueRef t_offset; LLVMValueRef t_list; - LLVMValueRef attribute_offset; - LLVMValueRef buffer_index; + LLVMValueRef vertex_index; LLVMValueRef args[3]; - LLVMValueRef input; + LLVMValueRef input[3]; /* Load the T list */ t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS); t_offset = lp_build_const_int32(gallivm, input_index); - t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset); + t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset); - /* Build the attribute offset */ - attribute_offset = lp_build_const_int32(gallivm, 0); - - buffer_index = LLVMGetParam(ctx->main_fn, + vertex_index = LLVMGetParam(ctx->main_fn, ctx->param_vertex_index0 + input_index); + fix_fetch = ctx->shader->key.mono.vs.fix_fetch[input_index]; + + /* Do multiple loads for special formats. */ + switch (fix_fetch) { + case SI_FIX_FETCH_RGB_64_FLOAT: + num_fetches = 3; /* 3 2-dword loads */ + fetch_stride = 8; + break; + case SI_FIX_FETCH_RGBA_64_FLOAT: + num_fetches = 2; /* 2 4-dword loads */ + fetch_stride = 16; + break; + case SI_FIX_FETCH_RGB_8: + case SI_FIX_FETCH_RGB_8_INT: + num_fetches = 3; + fetch_stride = 1; + break; + case SI_FIX_FETCH_RGB_16: + case SI_FIX_FETCH_RGB_16_INT: + num_fetches = 3; + fetch_stride = 2; + break; + default: + num_fetches = 1; + fetch_stride = 0; + } + args[0] = t_list; - args[1] = attribute_offset; - args[2] = buffer_index; - input = lp_build_intrinsic(gallivm->builder, - "llvm.SI.vs.load.input", ctx->v4f32, args, 3, - LP_FUNC_ATTR_READNONE); + args[2] = vertex_index; + + for (unsigned i = 0; i < num_fetches; i++) { + args[1] = LLVMConstInt(ctx->i32, fetch_stride * i, 0); + + input[i] = lp_build_intrinsic(gallivm->builder, + "llvm.SI.vs.load.input", ctx->v4f32, args, 3, + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); + } /* Break up the vec4 into individual components */ for (chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); out[chan] = LLVMBuildExtractElement(gallivm->builder, - input, llvm_chan, ""); + input[0], llvm_chan, ""); } - fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (2 * input_index)) & 3; - if (fix_fetch) { + switch (fix_fetch) { + case SI_FIX_FETCH_A2_SNORM: + case SI_FIX_FETCH_A2_SSCALED: + case SI_FIX_FETCH_A2_SINT: { /* The hardware returns an unsigned value; convert it to a * signed one. */ @@ -469,6 +451,98 @@ static void declare_input_vs( } out[3] = tmp; + break; + } + case SI_FIX_FETCH_RGBA_32_UNORM: + case SI_FIX_FETCH_RGBX_32_UNORM: + for (chan = 0; chan < 4; chan++) { + out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan], + ctx->i32, ""); + out[chan] = LLVMBuildUIToFP(gallivm->builder, + out[chan], ctx->f32, ""); + out[chan] = LLVMBuildFMul(gallivm->builder, out[chan], + LLVMConstReal(ctx->f32, 1.0 / UINT_MAX), ""); + } + /* RGBX UINT returns 1 in alpha, which would be rounded to 0 by normalizing. */ + if (fix_fetch == SI_FIX_FETCH_RGBX_32_UNORM) + out[3] = LLVMConstReal(ctx->f32, 1); + break; + case SI_FIX_FETCH_RGBA_32_SNORM: + case SI_FIX_FETCH_RGBX_32_SNORM: + case SI_FIX_FETCH_RGBA_32_FIXED: + case SI_FIX_FETCH_RGBX_32_FIXED: { + double scale; + if (fix_fetch >= SI_FIX_FETCH_RGBA_32_FIXED) + scale = 1.0 / 0x10000; + else + scale = 1.0 / INT_MAX; + + for (chan = 0; chan < 4; chan++) { + out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan], + ctx->i32, ""); + out[chan] = LLVMBuildSIToFP(gallivm->builder, + out[chan], ctx->f32, ""); + out[chan] = LLVMBuildFMul(gallivm->builder, out[chan], + LLVMConstReal(ctx->f32, scale), ""); + } + /* RGBX SINT returns 1 in alpha, which would be rounded to 0 by normalizing. */ + if (fix_fetch == SI_FIX_FETCH_RGBX_32_SNORM || + fix_fetch == SI_FIX_FETCH_RGBX_32_FIXED) + out[3] = LLVMConstReal(ctx->f32, 1); + break; + } + case SI_FIX_FETCH_RGBA_32_USCALED: + for (chan = 0; chan < 4; chan++) { + out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan], + ctx->i32, ""); + out[chan] = LLVMBuildUIToFP(gallivm->builder, + out[chan], ctx->f32, ""); + } + break; + case SI_FIX_FETCH_RGBA_32_SSCALED: + for (chan = 0; chan < 4; chan++) { + out[chan] = LLVMBuildBitCast(gallivm->builder, out[chan], + ctx->i32, ""); + out[chan] = LLVMBuildSIToFP(gallivm->builder, + out[chan], ctx->f32, ""); + } + break; + case SI_FIX_FETCH_RG_64_FLOAT: + for (chan = 0; chan < 2; chan++) + out[chan] = extract_double_to_float(ctx, input[0], chan); + + out[2] = LLVMConstReal(ctx->f32, 0); + out[3] = LLVMConstReal(ctx->f32, 1); + break; + case SI_FIX_FETCH_RGB_64_FLOAT: + for (chan = 0; chan < 3; chan++) + out[chan] = extract_double_to_float(ctx, input[chan], 0); + + out[3] = LLVMConstReal(ctx->f32, 1); + break; + case SI_FIX_FETCH_RGBA_64_FLOAT: + for (chan = 0; chan < 4; chan++) { + out[chan] = extract_double_to_float(ctx, input[chan / 2], + chan % 2); + } + break; + case SI_FIX_FETCH_RGB_8: + case SI_FIX_FETCH_RGB_8_INT: + case SI_FIX_FETCH_RGB_16: + case SI_FIX_FETCH_RGB_16_INT: + for (chan = 0; chan < 3; chan++) { + out[chan] = LLVMBuildExtractElement(gallivm->builder, + input[chan], + ctx->i32_0, ""); + } + if (fix_fetch == SI_FIX_FETCH_RGB_8 || + fix_fetch == SI_FIX_FETCH_RGB_16) { + out[3] = LLVMConstReal(ctx->f32, 1); + } else { + out[3] = LLVMBuildBitCast(gallivm->builder, ctx->i32_1, + ctx->f32, ""); + } + break; } } @@ -507,10 +581,10 @@ static LLVMValueRef get_indirect_index(struct si_shader_context *ctx, const struct tgsi_ind_register *ind, int rel_index) { - struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; + struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; LLVMValueRef result; - result = ctx->soa.addr[ind->Index][ind->Swizzle]; + result = ctx->addrs[ind->Index][ind->Swizzle]; result = LLVMBuildLoad(gallivm->builder, result, ""); result = LLVMBuildAdd(gallivm->builder, result, lp_build_const_int32(gallivm, rel_index), ""); @@ -547,7 +621,7 @@ static LLVMValueRef get_dw_address(struct si_shader_context *ctx, LLVMValueRef vertex_dw_stride, LLVMValueRef base_addr) { - struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; + struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; struct tgsi_shader_info *info = &ctx->shader->selector->info; ubyte *name, *index, *array_first; int first, param; @@ -642,10 +716,11 @@ static LLVMValueRef get_dw_address(struct si_shader_context *ctx, * Note that every attribute has 4 components. */ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx, + LLVMValueRef rel_patch_id, LLVMValueRef vertex_index, LLVMValueRef param_index) { - struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; + struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices; LLVMValueRef param_stride, constant16; @@ -656,7 +731,7 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx, constant16 = lp_build_const_int32(gallivm, 16); if (vertex_index) { - base_addr = LLVMBuildMul(gallivm->builder, get_rel_patch_id(ctx), + base_addr = LLVMBuildMul(gallivm->builder, rel_patch_id, vertices_per_patch, ""); base_addr = LLVMBuildAdd(gallivm->builder, base_addr, @@ -664,7 +739,7 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx, param_stride = total_vertices; } else { - base_addr = get_rel_patch_id(ctx); + base_addr = rel_patch_id; param_stride = num_patches; } @@ -689,7 +764,7 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_reg( const struct tgsi_full_dst_register *dst, const struct tgsi_full_src_register *src) { - struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; + struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; struct tgsi_shader_info *info = &ctx->shader->selector->info; ubyte *name, *index, *array_first; struct tgsi_full_src_register reg; @@ -744,152 +819,8 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_reg( lp_build_const_int32(gallivm, param_index_base), ""); - return get_tcs_tes_buffer_address(ctx, vertex_index, param_index); -} - -/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. - * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), - * or v4i32 (num_channels=3,4). */ -static void build_tbuffer_store(struct si_shader_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef vaddr, - LLVMValueRef soffset, - unsigned inst_offset, - unsigned dfmt, - unsigned nfmt, - unsigned offen, - unsigned idxen, - unsigned glc, - unsigned slc, - unsigned tfe) -{ - struct gallivm_state *gallivm = &ctx->gallivm; - LLVMValueRef args[] = { - rsrc, - vdata, - LLVMConstInt(ctx->i32, num_channels, 0), - vaddr, - soffset, - LLVMConstInt(ctx->i32, inst_offset, 0), - LLVMConstInt(ctx->i32, dfmt, 0), - LLVMConstInt(ctx->i32, nfmt, 0), - LLVMConstInt(ctx->i32, offen, 0), - LLVMConstInt(ctx->i32, idxen, 0), - LLVMConstInt(ctx->i32, glc, 0), - LLVMConstInt(ctx->i32, slc, 0), - LLVMConstInt(ctx->i32, tfe, 0) - }; - - /* The instruction offset field has 12 bits */ - assert(offen || inst_offset < (1 << 12)); - - /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */ - unsigned func = CLAMP(num_channels, 1, 3) - 1; - const char *types[] = {"i32", "v2i32", "v4i32"}; - char name[256]; - snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); - - lp_build_intrinsic(gallivm->builder, name, ctx->voidt, - args, ARRAY_SIZE(args), 0); -} - -static void build_tbuffer_store_dwords(struct si_shader_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef vaddr, - LLVMValueRef soffset, - unsigned inst_offset) -{ - static unsigned dfmt[] = { - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_DATA_FORMAT_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32_32 - }; - assert(num_channels >= 1 && num_channels <= 4); - - build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset, - inst_offset, dfmt[num_channels-1], - V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0); -} - -static LLVMValueRef build_buffer_load(struct si_shader_context *ctx, - LLVMValueRef rsrc, - int num_channels, - LLVMValueRef vindex, - LLVMValueRef voffset, - LLVMValueRef soffset, - unsigned inst_offset, - unsigned glc, - unsigned slc) -{ - struct gallivm_state *gallivm = &ctx->gallivm; - unsigned func = CLAMP(num_channels, 1, 3) - 1; - - if (HAVE_LLVM >= 0x309) { - LLVMValueRef args[] = { - LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, ""), - vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0), - LLVMConstInt(ctx->i32, inst_offset, 0), - LLVMConstInt(ctx->i1, glc, 0), - LLVMConstInt(ctx->i1, slc, 0) - }; - - LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2), - ctx->v4f32}; - const char *type_names[] = {"f32", "v2f32", "v4f32"}; - char name[256]; - - if (voffset) { - args[2] = LLVMBuildAdd(gallivm->builder, args[2], voffset, - ""); - } - - if (soffset) { - args[2] = LLVMBuildAdd(gallivm->builder, args[2], soffset, - ""); - } - - snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", - type_names[func]); - - return lp_build_intrinsic(gallivm->builder, name, types[func], args, - ARRAY_SIZE(args), LP_FUNC_ATTR_READONLY); - } else { - LLVMValueRef args[] = { - LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v16i8, ""), - voffset ? voffset : vindex, - soffset, - LLVMConstInt(ctx->i32, inst_offset, 0), - LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen - LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen - LLVMConstInt(ctx->i32, glc, 0), - LLVMConstInt(ctx->i32, slc, 0), - LLVMConstInt(ctx->i32, 0, 0), // TFE - }; - - LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2), - ctx->v4i32}; - const char *type_names[] = {"i32", "v2i32", "v4i32"}; - const char *arg_type = "i32"; - char name[256]; - - if (voffset && vindex) { - LLVMValueRef vaddr[] = {vindex, voffset}; - - arg_type = "v2i32"; - args[1] = lp_build_gather_values(gallivm, vaddr, 2); - } - - snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s", - type_names[func], arg_type); - - return lp_build_intrinsic(gallivm->builder, name, types[func], args, - ARRAY_SIZE(args), LP_FUNC_ATTR_READONLY); - } + return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), + vertex_index, param_index); } static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base, @@ -904,25 +835,25 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base, LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4); if (swizzle == ~0) { - value = build_buffer_load(ctx, buffer, 4, NULL, base, offset, - 0, 1, 0); + value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, + 0, 1, 0); return LLVMBuildBitCast(gallivm->builder, value, vec_type, ""); } if (!tgsi_type_is_64bit(type)) { - value = build_buffer_load(ctx, buffer, 4, NULL, base, offset, - 0, 1, 0); + value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, + 0, 1, 0); value = LLVMBuildBitCast(gallivm->builder, value, vec_type, ""); return LLVMBuildExtractElement(gallivm->builder, value, lp_build_const_int32(gallivm, swizzle), ""); } - value = build_buffer_load(ctx, buffer, 1, NULL, base, offset, + value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4, 1, 0); - value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset, + value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4 + 4, 1, 0); return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); @@ -956,12 +887,12 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base, dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, lp_build_const_int32(gallivm, swizzle)); - value = build_indexed_load(ctx, ctx->lds, dw_addr, false); + value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false); if (tgsi_type_is_64bit(type)) { LLVMValueRef value2; dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr, lp_build_const_int32(gallivm, 1)); - value2 = build_indexed_load(ctx, ctx->lds, dw_addr, false); + value2 = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false); return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); } @@ -987,8 +918,8 @@ static void lds_store(struct lp_build_tgsi_context *bld_base, lp_build_const_int32(gallivm, swizzle)); value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, ""); - build_indexed_store(ctx, ctx->lds, - dw_addr, value); + ac_build_indexed_store(&ctx->ac, ctx->lds, + dw_addr, value); } static LLVMValueRef fetch_input_tcs( @@ -1037,7 +968,7 @@ static LLVMValueRef fetch_input_tes( rw_buffers = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); - buffer = build_indexed_load_const(ctx, rw_buffers, + buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers, lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP)); base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds); @@ -1054,10 +985,13 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; const struct tgsi_full_dst_register *reg = &inst->Dst[0]; + const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info; unsigned chan_index; LLVMValueRef dw_addr, stride; LLVMValueRef rw_buffers, buffer, base, buf_addr; LLVMValueRef values[4]; + bool skip_lds_store; + bool is_tess_factor = false; /* Only handle per-patch and per-vertex outputs here. * Vectors will be lowered to scalars and this function will be called again. @@ -1072,14 +1006,27 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, stride = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8); dw_addr = get_tcs_out_current_patch_offset(ctx); dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr); + skip_lds_store = !sh_info->reads_pervertex_outputs; } else { dw_addr = get_tcs_out_current_patch_data_offset(ctx); dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr); + skip_lds_store = !sh_info->reads_perpatch_outputs; + + if (!reg->Register.Indirect) { + int name = sh_info->output_semantic_name[reg->Register.Index]; + + /* Always write tess factors into LDS for the TCS epilog. */ + if (name == TGSI_SEMANTIC_TESSINNER || + name == TGSI_SEMANTIC_TESSOUTER) { + skip_lds_store = false; + is_tess_factor = true; + } + } } rw_buffers = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); - buffer = build_indexed_load_const(ctx, rw_buffers, + buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers, lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP)); base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds); @@ -1090,25 +1037,27 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, LLVMValueRef value = dst[chan_index]; if (inst->Instruction.Saturate) - value = si_llvm_saturate(bld_base, value); + value = ac_emit_clamp(&ctx->ac, value); - lds_store(bld_base, chan_index, dw_addr, value); + /* Skip LDS stores if there is no LDS read of this output. */ + if (!skip_lds_store) + lds_store(bld_base, chan_index, dw_addr, value); value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, ""); values[chan_index] = value; - if (inst->Dst[0].Register.WriteMask != 0xF) { - build_tbuffer_store_dwords(ctx, buffer, value, 1, - buf_addr, base, - 4 * chan_index); + if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) { + ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 1, + buf_addr, base, + 4 * chan_index); } } - if (inst->Dst[0].Register.WriteMask == 0xF) { + if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) { LLVMValueRef value = lp_build_gather_values(bld_base->base.gallivm, values, 4); - build_tbuffer_store_dwords(ctx, buffer, value, 4, buf_addr, - base, 0); + ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 4, buf_addr, + base, 0); } } @@ -1121,7 +1070,7 @@ static LLVMValueRef fetch_input_gs( struct lp_build_context *base = &bld_base->base; struct si_shader_context *ctx = si_shader_context(bld_base); struct si_shader *shader = ctx->shader; - struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld; + struct lp_build_context *uint = &ctx->bld_base.uint_bld; struct gallivm_state *gallivm = base->gallivm; LLVMValueRef vtx_offset; LLVMValueRef args[9]; @@ -1175,14 +1124,16 @@ static LLVMValueRef fetch_input_gs( value = lp_build_intrinsic(gallivm->builder, "llvm.SI.buffer.load.dword.i32.i32", ctx->i32, args, 9, - LP_FUNC_ATTR_READONLY); + LP_FUNC_ATTR_READONLY | + LP_FUNC_ATTR_LEGACY); if (tgsi_type_is_64bit(type)) { LLVMValueRef value2; args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256); value2 = lp_build_intrinsic(gallivm->builder, "llvm.SI.buffer.load.dword.i32.i32", ctx->i32, args, 9, - LP_FUNC_ATTR_READONLY); + LP_FUNC_ATTR_READONLY | + LP_FUNC_ATTR_LEGACY); return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); } @@ -1220,80 +1171,6 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) } } -static LLVMValueRef build_fs_interp( - struct lp_build_tgsi_context *bld_base, - LLVMValueRef llvm_chan, - LLVMValueRef attr_number, - LLVMValueRef params, - LLVMValueRef i, - LLVMValueRef j) { - - struct si_shader_context *ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef args[5]; - LLVMValueRef p1; - if (HAVE_LLVM < 0x0400) { - LLVMValueRef ij[2]; - ij[0] = LLVMBuildBitCast(gallivm->builder, i, ctx->i32, ""); - ij[1] = LLVMBuildBitCast(gallivm->builder, j, ctx->i32, ""); - - args[0] = llvm_chan; - args[1] = attr_number; - args[2] = params; - args[3] = lp_build_gather_values(gallivm, ij, 2); - return lp_build_intrinsic(gallivm->builder, "llvm.SI.fs.interp", - ctx->f32, args, 4, - LP_FUNC_ATTR_READNONE); - } - - args[0] = i; - args[1] = llvm_chan; - args[2] = attr_number; - args[3] = params; - - p1 = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.interp.p1", - ctx->f32, args, 4, LP_FUNC_ATTR_READNONE); - - args[0] = p1; - args[1] = j; - args[2] = llvm_chan; - args[3] = attr_number; - args[4] = params; - - return lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.interp.p2", - ctx->f32, args, 5, LP_FUNC_ATTR_READNONE); -} - -static LLVMValueRef build_fs_interp_mov( - struct lp_build_tgsi_context *bld_base, - LLVMValueRef parameter, - LLVMValueRef llvm_chan, - LLVMValueRef attr_number, - LLVMValueRef params) { - - struct si_shader_context *ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef args[4]; - if (HAVE_LLVM < 0x0400) { - args[0] = llvm_chan; - args[1] = attr_number; - args[2] = params; - - return lp_build_intrinsic(gallivm->builder, - "llvm.SI.fs.constant", - ctx->f32, args, 3, - LP_FUNC_ATTR_READNONE); - } - - args[0] = parameter; - args[1] = llvm_chan; - args[2] = attr_number; - args[3] = params; - - return lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.interp.mov", - ctx->f32, args, 4, LP_FUNC_ATTR_READNONE); -} - /** * Interpolate a fragment shader input. * @@ -1319,7 +1196,7 @@ static void interp_fs_input(struct si_shader_context *ctx, LLVMValueRef face, LLVMValueRef result[4]) { - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; struct lp_build_context *base = &bld_base->base; struct lp_build_context *uint = &bld_base->uint_bld; struct gallivm_state *gallivm = base->gallivm; @@ -1377,17 +1254,17 @@ static void interp_fs_input(struct si_shader_context *ctx, LLVMValueRef front, back; if (interp) { - front = build_fs_interp(bld_base, llvm_chan, + front = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, prim_mask, i, j); - back = build_fs_interp(bld_base, llvm_chan, + back = ac_build_fs_interp(&ctx->ac, llvm_chan, back_attr_number, prim_mask, i, j); } else { - front = build_fs_interp_mov(bld_base, + front = ac_build_fs_interp_mov(&ctx->ac, lp_build_const_int32(gallivm, 2), /* P0 */ llvm_chan, attr_number, prim_mask); - back = build_fs_interp_mov(bld_base, + back = ac_build_fs_interp_mov(&ctx->ac, lp_build_const_int32(gallivm, 2), /* P0 */ llvm_chan, back_attr_number, prim_mask); } @@ -1400,12 +1277,12 @@ static void interp_fs_input(struct si_shader_context *ctx, } } else if (semantic_name == TGSI_SEMANTIC_FOG) { if (interp) { - result[0] = build_fs_interp(bld_base, uint->zero, - attr_number, prim_mask, i, j); + result[0] = ac_build_fs_interp(&ctx->ac, uint->zero, + attr_number, prim_mask, i, j); } else { - result[0] = build_fs_interp_mov(bld_base, uint->zero, - lp_build_const_int32(gallivm, 2), /* P0 */ - attr_number, prim_mask); + result[0] = ac_build_fs_interp_mov(&ctx->ac, uint->zero, + lp_build_const_int32(gallivm, 2), /* P0 */ + attr_number, prim_mask); } result[1] = result[2] = lp_build_const_float(gallivm, 0.0f); @@ -1415,10 +1292,10 @@ static void interp_fs_input(struct si_shader_context *ctx, LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); if (interp) { - result[chan] = build_fs_interp(bld_base, + result[chan] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, prim_mask, i, j); } else { - result[chan] = build_fs_interp_mov(bld_base, + result[chan] = ac_build_fs_interp_mov(&ctx->ac, lp_build_const_int32(gallivm, 2), /* P0 */ llvm_chan, attr_number, prim_mask); } @@ -1432,9 +1309,9 @@ static void declare_input_fs( const struct tgsi_full_declaration *decl, LLVMValueRef out[4]) { - struct lp_build_context *base = &radeon_bld->soa.bld_base.base; + struct lp_build_context *base = &radeon_bld->bld_base.base; struct si_shader_context *ctx = - si_shader_context(&radeon_bld->soa.bld_base); + si_shader_context(&radeon_bld->bld_base); struct si_shader *shader = ctx->shader; LLVMValueRef main_fn = radeon_bld->main_fn; LLVMValueRef interp_param = NULL; @@ -1478,53 +1355,10 @@ static void declare_input_fs( static LLVMValueRef get_sample_id(struct si_shader_context *radeon_bld) { - return unpack_param(si_shader_context(&radeon_bld->soa.bld_base), + return unpack_param(si_shader_context(&radeon_bld->bld_base), SI_PARAM_ANCILLARY, 8, 4); } -/** - * Set range metadata on an instruction. This can only be used on load and - * call instructions. If you know an instruction can only produce the values - * 0, 1, 2, you would do set_range_metadata(value, 0, 3); - * \p lo is the minimum value inclusive. - * \p hi is the maximum value exclusive. - */ -static void set_range_metadata(struct si_shader_context *ctx, - LLVMValueRef value, unsigned lo, unsigned hi) -{ - LLVMValueRef range_md, md_args[2]; - LLVMTypeRef type = LLVMTypeOf(value); - LLVMContextRef context = LLVMGetTypeContext(type); - - md_args[0] = LLVMConstInt(type, lo, false); - md_args[1] = LLVMConstInt(type, hi, false); - range_md = LLVMMDNodeInContext(context, md_args, 2); - LLVMSetMetadata(value, ctx->range_md_kind, range_md); -} - -static LLVMValueRef get_thread_id(struct si_shader_context *ctx) -{ - struct gallivm_state *gallivm = &ctx->gallivm; - LLVMValueRef tid; - - if (HAVE_LLVM < 0x0308) { - tid = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", - ctx->i32, NULL, 0, LP_FUNC_ATTR_READNONE); - } else { - LLVMValueRef tid_args[2]; - tid_args[0] = lp_build_const_int32(gallivm, 0xffffffff); - tid_args[1] = lp_build_const_int32(gallivm, 0); - tid_args[1] = lp_build_intrinsic(gallivm->builder, - "llvm.amdgcn.mbcnt.lo", ctx->i32, - tid_args, 2, LP_FUNC_ATTR_READNONE); - - tid = lp_build_intrinsic(gallivm->builder, - "llvm.amdgcn.mbcnt.hi", ctx->i32, - tid_args, 2, LP_FUNC_ATTR_READNONE); - } - set_range_metadata(ctx, tid, 0, 64); - return tid; -} /** * Load a dword from a constant buffer. @@ -1537,19 +1371,20 @@ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx, LLVMValueRef args[2] = {resource, offset}; return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); } static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id) { struct si_shader_context *ctx = - si_shader_context(&radeon_bld->soa.bld_base); - struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld; + si_shader_context(&radeon_bld->bld_base); + struct lp_build_context *uint_bld = &radeon_bld->bld_base.uint_bld; struct gallivm_state *gallivm = &radeon_bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef desc = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_PS_CONST_SAMPLE_POSITIONS); - LLVMValueRef resource = build_indexed_load_const(ctx, desc, buf_index); + LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index); /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */ LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8); @@ -1571,8 +1406,8 @@ static void declare_system_value( const struct tgsi_full_declaration *decl) { struct si_shader_context *ctx = - si_shader_context(&radeon_bld->soa.bld_base); - struct lp_build_context *bld = &radeon_bld->soa.bld_base.base; + si_shader_context(&radeon_bld->bld_base); + struct lp_build_context *bld = &radeon_bld->bld_base.base; struct gallivm_state *gallivm = &radeon_bld->gallivm; LLVMValueRef value = 0; @@ -1626,7 +1461,7 @@ static void declare_system_value( LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_X_FLOAT), LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_Y_FLOAT), LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_Z_FLOAT), - lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base, TGSI_OPCODE_RCP, + lp_build_emit_llvm_unary(&radeon_bld->bld_base, TGSI_OPCODE_RCP, LLVMGetParam(radeon_bld->main_fn, SI_PARAM_POS_W_FLOAT)), }; @@ -1649,9 +1484,9 @@ static void declare_system_value( lp_build_const_float(gallivm, 0), lp_build_const_float(gallivm, 0) }; - pos[0] = lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base, + pos[0] = lp_build_emit_llvm_unary(&radeon_bld->bld_base, TGSI_OPCODE_FRC, pos[0]); - pos[1] = lp_build_emit_llvm_unary(&radeon_bld->soa.bld_base, + pos[1] = lp_build_emit_llvm_unary(&radeon_bld->bld_base, TGSI_OPCODE_FRC, pos[1]); value = lp_build_gather_values(gallivm, pos, 4); break; @@ -1700,14 +1535,14 @@ static void declare_system_value( rw_buffers = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); - buffer = build_indexed_load_const(ctx, rw_buffers, + buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers, lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP)); base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds); - addr = get_tcs_tes_buffer_address(ctx, NULL, + addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL, lp_build_const_int32(gallivm, param)); - value = buffer_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT, + value = buffer_load(&radeon_bld->bld_base, TGSI_TYPE_FLOAT, ~0, buffer, base, addr); break; @@ -1721,7 +1556,7 @@ static void declare_system_value( slot = lp_build_const_int32(gallivm, SI_HS_CONST_DEFAULT_TESS_LEVELS); buf = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); - buf = build_indexed_load_const(ctx, buf, slot); + buf = ac_build_indexed_load_const(&ctx->ac, buf, slot); offset = decl->Semantic.Name == TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0; for (i = 0; i < 4; i++) @@ -1732,7 +1567,7 @@ static void declare_system_value( } case TGSI_SEMANTIC_PRIMID: - value = get_primitive_id(&radeon_bld->soa.bld_base, 0); + value = get_primitive_id(&radeon_bld->bld_base, 0); break; case TGSI_SEMANTIC_GRID_SIZE: @@ -1770,16 +1605,19 @@ static void declare_system_value( value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_THREAD_ID); break; -#if HAVE_LLVM >= 0x0309 case TGSI_SEMANTIC_HELPER_INVOCATION: - value = lp_build_intrinsic(gallivm->builder, - "llvm.amdgcn.ps.live", - ctx->i1, NULL, 0, - LP_FUNC_ATTR_READNONE); - value = LLVMBuildNot(gallivm->builder, value, ""); - value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, ""); + if (HAVE_LLVM >= 0x0309) { + value = lp_build_intrinsic(gallivm->builder, + "llvm.amdgcn.ps.live", + ctx->i1, NULL, 0, + LP_FUNC_ATTR_READNONE); + value = LLVMBuildNot(gallivm->builder, value, ""); + value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, ""); + } else { + assert(!"TGSI_SEMANTIC_HELPER_INVOCATION unsupported"); + return; + } break; -#endif default: assert(!"unknown system value"); @@ -1793,7 +1631,7 @@ static void declare_compute_memory(struct si_shader_context *radeon_bld, const struct tgsi_full_declaration *decl) { struct si_shader_context *ctx = - si_shader_context(&radeon_bld->soa.bld_base); + si_shader_context(&radeon_bld->bld_base); struct si_shader_selector *sel = ctx->shader->selector; struct gallivm_state *gallivm = &radeon_bld->gallivm; @@ -1818,7 +1656,7 @@ static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i) LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_CONST_BUFFERS); - return build_indexed_load_const(ctx, list_ptr, + return ac_build_indexed_load_const(&ctx->ac, list_ptr, LLVMConstInt(ctx->i32, i, 0)); } @@ -1854,12 +1692,12 @@ static LLVMValueRef fetch_constant( index = get_bounded_indirect_index(ctx, ®->DimIndirect, reg->Dimension.Index, SI_NUM_CONST_BUFFERS); - bufp = build_indexed_load_const(ctx, ptr, index); + bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index); } else bufp = load_const_buffer_desc(ctx, buf); if (reg->Register.Indirect) { - addr = ctx->soa.addr[ireg->Index][ireg->Swizzle]; + addr = ctx->addrs[ireg->Index][ireg->Swizzle]; addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg"); addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16); addr = lp_build_add(&bld_base->uint_bld, addr, @@ -1911,30 +1749,28 @@ static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct gallivm_state *galliv static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, LLVMValueRef *values, unsigned target, - LLVMValueRef *args) + struct ac_export_args *args) { struct si_shader_context *ctx = si_shader_context(bld_base); - struct lp_build_context *uint = - &ctx->soa.bld_base.uint_bld; struct lp_build_context *base = &bld_base->base; struct gallivm_state *gallivm = base->gallivm; LLVMBuilderRef builder = base->gallivm->builder; LLVMValueRef val[4]; unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR; unsigned chan; - bool is_int8; + bool is_int8, is_int10; /* Default is 0xf. Adjusted below depending on the format. */ - args[0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */ + args->enabled_channels = 0xf; /* writemask */ /* Specify whether the EXEC mask represents the valid mask */ - args[1] = uint->zero; + args->valid_mask = 0; /* Specify whether this is the last export */ - args[2] = uint->zero; + args->done = 0; /* Specify the target we are exporting */ - args[3] = lp_build_const_int32(base->gallivm, target); + args->target = target; if (ctx->type == PIPE_SHADER_FRAGMENT) { const struct si_shader_key *key = &ctx->shader->key; @@ -1944,39 +1780,40 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, assert(cbuf >= 0 && cbuf < 8); spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf; is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & 0x1; + is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) & 0x1; } - args[4] = uint->zero; /* COMPR flag */ - args[5] = base->undef; - args[6] = base->undef; - args[7] = base->undef; - args[8] = base->undef; + args->compr = false; + args->out[0] = base->undef; + args->out[1] = base->undef; + args->out[2] = base->undef; + args->out[3] = base->undef; switch (spi_shader_col_format) { case V_028714_SPI_SHADER_ZERO: - args[0] = uint->zero; /* writemask */ - args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL); + args->enabled_channels = 0; /* writemask */ + args->target = V_008DFC_SQ_EXP_NULL; break; case V_028714_SPI_SHADER_32_R: - args[0] = uint->one; /* writemask */ - args[5] = values[0]; + args->enabled_channels = 1; /* writemask */ + args->out[0] = values[0]; break; case V_028714_SPI_SHADER_32_GR: - args[0] = lp_build_const_int32(base->gallivm, 0x3); /* writemask */ - args[5] = values[0]; - args[6] = values[1]; + args->enabled_channels = 0x3; /* writemask */ + args->out[0] = values[0]; + args->out[1] = values[1]; break; case V_028714_SPI_SHADER_32_AR: - args[0] = lp_build_const_int32(base->gallivm, 0x9); /* writemask */ - args[5] = values[0]; - args[8] = values[3]; + args->enabled_channels = 0x9; /* writemask */ + args->out[0] = values[0]; + args->out[3] = values[3]; break; case V_028714_SPI_SHADER_FP16_ABGR: - args[4] = uint->one; /* COMPR flag */ + args->compr = 1; /* COMPR flag */ for (chan = 0; chan < 2; chan++) { LLVMValueRef pack_args[2] = { @@ -1988,8 +1825,9 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, packed = lp_build_intrinsic(base->gallivm->builder, "llvm.SI.packf16", ctx->i32, pack_args, 2, - LP_FUNC_ATTR_READNONE); - args[chan + 5] = + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); + args->out[chan] = LLVMBuildBitCast(base->gallivm->builder, packed, ctx->f32, ""); } @@ -1997,7 +1835,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, case V_028714_SPI_SHADER_UNORM16_ABGR: for (chan = 0; chan < 4; chan++) { - val[chan] = si_llvm_saturate(bld_base, values[chan]); + val[chan] = ac_emit_clamp(&ctx->ac, values[chan]); val[chan] = LLVMBuildFMul(builder, val[chan], lp_build_const_float(gallivm, 65535), ""); val[chan] = LLVMBuildFAdd(builder, val[chan], @@ -2006,10 +1844,10 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, ctx->i32, ""); } - args[4] = uint->one; /* COMPR flag */ - args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, + args->compr = 1; /* COMPR flag */ + args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, si_llvm_pack_two_int16(gallivm, val)); - args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT, + args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT, si_llvm_pack_two_int16(gallivm, val+2)); break; @@ -2035,57 +1873,66 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, val[chan] = LLVMBuildFPToSI(builder, val[chan], ctx->i32, ""); } - args[4] = uint->one; /* COMPR flag */ - args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, + args->compr = 1; /* COMPR flag */ + args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, si_llvm_pack_two_int32_as_int16(gallivm, val)); - args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT, + args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT, si_llvm_pack_two_int32_as_int16(gallivm, val+2)); break; case V_028714_SPI_SHADER_UINT16_ABGR: { - LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ? - 255 : 65535); + LLVMValueRef max_rgb = lp_build_const_int32(gallivm, + is_int8 ? 255 : is_int10 ? 1023 : 65535); + LLVMValueRef max_alpha = + !is_int10 ? max_rgb : lp_build_const_int32(gallivm, 3); + /* Clamp. */ for (chan = 0; chan < 4; chan++) { val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]); val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN, - val[chan], max); + val[chan], + chan == 3 ? max_alpha : max_rgb); } - args[4] = uint->one; /* COMPR flag */ - args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, + args->compr = 1; /* COMPR flag */ + args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, si_llvm_pack_two_int16(gallivm, val)); - args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT, + args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT, si_llvm_pack_two_int16(gallivm, val+2)); break; } case V_028714_SPI_SHADER_SINT16_ABGR: { - LLVMValueRef max = lp_build_const_int32(gallivm, is_int8 ? - 127 : 32767); - LLVMValueRef min = lp_build_const_int32(gallivm, is_int8 ? - -128 : -32768); + LLVMValueRef max_rgb = lp_build_const_int32(gallivm, + is_int8 ? 127 : is_int10 ? 511 : 32767); + LLVMValueRef min_rgb = lp_build_const_int32(gallivm, + is_int8 ? -128 : is_int10 ? -512 : -32768); + LLVMValueRef max_alpha = + !is_int10 ? max_rgb : lp_build_const_int32(gallivm, 1); + LLVMValueRef min_alpha = + !is_int10 ? min_rgb : lp_build_const_int32(gallivm, -2); + /* Clamp. */ for (chan = 0; chan < 4; chan++) { val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, values[chan]); val[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_IMIN, - val[chan], max); + TGSI_OPCODE_IMIN, + val[chan], chan == 3 ? max_alpha : max_rgb); val[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_IMAX, - val[chan], min); + TGSI_OPCODE_IMAX, + val[chan], chan == 3 ? min_alpha : min_rgb); } - args[4] = uint->one; /* COMPR flag */ - args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, + args->compr = 1; /* COMPR flag */ + args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, si_llvm_pack_two_int32_as_int16(gallivm, val)); - args[6] = bitcast(bld_base, TGSI_TYPE_FLOAT, + args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT, si_llvm_pack_two_int32_as_int16(gallivm, val+2)); break; } case V_028714_SPI_SHADER_32_ABGR: - memcpy(&args[5], values, sizeof(values[0]) * 4); + memcpy(&args->out[0], values, sizeof(values[0]) * 4); break; } } @@ -2111,10 +1958,10 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, lp_build_const_float(gallivm, -1.0f)); lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", - ctx->voidt, &arg, 1, 0); + ctx->voidt, &arg, 1, LP_FUNC_ATTR_LEGACY); } else { lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp", - ctx->voidt, NULL, 0, 0); + ctx->voidt, NULL, 0, LP_FUNC_ATTR_LEGACY); } } @@ -2146,11 +1993,10 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context * } static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base, - LLVMValueRef (*pos)[9], LLVMValueRef *out_elts) + struct ac_export_args *pos, LLVMValueRef *out_elts) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; - struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld; unsigned reg_index; unsigned chan; unsigned const_chan; @@ -2158,37 +2004,36 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base, LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, SI_VS_CONST_CLIP_PLANES); - LLVMValueRef const_resource = build_indexed_load_const(ctx, ptr, constbuf_index); + LLVMValueRef const_resource = ac_build_indexed_load_const(&ctx->ac, ptr, constbuf_index); for (reg_index = 0; reg_index < 2; reg_index ++) { - LLVMValueRef *args = pos[2 + reg_index]; + struct ac_export_args *args = &pos[2 + reg_index]; - args[5] = - args[6] = - args[7] = - args[8] = lp_build_const_float(base->gallivm, 0.0f); + args->out[0] = + args->out[1] = + args->out[2] = + args->out[3] = lp_build_const_float(base->gallivm, 0.0f); /* Compute dot products of position and user clip plane vectors */ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) { - args[1] = lp_build_const_int32(base->gallivm, - ((reg_index * 4 + chan) * 4 + - const_chan) * 4); + LLVMValueRef addr = + LLVMConstInt(ctx->i32, ((reg_index * 4 + chan) * 4 + + const_chan) * 4, 0); base_elt = buffer_load_const(ctx, const_resource, - args[1]); - args[5 + chan] = - lp_build_add(base, args[5 + chan], + addr); + args->out[chan] = + lp_build_add(base, args->out[chan], lp_build_mul(base, base_elt, out_elts[const_chan])); } } - args[0] = lp_build_const_int32(base->gallivm, 0xf); - args[1] = uint->zero; - args[2] = uint->zero; - args[3] = lp_build_const_int32(base->gallivm, - V_008DFC_SQ_EXP_POS + 2 + reg_index); - args[4] = uint->zero; + args->enabled_channels = 0xf; + args->valid_mask = 0; + args->done = 0; + args->target = V_008DFC_SQ_EXP_POS + 2 + reg_index; + args->compr = 0; } } @@ -2213,44 +2058,82 @@ static void si_dump_streamout(struct pipe_stream_output_info *so) } } -/* On SI, the vertex shader is responsible for writing streamout data - * to buffers. */ -static void si_llvm_emit_streamout(struct si_shader_context *ctx, - struct si_shader_output_values *outputs, - unsigned noutput) +static void emit_streamout_output(struct si_shader_context *ctx, + LLVMValueRef const *so_buffers, + LLVMValueRef const *so_write_offsets, + struct pipe_stream_output *stream_out, + struct si_shader_output_values *shader_out) { - struct pipe_stream_output_info *so = &ctx->shader->selector->so; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; - int i, j; - struct lp_build_if_state if_ctx; - LLVMValueRef so_buffers[4]; - LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, - SI_PARAM_RW_BUFFERS); + unsigned buf_idx = stream_out->output_buffer; + unsigned start = stream_out->start_component; + unsigned num_comps = stream_out->num_components; + LLVMValueRef out[4]; - /* Load the descriptors. */ - for (i = 0; i < 4; ++i) { - if (ctx->shader->selector->so.stride[i]) { - LLVMValueRef offset = lp_build_const_int32(gallivm, - SI_VS_STREAMOUT_BUF0 + i); + assert(num_comps && num_comps <= 4); + if (!num_comps || num_comps > 4) + return; + + /* Load the output as int. */ + for (int j = 0; j < num_comps; j++) { + assert(stream_out->stream == shader_out->vertex_stream[start + j]); + + out[j] = LLVMBuildBitCast(builder, + shader_out->values[start + j], + ctx->i32, ""); + } + + /* Pack the output. */ + LLVMValueRef vdata = NULL; - so_buffers[i] = build_indexed_load_const(ctx, buf_ptr, offset); + switch (num_comps) { + case 1: /* as i32 */ + vdata = out[0]; + break; + case 2: /* as v2i32 */ + case 3: /* as v4i32 (aligned to 4) */ + case 4: /* as v4i32 */ + vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps))); + for (int j = 0; j < num_comps; j++) { + vdata = LLVMBuildInsertElement(builder, vdata, out[j], + LLVMConstInt(ctx->i32, j, 0), ""); } + break; } + ac_build_tbuffer_store_dwords(&ctx->ac, so_buffers[buf_idx], + vdata, num_comps, + so_write_offsets[buf_idx], + LLVMConstInt(ctx->i32, 0, 0), + stream_out->dst_offset * 4); +} + +/** + * Write streamout data to buffers for vertex stream @p stream (different + * vertex streams can occur for GS copy shaders). + */ +static void si_llvm_emit_streamout(struct si_shader_context *ctx, + struct si_shader_output_values *outputs, + unsigned noutput, unsigned stream) +{ + struct si_shader_selector *sel = ctx->shader->selector; + struct pipe_stream_output_info *so = &sel->so; + struct gallivm_state *gallivm = &ctx->gallivm; + LLVMBuilderRef builder = gallivm->builder; + int i; + struct lp_build_if_state if_ctx; + /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ LLVMValueRef so_vtx_count = unpack_param(ctx, ctx->param_streamout_config, 16, 7); - LLVMValueRef tid = get_thread_id(ctx); + LLVMValueRef tid = ac_get_thread_id(&ctx->ac); /* can_emit = tid < so_vtx_count; */ LLVMValueRef can_emit = LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, ""); - LLVMValueRef stream_id = - unpack_param(ctx, ctx->param_streamout_config, 24, 2); - /* Emit the streamout code conditionally. This actually avoids * out-of-bounds buffer access. The hw tells us via the SGPR * (so_vtx_count) which threads are allowed to emit streamout data. */ @@ -2269,12 +2152,22 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx, /* Compute (streamout_write_index + thread_id). */ so_write_index = LLVMBuildAdd(builder, so_write_index, tid, ""); - /* Compute the write offset for each enabled buffer. */ + /* Load the descriptor and compute the write offset for each + * enabled buffer. */ LLVMValueRef so_write_offset[4] = {}; + LLVMValueRef so_buffers[4]; + LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, + SI_PARAM_RW_BUFFERS); + for (i = 0; i < 4; i++) { if (!so->stride[i]) continue; + LLVMValueRef offset = lp_build_const_int32(gallivm, + SI_VS_STREAMOUT_BUF0 + i); + + so_buffers[i] = ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset); + LLVMValueRef so_offset = LLVMGetParam(ctx->main_fn, ctx->param_streamout_offset[i]); so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->i32, 4, 0), ""); @@ -2286,58 +2179,16 @@ static void si_llvm_emit_streamout(struct si_shader_context *ctx, /* Write streamout data. */ for (i = 0; i < so->num_outputs; i++) { - unsigned buf_idx = so->output[i].output_buffer; unsigned reg = so->output[i].register_index; - unsigned start = so->output[i].start_component; - unsigned num_comps = so->output[i].num_components; - unsigned stream = so->output[i].stream; - LLVMValueRef out[4]; - struct lp_build_if_state if_ctx_stream; - - assert(num_comps && num_comps <= 4); - if (!num_comps || num_comps > 4) - continue; if (reg >= noutput) continue; - /* Load the output as int. */ - for (j = 0; j < num_comps; j++) { - out[j] = LLVMBuildBitCast(builder, - outputs[reg].values[start+j], - ctx->i32, ""); - } - - /* Pack the output. */ - LLVMValueRef vdata = NULL; - - switch (num_comps) { - case 1: /* as i32 */ - vdata = out[0]; - break; - case 2: /* as v2i32 */ - case 3: /* as v4i32 (aligned to 4) */ - case 4: /* as v4i32 */ - vdata = LLVMGetUndef(LLVMVectorType(ctx->i32, util_next_power_of_two(num_comps))); - for (j = 0; j < num_comps; j++) { - vdata = LLVMBuildInsertElement(builder, vdata, out[j], - LLVMConstInt(ctx->i32, j, 0), ""); - } - break; - } + if (stream != so->output[i].stream) + continue; - LLVMValueRef can_emit_stream = - LLVMBuildICmp(builder, LLVMIntEQ, - stream_id, - lp_build_const_int32(gallivm, stream), ""); - - lp_build_if(&if_ctx_stream, gallivm, can_emit_stream); - build_tbuffer_store_dwords(ctx, so_buffers[buf_idx], - vdata, num_comps, - so_write_offset[buf_idx], - LLVMConstInt(ctx->i32, 0, 0), - so->output[i].dst_offset*4); - lp_build_endif(&if_ctx_stream); + emit_streamout_output(ctx, so_buffers, so_write_offset, + &so->output[i], &outputs[reg]); } } lp_build_endif(&if_ctx); @@ -2352,10 +2203,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, struct si_shader_context *ctx = si_shader_context(bld_base); struct si_shader *shader = ctx->shader; struct lp_build_context *base = &bld_base->base; - struct lp_build_context *uint = - &ctx->soa.bld_base.uint_bld; - LLVMValueRef args[9]; - LLVMValueRef pos_args[4][9] = { { 0 } }; + struct ac_export_args args, pos_args[4] = {}; LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL; unsigned semantic_name, semantic_index; unsigned target; @@ -2363,13 +2211,9 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, unsigned pos_idx; int i; - if (outputs && ctx->shader->selector->so.num_outputs) { - si_llvm_emit_streamout(ctx, outputs, noutput); - } - for (i = 0; i < noutput; i++) { - semantic_name = outputs[i].name; - semantic_index = outputs[i].sid; + semantic_name = outputs[i].semantic_name; + semantic_index = outputs[i].semantic_index; bool export_param = true; switch (semantic_name) { @@ -2391,6 +2235,12 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base, break; } + if (outputs[i].vertex_stream[0] != 0 && + outputs[i].vertex_stream[1] != 0 && + outputs[i].vertex_stream[2] != 0 && + outputs[i].vertex_stream[3] != 0) + export_param = false; + handle_semantic: /* Select the correct target */ switch(semantic_name) { @@ -2411,15 +2261,6 @@ handle_semantic: case TGSI_SEMANTIC_POSITION: target = V_008DFC_SQ_EXP_POS; break; - case TGSI_SEMANTIC_COLOR: - case TGSI_SEMANTIC_BCOLOR: - if (!export_param) - continue; - target = V_008DFC_SQ_EXP_PARAM + param_count; - assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); - shader->info.vs_output_param_offset[i] = param_count; - param_count++; - break; case TGSI_SEMANTIC_CLIPDIST: if (shader->key.opt.hw_vs.clip_disable) { semantic_name = TGSI_SEMANTIC_GENERIC; @@ -2432,6 +2273,8 @@ handle_semantic: continue; si_llvm_emit_clipvertex(bld_base, pos_args, outputs[i].values); continue; + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_BCOLOR: case TGSI_SEMANTIC_PRIMID: case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_TEXCOORD: @@ -2450,16 +2293,14 @@ handle_semantic: semantic_name); } - si_llvm_init_export_args(bld_base, outputs[i].values, target, args); + si_llvm_init_export_args(bld_base, outputs[i].values, target, &args); if (target >= V_008DFC_SQ_EXP_POS && target <= (V_008DFC_SQ_EXP_POS + 3)) { - memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], - args, sizeof(args)); + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); } else { - lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.export", ctx->voidt, - args, 9, 0); + ac_emit_export(&ctx->ac, &args); } if (semantic_name == TGSI_SEMANTIC_CLIPDIST) { @@ -2471,16 +2312,16 @@ handle_semantic: shader->info.nr_param_exports = param_count; /* We need to add the position output manually if it's missing. */ - if (!pos_args[0][0]) { - pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */ - pos_args[0][1] = uint->zero; /* EXEC mask */ - pos_args[0][2] = uint->zero; /* last export? */ - pos_args[0][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS); - pos_args[0][4] = uint->zero; /* COMPR flag */ - pos_args[0][5] = base->zero; /* X */ - pos_args[0][6] = base->zero; /* Y */ - pos_args[0][7] = base->zero; /* Z */ - pos_args[0][8] = base->one; /* W */ + if (!pos_args[0].out[0]) { + pos_args[0].enabled_channels = 0xf; /* writemask */ + pos_args[0].valid_mask = 0; /* EXEC mask */ + pos_args[0].done = 0; /* last export? */ + pos_args[0].target = V_008DFC_SQ_EXP_POS; + pos_args[0].compr = 0; /* COMPR flag */ + pos_args[0].out[0] = base->zero; /* X */ + pos_args[0].out[1] = base->zero; /* Y */ + pos_args[0].out[2] = base->zero; /* Z */ + pos_args[0].out[3] = base->one; /* W */ } /* Write the misc vector (point size, edgeflag, layer, viewport). */ @@ -2488,22 +2329,21 @@ handle_semantic: shader->selector->info.writes_edgeflag || shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) { - pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */ - shader->selector->info.writes_psize | - (shader->selector->info.writes_edgeflag << 1) | - (shader->selector->info.writes_layer << 2) | - (shader->selector->info.writes_viewport_index << 3)); - pos_args[1][1] = uint->zero; /* EXEC mask */ - pos_args[1][2] = uint->zero; /* last export? */ - pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1); - pos_args[1][4] = uint->zero; /* COMPR flag */ - pos_args[1][5] = base->zero; /* X */ - pos_args[1][6] = base->zero; /* Y */ - pos_args[1][7] = base->zero; /* Z */ - pos_args[1][8] = base->zero; /* W */ + pos_args[1].enabled_channels = shader->selector->info.writes_psize | + (shader->selector->info.writes_edgeflag << 1) | + (shader->selector->info.writes_layer << 2) | + (shader->selector->info.writes_viewport_index << 3); + pos_args[1].valid_mask = 0; /* EXEC mask */ + pos_args[1].done = 0; /* last export? */ + pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; + pos_args[1].compr = 0; /* COMPR flag */ + pos_args[1].out[0] = base->zero; /* X */ + pos_args[1].out[1] = base->zero; /* Y */ + pos_args[1].out[2] = base->zero; /* Z */ + pos_args[1].out[3] = base->zero; /* W */ if (shader->selector->info.writes_psize) - pos_args[1][5] = psize_value; + pos_args[1].out[0] = psize_value; if (shader->selector->info.writes_edgeflag) { /* The output is a float, but the hw expects an integer @@ -2516,36 +2356,35 @@ handle_semantic: bld_base->int_bld.one); /* The LLVM intrinsic expects a float. */ - pos_args[1][6] = LLVMBuildBitCast(base->gallivm->builder, + pos_args[1].out[1] = LLVMBuildBitCast(base->gallivm->builder, edgeflag_value, ctx->f32, ""); } if (shader->selector->info.writes_layer) - pos_args[1][7] = layer_value; + pos_args[1].out[2] = layer_value; if (shader->selector->info.writes_viewport_index) - pos_args[1][8] = viewport_index_value; + pos_args[1].out[3] = viewport_index_value; } for (i = 0; i < 4; i++) - if (pos_args[i][0]) + if (pos_args[i].out[0]) shader->info.nr_pos_exports++; pos_idx = 0; for (i = 0; i < 4; i++) { - if (!pos_args[i][0]) + if (!pos_args[i].out[0]) continue; /* Specify the target we are exporting */ - pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++); + pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; if (pos_idx == shader->info.nr_pos_exports) /* Specify that this is the last export */ - pos_args[i][2] = uint->one; + pos_args[i].done = 1; - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, pos_args[i], 9, 0); + ac_emit_export(&ctx->ac, &pos_args[i]); } } @@ -2564,7 +2403,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5); rw_buffers = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); - buffer = build_indexed_load_const(ctx, rw_buffers, + buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers, lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP)); buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds); @@ -2584,14 +2423,15 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) ""); LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx, + get_rel_patch_id(ctx), invocation_id, lp_build_const_int32(gallivm, i)); LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0, lds_ptr); - build_tbuffer_store_dwords(ctx, buffer, value, 4, buffer_addr, - buffer_offset, 0); + ac_build_tbuffer_store_dwords(&ctx->ac, buffer, value, 4, buffer_addr, + buffer_offset, 0); } } @@ -2605,7 +2445,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, struct si_shader *shader = ctx->shader; unsigned tess_inner_index, tess_outer_index; LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer; - LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base; + LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base, inner[4], outer[4]; unsigned stride, outer_comps, inner_comps, i; struct lp_build_if_state if_ctx, inner_if_ctx; @@ -2657,17 +2497,26 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, lp_build_const_int32(gallivm, tess_outer_index * 4), ""); + for (i = 0; i < 4; i++) { + inner[i] = LLVMGetUndef(ctx->i32); + outer[i] = LLVMGetUndef(ctx->i32); + } + if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) { /* For isolines, the hardware expects tess factors in the * reverse order from what GLSL / TGSI specify. */ - out[0] = lds_load(bld_base, TGSI_TYPE_SIGNED, 1, lds_outer); - out[1] = lds_load(bld_base, TGSI_TYPE_SIGNED, 0, lds_outer); + outer[0] = out[1] = lds_load(bld_base, TGSI_TYPE_SIGNED, 0, lds_outer); + outer[1] = out[0] = lds_load(bld_base, TGSI_TYPE_SIGNED, 1, lds_outer); } else { - for (i = 0; i < outer_comps; i++) - out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer); - for (i = 0; i < inner_comps; i++) - out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner); + for (i = 0; i < outer_comps; i++) { + outer[i] = out[i] = + lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer); + } + for (i = 0; i < inner_comps; i++) { + inner[i] = out[outer_comps+i] = + lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner); + } } /* Convert the outputs to vectors for stores. */ @@ -2680,7 +2529,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, /* Get the buffer. */ rw_buffers = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); - buffer = build_indexed_load_const(ctx, rw_buffers, + buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers, lp_build_const_int32(gallivm, SI_HS_RING_TESS_FACTOR)); /* Get the offset. */ @@ -2694,18 +2543,54 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, rel_patch_id, bld_base->uint_bld.zero, "")); /* Store the dynamic HS control word. */ - build_tbuffer_store_dwords(ctx, buffer, - lp_build_const_int32(gallivm, 0x80000000), - 1, lp_build_const_int32(gallivm, 0), tf_base, 0); + ac_build_tbuffer_store_dwords(&ctx->ac, buffer, + lp_build_const_int32(gallivm, 0x80000000), + 1, lp_build_const_int32(gallivm, 0), tf_base, 0); lp_build_endif(&inner_if_ctx); /* Store the tessellation factors. */ - build_tbuffer_store_dwords(ctx, buffer, vec0, - MIN2(stride, 4), byteoffset, tf_base, 4); + ac_build_tbuffer_store_dwords(&ctx->ac, buffer, vec0, + MIN2(stride, 4), byteoffset, tf_base, 4); if (vec1) - build_tbuffer_store_dwords(ctx, buffer, vec1, - stride - 4, byteoffset, tf_base, 20); + ac_build_tbuffer_store_dwords(&ctx->ac, buffer, vec1, + stride - 4, byteoffset, tf_base, 20); + + /* Store the tess factors into the offchip buffer if TES reads them. */ + if (shader->key.part.tcs.epilog.tes_reads_tess_factors) { + LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset; + LLVMValueRef tf_inner_offset; + unsigned param_outer, param_inner; + + buf = ac_build_indexed_load_const(&ctx->ac, rw_buffers, + LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 0)); + base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds); + + param_outer = si_shader_io_get_unique_index( + TGSI_SEMANTIC_TESSOUTER, 0); + tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, + LLVMConstInt(ctx->i32, param_outer, 0)); + + outer_vec = lp_build_gather_values(gallivm, outer, + util_next_power_of_two(outer_comps)); + + ac_build_tbuffer_store_dwords(&ctx->ac, buf, outer_vec, + outer_comps, tf_outer_offset, + base, 0); + if (inner_comps) { + param_inner = si_shader_io_get_unique_index( + TGSI_SEMANTIC_TESSINNER, 0); + tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, + LLVMConstInt(ctx->i32, param_inner, 0)); + + inner_vec = inner_comps == 1 ? inner[0] : + lp_build_gather_values(gallivm, inner, inner_comps); + ac_build_tbuffer_store_dwords(&ctx->ac, buf, inner_vec, + inner_comps, tf_inner_offset, + base, 0); + } + } + lp_build_endif(&if_ctx); } @@ -2714,6 +2599,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset; + LLVMValueRef offchip_soffset, offchip_layout; si_copy_tcs_inputs(bld_base); @@ -2739,9 +2625,16 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) ret = LLVMBuildInsertValue(builder, ret, rw0, 0, ""); ret = LLVMBuildInsertValue(builder, ret, rw1, 1, ""); - /* Tess factor buffer soffset is after user SGPRs. */ + /* Tess offchip and factor buffer soffset are after user SGPRs. */ + offchip_layout = LLVMGetParam(ctx->main_fn, + SI_PARAM_TCS_OFFCHIP_LAYOUT); + offchip_soffset = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds); tf_soffset = LLVMGetParam(ctx->main_fn, SI_PARAM_TESS_FACTOR_OFFSET); + ret = LLVMBuildInsertValue(builder, ret, offchip_layout, + SI_SGPR_TCS_OFFCHIP_LAYOUT, ""); + ret = LLVMBuildInsertValue(builder, ret, offchip_soffset, + SI_TCS_NUM_USER_SGPR, ""); ret = LLVMBuildInsertValue(builder, ret, tf_soffset, SI_TCS_NUM_USER_SGPR + 1, ""); @@ -2774,7 +2667,7 @@ static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base) /* Write outputs to LDS. The next shader (TCS aka HS) will read * its inputs from it. */ for (i = 0; i < info->num_outputs; i++) { - LLVMValueRef *out_ptr = ctx->soa.outputs[i]; + LLVMValueRef *out_ptr = ctx->outputs[i]; unsigned name = info->output_semantic_name[i]; unsigned index = info->output_semantic_index[i]; int param = si_shader_io_get_unique_index(name, index); @@ -2800,8 +2693,7 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) int i; for (i = 0; i < info->num_outputs; i++) { - LLVMValueRef *out_ptr = - ctx->soa.outputs[i]; + LLVMValueRef *out_ptr = ctx->outputs[i]; int param_index; if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX || @@ -2815,14 +2707,14 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""); out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, ""); - build_tbuffer_store(ctx, - ctx->esgs_ring, - out_val, 1, - LLVMGetUndef(ctx->i32), soffset, - (4 * param_index + chan) * 4, - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_NUM_FORMAT_UINT, - 0, 0, 1, 1, 0); + ac_build_tbuffer_store(&ctx->ac, + ctx->esgs_ring, + out_val, 1, + LLVMGetUndef(ctx->i32), soffset, + (4 * param_index + chan) * 4, + V_008F0C_BUF_DATA_FORMAT_32, + V_008F0C_BUF_NUM_FORMAT_UINT, + 0, 0, 1, 1, 0); } } } @@ -2830,13 +2722,9 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef args[2]; - args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_NOP | SENDMSG_GS_DONE); - args[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID); - lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg", - ctx->voidt, args, 2, 0); + ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, + LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID)); } static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) @@ -2878,9 +2766,9 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) } for (j = 0; j < 4; j++) { - addr = ctx->soa.outputs[i][j]; + addr = ctx->outputs[i][j]; val = LLVMBuildLoad(gallivm->builder, addr, ""); - val = si_llvm_saturate(bld_base, val); + val = ac_emit_clamp(&ctx->ac, val); LLVMBuildStore(gallivm->builder, val, addr); } } @@ -2890,14 +2778,18 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) } for (i = 0; i < info->num_outputs; i++) { - outputs[i].name = info->output_semantic_name[i]; - outputs[i].sid = info->output_semantic_index[i]; + outputs[i].semantic_name = info->output_semantic_name[i]; + outputs[i].semantic_index = info->output_semantic_index[i]; - for (j = 0; j < 4; j++) + for (j = 0; j < 4; j++) { outputs[i].values[j] = LLVMBuildLoad(gallivm->builder, - ctx->soa.outputs[i][j], + ctx->outputs[i][j], ""); + outputs[i].vertex_stream[j] = + (info->output_streams[i] >> (2 * j)) & 3; + } + } /* Return the primitive ID from the LLVM function. */ @@ -2908,13 +2800,15 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) get_primitive_id(bld_base, 0)), VS_EPILOG_PRIMID_LOC, ""); + if (ctx->shader->selector->so.num_outputs) + si_llvm_emit_streamout(ctx, outputs, i, 0); si_llvm_export_vs(bld_base, outputs, i); FREE(outputs); } struct si_ps_exports { unsigned num; - LLVMValueRef args[10][9]; + struct ac_export_args args[10]; }; unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil, @@ -2942,8 +2836,7 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; - struct lp_build_context *uint = &bld_base->uint_bld; - LLVMValueRef args[9]; + struct ac_export_args args; unsigned mask = 0; unsigned format = si_get_spi_shader_z_format(depth != NULL, stencil != NULL, @@ -2951,46 +2844,46 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, assert(depth || stencil || samplemask); - args[1] = uint->one; /* whether the EXEC mask is valid */ - args[2] = uint->one; /* DONE bit */ + args.valid_mask = 1; /* whether the EXEC mask is valid */ + args.done = 1; /* DONE bit */ /* Specify the target we are exporting */ - args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ); + args.target = V_008DFC_SQ_EXP_MRTZ; - args[4] = uint->zero; /* COMP flag */ - args[5] = base->undef; /* R, depth */ - args[6] = base->undef; /* G, stencil test value[0:7], stencil op value[8:15] */ - args[7] = base->undef; /* B, sample mask */ - args[8] = base->undef; /* A, alpha to mask */ + args.compr = 0; /* COMP flag */ + args.out[0] = base->undef; /* R, depth */ + args.out[1] = base->undef; /* G, stencil test value[0:7], stencil op value[8:15] */ + args.out[2] = base->undef; /* B, sample mask */ + args.out[3] = base->undef; /* A, alpha to mask */ if (format == V_028710_SPI_SHADER_UINT16_ABGR) { assert(!depth); - args[4] = uint->one; /* COMPR flag */ + args.compr = 1; /* COMPR flag */ if (stencil) { /* Stencil should be in X[23:16]. */ stencil = bitcast(bld_base, TGSI_TYPE_UNSIGNED, stencil); stencil = LLVMBuildShl(base->gallivm->builder, stencil, LLVMConstInt(ctx->i32, 16, 0), ""); - args[5] = bitcast(bld_base, TGSI_TYPE_FLOAT, stencil); + args.out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, stencil); mask |= 0x3; } if (samplemask) { /* SampleMask should be in Y[15:0]. */ - args[6] = samplemask; + args.out[1] = samplemask; mask |= 0xc; } } else { if (depth) { - args[5] = depth; + args.out[0] = depth; mask |= 0x1; } if (stencil) { - args[6] = stencil; + args.out[1] = stencil; mask |= 0x2; } if (samplemask) { - args[7] = samplemask; + args.out[2] = samplemask; mask |= 0x4; } } @@ -3003,9 +2896,9 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, mask |= 0x1; /* Specify which components to enable */ - args[0] = lp_build_const_int32(base->gallivm, mask); + args.enabled_channels = mask; - memcpy(exp->args[exp->num++], args, sizeof(args)); + memcpy(&exp->args[exp->num++], &args, sizeof(args)); } static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, @@ -3020,7 +2913,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, /* Clamp color */ if (ctx->shader->key.part.ps.epilog.clamp_color) for (i = 0; i < 4; i++) - color[i] = si_llvm_saturate(bld_base, color[i]); + color[i] = ac_emit_clamp(&ctx->ac, color[i]); /* Alpha to one */ if (ctx->shader->key.part.ps.epilog.alpha_to_one) @@ -3038,40 +2931,40 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ if (ctx->shader->key.part.ps.epilog.last_cbuf > 0) { - LLVMValueRef args[8][9]; + struct ac_export_args args[8]; int c, last = -1; /* Get the export arguments, also find out what the last one is. */ for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) { si_llvm_init_export_args(bld_base, color, - V_008DFC_SQ_EXP_MRT + c, args[c]); - if (args[c][0] != bld_base->uint_bld.zero) + V_008DFC_SQ_EXP_MRT + c, &args[c]); + if (args[c].enabled_channels) last = c; } /* Emit all exports. */ for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) { if (is_last && last == c) { - args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ - args[c][2] = bld_base->uint_bld.one; /* DONE bit */ - } else if (args[c][0] == bld_base->uint_bld.zero) + args[c].valid_mask = 1; /* whether the EXEC mask is valid */ + args[c].done = 1; /* DONE bit */ + } else if (!args[c].enabled_channels) continue; /* unnecessary NULL export */ - memcpy(exp->args[exp->num++], args[c], sizeof(args[c])); + memcpy(&exp->args[exp->num++], &args[c], sizeof(args[c])); } } else { - LLVMValueRef args[9]; + struct ac_export_args args; /* Export */ si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index, - args); + &args); if (is_last) { - args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ - args[2] = bld_base->uint_bld.one; /* DONE bit */ - } else if (args[0] == bld_base->uint_bld.zero) + args.valid_mask = 1; /* whether the EXEC mask is valid */ + args.done = 1; /* DONE bit */ + } else if (!args.enabled_channels) return; /* unnecessary NULL export */ - memcpy(exp->args[exp->num++], args, sizeof(args)); + memcpy(&exp->args[exp->num++], &args, sizeof(args)); } } @@ -3079,30 +2972,26 @@ static void si_emit_ps_exports(struct si_shader_context *ctx, struct si_ps_exports *exp) { for (unsigned i = 0; i < exp->num; i++) - lp_build_intrinsic(ctx->gallivm.builder, - "llvm.SI.export", ctx->voidt, - exp->args[i], 9, 0); + ac_emit_export(&ctx->ac, &exp->args[i]); } static void si_export_null(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; - struct lp_build_context *uint = &bld_base->uint_bld; - LLVMValueRef args[9]; + struct ac_export_args args; - args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ - args[1] = uint->one; /* whether the EXEC mask is valid */ - args[2] = uint->one; /* DONE bit */ - args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL); - args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ - args[5] = base->undef; /* R */ - args[6] = base->undef; /* G */ - args[7] = base->undef; /* B */ - args[8] = base->undef; /* A */ + args.enabled_channels = 0x0; /* enabled channels */ + args.valid_mask = 1; /* whether the EXEC mask is valid */ + args.done = 1; /* DONE bit */ + args.target = V_008DFC_SQ_EXP_NULL; + args.compr = 0; /* COMPR flag (0 = 32-bit export) */ + args.out[0] = base->undef; /* R */ + args.out[1] = base->undef; /* G */ + args.out[2] = base->undef; /* B */ + args.out[3] = base->undef; /* A */ - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, args, 9, 0); + ac_emit_export(&ctx->ac, &args); } /** @@ -3140,22 +3029,22 @@ static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base) case TGSI_SEMANTIC_COLOR: assert(semantic_index < 8); for (j = 0; j < 4; j++) { - LLVMValueRef ptr = ctx->soa.outputs[i][j]; + LLVMValueRef ptr = ctx->outputs[i][j]; LLVMValueRef result = LLVMBuildLoad(builder, ptr, ""); color[semantic_index][j] = result; } break; case TGSI_SEMANTIC_POSITION: depth = LLVMBuildLoad(builder, - ctx->soa.outputs[i][2], ""); + ctx->outputs[i][2], ""); break; case TGSI_SEMANTIC_STENCIL: stencil = LLVMBuildLoad(builder, - ctx->soa.outputs[i][1], ""); + ctx->outputs[i][1], ""); break; case TGSI_SEMANTIC_SAMPLEMASK: samplemask = LLVMBuildLoad(builder, - ctx->soa.outputs[i][0], ""); + ctx->outputs[i][0], ""); break; default: fprintf(stderr, "Warning: SI unhandled fs output type:%d\n", @@ -3233,45 +3122,6 @@ static LLVMValueRef get_buffer_size( return size; } -/** - * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with - * intrinsic names). - */ -static void build_type_name_for_intr( - LLVMTypeRef type, - char *buf, unsigned bufsize) -{ - LLVMTypeRef elem_type = type; - - assert(bufsize >= 8); - - if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { - int ret = snprintf(buf, bufsize, "v%u", - LLVMGetVectorSize(type)); - if (ret < 0) { - char *type_name = LLVMPrintTypeToString(type); - fprintf(stderr, "Error building type name for: %s\n", - type_name); - return; - } - elem_type = LLVMGetElementType(type); - buf += ret; - bufsize -= ret; - } - switch (LLVMGetTypeKind(elem_type)) { - default: break; - case LLVMIntegerTypeKind: - snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type)); - break; - case LLVMFloatTypeKind: - snprintf(buf, bufsize, "f32"); - break; - case LLVMDoubleTypeKind: - snprintf(buf, bufsize, "f64"); - break; - } -} - static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data); @@ -3346,7 +3196,7 @@ shader_buffer_fetch_rsrc(struct si_shader_context *ctx, reg->Register.Index, SI_NUM_SHADER_BUFFERS); - return build_indexed_load_const(ctx, rsrc_ptr, index); + return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index); } static bool tgsi_is_array_sampler(unsigned target) @@ -3424,10 +3274,12 @@ image_fetch_rsrc( if (!image->Register.Indirect) { const struct tgsi_shader_info *info = bld_base->info; + unsigned images_writemask = info->images_store | + info->images_atomic; index = LLVMConstInt(ctx->i32, image->Register.Index, 0); - if (info->images_writemask & (1 << image->Register.Index) && + if (images_writemask & (1 << image->Register.Index) && target != TGSI_TEXTURE_BUFFER) dcc_off = true; } else { @@ -3454,11 +3306,11 @@ image_fetch_rsrc( LLVMConstInt(ctx->i32, 2, 0), ""); index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 1, 0), ""); - *rsrc = build_indexed_load_const(ctx, rsrc_ptr, index); + *rsrc = ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index); return; } - tmp = build_indexed_load_const(ctx, rsrc_ptr, index); + tmp = ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index); if (dcc_off) tmp = force_dcc_off(ctx, tmp); *rsrc = tmp; @@ -3651,7 +3503,7 @@ static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx, LLVMValueRef offset, ptr; int addr_space; - offset = lp_build_emit_fetch(&ctx->soa.bld_base, inst, arg, 0); + offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0); offset = LLVMBuildBitCast(builder, offset, ctx->i32, ""); ptr = ctx->shared_memory; @@ -3667,7 +3519,7 @@ static void load_emit_memory( struct lp_build_emit_data *emit_data) { const struct tgsi_full_instruction *inst = emit_data->inst; - struct lp_build_context *base = &ctx->soa.bld_base.base; + struct lp_build_context *base = &ctx->bld_base.base; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; unsigned writemask = inst->Dst[0].Register.WriteMask; @@ -3697,7 +3549,7 @@ static void get_image_intr_name(const char *base_name, { char coords_type_name[8]; - build_type_name_for_intr(coords_type, coords_type_name, + ac_build_type_name_for_intr(coords_type, coords_type_name, sizeof(coords_type_name)); if (HAVE_LLVM <= 0x0309) { @@ -3706,9 +3558,9 @@ static void get_image_intr_name(const char *base_name, char data_type_name[8]; char rsrc_type_name[8]; - build_type_name_for_intr(data_type, data_type_name, + ac_build_type_name_for_intr(data_type, data_type_name, sizeof(data_type_name)); - build_type_name_for_intr(rsrc_type, rsrc_type_name, + ac_build_type_name_for_intr(rsrc_type, rsrc_type_name, sizeof(rsrc_type_name)); snprintf(out_name, out_len, "%s.%s.%s.%s", base_name, data_type_name, coords_type_name, rsrc_type_name); @@ -3833,7 +3685,7 @@ static void store_emit_buffer( const struct tgsi_full_instruction *inst = emit_data->inst; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; - struct lp_build_context *uint_bld = &ctx->soa.bld_base.uint_bld; + struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld; LLVMValueRef base_data = emit_data->args[0]; LLVMValueRef base_offset = emit_data->args[3]; unsigned writemask = inst->Dst[0].Register.WriteMask; @@ -3904,7 +3756,7 @@ static void store_emit_memory( { const struct tgsi_full_instruction *inst = emit_data->inst; struct gallivm_state *gallivm = &ctx->gallivm; - struct lp_build_context *base = &ctx->soa.bld_base.base; + struct lp_build_context *base = &ctx->bld_base.base; LLVMBuilderRef builder = gallivm->builder; unsigned writemask = inst->Dst[0].Register.WriteMask; LLVMValueRef ptr, derived_ptr, data, index; @@ -3916,7 +3768,7 @@ static void store_emit_memory( if (!(writemask & (1 << chan))) { continue; } - data = lp_build_emit_fetch(&ctx->soa.bld_base, inst, 1, chan); + data = lp_build_emit_fetch(&ctx->bld_base, inst, 1, chan); index = lp_build_const_int32(gallivm, chan); derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); LLVMBuildStore(builder, data, derived_ptr); @@ -4034,12 +3886,12 @@ static void atomic_emit_memory(struct si_shader_context *ctx, ptr = get_memory_ptr(ctx, inst, ctx->i32, 1); - arg = lp_build_emit_fetch(&ctx->soa.bld_base, inst, 2, 0); + arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0); arg = LLVMBuildBitCast(builder, arg, ctx->i32, ""); if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { LLVMValueRef new_data; - new_data = lp_build_emit_fetch(&ctx->soa.bld_base, + new_data = lp_build_emit_fetch(&ctx->bld_base, inst, 3, 0); new_data = LLVMBuildBitCast(builder, new_data, ctx->i32, ""); @@ -4124,7 +3976,7 @@ static void atomic_emit( else coords = emit_data->args[1]; - build_type_name_for_intr(LLVMTypeOf(coords), coords_type, sizeof(coords_type)); + ac_build_type_name_for_intr(LLVMTypeOf(coords), coords_type, sizeof(coords_type)); snprintf(intrinsic_name, sizeof(intrinsic_name), "llvm.amdgcn.image.atomic.%s.%s", action->intr_name, coords_type); @@ -4191,7 +4043,7 @@ static void resq_emit( out = lp_build_intrinsic( builder, "llvm.SI.getresinfo.i32", emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); /* Divide the number of layers by 6 to get the number of cubes. */ if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) { @@ -4216,7 +4068,8 @@ static void set_tex_fetch_args(struct si_shader_context *ctx, { struct gallivm_state *gallivm = &ctx->gallivm; unsigned num_args; - unsigned is_rect = target == TGSI_TEXTURE_RECT; + unsigned is_rect = target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT; /* Pad to power of two vector */ while (count < util_next_power_of_two(count)) @@ -4298,7 +4151,7 @@ static LLVMValueRef load_sampler_desc_custom(struct si_shader_context *ctx, break; } - return build_indexed_load_const(ctx, list, index); + return ac_build_indexed_load_const(&ctx->ac, list, index); } static LLVMValueRef load_sampler_desc(struct si_shader_context *ctx, @@ -4428,7 +4281,7 @@ static void txq_emit(const struct lp_build_tgsi_action *action, emit_data->output[emit_data->chan] = lp_build_intrinsic( base->gallivm->builder, "llvm.SI.getresinfo.i32", emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); /* Divide the number of layers by 6 to get the number of cubes. */ if (target == TGSI_TEXTURE_CUBE_ARRAY || @@ -4543,7 +4396,7 @@ static void tex_fetch_args( * Z32_FLOAT, but we don't know that here. */ if (ctx->screen->b.chip_class == VI) - z = si_llvm_saturate(bld_base, z); + z = ac_emit_clamp(&ctx->ac, z); address[count++] = z; } @@ -4595,7 +4448,11 @@ static void tex_fetch_args( target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) - si_prepare_cube_coords(bld_base, emit_data, coords, derivs); + ac_prepare_cube_coords(&ctx->ac, + opcode == TGSI_OPCODE_TXD, + target == TGSI_TEXTURE_CUBE_ARRAY || + target == TGSI_TEXTURE_SHADOWCUBE_ARRAY, + coords, derivs); if (opcode == TGSI_OPCODE_TXD) for (int i = 0; i < num_deriv_channels * 2; i++) @@ -4643,16 +4500,12 @@ static void tex_fetch_args( struct lp_build_context *uint_bld = &bld_base->uint_bld; struct lp_build_emit_data txf_emit_data = *emit_data; LLVMValueRef txf_address[4]; - unsigned txf_count = count; + /* We only need .xy for non-arrays, and .xyz for arrays. */ + unsigned txf_count = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3; struct tgsi_full_instruction inst = {}; memcpy(txf_address, address, sizeof(txf_address)); - if (target == TGSI_TEXTURE_2D_MSAA) { - txf_address[2] = bld_base->uint_bld.zero; - } - txf_address[3] = bld_base->uint_bld.zero; - /* Read FMASK using TXF. */ inst.Instruction.Opcode = TGSI_OPCODE_TXF; inst.Texture.Texture = target; @@ -4673,7 +4526,7 @@ static void tex_fetch_args( txf_emit_data.output[0], uint_bld->zero, ""); - unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3; + unsigned sample_chan = txf_count; /* the sample index is last */ LLVMValueRef sample_index4 = LLVMBuildMul(gallivm->builder, address[sample_chan], four, ""); @@ -4709,7 +4562,6 @@ static void tex_fetch_args( /* add tex offsets */ if (inst->Texture.NumOffsets) { struct lp_build_context *uint_bld = &bld_base->uint_bld; - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); const struct tgsi_texture_offset *off = inst->TexOffsets; assert(inst->Texture.NumOffsets == 1); @@ -4717,7 +4569,7 @@ static void tex_fetch_args( switch (target) { case TGSI_TEXTURE_3D: address[2] = lp_build_add(uint_bld, address[2], - bld->immediates[off->Index][off->SwizzleZ]); + ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]); /* fall through */ case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: @@ -4727,7 +4579,7 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOW2D_ARRAY: address[1] = lp_build_add(uint_bld, address[1], - bld->immediates[off->Index][off->SwizzleY]); + ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]); /* fall through */ case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -4735,7 +4587,7 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOW1D_ARRAY: address[0] = lp_build_add(uint_bld, address[0], - bld->immediates[off->Index][off->SwizzleX]); + ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]); break; /* texture offsets do not apply to other texture targets */ } @@ -4755,13 +4607,12 @@ static void tex_fetch_args( /* Get the component index from src1.x for Gather4. */ if (!tgsi_is_shadow_target(target)) { - LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates; LLVMValueRef comp_imm; struct tgsi_src_register src1 = inst->Src[1].Register; assert(src1.File == TGSI_FILE_IMMEDIATE); - comp_imm = imms[src1.Index][src1.SwizzleX]; + comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX]; gather_comp = LLVMConstIntGetZExtValue(comp_imm); gather_comp = CLAMP(gather_comp, 0, 3); } @@ -4806,9 +4657,9 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx, set_tex_fetch_args(ctx, &txq_emit_data, TGSI_OPCODE_TXQ, txq_inst.Texture.Texture, emit_data->args[1], NULL, - &ctx->soa.bld_base.uint_bld.zero, + &ctx->bld_base.uint_bld.zero, 1, 0xf); - txq_emit(NULL, &ctx->soa.bld_base, &txq_emit_data); + txq_emit(NULL, &ctx->bld_base, &txq_emit_data); /* Compute -0.5 / size. */ for (c = 0; c < 2; c++) { @@ -4817,7 +4668,7 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx, LLVMConstInt(ctx->i32, c, 0), ""); half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, ""); half_texel[c] = - lp_build_emit_llvm_unary(&ctx->soa.bld_base, + lp_build_emit_llvm_unary(&ctx->bld_base, TGSI_OPCODE_RCP, half_texel[c]); half_texel[c] = LLVMBuildFMul(builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), ""); @@ -4839,7 +4690,7 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx, emit_data->output[emit_data->chan] = lp_build_intrinsic(builder, intr_name, emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); } static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, @@ -4863,7 +4714,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, base->gallivm->builder, "llvm.SI.vs.load.input", emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); return; } @@ -4909,7 +4760,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, } /* Add the type and suffixes .c, .o if needed. */ - build_type_name_for_intr(LLVMTypeOf(emit_data->args[0]), type, sizeof(type)); + ac_build_type_name_for_intr(LLVMTypeOf(emit_data->args[0]), type, sizeof(type)); sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix, has_offset ? ".o" : "", type); @@ -4940,7 +4791,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, emit_data->output[emit_data->chan] = lp_build_intrinsic( base->gallivm->builder, intr_name, emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); } static void si_llvm_emit_txqs( @@ -4972,35 +4823,6 @@ static void si_llvm_emit_txqs( emit_data->output[emit_data->chan] = samples; } -/* - * SI implements derivatives using the local data store (LDS) - * All writes to the LDS happen in all executing threads at - * the same time. TID is the Thread ID for the current - * thread and is a value between 0 and 63, representing - * the thread's position in the wavefront. - * - * For the pixel shader threads are grouped into quads of four pixels. - * The TIDs of the pixels of a quad are: - * - * +------+------+ - * |4n + 0|4n + 1| - * +------+------+ - * |4n + 2|4n + 3| - * +------+------+ - * - * So, masking the TID with 0xfffffffc yields the TID of the top left pixel - * of the quad, masking with 0xfffffffd yields the TID of the top pixel of - * the current pixel's column, and masking with 0xfffffffe yields the TID - * of the left pixel of the current pixel's row. - * - * Adding 1 yields the TID of the pixel to the right of the left pixel, and - * adding 2 yields the TID of the pixel below the top pixel. - */ -/* masks for thread ID. */ -#define TID_MASK_TOP_LEFT 0xfffffffc -#define TID_MASK_TOP 0xfffffffd -#define TID_MASK_LEFT 0xfffffffe - static void si_llvm_emit_ddxy( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, @@ -5009,59 +4831,24 @@ static void si_llvm_emit_ddxy( struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; unsigned opcode = emit_data->info->opcode; - LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, val, args[2]; + LLVMValueRef val; int idx; unsigned mask; - thread_id = get_thread_id(ctx); - if (opcode == TGSI_OPCODE_DDX_FINE) - mask = TID_MASK_LEFT; + mask = AC_TID_MASK_LEFT; else if (opcode == TGSI_OPCODE_DDY_FINE) - mask = TID_MASK_TOP; + mask = AC_TID_MASK_TOP; else - mask = TID_MASK_TOP_LEFT; - - tl_tid = LLVMBuildAnd(gallivm->builder, thread_id, - lp_build_const_int32(gallivm, mask), ""); + mask = AC_TID_MASK_TOP_LEFT; /* for DDX we want to next X pixel, DDY next Y pixel. */ idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2; - trbl_tid = LLVMBuildAdd(gallivm->builder, tl_tid, - lp_build_const_int32(gallivm, idx), ""); val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, ""); - - if (ctx->screen->has_ds_bpermute) { - args[0] = LLVMBuildMul(gallivm->builder, tl_tid, - lp_build_const_int32(gallivm, 4), ""); - args[1] = val; - tl = lp_build_intrinsic(gallivm->builder, - "llvm.amdgcn.ds.bpermute", ctx->i32, - args, 2, LP_FUNC_ATTR_READNONE); - - args[0] = LLVMBuildMul(gallivm->builder, trbl_tid, - lp_build_const_int32(gallivm, 4), ""); - trbl = lp_build_intrinsic(gallivm->builder, - "llvm.amdgcn.ds.bpermute", ctx->i32, - args, 2, LP_FUNC_ATTR_READNONE); - } else { - LLVMValueRef store_ptr, load_ptr0, load_ptr1; - - store_ptr = build_gep0(ctx, ctx->lds, thread_id); - load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid); - load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid); - - LLVMBuildStore(gallivm->builder, val, store_ptr); - tl = LLVMBuildLoad(gallivm->builder, load_ptr0, ""); - trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, ""); - } - - tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, ""); - trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, ""); - - emit_data->output[emit_data->chan] = - LLVMBuildFSub(gallivm->builder, trbl, tl, ""); + val = ac_emit_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute, + mask, idx, ctx->lds, val); + emit_data->output[emit_data->chan] = val; } /* @@ -5221,11 +5008,11 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, gallivm->builder, interp_param, uint->zero, ""); LLVMValueRef j = LLVMBuildExtractElement( gallivm->builder, interp_param, uint->one, ""); - emit_data->output[chan] = build_fs_interp(bld_base, + emit_data->output[chan] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number, params, i, j); } else { - emit_data->output[chan] = build_fs_interp_mov(bld_base, + emit_data->output[chan] = ac_build_fs_interp_mov(&ctx->ac, lp_build_const_int32(gallivm, 2), /* P0 */ llvm_chan, attr_number, params); } @@ -5235,13 +5022,15 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { - LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates; + struct si_shader_context *ctx = si_shader_context(bld_base); struct tgsi_src_register src0 = emit_data->inst->Src[0].Register; + LLVMValueRef imm; unsigned stream; assert(src0.File == TGSI_FILE_IMMEDIATE); - stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 0x3; + imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX]; + stream = LLVMConstIntGetZExtValue(imm) & 0x3; return stream; } @@ -5261,8 +5050,7 @@ static void si_llvm_emit_vertex( SI_PARAM_GS2VS_OFFSET); LLVMValueRef gs_next_vertex; LLVMValueRef can_emit, kill; - LLVMValueRef args[2]; - unsigned chan; + unsigned chan, offset; int i; unsigned stream; @@ -5292,33 +5080,38 @@ static void si_llvm_emit_vertex( lp_build_const_float(gallivm, -1.0f)); lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", - ctx->voidt, &kill, 1, 0); + ctx->voidt, &kill, 1, LP_FUNC_ATTR_LEGACY); } else { lp_build_if(&if_state, gallivm, can_emit); } + offset = 0; for (i = 0; i < info->num_outputs; i++) { - LLVMValueRef *out_ptr = - ctx->soa.outputs[i]; + LLVMValueRef *out_ptr = ctx->outputs[i]; for (chan = 0; chan < 4; chan++) { + if (!(info->output_usagemask[i] & (1 << chan)) || + ((info->output_streams[i] >> (2 * chan)) & 3) != stream) + continue; + LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""); LLVMValueRef voffset = - lp_build_const_int32(gallivm, (i * 4 + chan) * + lp_build_const_int32(gallivm, offset * shader->selector->gs_max_out_vertices); + offset++; voffset = lp_build_add(uint, voffset, gs_next_vertex); voffset = lp_build_mul_imm(uint, voffset, 4); out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, ""); - build_tbuffer_store(ctx, - ctx->gsvs_ring[stream], - out_val, 1, - voffset, soffset, 0, - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_NUM_FORMAT_UINT, - 1, 0, 1, 1, 0); + ac_build_tbuffer_store(&ctx->ac, + ctx->gsvs_ring[stream], + out_val, 1, + voffset, soffset, 0, + V_008F0C_BUF_DATA_FORMAT_32, + V_008F0C_BUF_NUM_FORMAT_UINT, + 1, 0, 1, 1, 0); } } @@ -5328,11 +5121,8 @@ static void si_llvm_emit_vertex( LLVMBuildStore(gallivm->builder, gs_next_vertex, ctx->gs_next_vertex[stream]); /* Signal vertex emission */ - args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS | (stream << 8)); - args[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID); - lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg", - ctx->voidt, args, 2, 0); - + ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), + LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID)); if (!use_kill) lp_build_endif(&if_state); } @@ -5344,16 +5134,12 @@ static void si_llvm_emit_primitive( struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMValueRef args[2]; unsigned stream; /* Signal primitive cut */ stream = si_llvm_get_stream(bld_base, emit_data); - args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | SENDMSG_GS | (stream << 8)); - args[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID); - lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg", - ctx->voidt, args, 2, 0); + ac_emit_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), + LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID)); } static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, @@ -5363,10 +5149,13 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - /* The real barrier instruction isn’t needed, because an entire patch + /* SI only (thanks to a hw bug workaround): + * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ctx->type == PIPE_SHADER_TESS_CTRL) { + if (HAVE_LLVM >= 0x0309 && + ctx->screen->b.chip_class == SI && + ctx->type == PIPE_SHADER_TESS_CTRL) { emit_waitcnt(ctx, LGKM_CNT & VM_CNT); return; } @@ -5417,6 +5206,10 @@ static void si_create_function(struct si_shader_context *ctx, lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG); } + LLVMAddTargetDependentFunctionAttr(ctx->main_fn, + "no-signed-zeros-fp-math", + "true"); + if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) { /* These were copied from some LLVM test. */ LLVMAddTargetDependentFunctionAttr(ctx->main_fn, @@ -5434,20 +5227,6 @@ static void si_create_function(struct si_shader_context *ctx, } } -static void create_meta_data(struct si_shader_context *ctx) -{ - struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm; - - ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(gallivm->context, - "invariant.load", 14); - ctx->range_md_kind = LLVMGetMDKindIDInContext(gallivm->context, - "range", 5); - ctx->uniform_md_kind = LLVMGetMDKindIDInContext(gallivm->context, - "amdgpu.uniform", 14); - - ctx->empty_md = LLVMMDNodeInContext(gallivm->context, NULL, 0); -} - static void declare_streamout_params(struct si_shader_context *ctx, struct pipe_stream_output_info *so, LLVMTypeRef *params, LLVMTypeRef i32, @@ -5460,7 +5239,7 @@ static void declare_streamout_params(struct si_shader_context *ctx, if (ctx->type != PIPE_SHADER_TESS_EVAL) params[ctx->param_streamout_config = (*num_params)++] = i32; else - ctx->param_streamout_config = ctx->param_tess_offchip; + ctx->param_streamout_config = *num_params - 1; params[ctx->param_streamout_write_index = (*num_params)++] = i32; } @@ -5499,7 +5278,7 @@ static unsigned llvm_get_type_size(LLVMTypeRef type) static void declare_tess_lds(struct si_shader_context *ctx) { struct gallivm_state *gallivm = &ctx->gallivm; - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; struct lp_build_context *uint = &bld_base->uint_bld; unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768; @@ -5527,10 +5306,10 @@ static unsigned si_get_max_workgroup_size(struct si_shader *shader) static void create_function(struct si_shader_context *ctx) { - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; struct si_shader *shader = ctx->shader; - LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32; + LLVMTypeRef params[SI_NUM_PARAMS + SI_MAX_ATTRIBS], v3i32; LLVMTypeRef returns[16+32*4]; unsigned i, last_sgpr, num_params, num_return_sgprs; unsigned num_returns = 0; @@ -5546,7 +5325,7 @@ static void create_function(struct si_shader_context *ctx) switch (ctx->type) { case PIPE_SHADER_VERTEX: - params[SI_PARAM_VERTEX_BUFFERS] = const_array(ctx->v16i8, SI_NUM_VERTEX_BUFFERS); + params[SI_PARAM_VERTEX_BUFFERS] = const_array(ctx->v16i8, SI_MAX_ATTRIBS); params[SI_PARAM_BASE_VERTEX] = ctx->i32; params[SI_PARAM_START_INSTANCE] = ctx->i32; params[SI_PARAM_DRAWID] = ctx->i32; @@ -5624,10 +5403,10 @@ static void create_function(struct si_shader_context *ctx) if (shader->key.as_es) { params[ctx->param_oc_lds = num_params++] = ctx->i32; - params[ctx->param_tess_offchip = num_params++] = ctx->i32; + params[num_params++] = ctx->i32; params[ctx->param_es2gs_offset = num_params++] = ctx->i32; } else { - params[ctx->param_tess_offchip = num_params++] = ctx->i32; + params[num_params++] = ctx->i32; declare_streamout_params(ctx, &shader->selector->so, params, ctx->i32, &num_params); params[ctx->param_oc_lds = num_params++] = ctx->i32; @@ -5783,8 +5562,7 @@ static void create_function(struct si_shader_context *ctx) LOCAL_ADDR_SPACE); if ((ctx->type == PIPE_SHADER_VERTEX && shader->key.as_ls) || - ctx->type == PIPE_SHADER_TESS_CTRL || - ctx->type == PIPE_SHADER_TESS_EVAL) + ctx->type == PIPE_SHADER_TESS_CTRL) declare_tess_lds(ctx); } @@ -5794,8 +5572,8 @@ static void create_function(struct si_shader_context *ctx) */ static void preload_ring_buffers(struct si_shader_context *ctx) { - struct gallivm_state *gallivm = - ctx->soa.bld_base.base.gallivm; + struct gallivm_state *gallivm = ctx->bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS); @@ -5811,22 +5589,84 @@ static void preload_ring_buffers(struct si_shader_context *ctx) LLVMValueRef offset = lp_build_const_int32(gallivm, ring); ctx->esgs_ring = - build_indexed_load_const(ctx, buf_ptr, offset); + ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset); } if (ctx->shader->is_gs_copy_shader) { - LLVMValueRef offset = lp_build_const_int32(gallivm, SI_VS_RING_GSVS); + LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS); ctx->gsvs_ring[0] = - build_indexed_load_const(ctx, buf_ptr, offset); - } - if (ctx->type == PIPE_SHADER_GEOMETRY) { - int i; - for (i = 0; i < 4; i++) { - LLVMValueRef offset = lp_build_const_int32(gallivm, SI_GS_RING_GSVS0 + i); + ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset); + } else if (ctx->type == PIPE_SHADER_GEOMETRY) { + const struct si_shader_selector *sel = ctx->shader->selector; + struct lp_build_context *uint = &ctx->bld_base.uint_bld; + LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS); + LLVMValueRef base_ring; + + base_ring = ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset); + + /* The conceptual layout of the GSVS ring is + * v0c0 .. vLv0 v0c1 .. vLc1 .. + * but the real memory layout is swizzled across + * threads: + * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL + * t16v0c0 .. + * Override the buffer descriptor accordingly. + */ + LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2); + uint64_t stream_offset = 0; - ctx->gsvs_ring[i] = - build_indexed_load_const(ctx, buf_ptr, offset); + for (unsigned stream = 0; stream < 4; ++stream) { + unsigned num_components; + unsigned stride; + unsigned num_records; + LLVMValueRef ring, tmp; + + num_components = sel->info.num_stream_output_components[stream]; + if (!num_components) + continue; + + stride = 4 * num_components * sel->gs_max_out_vertices; + + /* Limit on the stride field for <= CIK. */ + assert(stride < (1 << 14)); + + num_records = 64; + + ring = LLVMBuildBitCast(builder, base_ring, v2i64, ""); + tmp = LLVMBuildExtractElement(builder, ring, uint->zero, ""); + tmp = LLVMBuildAdd(builder, tmp, + LLVMConstInt(ctx->i64, + stream_offset, 0), ""); + stream_offset += stride * 64; + + ring = LLVMBuildInsertElement(builder, ring, tmp, uint->zero, ""); + ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, ""); + tmp = LLVMBuildExtractElement(builder, ring, uint->one, ""); + tmp = LLVMBuildOr(builder, tmp, + LLVMConstInt(ctx->i32, + S_008F04_STRIDE(stride) | + S_008F04_SWIZZLE_ENABLE(1), 0), ""); + ring = LLVMBuildInsertElement(builder, ring, tmp, uint->one, ""); + ring = LLVMBuildInsertElement(builder, ring, + LLVMConstInt(ctx->i32, num_records, 0), + LLVMConstInt(ctx->i32, 2, 0), ""); + ring = LLVMBuildInsertElement(builder, ring, + LLVMConstInt(ctx->i32, + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */ + S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */ + S_008F0C_ADD_TID_ENABLE(1), + 0), + LLVMConstInt(ctx->i32, 3, 0), ""); + ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, ""); + + ctx->gsvs_ring[stream] = ring; } } } @@ -5835,8 +5675,7 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, LLVMValueRef param_rw_buffers, unsigned param_pos_fixed_pt) { - struct lp_build_tgsi_context *bld_base = - &ctx->soa.bld_base; + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef slot, desc, offset, row, bit, address[2]; @@ -5850,7 +5689,7 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, /* Load the buffer descriptor. */ slot = lp_build_const_int32(gallivm, SI_PS_CONST_POLY_STIPPLE); - desc = build_indexed_load_const(ctx, param_rw_buffers, slot); + desc = ac_build_indexed_load_const(&ctx->ac, param_rw_buffers, slot); /* The stipple pattern is 32x32, each row has 32 bits. */ offset = LLVMBuildMul(builder, address[1], @@ -5863,23 +5702,24 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, /* The intrinsic kills the thread if arg < 0. */ bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0), LLVMConstReal(ctx->f32, -1), ""); - lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0); + lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, + LP_FUNC_ATTR_LEGACY); } -void si_shader_binary_read_config(struct radeon_shader_binary *binary, +void si_shader_binary_read_config(struct ac_shader_binary *binary, struct si_shader_config *conf, unsigned symbol_offset) { unsigned i; const unsigned char *config = - radeon_shader_binary_config_start(binary, symbol_offset); + ac_shader_binary_config_start(binary, symbol_offset); bool really_needs_scratch = false; /* LLVM adds SGPR spills to the scratch size. * Find out if we really need the scratch buffer. */ for (i = 0; i < binary->reloc_count; i++) { - const struct radeon_shader_reloc *reloc = &binary->relocs[i]; + const struct ac_shader_reloc *reloc = &binary->relocs[i]; if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { @@ -5969,7 +5809,7 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx, S_008F04_STRIDE(config->scratch_bytes_per_wave / 64); for (i = 0 ; i < shader->binary.reloc_count; i++) { - const struct radeon_shader_reloc *reloc = + const struct ac_shader_reloc *reloc = &shader->binary.relocs[i]; if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) { util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset, @@ -5994,11 +5834,11 @@ static unsigned si_get_shader_binary_size(struct si_shader *shader) int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) { - const struct radeon_shader_binary *prolog = + const struct ac_shader_binary *prolog = shader->prolog ? &shader->prolog->binary : NULL; - const struct radeon_shader_binary *epilog = + const struct ac_shader_binary *epilog = shader->epilog ? &shader->epilog->binary : NULL; - const struct radeon_shader_binary *mainb = &shader->binary; + const struct ac_shader_binary *mainb = &shader->binary; unsigned bo_size = si_get_shader_binary_size(shader) + (!epilog ? mainb->rodata_size : 0); unsigned char *ptr; @@ -6010,7 +5850,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) r600_resource_reference(&shader->bo, NULL); shader->bo = (struct r600_resource*) pipe_buffer_create(&sscreen->b.b, 0, - PIPE_USAGE_IMMUTABLE, bo_size); + PIPE_USAGE_IMMUTABLE, + align(bo_size, SI_CPDMA_ALIGNMENT)); if (!shader->bo) return -ENOMEM; @@ -6035,7 +5876,7 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) return 0; } -static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary, +static void si_shader_dump_disassembly(const struct ac_shader_binary *binary, struct pipe_debug_callback *debug, const char *name, FILE *file) { @@ -6087,7 +5928,8 @@ static void si_shader_dump_stats(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor, - FILE *file) + FILE *file, + bool check_debug_option) { struct si_shader_config *conf = &shader->config; unsigned num_inputs = shader->selector ? shader->selector->info.num_inputs : 0; @@ -6138,7 +5980,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen, if (lds_per_wave) max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); - if (file != stderr || + if (!check_debug_option || r600_can_dump_shader(&sscreen->b, processor)) { if (processor == PIPE_SHADER_FRAGMENT) { fprintf(file, "*** SHADER CONFIG ***\n" @@ -6175,8 +6017,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen, conf->spilled_vgprs, conf->private_mem_vgprs); } -static const char *si_get_shader_name(struct si_shader *shader, - unsigned processor) +const char *si_get_shader_name(struct si_shader *shader, unsigned processor) { switch (processor) { case PIPE_SHADER_VERTEX: @@ -6209,19 +6050,19 @@ static const char *si_get_shader_name(struct si_shader *shader, void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor, - FILE *file) + FILE *file, bool check_debug_option) { - if (file != stderr || + if (!check_debug_option || r600_can_dump_shader(&sscreen->b, processor)) si_dump_shader_key(processor, &shader->key, file); - if (file != stderr && shader->binary.llvm_ir_string) { + if (!check_debug_option && shader->binary.llvm_ir_string) { fprintf(file, "\n%s - main shader part - LLVM IR:\n\n", si_get_shader_name(shader, processor)); fprintf(file, "%s\n", shader->binary.llvm_ir_string); } - if (file != stderr || + if (!check_debug_option || (r600_can_dump_shader(&sscreen->b, processor) && !(sscreen->b.debug_flags & DBG_NO_ASM))) { fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor)); @@ -6238,11 +6079,12 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, fprintf(file, "\n"); } - si_shader_dump_stats(sscreen, shader, debug, processor, file); + si_shader_dump_stats(sscreen, shader, debug, processor, file, + check_debug_option); } int si_compile_llvm(struct si_screen *sscreen, - struct radeon_shader_binary *binary, + struct ac_shader_binary *binary, struct si_shader_config *conf, LLVMTargetMachineRef tm, LLVMModuleRef mod, @@ -6258,7 +6100,7 @@ int si_compile_llvm(struct si_screen *sscreen, if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) { fprintf(stderr, "%s LLVM IR:\n\n", name); - LLVMDumpModule(mod); + ac_dump_module(mod); fprintf(stderr, "\n"); } } @@ -6329,7 +6171,8 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, struct si_shader_context ctx; struct si_shader *shader; struct gallivm_state *gallivm = &ctx.gallivm; - struct lp_build_tgsi_context *bld_base = &ctx.soa.bld_base; + LLVMBuilderRef builder; + struct lp_build_tgsi_context *bld_base = &ctx.bld_base; struct lp_build_context *uint = &bld_base->uint_bld; struct si_shader_output_values *outputs; struct tgsi_shader_info *gsinfo = &gs_selector->info; @@ -6354,7 +6197,8 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, si_init_shader_ctx(&ctx, sscreen, shader, tm); ctx.type = PIPE_SHADER_VERTEX; - create_meta_data(&ctx); + builder = gallivm->builder; + create_function(&ctx); preload_ring_buffers(&ctx); @@ -6370,36 +6214,92 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, args[7] = uint->one; /* SLC */ args[8] = uint->zero; /* TFE */ - /* Fetch vertex data from GSVS ring */ + /* Fetch the vertex stream ID.*/ + LLVMValueRef stream_id; + + if (gs_selector->so.num_outputs) + stream_id = unpack_param(&ctx, ctx.param_streamout_config, 24, 2); + else + stream_id = uint->zero; + + /* Fill in output information. */ for (i = 0; i < gsinfo->num_outputs; ++i) { - unsigned chan; + outputs[i].semantic_name = gsinfo->output_semantic_name[i]; + outputs[i].semantic_index = gsinfo->output_semantic_index[i]; + + for (int chan = 0; chan < 4; chan++) { + outputs[i].vertex_stream[chan] = + (gsinfo->output_streams[i] >> (2 * chan)) & 3; + } + } - outputs[i].name = gsinfo->output_semantic_name[i]; - outputs[i].sid = gsinfo->output_semantic_index[i]; + LLVMBasicBlockRef end_bb; + LLVMValueRef switch_inst; - for (chan = 0; chan < 4; chan++) { - args[2] = lp_build_const_int32(gallivm, - (i * 4 + chan) * - gs_selector->gs_max_out_vertices * 16 * 4); + end_bb = LLVMAppendBasicBlockInContext(gallivm->context, ctx.main_fn, "end"); + switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4); + + for (int stream = 0; stream < 4; stream++) { + LLVMBasicBlockRef bb; + unsigned offset; + + if (!gsinfo->num_stream_output_components[stream]) + continue; + + if (stream > 0 && !gs_selector->so.num_outputs) + continue; - outputs[i].values[chan] = - LLVMBuildBitCast(gallivm->builder, + bb = LLVMInsertBasicBlockInContext(gallivm->context, end_bb, "out"); + LLVMAddCase(switch_inst, lp_build_const_int32(gallivm, stream), bb); + LLVMPositionBuilderAtEnd(builder, bb); + + /* Fetch vertex data from GSVS ring */ + offset = 0; + for (i = 0; i < gsinfo->num_outputs; ++i) { + for (unsigned chan = 0; chan < 4; chan++) { + if (!(gsinfo->output_usagemask[i] & (1 << chan)) || + outputs[i].vertex_stream[chan] != stream) { + outputs[i].values[chan] = ctx.bld_base.base.undef; + continue; + } + + args[2] = lp_build_const_int32( + gallivm, + offset * gs_selector->gs_max_out_vertices * 16 * 4); + offset++; + + outputs[i].values[chan] = + LLVMBuildBitCast(gallivm->builder, lp_build_intrinsic(gallivm->builder, - "llvm.SI.buffer.load.dword.i32.i32", - ctx.i32, args, 9, - LP_FUNC_ATTR_READONLY), + "llvm.SI.buffer.load.dword.i32.i32", + ctx.i32, args, 9, + LP_FUNC_ATTR_READONLY | + LP_FUNC_ATTR_LEGACY), ctx.f32, ""); + } + } + + /* Streamout and exports. */ + if (gs_selector->so.num_outputs) { + si_llvm_emit_streamout(&ctx, outputs, + gsinfo->num_outputs, + stream); } + + if (stream == 0) + si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs); + + LLVMBuildBr(builder, end_bb); } - si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs); + LLVMPositionBuilderAtEnd(builder, end_bb); LLVMBuildRetVoid(gallivm->builder); /* Dump LLVM IR before any optimization passes */ if (sscreen->b.debug_flags & DBG_PREOPT_IR && r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY)) - LLVMDumpModule(bld_base->base.gallivm->module); + ac_dump_module(bld_base->base.gallivm->module); si_llvm_finalize_module(&ctx, r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_GEOMETRY)); @@ -6413,7 +6313,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, if (r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY)) fprintf(stderr, "GS Copy Shader:\n"); si_shader_dump(sscreen, ctx.shader, debug, - PIPE_SHADER_GEOMETRY, stderr); + PIPE_SHADER_GEOMETRY, stderr, true); r = si_shader_binary_upload(sscreen, ctx.shader); } @@ -6445,7 +6345,11 @@ static void si_dump_shader_key(unsigned shader, struct si_shader_key *key, fprintf(f, " part.vs.epilog.export_prim_id = %u\n", key->part.vs.epilog.export_prim_id); fprintf(f, " as_es = %u\n", key->as_es); fprintf(f, " as_ls = %u\n", key->as_ls); - fprintf(f, " mono.vs.fix_fetch = 0x%x\n", key->mono.vs.fix_fetch); + + fprintf(f, " mono.vs.fix_fetch = {"); + for (i = 0; i < SI_MAX_ATTRIBS; i++) + fprintf(f, !i ? "%u" : ", %u", key->mono.vs.fix_fetch[i]); + fprintf(f, "}\n"); break; case PIPE_SHADER_TESS_CTRL: @@ -6477,6 +6381,7 @@ static void si_dump_shader_key(unsigned shader, struct si_shader_key *key, fprintf(f, " part.ps.prolog.bc_optimize_for_linear = %u\n", key->part.ps.prolog.bc_optimize_for_linear); fprintf(f, " part.ps.epilog.spi_shader_col_format = 0x%x\n", key->part.ps.epilog.spi_shader_col_format); fprintf(f, " part.ps.epilog.color_is_int8 = 0x%X\n", key->part.ps.epilog.color_is_int8); + fprintf(f, " part.ps.epilog.color_is_int10 = 0x%X\n", key->part.ps.epilog.color_is_int10); fprintf(f, " part.ps.epilog.last_cbuf = %u\n", key->part.ps.epilog.last_cbuf); fprintf(f, " part.ps.epilog.alpha_func = %u\n", key->part.ps.epilog.alpha_func); fprintf(f, " part.ps.epilog.alpha_to_one = %u\n", key->part.ps.epilog.alpha_to_one); @@ -6510,7 +6415,7 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, (shader && shader->selector) ? &shader->selector->info : NULL, (shader && shader->selector) ? shader->selector->tokens : NULL); - bld_base = &ctx->soa.bld_base; + bld_base = &ctx->bld_base; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action; @@ -6585,7 +6490,7 @@ static bool si_eliminate_const_output(struct si_shader_context *ctx, for (i = 0; i < 4; i++) { LLVMBool loses_info; - LLVMValueRef p = LLVMGetOperand(inst, 5 + i); + LLVMValueRef p = LLVMGetOperand(inst, (HAVE_LLVM >= 0x0500 ? 2 : 5) + i); /* It's a constant expression. Undef outputs are eliminated too. */ if (LLVMIsUndef(p)) { @@ -6669,10 +6574,12 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx) unsigned num_args = LLVMCountParams(callee); /* Check if this is an export instruction. */ - if (num_args != 9 || strcmp(name, "llvm.SI.export")) + if ((num_args != 9 && num_args != 8) || + (strcmp(name, "llvm.SI.export") && + strcmp(name, "llvm.amdgcn.exp."))) continue; - LLVMValueRef arg = LLVMGetOperand(cur, 3); + LLVMValueRef arg = LLVMGetOperand(cur, HAVE_LLVM >= 0x0500 ? 0 : 3); unsigned target = LLVMConstIntGetZExtValue(arg); if (target < V_008DFC_SQ_EXP_PARAM) @@ -6756,7 +6663,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, struct si_shader *shader) { struct si_shader_selector *sel = shader->selector; - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; switch (ctx->type) { case PIPE_SHADER_VERTEX: @@ -6797,7 +6704,6 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, return false; } - create_meta_data(ctx); create_function(ctx); preload_ring_buffers(ctx); @@ -7305,7 +7211,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, shader->info.uses_instanceid = sel->info.uses_instanceid; - bld_base = &ctx.soa.bld_base; + bld_base = &ctx.bld_base; ctx.load_system_value = declare_system_value; if (!si_compile_tgsi_main(&ctx, shader)) { @@ -7403,7 +7309,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, /* Dump LLVM IR before any optimization passes */ if (sscreen->b.debug_flags & DBG_PREOPT_IR && r600_can_dump_shader(&sscreen->b, ctx.type)) - LLVMDumpModule(mod); + ac_dump_module(mod); si_llvm_finalize_module(&ctx, r600_extra_shader_checks(&sscreen->b, ctx.type)); @@ -7704,7 +7610,7 @@ static void si_build_vs_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { struct gallivm_state *gallivm = &ctx->gallivm; - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; LLVMTypeRef params[5]; int num_params, i; @@ -7722,24 +7628,21 @@ static void si_build_vs_epilog_function(struct si_shader_context *ctx, /* Emit exports. */ if (key->vs_epilog.states.export_prim_id) { struct lp_build_context *base = &bld_base->base; - struct lp_build_context *uint = &bld_base->uint_bld; - LLVMValueRef args[9]; - - args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ - args[1] = uint->zero; /* whether the EXEC mask is valid */ - args[2] = uint->zero; /* DONE bit */ - args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_PARAM + - key->vs_epilog.prim_id_param_offset); - args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ - args[5] = LLVMGetParam(ctx->main_fn, + struct ac_export_args args; + + args.enabled_channels = 0x1; /* enabled channels */ + args.valid_mask = 0; /* whether the EXEC mask is valid */ + args.done = 0; /* DONE bit */ + args.target = V_008DFC_SQ_EXP_PARAM + + key->vs_epilog.prim_id_param_offset; + args.compr = 0; /* COMPR flag (0 = 32-bit export) */ + args.out[0] = LLVMGetParam(ctx->main_fn, VS_EPILOG_PRIMID_LOC); /* X */ - args[6] = base->undef; /* Y */ - args[7] = base->undef; /* Z */ - args[8] = base->undef; /* W */ + args.out[1] = base->undef; /* Y */ + args.out[2] = base->undef; /* Z */ + args.out[3] = base->undef; /* W */ - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - args, 9, 0); + ac_emit_export(&ctx->ac, &args); } LLVMBuildRetVoid(gallivm->builder); @@ -7825,7 +7728,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { struct gallivm_state *gallivm = &ctx->gallivm; - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; LLVMTypeRef params[16]; LLVMValueRef func; int last_sgpr, num_params; @@ -8160,7 +8063,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { struct gallivm_state *gallivm = &ctx->gallivm; - struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; LLVMTypeRef params[16+8*4+3]; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; int last_sgpr, num_params, i; @@ -8382,7 +8285,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct pipe_debug_callback *debug) { struct si_shader_selector *sel = shader->selector; - struct si_shader *mainp = sel->main_shader_part; + struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key); int r; /* LS, ES, VS are compiled on demand if the main part hasn't been @@ -8471,7 +8374,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, si_fix_resource_usage(sscreen, shader); si_shader_dump(sscreen, shader, debug, sel->info.processor, - stderr); + stderr, true); /* Upload. */ r = si_shader_binary_upload(sscreen, shader);