X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader.c;h=49538e823f4ff84701e42e65c730d183ac358c06;hb=68586bdd215f5f1af956f9e2e71039a5011a7ce1;hp=e3676dddd6714fe27262fe2161bd7f23696f96d3;hpb=194449a405bfc391cf730f88ef45dee768960c4a;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e3676dddd67..49538e823f4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -22,16 +22,12 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include - #include "util/u_memory.h" #include "tgsi/tgsi_strings.h" #include "tgsi/tgsi_from_mesa.h" #include "ac_exp_param.h" -#include "ac_shader_util.h" #include "ac_rtld.h" -#include "ac_llvm_util.h" #include "si_shader_internal.h" #include "si_pipe.h" #include "sid.h" @@ -49,8 +45,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f); static void si_build_vs_prolog_function(struct si_shader_context *ctx, union si_shader_part_key *key); -static void si_fix_resource_usage(struct si_screen *sscreen, - struct si_shader *shader); /** Whether the shader runs as a combination of multiple API shaders */ static bool is_multi_part_shader(struct si_shader_context *ctx) @@ -428,122 +422,6 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, } } -static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, - unsigned input_index, - unsigned vtx_offset_param, - LLVMTypeRef type, - unsigned swizzle) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct si_shader *shader = ctx->shader; - LLVMValueRef vtx_offset, soffset; - struct si_shader_info *info = &shader->selector->info; - unsigned semantic_name = info->input_semantic_name[input_index]; - unsigned semantic_index = info->input_semantic_index[input_index]; - unsigned param; - LLVMValueRef value; - - param = si_shader_io_get_unique_index(semantic_name, semantic_index, false); - - /* GFX9 has the ESGS ring in LDS. */ - if (ctx->screen->info.chip_class >= GFX9) { - unsigned index = vtx_offset_param; - - switch (index / 2) { - case 0: - vtx_offset = si_unpack_param(ctx, ctx->gs_vtx01_offset, - index % 2 ? 16 : 0, 16); - break; - case 1: - vtx_offset = si_unpack_param(ctx, ctx->gs_vtx23_offset, - index % 2 ? 16 : 0, 16); - break; - case 2: - vtx_offset = si_unpack_param(ctx, ctx->gs_vtx45_offset, - index % 2 ? 16 : 0, 16); - break; - default: - assert(0); - return NULL; - } - - unsigned offset = param * 4 + swizzle; - vtx_offset = LLVMBuildAdd(ctx->ac.builder, vtx_offset, - LLVMConstInt(ctx->i32, offset, false), ""); - - LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset); - LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, ""); - if (llvm_type_is_64bit(ctx, type)) { - ptr = LLVMBuildGEP(ctx->ac.builder, ptr, - &ctx->ac.i32_1, 1, ""); - LLVMValueRef values[2] = { - value, - LLVMBuildLoad(ctx->ac.builder, ptr, "") - }; - value = ac_build_gather_values(&ctx->ac, values, 2); - } - return LLVMBuildBitCast(ctx->ac.builder, value, type, ""); - } - - /* GFX6: input load from the ESGS ring in memory. */ - if (swizzle == ~0) { - LLVMValueRef values[4]; - unsigned chan; - for (chan = 0; chan < 4; chan++) { - values[chan] = si_llvm_load_input_gs(abi, input_index, vtx_offset_param, - type, chan); - } - return ac_build_gather_values(&ctx->ac, values, 4); - } - - /* Get the vertex offset parameter on GFX6. */ - LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac, - ctx->gs_vtx_offset[vtx_offset_param]); - - vtx_offset = LLVMBuildMul(ctx->ac.builder, gs_vtx_offset, - LLVMConstInt(ctx->i32, 4, 0), ""); - - soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0); - - value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0, - vtx_offset, soffset, 0, ac_glc, true, false); - if (llvm_type_is_64bit(ctx, type)) { - LLVMValueRef value2; - soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 256, 0); - - value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, - ctx->i32_0, vtx_offset, soffset, - 0, ac_glc, true, false); - return si_build_gather_64bit(ctx, type, value, value2); - } - return LLVMBuildBitCast(ctx->ac.builder, value, type, ""); -} - -static LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi, - unsigned location, - unsigned driver_location, - unsigned component, - unsigned num_components, - unsigned vertex_index, - unsigned const_index, - LLVMTypeRef type) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - - LLVMValueRef value[4]; - for (unsigned i = 0; i < num_components; i++) { - unsigned offset = i; - if (llvm_type_is_64bit(ctx, type)) - offset *= 2; - - offset += component; - value[i + component] = si_llvm_load_input_gs(&ctx->abi, driver_location / 4 + const_index, - vertex_index, type, offset); - } - - return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); -} - static LLVMValueRef get_base_vertex(struct ac_shader_abi *abi) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); @@ -610,81 +488,6 @@ void si_declare_compute_memory(struct si_shader_context *ctx) ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, ""); } -static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx) -{ - LLVMValueRef ptr = - ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers); - struct si_shader_selector *sel = ctx->shader->selector; - - /* Do the bounds checking with a descriptor, because - * doing computation and manual bounds checking of 64-bit - * addresses generates horrible VALU code with very high - * VGPR usage and very low SIMD occupancy. - */ - ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, ""); - - LLVMValueRef desc0, desc1; - desc0 = ptr; - desc1 = LLVMConstInt(ctx->i32, - S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); - - uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - - if (ctx->screen->info.chip_class >= GFX10) - rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | - S_008F0C_RESOURCE_LEVEL(1); - else - rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - - LLVMValueRef desc_elems[] = { - desc0, - desc1, - LLVMConstInt(ctx->i32, sel->info.constbuf0_num_slots * 16, 0), - LLVMConstInt(ctx->i32, rsrc3, false) - }; - - return ac_build_gather_values(&ctx->ac, desc_elems, 4); -} - -static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct si_shader_selector *sel = ctx->shader->selector; - - LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers); - - if (sel->info.const_buffers_declared == 1 && - sel->info.shader_buffers_declared == 0) { - return load_const_buffer_desc_fast_path(ctx); - } - - index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers); - index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), ""); - - return ac_build_load_to_sgpr(&ctx->ac, ptr, index); -} - -static LLVMValueRef -load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - LLVMValueRef rsrc_ptr = ac_get_arg(&ctx->ac, - ctx->const_and_shader_buffers); - - index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers); - index = LLVMBuildSub(ctx->ac.builder, - LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0), - index, ""); - - return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index); -} - /* Initialize arguments for the shader export intrinsic */ static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, LLVMValueRef *values, @@ -816,9 +619,9 @@ void si_emit_streamout_output(struct si_shader_context *ctx, * Write streamout data to buffers for vertex stream @p stream (different * vertex streams can occur for GS copy shaders). */ -static void si_llvm_emit_streamout(struct si_shader_context *ctx, - struct si_shader_output_values *outputs, - unsigned noutput, unsigned stream) +void si_llvm_emit_streamout(struct si_shader_context *ctx, + struct si_shader_output_values *outputs, + unsigned noutput, unsigned stream) { struct si_shader_selector *sel = ctx->shader->selector; struct pipe_stream_output_info *so = &sel->so; @@ -1178,141 +981,6 @@ void si_llvm_export_vs(struct si_shader_context *ctx, si_build_param_exports(ctx, outputs, noutput); } -/* Pass GS inputs from ES to GS on GFX9. */ -static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) -{ - LLVMValueRef ret = ctx->return_value; - - ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0); - ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1); - if (ctx->shader->key.as_ngg) - ret = si_insert_input_ptr(ctx, ret, ctx->gs_tg_info, 2); - else - ret = si_insert_input_ret(ctx, ret, ctx->gs2vs_offset, 2); - ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3); - ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5); - - ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, - 8 + SI_SGPR_RW_BUFFERS); - ret = si_insert_input_ptr(ctx, ret, - ctx->bindless_samplers_and_images, - 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); - if (ctx->screen->use_ngg) { - ret = si_insert_input_ptr(ctx, ret, ctx->vs_state_bits, - 8 + SI_SGPR_VS_STATE_BITS); - } - - unsigned vgpr; - if (ctx->type == PIPE_SHADER_VERTEX) - vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR; - else - vgpr = 8 + GFX9_TESGS_NUM_USER_SGPR; - - ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx01_offset, vgpr++); - ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx23_offset, vgpr++); - ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_prim_id, vgpr++); - ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++); - ret = si_insert_input_ret_float(ctx, ret, ctx->gs_vtx45_offset, vgpr++); - ctx->return_value = ret; -} - -static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, - unsigned max_outputs, - LLVMValueRef *addrs) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct si_shader *es = ctx->shader; - struct si_shader_info *info = &es->selector->info; - LLVMValueRef lds_base = NULL; - unsigned chan; - int i; - - if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) { - unsigned itemsize_dw = es->selector->esgs_itemsize / 4; - LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); - LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->merged_wave_info, 24, 4); - vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, - LLVMBuildMul(ctx->ac.builder, wave_idx, - LLVMConstInt(ctx->i32, ctx->ac.wave_size, false), ""), ""); - lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx, - LLVMConstInt(ctx->i32, itemsize_dw, 0), ""); - } - - for (i = 0; i < info->num_outputs; i++) { - int param; - - if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX || - info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER) - continue; - - param = si_shader_io_get_unique_index(info->output_semantic_name[i], - info->output_semantic_index[i], false); - - for (chan = 0; chan < 4; chan++) { - if (!(info->output_usagemask[i] & (1 << chan))) - continue; - - LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""); - out_val = ac_to_integer(&ctx->ac, out_val); - - /* GFX9 has the ESGS ring in LDS. */ - if (ctx->screen->info.chip_class >= GFX9) { - LLVMValueRef idx = LLVMConstInt(ctx->i32, param * 4 + chan, false); - idx = LLVMBuildAdd(ctx->ac.builder, lds_base, idx, ""); - ac_build_indexed_store(&ctx->ac, ctx->esgs_ring, idx, out_val); - continue; - } - - ac_build_buffer_store_dword(&ctx->ac, - ctx->esgs_ring, - out_val, 1, NULL, - ac_get_arg(&ctx->ac, ctx->es2gs_offset), - (4 * param + chan) * 4, - ac_glc | ac_slc | ac_swizzled); - } - } - - if (ctx->screen->info.chip_class >= GFX9) - si_set_es_return_value_for_gs(ctx); -} - -static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx) -{ - if (ctx->screen->info.chip_class >= GFX9) - return si_unpack_param(ctx, ctx->merged_wave_info, 16, 8); - else - return ac_get_arg(&ctx->ac, ctx->gs_wave_id); -} - -static void emit_gs_epilogue(struct si_shader_context *ctx) -{ - if (ctx->shader->key.as_ngg) { - gfx10_ngg_gs_emit_epilogue(ctx); - return; - } - - if (ctx->screen->info.chip_class >= GFX10) - LLVMBuildFence(ctx->ac.builder, LLVMAtomicOrderingRelease, false, ""); - - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, - si_get_gs_wave_id(ctx)); - - if (ctx->screen->info.chip_class >= GFX9) - ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); -} - -static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi, - unsigned max_outputs, - LLVMValueRef *addrs) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct si_shader_info UNUSED *info = &ctx->shader->selector->info; - - assert(info->num_outputs <= max_outputs); - - emit_gs_epilogue(ctx); -} - static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs) @@ -1389,106 +1057,6 @@ static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi *abi, ctx->return_value = ret; } -/* Emit one vertex from the geometry shader */ -static void si_llvm_emit_vertex(struct ac_shader_abi *abi, - unsigned stream, - LLVMValueRef *addrs) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - - if (ctx->shader->key.as_ngg) { - gfx10_ngg_gs_emit_vertex(ctx, stream, addrs); - return; - } - - struct si_shader_info *info = &ctx->shader->selector->info; - struct si_shader *shader = ctx->shader; - LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->gs2vs_offset); - LLVMValueRef gs_next_vertex; - LLVMValueRef can_emit; - unsigned chan, offset; - int i; - - /* Write vertex attribute values to GSVS ring */ - gs_next_vertex = LLVMBuildLoad(ctx->ac.builder, - ctx->gs_next_vertex[stream], - ""); - - /* If this thread has already emitted the declared maximum number of - * vertices, skip the write: excessive vertex emissions are not - * supposed to have any effect. - * - * If the shader has no writes to memory, kill it instead. This skips - * further memory loads and may allow LLVM to skip to the end - * altogether. - */ - can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex, - LLVMConstInt(ctx->i32, - shader->selector->gs_max_out_vertices, 0), ""); - - bool use_kill = !info->writes_memory; - if (use_kill) { - ac_build_kill_if_false(&ctx->ac, can_emit); - } else { - ac_build_ifcc(&ctx->ac, can_emit, 6505); - } - - offset = 0; - for (i = 0; i < info->num_outputs; i++) { - for (chan = 0; chan < 4; chan++) { - if (!(info->output_usagemask[i] & (1 << chan)) || - ((info->output_streams[i] >> (2 * chan)) & 3) != stream) - continue; - - LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""); - LLVMValueRef voffset = - LLVMConstInt(ctx->i32, offset * - shader->selector->gs_max_out_vertices, 0); - offset++; - - voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, ""); - voffset = LLVMBuildMul(ctx->ac.builder, voffset, - LLVMConstInt(ctx->i32, 4, 0), ""); - - out_val = ac_to_integer(&ctx->ac, out_val); - - ac_build_buffer_store_dword(&ctx->ac, - ctx->gsvs_ring[stream], - out_val, 1, - voffset, soffset, 0, - ac_glc | ac_slc | ac_swizzled); - } - } - - gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, ctx->i32_1, ""); - LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]); - - /* Signal vertex emission if vertex data was written. */ - if (offset) { - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), - si_get_gs_wave_id(ctx)); - } - - if (!use_kill) - ac_build_endif(&ctx->ac, 6505); -} - -/* Cut one primitive from the geometry shader */ -static void si_llvm_emit_primitive(struct ac_shader_abi *abi, - unsigned stream) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - - if (ctx->shader->key.as_ngg) { - LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]); - return; - } - - /* Signal primitive cut */ - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), - si_get_gs_wave_id(ctx)); -} - static void declare_streamout_params(struct si_shader_context *ctx, struct pipe_stream_output_info *so) { @@ -1708,7 +1276,7 @@ void si_add_arg_checked(struct ac_shader_args *args, ac_add_arg(args, file, registers, type, arg); } -static void create_function(struct si_shader_context *ctx) +void si_create_function(struct si_shader_context *ctx) { struct si_shader *shader = ctx->shader; LLVMTypeRef returns[AC_MAX_ARGS]; @@ -2106,144 +1674,6 @@ static void create_function(struct si_shader_context *ctx) } } -/* Ensure that the esgs ring is declared. - * - * We declare it with 64KB alignment as a hint that the - * pointer value will always be 0. - */ -static void declare_esgs_ring(struct si_shader_context *ctx) -{ - if (ctx->esgs_ring) - return; - - assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring")); - - ctx->esgs_ring = LLVMAddGlobalInAddressSpace( - ctx->ac.module, LLVMArrayType(ctx->i32, 0), - "esgs_ring", - AC_ADDR_SPACE_LDS); - LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage); - LLVMSetAlignment(ctx->esgs_ring, 64 * 1024); -} - -/** - * Load ESGS and GSVS ring buffer resource descriptors and save the variables - * for later use. - */ -static void preload_ring_buffers(struct si_shader_context *ctx) -{ - LLVMBuilderRef builder = ctx->ac.builder; - - LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers); - - if (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY) { - if (ctx->screen->info.chip_class <= GFX8) { - unsigned ring = - ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS - : SI_ES_RING_ESGS; - LLVMValueRef offset = LLVMConstInt(ctx->i32, ring, 0); - - ctx->esgs_ring = - ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); - } else { - if (USE_LDS_SYMBOLS && LLVM_VERSION_MAJOR >= 9) { - /* Declare the ESGS ring as an explicit LDS symbol. */ - declare_esgs_ring(ctx); - } else { - ac_declare_lds_as_pointer(&ctx->ac); - ctx->esgs_ring = ctx->ac.lds; - } - } - } - - if (ctx->shader->is_gs_copy_shader) { - LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0); - - ctx->gsvs_ring[0] = - ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); - } else if (ctx->type == PIPE_SHADER_GEOMETRY) { - const struct si_shader_selector *sel = ctx->shader->selector; - LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0); - LLVMValueRef base_ring; - - base_ring = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset); - - /* The conceptual layout of the GSVS ring is - * v0c0 .. vLv0 v0c1 .. vLc1 .. - * but the real memory layout is swizzled across - * threads: - * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL - * t16v0c0 .. - * Override the buffer descriptor accordingly. - */ - LLVMTypeRef v2i64 = LLVMVectorType(ctx->i64, 2); - uint64_t stream_offset = 0; - - for (unsigned stream = 0; stream < 4; ++stream) { - unsigned num_components; - unsigned stride; - unsigned num_records; - LLVMValueRef ring, tmp; - - num_components = sel->info.num_stream_output_components[stream]; - if (!num_components) - continue; - - stride = 4 * num_components * sel->gs_max_out_vertices; - - /* Limit on the stride field for <= GFX7. */ - assert(stride < (1 << 14)); - - num_records = ctx->ac.wave_size; - - ring = LLVMBuildBitCast(builder, base_ring, v2i64, ""); - tmp = LLVMBuildExtractElement(builder, ring, ctx->i32_0, ""); - tmp = LLVMBuildAdd(builder, tmp, - LLVMConstInt(ctx->i64, - stream_offset, 0), ""); - stream_offset += stride * ctx->ac.wave_size; - - ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->i32_0, ""); - ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, ""); - tmp = LLVMBuildExtractElement(builder, ring, ctx->i32_1, ""); - tmp = LLVMBuildOr(builder, tmp, - LLVMConstInt(ctx->i32, - S_008F04_STRIDE(stride) | - S_008F04_SWIZZLE_ENABLE(1), 0), ""); - ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->i32_1, ""); - ring = LLVMBuildInsertElement(builder, ring, - LLVMConstInt(ctx->i32, num_records, 0), - LLVMConstInt(ctx->i32, 2, 0), ""); - - uint32_t rsrc3 = - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */ - S_008F0C_ADD_TID_ENABLE(1); - - if (ctx->ac.chip_class >= GFX10) { - rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | - S_008F0C_RESOURCE_LEVEL(1); - } else { - rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */ - } - - ring = LLVMBuildInsertElement(builder, ring, - LLVMConstInt(ctx->i32, rsrc3, false), - LLVMConstInt(ctx->i32, 3, 0), ""); - - ctx->gsvs_ring[stream] = ring; - } - } else if (ctx->type == PIPE_SHADER_TESS_EVAL) { - si_llvm_preload_tes_rings(ctx); - } -} - /* For the UMR disassembler. */ #define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */ #define DEBUGGER_NUM_MARKERS 5 @@ -2656,16 +2086,16 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, si_shader_dump_stats(sscreen, shader, file, check_debug_option); } -static int si_compile_llvm(struct si_screen *sscreen, - struct si_shader_binary *binary, - struct ac_shader_config *conf, - struct ac_llvm_compiler *compiler, - LLVMModuleRef mod, - struct pipe_debug_callback *debug, - enum pipe_shader_type shader_type, - unsigned wave_size, - const char *name, - bool less_optimized) +int si_compile_llvm(struct si_screen *sscreen, + struct si_shader_binary *binary, + struct ac_shader_config *conf, + struct ac_llvm_compiler *compiler, + LLVMModuleRef mod, + struct pipe_debug_callback *debug, + enum pipe_shader_type shader_type, + unsigned wave_size, + const char *name, + bool less_optimized) { unsigned count = p_atomic_inc_return(&sscreen->num_compilations); @@ -2724,155 +2154,6 @@ static int si_compile_llvm(struct si_screen *sscreen, return 0; } -/* Generate code for the hardware VS shader stage to go with a geometry shader */ -struct si_shader * -si_generate_gs_copy_shader(struct si_screen *sscreen, - struct ac_llvm_compiler *compiler, - struct si_shader_selector *gs_selector, - struct pipe_debug_callback *debug) -{ - struct si_shader_context ctx; - struct si_shader *shader; - LLVMBuilderRef builder; - struct si_shader_output_values outputs[SI_MAX_VS_OUTPUTS]; - struct si_shader_info *gsinfo = &gs_selector->info; - int i; - - - shader = CALLOC_STRUCT(si_shader); - if (!shader) - return NULL; - - /* We can leave the fence as permanently signaled because the GS copy - * shader only becomes visible globally after it has been compiled. */ - util_queue_fence_init(&shader->ready); - - shader->selector = gs_selector; - shader->is_gs_copy_shader = true; - - si_llvm_context_init(&ctx, sscreen, compiler, - si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false)); - ctx.shader = shader; - ctx.type = PIPE_SHADER_VERTEX; - - builder = ctx.ac.builder; - - create_function(&ctx); - preload_ring_buffers(&ctx); - - LLVMValueRef voffset = - LLVMBuildMul(ctx.ac.builder, ctx.abi.vertex_id, - LLVMConstInt(ctx.i32, 4, 0), ""); - - /* Fetch the vertex stream ID.*/ - LLVMValueRef stream_id; - - if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) - stream_id = si_unpack_param(&ctx, ctx.streamout_config, 24, 2); - else - stream_id = ctx.i32_0; - - /* Fill in output information. */ - for (i = 0; i < gsinfo->num_outputs; ++i) { - outputs[i].semantic_name = gsinfo->output_semantic_name[i]; - outputs[i].semantic_index = gsinfo->output_semantic_index[i]; - - for (int chan = 0; chan < 4; chan++) { - outputs[i].vertex_stream[chan] = - (gsinfo->output_streams[i] >> (2 * chan)) & 3; - } - } - - LLVMBasicBlockRef end_bb; - LLVMValueRef switch_inst; - - end_bb = LLVMAppendBasicBlockInContext(ctx.ac.context, ctx.main_fn, "end"); - switch_inst = LLVMBuildSwitch(builder, stream_id, end_bb, 4); - - for (int stream = 0; stream < 4; stream++) { - LLVMBasicBlockRef bb; - unsigned offset; - - if (!gsinfo->num_stream_output_components[stream]) - continue; - - if (stream > 0 && !gs_selector->so.num_outputs) - continue; - - bb = LLVMInsertBasicBlockInContext(ctx.ac.context, end_bb, "out"); - LLVMAddCase(switch_inst, LLVMConstInt(ctx.i32, stream, 0), bb); - LLVMPositionBuilderAtEnd(builder, bb); - - /* Fetch vertex data from GSVS ring */ - offset = 0; - for (i = 0; i < gsinfo->num_outputs; ++i) { - for (unsigned chan = 0; chan < 4; chan++) { - if (!(gsinfo->output_usagemask[i] & (1 << chan)) || - outputs[i].vertex_stream[chan] != stream) { - outputs[i].values[chan] = LLVMGetUndef(ctx.f32); - continue; - } - - LLVMValueRef soffset = LLVMConstInt(ctx.i32, - offset * gs_selector->gs_max_out_vertices * 16 * 4, 0); - offset++; - - outputs[i].values[chan] = - ac_build_buffer_load(&ctx.ac, - ctx.gsvs_ring[0], 1, - ctx.i32_0, voffset, - soffset, 0, ac_glc | ac_slc, - true, false); - } - } - - /* Streamout and exports. */ - if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) { - si_llvm_emit_streamout(&ctx, outputs, - gsinfo->num_outputs, - stream); - } - - if (stream == 0) - si_llvm_export_vs(&ctx, outputs, gsinfo->num_outputs); - - LLVMBuildBr(builder, end_bb); - } - - LLVMPositionBuilderAtEnd(builder, end_bb); - - LLVMBuildRetVoid(ctx.ac.builder); - - ctx.type = PIPE_SHADER_GEOMETRY; /* override for shader dumping */ - si_llvm_optimize_module(&ctx); - - bool ok = false; - if (si_compile_llvm(sscreen, &ctx.shader->binary, - &ctx.shader->config, ctx.compiler, - ctx.ac.module, - debug, PIPE_SHADER_GEOMETRY, ctx.ac.wave_size, - "GS Copy Shader", false) == 0) { - if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY)) - fprintf(stderr, "GS Copy Shader:\n"); - si_shader_dump(sscreen, ctx.shader, debug, stderr, true); - - if (!ctx.shader->config.scratch_bytes_per_wave) - ok = si_shader_binary_upload(sscreen, ctx.shader, 0); - else - ok = true; - } - - si_llvm_dispose(&ctx); - - if (!ok) { - FREE(shader); - shader = NULL; - } else { - si_fix_resource_usage(sscreen, shader); - } - return shader; -} - static void si_dump_shader_key_vs(const struct si_shader_key *key, const struct si_vs_prolog_bits *prolog, const char *prefix, FILE *f) @@ -3052,28 +2333,14 @@ static bool si_vs_needs_prolog(const struct si_shader_selector *sel, key->unpack_instance_id_from_vertex_id; } -LLVMValueRef si_is_es_thread(struct si_shader_context *ctx) -{ - /* Return true if the current thread should execute an ES thread. */ - return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, - ac_get_thread_id(&ctx->ac), - si_unpack_param(ctx, ctx->merged_wave_info, 0, 8), ""); -} - -LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx) -{ - /* Return true if the current thread should execute a GS thread. */ - return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, - ac_get_thread_id(&ctx->ac), - si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), ""); -} - static bool si_build_main_function(struct si_shader_context *ctx, struct nir_shader *nir, bool free_nir) { struct si_shader *shader = ctx->shader; struct si_shader_selector *sel = shader->selector; + si_llvm_init_resource_callbacks(ctx); + switch (ctx->type) { case PIPE_SHADER_VERTEX: if (shader->key.as_ls) @@ -3102,10 +2369,7 @@ static bool si_build_main_function(struct si_shader_context *ctx, ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; break; case PIPE_SHADER_GEOMETRY: - ctx->abi.load_inputs = si_nir_load_input_gs; - ctx->abi.emit_vertex = si_llvm_emit_vertex; - ctx->abi.emit_primitive = si_llvm_emit_primitive; - ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue; + si_llvm_init_gs_callbacks(ctx); break; case PIPE_SHADER_FRAGMENT: si_llvm_init_ps_callbacks(ctx); @@ -3118,11 +2382,15 @@ static bool si_build_main_function(struct si_shader_context *ctx, return false; } - ctx->abi.load_ubo = load_ubo; - ctx->abi.load_ssbo = load_ssbo; + si_create_function(ctx); - create_function(ctx); - preload_ring_buffers(ctx); + if (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY) + si_preload_esgs_ring(ctx); + + if (ctx->type == PIPE_SHADER_GEOMETRY) + si_preload_gs_rings(ctx); + else if (ctx->type == PIPE_SHADER_TESS_EVAL) + si_llvm_preload_tes_rings(ctx); if (ctx->type == PIPE_SHADER_TESS_CTRL && sel->info.tessfactors_are_def_in_all_invocs) { @@ -3172,7 +2440,7 @@ static bool si_build_main_function(struct si_shader_context *ctx, * avoids bank conflicts for SoA accesses. */ if (!gfx10_is_ngg_passthrough(shader)) - declare_esgs_ring(ctx); + si_llvm_declare_esgs_ring(ctx); /* This is really only needed when streamout and / or vertex * compaction is enabled. @@ -3324,129 +2592,6 @@ static void si_get_vs_prolog_key(const struct si_shader_info *info, shader_out->info.uses_instanceid = true; } -/** - * Build the GS prolog function. Rotate the input vertices for triangle strips - * with adjacency. - */ -static void si_build_gs_prolog_function(struct si_shader_context *ctx, - union si_shader_part_key *key) -{ - unsigned num_sgprs, num_vgprs; - LLVMBuilderRef builder = ctx->ac.builder; - LLVMTypeRef returns[AC_MAX_ARGS]; - LLVMValueRef func, ret; - - memset(&ctx->args, 0, sizeof(ctx->args)); - - if (ctx->screen->info.chip_class >= GFX9) { - if (key->gs_prolog.states.gfx9_prev_is_vs) - num_sgprs = 8 + GFX9_VSGS_NUM_USER_SGPR; - else - num_sgprs = 8 + GFX9_TESGS_NUM_USER_SGPR; - num_vgprs = 5; /* ES inputs are not needed by GS */ - } else { - num_sgprs = GFX6_GS_NUM_USER_SGPR + 2; - num_vgprs = 8; - } - - for (unsigned i = 0; i < num_sgprs; ++i) { - ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - returns[i] = ctx->i32; - } - - for (unsigned i = 0; i < num_vgprs; ++i) { - ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); - returns[num_sgprs + i] = ctx->f32; - } - - /* Create the function. */ - si_llvm_create_func(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, 0); - func = ctx->main_fn; - - /* Set the full EXEC mask for the prolog, because we are only fiddling - * with registers here. The main shader part will set the correct EXEC - * mask. - */ - if (ctx->screen->info.chip_class >= GFX9 && !key->gs_prolog.is_monolithic) - ac_init_exec_full_mask(&ctx->ac); - - /* Copy inputs to outputs. This should be no-op, as the registers match, - * but it will prevent the compiler from overwriting them unintentionally. - */ - ret = ctx->return_value; - for (unsigned i = 0; i < num_sgprs; i++) { - LLVMValueRef p = LLVMGetParam(func, i); - ret = LLVMBuildInsertValue(builder, ret, p, i, ""); - } - for (unsigned i = 0; i < num_vgprs; i++) { - LLVMValueRef p = LLVMGetParam(func, num_sgprs + i); - p = ac_to_float(&ctx->ac, p); - ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, ""); - } - - if (key->gs_prolog.states.tri_strip_adj_fix) { - /* Remap the input vertices for every other primitive. */ - const struct ac_arg gfx6_vtx_params[6] = { - { .used = true, .arg_index = num_sgprs }, - { .used = true, .arg_index = num_sgprs + 1 }, - { .used = true, .arg_index = num_sgprs + 3 }, - { .used = true, .arg_index = num_sgprs + 4 }, - { .used = true, .arg_index = num_sgprs + 5 }, - { .used = true, .arg_index = num_sgprs + 6 }, - }; - const struct ac_arg gfx9_vtx_params[3] = { - { .used = true, .arg_index = num_sgprs }, - { .used = true, .arg_index = num_sgprs + 1 }, - { .used = true, .arg_index = num_sgprs + 4 }, - }; - LLVMValueRef vtx_in[6], vtx_out[6]; - LLVMValueRef prim_id, rotate; - - if (ctx->screen->info.chip_class >= GFX9) { - for (unsigned i = 0; i < 3; i++) { - vtx_in[i*2] = si_unpack_param(ctx, gfx9_vtx_params[i], 0, 16); - vtx_in[i*2+1] = si_unpack_param(ctx, gfx9_vtx_params[i], 16, 16); - } - } else { - for (unsigned i = 0; i < 6; i++) - vtx_in[i] = ac_get_arg(&ctx->ac, gfx6_vtx_params[i]); - } - - prim_id = LLVMGetParam(func, num_sgprs + 2); - rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, ""); - - for (unsigned i = 0; i < 6; ++i) { - LLVMValueRef base, rotated; - base = vtx_in[i]; - rotated = vtx_in[(i + 4) % 6]; - vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, ""); - } - - if (ctx->screen->info.chip_class >= GFX9) { - for (unsigned i = 0; i < 3; i++) { - LLVMValueRef hi, out; - - hi = LLVMBuildShl(builder, vtx_out[i*2+1], - LLVMConstInt(ctx->i32, 16, 0), ""); - out = LLVMBuildOr(builder, vtx_out[i*2], hi, ""); - out = ac_to_float(&ctx->ac, out); - ret = LLVMBuildInsertValue(builder, ret, out, - gfx9_vtx_params[i].arg_index, ""); - } - } else { - for (unsigned i = 0; i < 6; i++) { - LLVMValueRef out; - - out = ac_to_float(&ctx->ac, vtx_out[i]); - ret = LLVMBuildInsertValue(builder, ret, out, - gfx6_vtx_params[i].arg_index, ""); - } - } - } - - LLVMBuildRet(builder, ret); -} - /** * Given a list of shader part functions, build a wrapper function that * runs them in sequence to form a monolithic shader. @@ -3900,7 +3045,7 @@ int si_compile_shader(struct si_screen *sscreen, gs_prolog_key.gs_prolog.states = shader->key.part.gs.prolog; gs_prolog_key.gs_prolog.is_monolithic = true; gs_prolog_key.gs_prolog.as_ngg = shader->key.as_ngg; - si_build_gs_prolog_function(&ctx, &gs_prolog_key); + si_llvm_build_gs_prolog(&ctx, &gs_prolog_key); gs_prolog = ctx.main_fn; /* ES main part */ @@ -3959,7 +3104,7 @@ int si_compile_shader(struct si_screen *sscreen, memset(&prolog_key, 0, sizeof(prolog_key)); prolog_key.gs_prolog.states = shader->key.part.gs.prolog; - si_build_gs_prolog_function(&ctx, &prolog_key); + si_llvm_build_gs_prolog(&ctx, &prolog_key); parts[0] = ctx.main_fn; si_build_wrapper_function(&ctx, parts, 2, 1, 0); @@ -4431,7 +3576,7 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen, shader->prolog2 = si_get_shader_part(sscreen, &sscreen->gs_prologs, PIPE_SHADER_GEOMETRY, true, &prolog_key, compiler, debug, - si_build_gs_prolog_function, + si_llvm_build_gs_prolog, "Geometry Shader Prolog"); return shader->prolog2 != NULL; } @@ -4722,8 +3867,7 @@ void si_multiwave_lds_size_workaround(struct si_screen *sscreen, *lds_size = MAX2(*lds_size, 8); } -static void si_fix_resource_usage(struct si_screen *sscreen, - struct si_shader *shader) +void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader) { unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */