X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader.c;h=68506b7a92cb105e061a578224071af006ec6b4b;hb=792a638b032d16fbe6404f9d90c34b3e0f1fb0b5;hp=2ab1833579ec782e129e7fb7bcaacb86d23badbd;hpb=612489bd5df5dc46a95e05c8882e28a24eb9bae9;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2ab1833579e..68506b7a92c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1074,18 +1074,25 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, LLVMConstInt(ctx->i32, tf_offset, 0), ""); } + uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + + if (ctx->screen->info.chip_class >= GFX10) + rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | + S_008F0C_OOB_SELECT(3) | + S_008F0C_RESOURCE_LEVEL(1); + else + rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + LLVMValueRef desc[4]; desc[0] = addr; desc[1] = LLVMConstInt(ctx->i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); desc[2] = LLVMConstInt(ctx->i32, 0xffffffff, 0); - desc[3] = LLVMConstInt(ctx->i32, - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0); + desc[3] = LLVMConstInt(ctx->i32, rsrc3, false); return ac_build_gather_values(&ctx->ac, desc, 4); } @@ -2249,17 +2256,24 @@ static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *c desc1 = LLVMConstInt(ctx->i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); + uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + + if (ctx->screen->info.chip_class >= GFX10) + rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | + S_008F0C_OOB_SELECT(3) | + S_008F0C_RESOURCE_LEVEL(1); + else + rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + LLVMValueRef desc_elems[] = { desc0, desc1, LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * 16, 0), - LLVMConstInt(ctx->i32, - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0) + LLVMConstInt(ctx->i32, rsrc3, false) }; return ac_build_gather_values(&ctx->ac, desc_elems, 4); @@ -3401,11 +3415,15 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) /* Pass GS inputs from ES to GS on GFX9. */ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) { + LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef ret = ctx->return_value; ret = si_insert_input_ptr(ctx, ret, 0, 0); ret = si_insert_input_ptr(ctx, ret, 1, 1); - ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2); + if (ctx->shader->key.as_ngg) + ret = LLVMBuildInsertValue(builder, ret, ctx->gs_tg_info, 2, ""); + else + ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); @@ -3555,6 +3573,11 @@ static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx) static void emit_gs_epilogue(struct si_shader_context *ctx) { + if (ctx->shader->key.as_ngg) { + gfx10_ngg_gs_emit_epilogue(ctx); + return; + } + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, si_get_gs_wave_id(ctx)); @@ -4192,6 +4215,12 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, LLVMValueRef *addrs) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); + + if (ctx->shader->key.as_ngg) { + gfx10_ngg_gs_emit_vertex(ctx, stream, addrs); + return; + } + struct tgsi_shader_info *info = &ctx->shader->selector->info; struct si_shader *shader = ctx->shader; struct lp_build_if_state if_state; @@ -4284,6 +4313,11 @@ static void si_llvm_emit_primitive(struct ac_shader_abi *abi, { struct si_shader_context *ctx = si_shader_context_from_abi(abi); + if (ctx->shader->key.as_ngg) { + LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]); + return; + } + /* Signal primitive cut */ ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), si_get_gs_wave_id(ctx)); @@ -4513,12 +4547,23 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx, add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.vertex_id); if (shader->key.as_ls) { ctx->param_rel_auto_id = add_arg(fninfo, ARG_VGPR, ctx->i32); + if (ctx->screen->info.chip_class >= GFX10) { + add_arg(fninfo, ARG_VGPR, ctx->i32); /* user VGPR */ + add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id); + } else { + add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id); + add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */ + } + } else if (ctx->screen->info.chip_class == GFX10 && + !shader->is_gs_copy_shader) { + add_arg(fninfo, ARG_VGPR, ctx->i32); /* user vgpr */ + add_arg(fninfo, ARG_VGPR, ctx->i32); /* user vgpr */ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id); } else { add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id); ctx->param_vs_prim_id = add_arg(fninfo, ARG_VGPR, ctx->i32); + add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */ } - add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */ if (!shader->is_gs_copy_shader) { /* Vertex load indices. */ @@ -5058,18 +5103,27 @@ static void preload_ring_buffers(struct si_shader_context *ctx) ring = LLVMBuildInsertElement(builder, ring, LLVMConstInt(ctx->i32, num_records, 0), LLVMConstInt(ctx->i32, 2, 0), ""); + + uint32_t rsrc3 = + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */ + S_008F0C_ADD_TID_ENABLE(1); + + if (ctx->ac.chip_class >= GFX10) { + rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | + S_008F0C_OOB_SELECT(2) | + S_008F0C_RESOURCE_LEVEL(1); + } else { + rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */ + } + ring = LLVMBuildInsertElement(builder, ring, - LLVMConstInt(ctx->i32, - S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | - S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */ - S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */ - S_008F0C_ADD_TID_ENABLE(1), - 0), + LLVMConstInt(ctx->i32, rsrc3, false), LLVMConstInt(ctx->i32, 3, 0), ""); ctx->gsvs_ring[stream] = ring; @@ -5136,7 +5190,7 @@ static bool si_shader_binary_open(struct si_screen *screen, #undef add_part - struct ac_rtld_symbol lds_symbols[1]; + struct ac_rtld_symbol lds_symbols[2]; unsigned num_lds_symbols = 0; if (sel && screen->info.chip_class >= GFX9 && @@ -5150,6 +5204,13 @@ static bool si_shader_binary_open(struct si_screen *screen, sym->align = 64 * 1024; } + if (shader->key.as_ngg && sel->type == PIPE_SHADER_GEOMETRY) { + struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++]; + sym->name = "ngg_emit"; + sym->size = shader->ngg.ngg_emit_size * 4; + sym->align = 4; + } + bool ok = ac_rtld_open(rtld, (struct ac_rtld_open_info){ .info = &screen->info, .options = { @@ -5178,7 +5239,6 @@ static unsigned si_get_shader_binary_size(struct si_screen *screen, struct si_sh return rtld.rx_size; } - static bool si_get_external_symbol(void *data, const char *name, uint64_t *value) { uint64_t *scratch_va = data; @@ -6005,6 +6065,40 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) create_function(ctx); preload_ring_buffers(ctx); + if (ctx->type == PIPE_SHADER_TESS_CTRL && + sel->tcs_info.tessfactors_are_def_in_all_invocs) { + for (unsigned i = 0; i < 6; i++) { + ctx->invoc0_tess_factors[i] = + ac_build_alloca_undef(&ctx->ac, ctx->i32, ""); + } + } + + if (ctx->type == PIPE_SHADER_GEOMETRY) { + for (unsigned i = 0; i < 4; i++) { + ctx->gs_next_vertex[i] = + ac_build_alloca(&ctx->ac, ctx->i32, ""); + } + if (shader->key.as_ngg) { + for (unsigned i = 0; i < 4; ++i) { + ctx->gs_curprim_verts[i] = + lp_build_alloca(&ctx->gallivm, ctx->ac.i32, ""); + ctx->gs_generated_prims[i] = + lp_build_alloca(&ctx->gallivm, ctx->ac.i32, ""); + } + + LLVMTypeRef a8i32 = LLVMArrayType(ctx->i32, 8); + ctx->gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx->ac.module, + a8i32, "ngg_scratch", AC_ADDR_SPACE_LDS); + LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(a8i32)); + LLVMSetAlignment(ctx->gs_ngg_scratch, 4); + + ctx->gs_ngg_emit = LLVMAddGlobalInAddressSpace(ctx->ac.module, + LLVMArrayType(ctx->i32, 0), "ngg_emit", AC_ADDR_SPACE_LDS); + LLVMSetLinkage(ctx->gs_ngg_emit, LLVMExternalLinkage); + LLVMSetAlignment(ctx->gs_ngg_emit, 4); + } + } + /* For GFX9 merged shaders: * - Set EXEC for the first shader. If the prolog is present, set * EXEC there instead. @@ -6036,14 +6130,22 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) LLVMValueRef num_threads; bool nested_barrier; - if (!shader->is_monolithic) + if (!shader->is_monolithic || + (ctx->type == PIPE_SHADER_TESS_EVAL && + shader->key.as_ngg)) ac_init_exec_full_mask(&ctx->ac); if (ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_GEOMETRY) { + if (ctx->type == PIPE_SHADER_GEOMETRY && shader->key.as_ngg) { + gfx10_ngg_gs_emit_prologue(ctx); + nested_barrier = false; + } else { + nested_barrier = true; + } + /* Number of patches / primitives */ num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8); - nested_barrier = true; } else { /* Number of vertices */ num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 0, 8); @@ -6078,22 +6180,6 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx) } } - if (ctx->type == PIPE_SHADER_TESS_CTRL && - sel->tcs_info.tessfactors_are_def_in_all_invocs) { - for (unsigned i = 0; i < 6; i++) { - ctx->invoc0_tess_factors[i] = - ac_build_alloca_undef(&ctx->ac, ctx->i32, ""); - } - } - - if (ctx->type == PIPE_SHADER_GEOMETRY) { - int i; - for (i = 0; i < 4; i++) { - ctx->gs_next_vertex[i] = - ac_build_alloca(&ctx->ac, ctx->i32, ""); - } - } - if (sel->force_correct_derivs_after_kill) { ctx->postponed_kill = ac_build_alloca_undef(&ctx->ac, ctx->i1, ""); /* true = don't kill. */ @@ -7241,7 +7327,10 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx, } unsigned vertex_id_vgpr = first_vs_vgpr; - unsigned instance_id_vgpr = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1); + unsigned instance_id_vgpr = + ctx->screen->info.chip_class >= GFX10 ? + first_vs_vgpr + 3 : + first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1); ctx->abi.vertex_id = input_vgprs[vertex_id_vgpr]; ctx->abi.instance_id = input_vgprs[instance_id_vgpr]; @@ -8183,8 +8272,12 @@ bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compil si_calculate_max_simd_waves(shader); } - if (sscreen->info.chip_class >= GFX9 && sel->type == PIPE_SHADER_GEOMETRY) + if (shader->key.as_ngg) { + assert(!shader->key.as_es && !shader->key.as_ls); + gfx10_ngg_calculate_subgroup_info(shader); + } else if (sscreen->info.chip_class >= GFX9 && sel->type == PIPE_SHADER_GEOMETRY) { gfx9_get_gs_info(shader->previous_stage_sel, sel, &shader->gs_info); + } si_fix_resource_usage(sscreen, shader); si_shader_dump(sscreen, shader, debug, sel->info.processor,