LLVMConstInt(ctx->i32, tf_offset, 0), "");
}
+ uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (ctx->screen->info.chip_class >= GFX10)
+ rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(3) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ else
+ rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
LLVMValueRef desc[4];
desc[0] = addr;
desc[1] = LLVMConstInt(ctx->i32,
S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
desc[2] = LLVMConstInt(ctx->i32, 0xffffffff, 0);
- desc[3] = LLVMConstInt(ctx->i32,
- S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0);
+ desc[3] = LLVMConstInt(ctx->i32, rsrc3, false);
return ac_build_gather_values(&ctx->ac, desc, 4);
}
desc1 = LLVMConstInt(ctx->i32,
S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
+ uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (ctx->screen->info.chip_class >= GFX10)
+ rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(3) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ else
+ rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
LLVMValueRef desc_elems[] = {
desc0,
desc1,
LLVMConstInt(ctx->i32, (sel->info.const_file_max[0] + 1) * 16, 0),
- LLVMConstInt(ctx->i32,
- S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32), 0)
+ LLVMConstInt(ctx->i32, rsrc3, false)
};
return ac_build_gather_values(&ctx->ac, desc_elems, 4);
/* Pass GS inputs from ES to GS on GFX9. */
static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
{
+ LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, 0, 0);
ret = si_insert_input_ptr(ctx, ret, 1, 1);
- ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
+ if (ctx->shader->key.as_ngg)
+ ret = LLVMBuildInsertValue(builder, ret, ctx->gs_tg_info, 2, "");
+ else
+ ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
static void emit_gs_epilogue(struct si_shader_context *ctx)
{
+ if (ctx->shader->key.as_ngg) {
+ gfx10_ngg_gs_emit_epilogue(ctx);
+ return;
+ }
+
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
si_get_gs_wave_id(ctx));
LLVMValueRef *addrs)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+
+ if (ctx->shader->key.as_ngg) {
+ gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
+ return;
+ }
+
struct tgsi_shader_info *info = &ctx->shader->selector->info;
struct si_shader *shader = ctx->shader;
struct lp_build_if_state if_state;
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ if (ctx->shader->key.as_ngg) {
+ LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
+ return;
+ }
+
/* Signal primitive cut */
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
si_get_gs_wave_id(ctx));
add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.vertex_id);
if (shader->key.as_ls) {
ctx->param_rel_auto_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ if (ctx->screen->info.chip_class >= GFX10) {
+ add_arg(fninfo, ARG_VGPR, ctx->i32); /* user VGPR */
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
+ } else {
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
+ add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */
+ }
+ } else if (ctx->screen->info.chip_class == GFX10 &&
+ !shader->is_gs_copy_shader) {
+ add_arg(fninfo, ARG_VGPR, ctx->i32); /* user vgpr */
+ add_arg(fninfo, ARG_VGPR, ctx->i32); /* user vgpr */
add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
} else {
add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.instance_id);
ctx->param_vs_prim_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */
}
- add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */
if (!shader->is_gs_copy_shader) {
/* Vertex load indices. */
ring = LLVMBuildInsertElement(builder, ring,
LLVMConstInt(ctx->i32, num_records, 0),
LLVMConstInt(ctx->i32, 2, 0), "");
+
+ uint32_t rsrc3 =
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
+ S_008F0C_ADD_TID_ENABLE(1);
+
+ if (ctx->ac.chip_class >= GFX10) {
+ rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(2) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+ S_008F0C_ELEMENT_SIZE(1); /* element_size = 4 (bytes) */
+ }
+
ring = LLVMBuildInsertElement(builder, ring,
- LLVMConstInt(ctx->i32,
- S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(1) | /* element_size = 4 (bytes) */
- S_008F0C_INDEX_STRIDE(1) | /* index_stride = 16 (elements) */
- S_008F0C_ADD_TID_ENABLE(1),
- 0),
+ LLVMConstInt(ctx->i32, rsrc3, false),
LLVMConstInt(ctx->i32, 3, 0), "");
ctx->gsvs_ring[stream] = ring;
#undef add_part
- struct ac_rtld_symbol lds_symbols[1];
+ struct ac_rtld_symbol lds_symbols[2];
unsigned num_lds_symbols = 0;
if (sel && screen->info.chip_class >= GFX9 &&
sym->align = 64 * 1024;
}
+ if (shader->key.as_ngg && sel->type == PIPE_SHADER_GEOMETRY) {
+ struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
+ sym->name = "ngg_emit";
+ sym->size = shader->ngg.ngg_emit_size * 4;
+ sym->align = 4;
+ }
+
bool ok = ac_rtld_open(rtld, (struct ac_rtld_open_info){
.info = &screen->info,
.options = {
return rtld.rx_size;
}
-
static bool si_get_external_symbol(void *data, const char *name, uint64_t *value)
{
uint64_t *scratch_va = data;
create_function(ctx);
preload_ring_buffers(ctx);
+ if (ctx->type == PIPE_SHADER_TESS_CTRL &&
+ sel->tcs_info.tessfactors_are_def_in_all_invocs) {
+ for (unsigned i = 0; i < 6; i++) {
+ ctx->invoc0_tess_factors[i] =
+ ac_build_alloca_undef(&ctx->ac, ctx->i32, "");
+ }
+ }
+
+ if (ctx->type == PIPE_SHADER_GEOMETRY) {
+ for (unsigned i = 0; i < 4; i++) {
+ ctx->gs_next_vertex[i] =
+ ac_build_alloca(&ctx->ac, ctx->i32, "");
+ }
+ if (shader->key.as_ngg) {
+ for (unsigned i = 0; i < 4; ++i) {
+ ctx->gs_curprim_verts[i] =
+ lp_build_alloca(&ctx->gallivm, ctx->ac.i32, "");
+ ctx->gs_generated_prims[i] =
+ lp_build_alloca(&ctx->gallivm, ctx->ac.i32, "");
+ }
+
+ LLVMTypeRef a8i32 = LLVMArrayType(ctx->i32, 8);
+ ctx->gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx->ac.module,
+ a8i32, "ngg_scratch", AC_ADDR_SPACE_LDS);
+ LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(a8i32));
+ LLVMSetAlignment(ctx->gs_ngg_scratch, 4);
+
+ ctx->gs_ngg_emit = LLVMAddGlobalInAddressSpace(ctx->ac.module,
+ LLVMArrayType(ctx->i32, 0), "ngg_emit", AC_ADDR_SPACE_LDS);
+ LLVMSetLinkage(ctx->gs_ngg_emit, LLVMExternalLinkage);
+ LLVMSetAlignment(ctx->gs_ngg_emit, 4);
+ }
+ }
+
/* For GFX9 merged shaders:
* - Set EXEC for the first shader. If the prolog is present, set
* EXEC there instead.
LLVMValueRef num_threads;
bool nested_barrier;
- if (!shader->is_monolithic)
+ if (!shader->is_monolithic ||
+ (ctx->type == PIPE_SHADER_TESS_EVAL &&
+ shader->key.as_ngg))
ac_init_exec_full_mask(&ctx->ac);
if (ctx->type == PIPE_SHADER_TESS_CTRL ||
ctx->type == PIPE_SHADER_GEOMETRY) {
+ if (ctx->type == PIPE_SHADER_GEOMETRY && shader->key.as_ngg) {
+ gfx10_ngg_gs_emit_prologue(ctx);
+ nested_barrier = false;
+ } else {
+ nested_barrier = true;
+ }
+
/* Number of patches / primitives */
num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
- nested_barrier = true;
} else {
/* Number of vertices */
num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 0, 8);
}
}
- if (ctx->type == PIPE_SHADER_TESS_CTRL &&
- sel->tcs_info.tessfactors_are_def_in_all_invocs) {
- for (unsigned i = 0; i < 6; i++) {
- ctx->invoc0_tess_factors[i] =
- ac_build_alloca_undef(&ctx->ac, ctx->i32, "");
- }
- }
-
- if (ctx->type == PIPE_SHADER_GEOMETRY) {
- int i;
- for (i = 0; i < 4; i++) {
- ctx->gs_next_vertex[i] =
- ac_build_alloca(&ctx->ac, ctx->i32, "");
- }
- }
-
if (sel->force_correct_derivs_after_kill) {
ctx->postponed_kill = ac_build_alloca_undef(&ctx->ac, ctx->i1, "");
/* true = don't kill. */
}
unsigned vertex_id_vgpr = first_vs_vgpr;
- unsigned instance_id_vgpr = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
+ unsigned instance_id_vgpr =
+ ctx->screen->info.chip_class >= GFX10 ?
+ first_vs_vgpr + 3 :
+ first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
ctx->abi.vertex_id = input_vgprs[vertex_id_vgpr];
ctx->abi.instance_id = input_vgprs[instance_id_vgpr];
si_calculate_max_simd_waves(shader);
}
- if (sscreen->info.chip_class >= GFX9 && sel->type == PIPE_SHADER_GEOMETRY)
+ if (shader->key.as_ngg) {
+ assert(!shader->key.as_es && !shader->key.as_ls);
+ gfx10_ngg_calculate_subgroup_info(shader);
+ } else if (sscreen->info.chip_class >= GFX9 && sel->type == PIPE_SHADER_GEOMETRY) {
gfx9_get_gs_info(shader->previous_stage_sel, sel, &shader->gs_info);
+ }
si_fix_resource_usage(sscreen, shader);
si_shader_dump(sscreen, shader, debug, sel->info.processor,