LLVMContextRef context;
LLVMValueRef main_function;
- LLVMValueRef descriptor_sets[RADV_UD_MAX_SETS];
+ LLVMValueRef descriptor_sets[MAX_SETS];
LLVMValueRef ring_offsets;
LLVMValueRef vertex_buffers;
LLVMValueRef vertexptr; /* GFX10 only */
};
+struct radv_shader_output_values {
+ LLVMValueRef values[4];
+ unsigned slot_name;
+ unsigned slot_index;
+ unsigned usage_mask;
+};
+
enum radeon_llvm_calling_convention {
RADEON_LLVM_AMDGPU_VS = 87,
RADEON_LLVM_AMDGPU_GS = 88,
/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
if (ctx->options->chip_class == GFX6) {
- unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ unsigned one_wave = ctx->options->wave_size / MAX2(num_tcs_input_cp, num_tcs_output_cp);
num_patches = MIN2(num_patches, one_wave);
}
return num_patches;
}
} else {
if (ctx->ac.chip_class >= GFX10) {
- add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* user vgpr */
- add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* user vgpr */
- add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
+ if (ctx->options->key.vs_common_out.as_ngg) {
+ add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* user vgpr */
+ add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* user vgpr */
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
+ } else {
+ add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id);
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
+ }
} else {
add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id);
{
int i;
- if (ctx->ac.chip_class >= GFX10)
- return;
-
/* Streamout SGPRs. */
if (ctx->shader_info->info.so.num_outputs) {
assert(stage == MESA_SHADER_VERTEX ||
uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (ctx->ac.chip_class >= GFX10) {
+ desc_type |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(3) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
LLVMValueRef desc_components[4] = {
LLVMBuildPtrToInt(ctx->ac.builder, desc_ptr, ctx->ac.intptr, ""),
radv_emit_stream_output(struct radv_shader_context *ctx,
LLVMValueRef const *so_buffers,
LLVMValueRef const *so_write_offsets,
- const struct radv_stream_output *output)
+ const struct radv_stream_output *output,
+ struct radv_shader_output_values *shader_out)
{
unsigned num_comps = util_bitcount(output->component_mask);
- unsigned loc = output->location;
unsigned buf = output->buffer;
unsigned offset = output->offset;
unsigned start;
/* Load the output as int. */
for (int i = 0; i < num_comps; i++) {
- out[i] = ac_to_integer(&ctx->ac,
- radv_load_output(ctx, loc, start + i));
+ out[i] = ac_to_integer(&ctx->ac, shader_out->values[start + i]);
}
/* Pack the output. */
/* Write streamout data. */
for (i = 0; i < ctx->shader_info->info.so.num_outputs; i++) {
+ struct radv_shader_output_values shader_out = {};
struct radv_stream_output *output =
&ctx->shader_info->info.so.outputs[i];
if (stream != output->stream)
continue;
- radv_emit_stream_output(ctx, so_buffers,
- so_write_offset, output);
+ for (int j = 0; j < 4; j++) {
+ shader_out.values[j] =
+ radv_load_output(ctx, output->location, j);
+ }
+
+ radv_emit_stream_output(ctx, so_buffers,so_write_offset,
+ output, &shader_out);
}
}
ac_nir_build_endif(&if_ctx);
}
-struct radv_shader_output_values {
- LLVMValueRef values[4];
- unsigned slot_name;
- unsigned slot_index;
- unsigned usage_mask;
-};
-
static void
radv_build_param_exports(struct radv_shader_context *ctx,
struct radv_shader_output_values *outputs,
LLVMValueRef wave_idx = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 24, 4);
vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
LLVMBuildMul(ctx->ac.builder, wave_idx,
- LLVMConstInt(ctx->ac.i32, 64, false), ""), "");
+ LLVMConstInt(ctx->ac.i32,
+ ctx->ac.wave_size, false), ""), "");
lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), "");
}
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef tmp;
tmp = LLVMBuildMul(builder, get_wave_id_in_tg(ctx),
- LLVMConstInt(ctx->ac.i32, 64, false), "");
+ LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), "");
return LLVMBuildAdd(builder, tmp, ac_get_thread_id(&ctx->ac), "");
}
*/
LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
uint64_t stream_offset = 0;
- unsigned num_records = 64;
+ unsigned num_records = ctx->ac.wave_size;
LLVMValueRef base_ring;
base_ring =
ring = LLVMBuildInsertElement(ctx->ac.builder,
ring, tmp, ctx->ac.i32_0, "");
- stream_offset += stride * 64;
+ stream_offset += stride * ctx->ac.wave_size;
ring = LLVMBuildBitCast(ctx->ac.builder, ring,
ctx->ac.v4i32, "");
unsigned
radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+ gl_shader_stage stage,
const struct nir_shader *nir)
{
- switch (nir->info.stage) {
- case MESA_SHADER_TESS_CTRL:
- return chip_class >= GFX7 ? 128 : 64;
- case MESA_SHADER_GEOMETRY:
- return chip_class >= GFX9 ? 128 : 64;
- case MESA_SHADER_COMPUTE:
- break;
- default:
- return 0;
- }
-
- unsigned max_workgroup_size = nir->info.cs.local_size[0] *
- nir->info.cs.local_size[1] *
- nir->info.cs.local_size[2];
- return max_workgroup_size;
+ const unsigned backup_sizes[] = {chip_class >= GFX9 ? 128 : 64, 1, 1};
+ return radv_get_max_workgroup_size(chip_class, stage, nir ? nir->info.cs.local_size : backup_sizes);
}
/* Fixup the HW not emitting the TCS regs if there are no HS threads. */
ctx.options = options;
ctx.shader_info = shader_info;
- ac_llvm_context_init(&ctx.ac, options->chip_class, options->family);
- ctx.context = ctx.ac.context;
- ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
-
enum ac_float_mode float_mode =
options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
AC_FLOAT_MODE_DEFAULT;
- ctx.ac.builder = ac_create_builder(ctx.context, float_mode);
+ ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class,
+ options->family, float_mode, options->wave_size,
+ options->wave_size);
+ ctx.context = ctx.ac.context;
radv_nir_shader_info_init(&shader_info->info);
for(int i = 0; i < shader_count; ++i)
radv_nir_shader_info_pass(shaders[i], options, &shader_info->info);
- for (i = 0; i < RADV_UD_MAX_SETS; i++)
+ for (i = 0; i < MAX_SETS; i++)
shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
for (i = 0; i < AC_UD_MAX_UD; i++)
shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
for (int i = 0; i < shader_count; ++i) {
ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
radv_nir_get_max_workgroup_size(ctx.options->chip_class,
- shaders[i]));
+ shaders[i]->info.stage,
+ shaders[i]));
}
if (ctx.ac.chip_class >= GFX10) {
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
ctx.abi.load_resource = radv_load_resource;
ctx.abi.clamp_shadow_reference = false;
- ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800;
-
- /* Because the new raw/struct atomic intrinsics are buggy with LLVM 8,
- * we fallback to the old intrinsics for atomic buffer image operations
- * and thus we need to apply the indexing workaround...
- */
- ctx.abi.gfx9_stride_size_workaround_for_atomic = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x900;
+ ctx.abi.robust_buffer_access = options->robust_buffer_access;
bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && ctx.options->key.vs_common_out.as_ngg;
if (shader_count >= 2 || is_ngg)
ac_init_exec_full_mask(&ctx.ac);
- if ((ctx.ac.family == CHIP_VEGA10 ||
- ctx.ac.family == CHIP_RAVEN) &&
+ if (options->has_ls_vgpr_init_bug &&
shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
ac_nir_fixup_ls_hs_input_vgprs(&ctx);
declare_esgs_ring(&ctx);
}
- if (i)
+ bool nested_barrier = false;
+
+ if (i) {
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY &&
+ ctx.options->key.vs_common_out.as_ngg) {
+ gfx10_ngg_gs_emit_prologue(&ctx);
+ nested_barrier = false;
+ } else {
+ nested_barrier = true;
+ }
+ }
+
+ if (nested_barrier) {
+ /* Execute a barrier before the second shader in
+ * a merged shader.
+ *
+ * Execute the barrier inside the conditional block,
+ * so that empty waves can jump directly to s_endpgm,
+ * which will also signal the barrier.
+ *
+ * This is possible in gfx9, because an empty wave
+ * for the second shader does not participate in
+ * the epilogue. With NGG, empty waves may still
+ * be required to export data (e.g. GS output vertices),
+ * so we cannot let them exit early.
+ *
+ * If the shader is TCS and the TCS epilog is present
+ * and contains a barrier, it will wait there and then
+ * reach s_endpgm.
+ */
ac_emit_barrier(&ctx.ac, ctx.stage);
+ }
nir_foreach_variable(variable, &shaders[i]->outputs)
scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
LLVMBasicBlockRef merge_block;
if (shader_count >= 2 || is_ngg) {
-
- if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY &&
- ctx.options->key.vs_common_out.as_ngg) {
- gfx10_ngg_gs_emit_prologue(&ctx);
- }
-
LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
break;
case MESA_SHADER_FRAGMENT:
shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
+ shader_info->fs.post_depth_coverage = nir->info.fs.post_depth_coverage;
break;
case MESA_SHADER_GEOMETRY:
shader_info->gs.vertices_in = nir->info.gs.vertices_in;
shader_info->gs.es_type = nir[0]->info.stage;
}
}
+ shader_info->info.wave_size = options->wave_size;
}
static void
ctx.options = options;
ctx.shader_info = shader_info;
- ac_llvm_context_init(&ctx.ac, options->chip_class, options->family);
- ctx.context = ctx.ac.context;
- ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
-
- ctx.is_gs_copy_shader = true;
-
enum ac_float_mode float_mode =
options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
AC_FLOAT_MODE_DEFAULT;
- ctx.ac.builder = ac_create_builder(ctx.context, float_mode);
+ ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class,
+ options->family, float_mode, 64, 64);
+ ctx.context = ctx.ac.context;
+
+ ctx.is_gs_copy_shader = true;
ctx.stage = MESA_SHADER_VERTEX;
radv_nir_shader_info_pass(geom_shader, options, &shader_info->info);