/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
if (ctx->options->chip_class == GFX6) {
- unsigned one_wave = ctx->options->wave_size / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
num_patches = MIN2(num_patches, one_wave);
}
return num_patches;
return result;
}
+static LLVMValueRef
+radv_emit_fetch_64bit(struct radv_shader_context *ctx,
+ LLVMTypeRef type, LLVMValueRef a, LLVMValueRef b)
+{
+ LLVMValueRef values[2] = {
+ ac_to_integer(&ctx->ac, a),
+ ac_to_integer(&ctx->ac, b),
+ };
+ LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
+ return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
+}
+
static LLVMValueRef
load_gs_input(struct ac_shader_abi *abi,
unsigned location,
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
value[i] = ac_lds_load(&ctx->ac, dw_addr);
+
+ if (ac_get_type_size(type) == 8) {
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index + 1, 0), "");
+ LLVMValueRef tmp = ac_lds_load(&ctx->ac, dw_addr);
+
+ value[i] = radv_emit_fetch_64bit(ctx, type, value[i], tmp);
+ }
} else {
LLVMValueRef soffset =
LLVMConstInt(ctx->ac.i32,
ctx->ac.i32_0,
vtx_offset, soffset,
0, ac_glc, true, false);
+
+ if (ac_get_type_size(type) == 8) {
+ soffset = LLVMConstInt(ctx->ac.i32,
+ (param * 4 + i + const_index + 1) * 256,
+ false);
+
+ LLVMValueRef tmp =
+ ac_build_buffer_load(&ctx->ac,
+ ctx->esgs_ring, 1,
+ ctx->ac.i32_0,
+ vtx_offset, soffset,
+ 0, ac_glc, true, false);
+
+ value[i] = radv_emit_fetch_64bit(ctx, type, value[i], tmp);
+ }
}
if (ac_get_type_size(type) == 2) {
"");
/* If this thread has already emitted the declared maximum number of
- * vertices, kill it: excessive vertex emissions are not supposed to
- * have any effect, and GS threads have no externally observable
- * effects other than emitting vertices.
+ * vertices, don't emit any more: excessive vertex emissions are not
+ * supposed to have any effect.
*/
can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
- ac_build_kill_if_false(&ctx->ac, can_emit);
+
+ bool use_kill = !ctx->shader_info->gs.writes_memory;
+ if (use_kill)
+ ac_build_kill_if_false(&ctx->ac, can_emit);
+ else
+ ac_build_ifcc(&ctx->ac, can_emit, 6505);
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask =
ac_build_sendmsg(&ctx->ac,
AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
ctx->gs_wave_id);
+
+ if (!use_kill)
+ ac_build_endif(&ctx->ac, 6505);
}
static void
ac_build_export(&ctx->ac, &args);
}
+static struct radv_stream_output *
+radv_get_stream_output_by_loc(struct radv_streamout_info *so, unsigned location)
+{
+ for (unsigned i = 0; i < so->num_outputs; ++i) {
+ if (so->outputs[i].location == location)
+ return &so->outputs[i];
+ }
+
+ return NULL;
+}
+
static void build_streamout_vertex(struct radv_shader_context *ctx,
LLVMValueRef *so_buffer, LLVMValueRef *wg_offset_dw,
unsigned stream, LLVMValueRef offset_vtx,
offset[buffer] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 2, false), "");
}
- for (unsigned i = 0; i < so->num_outputs; ++i) {
- struct radv_stream_output *output =
- &ctx->shader_info->so.outputs[i];
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ struct radv_shader_output_values outputs[AC_LLVM_MAX_OUTPUTS];
+ unsigned noutput = 0;
+ unsigned out_idx = 0;
- if (stream != output->stream)
- continue;
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask =
+ ctx->shader_info->gs.output_usage_mask[i];
+ uint8_t output_stream =
+ output_stream = ctx->shader_info->gs.output_streams[i];
- unsigned loc = output->location;
- struct radv_shader_output_values out = {};
+ if (!(ctx->output_mask & (1ull << i)) ||
+ output_stream != stream)
+ continue;
- for (unsigned comp = 0; comp < 4; comp++) {
- tmp = ac_build_gep0(&ctx->ac, vertexptr,
- LLVMConstInt(ctx->ac.i32, 4 * loc + comp, false));
- out.values[comp] = LLVMBuildLoad(builder, tmp, "");
+ outputs[noutput].slot_name = i;
+ outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+ outputs[noutput].usage_mask = output_usage_mask;
+
+ int length = util_last_bit(output_usage_mask);
+
+ for (unsigned j = 0; j < length; j++, out_idx++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
+ tmp = ac_build_gep0(&ctx->ac, vertexptr,
+ LLVMConstInt(ctx->ac.i32, out_idx, false));
+ outputs[noutput].values[j] = LLVMBuildLoad(builder, tmp, "");
+ }
+
+ for (unsigned j = length; j < 4; j++)
+ outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
+
+ noutput++;
+ }
+
+ for (unsigned i = 0; i < noutput; i++) {
+ struct radv_stream_output *output =
+ radv_get_stream_output_by_loc(so, outputs[i].slot_name);
+
+ if (!output ||
+ output->stream != stream)
+ continue;
+
+ struct radv_shader_output_values out = {};
+
+ for (unsigned j = 0; j < 4; j++) {
+ out.values[j] = outputs[i].values[j];
+ }
+
+ radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
}
+ } else {
+ for (unsigned i = 0; i < so->num_outputs; ++i) {
+ struct radv_stream_output *output =
+ &ctx->shader_info->so.outputs[i];
+
+ if (stream != output->stream)
+ continue;
+
+ struct radv_shader_output_values out = {};
+
+ for (unsigned comp = 0; comp < 4; comp++) {
+ if (!(output->component_mask & (1 << comp)))
+ continue;
+
+ tmp = ac_build_gep0(&ctx->ac, vertexptr,
+ LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
+ out.values[comp] = LLVMBuildLoad(builder, tmp, "");
+ }
- radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
+ radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
+ }
}
}
static void
handle_ngg_outputs_post_1(struct radv_shader_context *ctx)
{
+ struct radv_streamout_info *so = &ctx->shader_info->so;
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef vertex_ptr = NULL;
LLVMValueRef tmp, tmp2;
vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
- for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
- if (!(ctx->output_mask & (1ull << i)))
- continue;
+ for (unsigned i = 0; i < so->num_outputs; ++i) {
+ struct radv_stream_output *output =
+ &ctx->shader_info->so.outputs[i];
+
+ unsigned loc = output->location;
+
+ for (unsigned comp = 0; comp < 4; comp++) {
+ if (!(output->component_mask & (1 << comp)))
+ continue;
- for (unsigned j = 0; j < 4; j++) {
tmp = ac_build_gep0(&ctx->ac, vertex_ptr,
- LLVMConstInt(ctx->ac.i32, 4 * i + j, false));
+ LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
tmp2 = LLVMBuildLoad(builder,
- ctx->abi.outputs[4 * i + j], "");
+ ctx->abi.outputs[4 * loc + comp], "");
tmp2 = ac_to_integer(&ctx->ac, tmp2);
LLVMBuildStore(builder, tmp2, tmp);
}
LLVMValueRef num_vertices_val;
if (ctx->stage == MESA_SHADER_VERTEX) {
- num_vertices_val = LLVMConstInt(ctx->ac.i32, 1, false);
+ LLVMValueRef outprim_val =
+ LLVMConstInt(ctx->ac.i32,
+ ctx->options->key.vs.outprim, false);
+ num_vertices_val = LLVMBuildAdd(builder, outprim_val,
+ ctx->ac.i32_1, "");
num_vertices = 3; /* TODO: optimize for points & lines */
} else {
assert(ctx->stage == MESA_SHADER_TESS_EVAL);
const LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, vertexidx,
LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
- ac_build_kill_if_false(&ctx->ac, can_emit);
+ ac_build_ifcc(&ctx->ac, can_emit, 9001);
tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, "");
tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
+
+ ac_build_endif(&ctx->ac, 9001);
}
static void
ctx.options = options;
ctx.shader_info = shader_info;
- enum ac_float_mode float_mode =
- options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
- AC_FLOAT_MODE_DEFAULT;
+ enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT;
+
+ if (shader_info->float_controls_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) {
+ float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
+ } else if (options->unsafe_math) {
+ float_mode = AC_FLOAT_MODE_UNSAFE_FP_MATH;
+ }
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class,
options->family, float_mode, options->wave_size, 64);
shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
ac_nir_fixup_ls_hs_input_vgprs(&ctx);
- if (shaders[shader_count - 1]->info.stage != MESA_SHADER_GEOMETRY &&
- (ctx.options->key.vs_common_out.as_ngg &&
- !ctx.options->key.vs_common_out.as_es)) {
- /* Unconditionally declare scratch space base for streamout and
- * vertex compaction. Whether space is actually allocated is
+ if (is_ngg) {
+ /* Declare scratch space base for streamout and vertex
+ * compaction. Whether space is actually allocated is
* determined during linking / PM4 creation.
*
* Add an extra dword per vertex to ensure an odd stride, which
fprintf(stderr, "\n");
}
- if (options->record_llvm_ir) {
+ if (options->record_ir) {
char *llvm_ir = LLVMPrintModuleToString(llvm_module);
llvm_ir_string = strdup(llvm_ir);
LLVMDisposeMessage(llvm_ir);
LLVMBasicBlockRef bb;
unsigned offset;
- if (!num_components)
+ if (stream > 0 && !num_components)
continue;
if (stream > 0 && !ctx->shader_info->so.num_outputs)