if (ctx->ac.chip_class >= GFX10) {
desc_type |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(3) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
}
-static unsigned
-get_num_channels_from_data_format(unsigned data_format)
-{
- switch (data_format) {
- case V_008F0C_BUF_DATA_FORMAT_8:
- case V_008F0C_BUF_DATA_FORMAT_16:
- case V_008F0C_BUF_DATA_FORMAT_32:
- return 1;
- case V_008F0C_BUF_DATA_FORMAT_8_8:
- case V_008F0C_BUF_DATA_FORMAT_16_16:
- case V_008F0C_BUF_DATA_FORMAT_32_32:
- return 2;
- case V_008F0C_BUF_DATA_FORMAT_10_11_11:
- case V_008F0C_BUF_DATA_FORMAT_11_11_10:
- case V_008F0C_BUF_DATA_FORMAT_32_32_32:
- return 3;
- case V_008F0C_BUF_DATA_FORMAT_8_8_8_8:
- case V_008F0C_BUF_DATA_FORMAT_10_10_10_2:
- case V_008F0C_BUF_DATA_FORMAT_2_10_10_10:
- case V_008F0C_BUF_DATA_FORMAT_16_16_16_16:
- case V_008F0C_BUF_DATA_FORMAT_32_32_32_32:
- return 4;
- default:
- break;
- }
-
- return 4;
-}
-
static LLVMValueRef
radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
LLVMValueRef value,
ctx->args->ac.base_vertex), "");
}
+ const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
+
/* Adjust the number of channels to load based on the vertex
* attribute format.
*/
- unsigned num_format_channels = get_num_channels_from_data_format(data_format);
- unsigned num_channels = MIN2(num_input_channels, num_format_channels);
+ unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
num_channels = MAX2(num_channels, 3);
}
- if (attrib_stride != 0 && attrib_offset > attrib_stride) {
- LLVMValueRef buffer_offset =
- LLVMConstInt(ctx->ac.i32,
- attrib_offset / attrib_stride, false);
+ t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
+ t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
- buffer_index = LLVMBuildAdd(ctx->ac.builder,
- buffer_index,
- buffer_offset, "");
+ /* Perform per-channel vertex fetch operations if unaligned
+ * access are detected. Only GFX6 and GFX10 are affected.
+ */
+ bool unaligned_vertex_fetches = false;
+ if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class == GFX10) &&
+ vtx_info->chan_format != data_format &&
+ ((attrib_offset % vtx_info->element_size) ||
+ (attrib_stride % vtx_info->element_size)))
+ unaligned_vertex_fetches = true;
+
+ if (unaligned_vertex_fetches) {
+ unsigned chan_format = vtx_info->chan_format;
+ LLVMValueRef values[4];
- attrib_offset = attrib_offset % attrib_stride;
- }
+ assert(ctx->ac.chip_class == GFX6 ||
+ ctx->ac.chip_class == GFX10);
- t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
- t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
+ for (unsigned chan = 0; chan < num_channels; chan++) {
+ unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
+ LLVMValueRef chan_index = buffer_index;
+
+ if (attrib_stride != 0 && chan_offset > attrib_stride) {
+ LLVMValueRef buffer_offset =
+ LLVMConstInt(ctx->ac.i32,
+ chan_offset / attrib_stride, false);
+
+ chan_index = LLVMBuildAdd(ctx->ac.builder,
+ buffer_index,
+ buffer_offset, "");
- input = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
- buffer_index,
- LLVMConstInt(ctx->ac.i32, attrib_offset, false),
- ctx->ac.i32_0, ctx->ac.i32_0,
- num_channels,
- data_format, num_format, 0, true);
+ chan_offset = chan_offset % attrib_stride;
+ }
+
+ values[chan] = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
+ chan_index,
+ LLVMConstInt(ctx->ac.i32, chan_offset, false),
+ ctx->ac.i32_0, ctx->ac.i32_0, 1,
+ chan_format, num_format, 0, true);
+ }
+
+ input = ac_build_gather_values(&ctx->ac, values, num_channels);
+ } else {
+ if (attrib_stride != 0 && attrib_offset > attrib_stride) {
+ LLVMValueRef buffer_offset =
+ LLVMConstInt(ctx->ac.i32,
+ attrib_offset / attrib_stride, false);
+
+ buffer_index = LLVMBuildAdd(ctx->ac.builder,
+ buffer_index,
+ buffer_offset, "");
+
+ attrib_offset = attrib_offset % attrib_stride;
+ }
+
+ input = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
+ buffer_index,
+ LLVMConstInt(ctx->ac.i32, attrib_offset, false),
+ ctx->ac.i32_0, ctx->ac.i32_0,
+ num_channels,
+ data_format, num_format, 0, true);
+ }
if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
LLVMValueRef c[4];
{
return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info),
ctx->ac.i32_0,
- LLVMConstInt(ctx->ac.i32, 11, false),
+ LLVMConstInt(ctx->ac.i32, 12, false),
false);
}
return ngg_gs_vertex_ptr(ctx, vertexidx);
}
-/* Send GS Alloc Req message from the first wave of the group to SPI.
- * Message payload is:
- * - bits 0..10: vertices in group
- * - bits 12..22: primitives in group
- */
-static void build_sendmsg_gs_alloc_req(struct radv_shader_context *ctx,
- LLVMValueRef vtx_cnt,
- LLVMValueRef prim_cnt)
+static LLVMValueRef
+ngg_gs_get_emit_output_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexptr,
+ unsigned out_idx)
{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef tmp;
-
- tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
- ac_build_ifcc(&ctx->ac, tmp, 5020);
-
- tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->ac.i32, 12, false),"");
- tmp = LLVMBuildOr(builder, tmp, vtx_cnt, "");
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_ALLOC_REQ, tmp);
-
- ac_build_endif(&ctx->ac, 5020);
+ LLVMValueRef gep_idx[3] = {
+ ctx->ac.i32_0, /* implied C-style array */
+ ctx->ac.i32_0, /* first struct entry */
+ LLVMConstInt(ctx->ac.i32, out_idx, false),
+ };
+ return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
}
-struct ngg_prim {
- unsigned num_vertices;
- LLVMValueRef isnull;
- LLVMValueRef index[3];
- LLVMValueRef edgeflag[3];
-};
-
-static void build_export_prim(struct radv_shader_context *ctx,
- const struct ngg_prim *prim)
+static LLVMValueRef
+ngg_gs_get_emit_primflag_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexptr,
+ unsigned stream)
{
- LLVMBuilderRef builder = ctx->ac.builder;
- struct ac_export_args args;
- LLVMValueRef tmp;
-
- tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
- args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
-
- for (unsigned i = 0; i < prim->num_vertices; ++i) {
- tmp = LLVMBuildShl(builder, prim->index[i],
- LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
- args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
- tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
- tmp = LLVMBuildShl(builder, tmp,
- LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
- args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
- }
-
- args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, "");
- args.out[1] = LLVMGetUndef(ctx->ac.f32);
- args.out[2] = LLVMGetUndef(ctx->ac.f32);
- args.out[3] = LLVMGetUndef(ctx->ac.f32);
-
- args.target = V_008DFC_SQ_EXP_PRIM;
- args.enabled_channels = 1;
- args.done = true;
- args.valid_mask = false;
- args.compr = false;
-
- ac_build_export(&ctx->ac, &args);
+ LLVMValueRef gep_idx[3] = {
+ ctx->ac.i32_0, /* implied C-style array */
+ ctx->ac.i32_1, /* second struct entry */
+ LLVMConstInt(ctx->ac.i32, stream, false),
+ };
+ return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
}
static struct radv_stream_output *
/* TODO: primitive culling */
- build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
+ ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx),
+ ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
/* TODO: streamout queries */
- /* Export primitive data to the index buffer. Format is:
- * - bits 0..8: index 0
- * - bit 9: edge flag 0
- * - bits 10..18: index 1
- * - bit 19: edge flag 1
- * - bits 20..28: index 2
- * - bit 29: edge flag 2
- * - bit 31: null primitive (skip)
+ /* Export primitive data to the index buffer.
*
* For the first version, we will always build up all three indices
* independent of the primitive type. The additional garbage data
*/
ac_build_ifcc(&ctx->ac, is_gs_thread, 6001);
{
- struct ngg_prim prim = {};
-
- prim.num_vertices = num_vertices;
- prim.isnull = ctx->ac.i1false;
- memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+ struct ac_ngg_prim prim = {};
- for (unsigned i = 0; i < num_vertices; ++i) {
- tmp = LLVMBuildLShr(builder,
- ac_get_arg(&ctx->ac, ctx->args->ac.gs_invocation_id),
- LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
- prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+ if (ctx->args->options->key.vs_common_out.as_ngg_passthrough) {
+ prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]);
+ } else {
+ prim.num_vertices = num_vertices;
+ prim.isnull = ctx->ac.i1false;
+ memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+
+ for (unsigned i = 0; i < num_vertices; ++i) {
+ tmp = LLVMBuildLShr(builder,
+ ac_get_arg(&ctx->ac, ctx->args->ac.gs_invocation_id),
+ LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
+ prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+ }
}
- build_export_prim(ctx, &prim);
+ ac_build_export_prim(&ctx->ac, &prim);
}
ac_build_endif(&ctx->ac, 6001);
LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
tmp = ngg_gs_emit_vertex_ptr(ctx, gsthread, vertexidx);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implied C-style array */
- ctx->ac.i32_1, /* second entry of struct */
- LLVMConstInt(ctx->ac.i32, stream, false),
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
- LLVMBuildStore(builder, i8_0, tmp);
+ LLVMBuildStore(builder, i8_0,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, stream));
ac_build_endloop(&ctx->ac, 5100);
}
if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
continue;
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- LLVMConstInt(ctx->ac.i32, stream, false),
- };
- tmp = LLVMBuildGEP(builder, vertexptr, gep_idx, 3, "");
- tmp = LLVMBuildLoad(builder, tmp, "");
+ tmp = LLVMBuildLoad(builder,
+ ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream), "");
tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
tmp2 = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
nggso.prim_enable[stream] = LLVMBuildAnd(builder, tmp, tmp2, "");
/* Load primitive liveness */
tmp = ngg_gs_vertex_ptr(ctx, primidx);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- ctx->ac.i32_0, /* stream 0 */
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
- tmp = LLVMBuildLoad(builder, tmp, "");
+ tmp = LLVMBuildLoad(builder,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
const LLVMValueRef primlive =
LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
* there are 4 or more contiguous null primitives in the export
* (in the common case of single-dword prim exports).
*/
- build_sendmsg_gs_alloc_req(ctx, vertlive_scan.result_reduce, num_emit_threads);
+ ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx),
+ vertlive_scan.result_reduce, num_emit_threads);
/* Setup the reverse vertex compaction permutation. We re-use stream 1
* of the primitive liveness flags, relying on the fact that each
ac_build_ifcc(&ctx->ac, vertlive, 5130);
{
tmp = ngg_gs_vertex_ptr(ctx, vertlive_scan.result_exclusive);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- ctx->ac.i32_1, /* stream 1 */
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
tmp2 = LLVMBuildTrunc(builder, tid, ctx->ac.i8, "");
- LLVMBuildStore(builder, tmp2, tmp);
+ LLVMBuildStore(builder, tmp2,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1));
}
ac_build_endif(&ctx->ac, 5130);
ac_build_ifcc(&ctx->ac, tmp, 5140);
{
LLVMValueRef flags;
- struct ngg_prim prim = {};
+ struct ac_ngg_prim prim = {};
prim.num_vertices = verts_per_prim;
tmp = ngg_gs_vertex_ptr(ctx, tid);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- ctx->ac.i32_0, /* primflag */
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
- flags = LLVMBuildLoad(builder, tmp, "");
+ flags = LLVMBuildLoad(builder,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
prim.isnull = LLVMBuildNot(builder, LLVMBuildTrunc(builder, flags, ctx->ac.i1, ""), "");
for (unsigned i = 0; i < verts_per_prim; ++i) {
LLVMValueRef is_odd = LLVMBuildLShr(builder, flags, ctx->ac.i8_1, "");
is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, "");
- struct ngg_prim in = prim;
+ struct ac_ngg_prim in = prim;
prim.index[0] = in.index[0];
prim.index[1] = LLVMBuildSelect(builder, is_odd,
in.index[2], in.index[1], "");
in.index[1], in.index[2], "");
}
- build_export_prim(ctx, &prim);
+ ac_build_export_prim(&ctx->ac, &prim);
}
ac_build_endif(&ctx->ac, 5140);
outinfo->pos_exports = 0;
tmp = ngg_gs_vertex_ptr(ctx, tid);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implicit C-style array */
- ctx->ac.i32_1, /* second value of struct */
- ctx->ac.i32_1, /* stream 1: source data index */
- };
- tmp = LLVMBuildGEP(builder, tmp, gep_idx, 3, "");
- tmp = LLVMBuildLoad(builder, tmp, "");
+ tmp = LLVMBuildLoad(builder,
+ ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1), "");
tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
const LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tmp);
unsigned out_idx = 0;
- gep_idx[1] = ctx->ac.i32_0;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask =
ctx->args->shader_info->gs.output_usage_mask[i];
if (!(output_usage_mask & (1 << j)))
continue;
- gep_idx[2] = LLVMConstInt(ctx->ac.i32, out_idx, false);
- tmp = LLVMBuildGEP(builder, vertexptr, gep_idx, 3, "");
+ tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
tmp = LLVMBuildLoad(builder, tmp, "");
LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
out_ptr[j], "");
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implied C-style array */
- ctx->ac.i32_0, /* first entry of struct */
- LLVMConstInt(ctx->ac.i32, out_idx, false),
- };
- LLVMValueRef ptr = LLVMBuildGEP(builder, vertexptr, gep_idx, 3, "");
-
out_val = ac_to_integer(&ctx->ac, out_val);
out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
- LLVMBuildStore(builder, out_val, ptr);
+ LLVMBuildStore(builder, out_val,
+ ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx));
}
}
assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
tmp = LLVMBuildAdd(builder, curverts, ctx->ac.i32_1, "");
LLVMBuildStore(builder, tmp, ctx->gs_curprim_verts[stream]);
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implied C-style array */
- ctx->ac.i32_1, /* second struct entry */
- LLVMConstInt(ctx->ac.i32, stream, false),
- };
- const LLVMValueRef primflagptr =
- LLVMBuildGEP(builder, vertexptr, gep_idx, 3, "");
-
/* The per-vertex primitive flag encoding:
* bit 0: whether this vertex finishes a primitive
* bit 1: whether the primitive is odd (if we are emitting triangle strips)
LLVMBuildShl(builder,
LLVMBuildZExt(builder, is_odd, ctx->ac.i8, ""),
ctx->ac.i8_1, ""), "");
- LLVMBuildStore(builder, tmp, primflagptr);
+ LLVMBuildStore(builder, tmp,
+ ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream));
tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
* Add an extra dword per vertex to ensure an odd stride, which
* avoids bank conflicts for SoA accesses.
*/
- declare_esgs_ring(&ctx);
+ if (!args->options->key.vs_common_out.as_ngg_passthrough)
+ declare_esgs_ring(&ctx);
/* This is really only needed when streamout and / or vertex
* compaction is enabled.
*/
- LLVMTypeRef asi32 = LLVMArrayType(ctx.ac.i32, 8);
- ctx.gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx.ac.module,
- asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
- LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(asi32));
- LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
+ if (args->shader_info->so.num_outputs) {
+ LLVMTypeRef asi32 = LLVMArrayType(ctx.ac.i32, 8);
+ ctx.gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx.ac.module,
+ asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
+ LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(asi32));
+ LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
+ }
}
for(int i = 0; i < shader_count; ++i) {