* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include "si_shader_internal.h"
#include "si_pipe.h"
+#include "si_shader_internal.h"
#include "sid.h"
static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
{
- switch (ctx->type) {
- case PIPE_SHADER_TESS_CTRL:
- return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8);
+ switch (ctx->type) {
+ case PIPE_SHADER_TESS_CTRL:
+ return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8);
- case PIPE_SHADER_TESS_EVAL:
- return ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id);
+ case PIPE_SHADER_TESS_EVAL:
+ return ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id);
- default:
- assert(0);
- return NULL;
- }
+ default:
+ assert(0);
+ return NULL;
+ }
}
/* Tessellation shaders pass outputs to the next shader using LDS.
* All three shaders VS(LS), TCS, TES share the same LDS space.
*/
-static LLVMValueRef
-get_tcs_in_patch_stride(struct si_shader_context *ctx)
+static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx)
{
- return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13);
+ return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13);
}
static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)
{
- assert(ctx->type == PIPE_SHADER_TESS_CTRL);
+ assert(ctx->type == PIPE_SHADER_TESS_CTRL);
- if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
- return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
+ if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
+ return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
- return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
+ return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
}
static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
{
- unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
+ unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
- return LLVMConstInt(ctx->i32, stride, 0);
+ return LLVMConstInt(ctx->ac.i32, stride, 0);
}
static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
{
- if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
- return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
-
- const struct si_shader_info *info = &ctx->shader->selector->info;
- unsigned tcs_out_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
- unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
- unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);
- unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride +
- num_patch_outputs * 4;
- return LLVMConstInt(ctx->i32, patch_dw_stride, 0);
+ if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
+ return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
+
+ const struct si_shader_info *info = &ctx->shader->selector->info;
+ unsigned tcs_out_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+ unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
+ unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);
+ unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride + num_patch_outputs * 4;
+ return LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0);
}
-static LLVMValueRef
-get_tcs_out_patch0_offset(struct si_shader_context *ctx)
+static LLVMValueRef get_tcs_out_patch0_offset(struct si_shader_context *ctx)
{
- return LLVMBuildMul(ctx->ac.builder,
- si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 0, 16),
- LLVMConstInt(ctx->i32, 4, 0), "");
+ return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 0, 16),
+ LLVMConstInt(ctx->ac.i32, 4, 0), "");
}
-static LLVMValueRef
-get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
+static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
{
- return LLVMBuildMul(ctx->ac.builder,
- si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16),
- LLVMConstInt(ctx->i32, 4, 0), "");
+ return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16),
+ LLVMConstInt(ctx->ac.i32, 4, 0), "");
}
-static LLVMValueRef
-get_tcs_in_current_patch_offset(struct si_shader_context *ctx)
+static LLVMValueRef get_tcs_in_current_patch_offset(struct si_shader_context *ctx)
{
- LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
- LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
+ LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
- return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");
+ return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");
}
-static LLVMValueRef
-get_tcs_out_current_patch_offset(struct si_shader_context *ctx)
+static LLVMValueRef get_tcs_out_current_patch_offset(struct si_shader_context *ctx)
{
- LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
- LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
- LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
+ LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
- return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_offset);
+ return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_offset);
}
-static LLVMValueRef
-get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
+static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
{
- LLVMValueRef patch0_patch_data_offset =
- get_tcs_out_patch0_patch_data_offset(ctx);
- LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
- LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
+ LLVMValueRef patch0_patch_data_offset = get_tcs_out_patch0_patch_data_offset(ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
- return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);
+ return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);
}
static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
{
- unsigned tcs_out_vertices =
- ctx->shader->selector ?
- ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] : 0;
+ unsigned tcs_out_vertices =
+ ctx->shader->selector ? ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]
+ : 0;
- /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
- if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
- return LLVMConstInt(ctx->i32, tcs_out_vertices, 0);
+ /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
+ if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
+ return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);
- return si_unpack_param(ctx, ctx->tcs_offchip_layout, 6, 6);
+ return si_unpack_param(ctx, ctx->tcs_offchip_layout, 6, 6);
}
static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
{
- unsigned stride;
-
- switch (ctx->type) {
- case PIPE_SHADER_VERTEX:
- stride = ctx->shader->selector->lshs_vertex_stride / 4;
- return LLVMConstInt(ctx->i32, stride, 0);
-
- case PIPE_SHADER_TESS_CTRL:
- if (ctx->screen->info.chip_class >= GFX9 &&
- ctx->shader->is_monolithic) {
- stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
- return LLVMConstInt(ctx->i32, stride, 0);
- }
- return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
-
- default:
- assert(0);
- return NULL;
- }
+ unsigned stride;
+
+ switch (ctx->type) {
+ case PIPE_SHADER_VERTEX:
+ stride = ctx->shader->selector->lshs_vertex_stride / 4;
+ return LLVMConstInt(ctx->ac.i32, stride, 0);
+
+ case PIPE_SHADER_TESS_CTRL:
+ if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) {
+ stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
+ return LLVMConstInt(ctx->ac.i32, stride, 0);
+ }
+ return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
+
+ default:
+ assert(0);
+ return NULL;
+ }
}
-static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context *ctx,
- LLVMValueRef vertex_dw_stride,
- LLVMValueRef base_addr,
- LLVMValueRef vertex_index,
- LLVMValueRef param_index,
- ubyte name, ubyte index)
+static LLVMValueRef
+get_dw_address_from_generic_indices(struct si_shader_context *ctx, LLVMValueRef vertex_dw_stride,
+ LLVMValueRef base_addr, LLVMValueRef vertex_index,
+ LLVMValueRef param_index, ubyte name, ubyte index)
{
- if (vertex_dw_stride) {
- base_addr = ac_build_imad(&ctx->ac, vertex_index,
- vertex_dw_stride, base_addr);
- }
-
- if (param_index) {
- base_addr = ac_build_imad(&ctx->ac, param_index,
- LLVMConstInt(ctx->i32, 4, 0), base_addr);
- }
-
- int param = name == TGSI_SEMANTIC_PATCH ||
- name == TGSI_SEMANTIC_TESSINNER ||
- name == TGSI_SEMANTIC_TESSOUTER ?
- si_shader_io_get_unique_index_patch(name, index) :
- si_shader_io_get_unique_index(name, index, false);
-
- /* Add the base address of the element. */
- return LLVMBuildAdd(ctx->ac.builder, base_addr,
- LLVMConstInt(ctx->i32, param * 4, 0), "");
+ if (vertex_dw_stride) {
+ base_addr = ac_build_imad(&ctx->ac, vertex_index, vertex_dw_stride, base_addr);
+ }
+
+ if (param_index) {
+ base_addr = ac_build_imad(&ctx->ac, param_index, LLVMConstInt(ctx->ac.i32, 4, 0), base_addr);
+ }
+
+ int param = name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER
+ ? si_shader_io_get_unique_index_patch(name, index)
+ : si_shader_io_get_unique_index(name, index, false);
+
+ /* Add the base address of the element. */
+ return LLVMBuildAdd(ctx->ac.builder, base_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
}
/* The offchip buffer layout for TCS->TES is
* Note that every attribute has 4 components.
*/
static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
- LLVMValueRef rel_patch_id,
- LLVMValueRef vertex_index,
+ LLVMValueRef rel_patch_id, LLVMValueRef vertex_index,
LLVMValueRef param_index)
{
- LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
- LLVMValueRef param_stride, constant16;
-
- vertices_per_patch = get_num_tcs_out_vertices(ctx);
- num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6);
- total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch,
- num_patches, "");
-
- constant16 = LLVMConstInt(ctx->i32, 16, 0);
- if (vertex_index) {
- base_addr = ac_build_imad(&ctx->ac, rel_patch_id,
- vertices_per_patch, vertex_index);
- param_stride = total_vertices;
- } else {
- base_addr = rel_patch_id;
- param_stride = num_patches;
- }
-
- base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr);
- base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
-
- if (!vertex_index) {
- LLVMValueRef patch_data_offset =
- si_unpack_param(ctx, ctx->tcs_offchip_layout, 12, 20);
-
- base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
- patch_data_offset, "");
- }
- return base_addr;
+ LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
+ LLVMValueRef param_stride, constant16;
+
+ vertices_per_patch = get_num_tcs_out_vertices(ctx);
+ num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6);
+ total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, "");
+
+ constant16 = LLVMConstInt(ctx->ac.i32, 16, 0);
+ if (vertex_index) {
+ base_addr = ac_build_imad(&ctx->ac, rel_patch_id, vertices_per_patch, vertex_index);
+ param_stride = total_vertices;
+ } else {
+ base_addr = rel_patch_id;
+ param_stride = num_patches;
+ }
+
+ base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr);
+ base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
+
+ if (!vertex_index) {
+ LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->tcs_offchip_layout, 12, 20);
+
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, "");
+ }
+ return base_addr;
}
-static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(
- struct si_shader_context *ctx,
- LLVMValueRef vertex_index,
- LLVMValueRef param_index,
- ubyte name, ubyte index)
+static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(struct si_shader_context *ctx,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ ubyte name, ubyte index)
{
- unsigned param_index_base;
-
- param_index_base = name == TGSI_SEMANTIC_PATCH ||
- name == TGSI_SEMANTIC_TESSINNER ||
- name == TGSI_SEMANTIC_TESSOUTER ?
- si_shader_io_get_unique_index_patch(name, index) :
- si_shader_io_get_unique_index(name, index, false);
-
- if (param_index) {
- param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
- LLVMConstInt(ctx->i32, param_index_base, 0),
- "");
- } else {
- param_index = LLVMConstInt(ctx->i32, param_index_base, 0);
- }
-
- return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx),
- vertex_index, param_index);
+ unsigned param_index_base;
+
+ param_index_base = name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER
+ ? si_shader_io_get_unique_index_patch(name, index)
+ : si_shader_io_get_unique_index(name, index, false);
+
+ if (param_index) {
+ param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
+ LLVMConstInt(ctx->ac.i32, param_index_base, 0), "");
+ } else {
+ param_index = LLVMConstInt(ctx->ac.i32, param_index_base, 0);
+ }
+
+ return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), vertex_index, param_index);
}
-static LLVMValueRef buffer_load(struct si_shader_context *ctx,
- LLVMTypeRef type, unsigned swizzle,
- LLVMValueRef buffer, LLVMValueRef offset,
- LLVMValueRef base, bool can_speculate)
+static LLVMValueRef buffer_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
+ LLVMValueRef buffer, LLVMValueRef offset, LLVMValueRef base,
+ bool can_speculate)
{
- LLVMValueRef value, value2;
- LLVMTypeRef vec_type = LLVMVectorType(type, 4);
+ LLVMValueRef value, value2;
+ LLVMTypeRef vec_type = LLVMVectorType(type, 4);
- if (swizzle == ~0) {
- value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
- 0, ac_glc, can_speculate, false);
+ if (swizzle == ~0) {
+ value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc,
+ can_speculate, false);
- return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
- }
+ return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
+ }
- if (ac_get_type_size(type) != 8) {
- value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
- 0, ac_glc, can_speculate, false);
+ if (ac_get_type_size(type) != 8) {
+ value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc,
+ can_speculate, false);
- value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
- return LLVMBuildExtractElement(ctx->ac.builder, value,
- LLVMConstInt(ctx->i32, swizzle, 0), "");
- }
+ value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
+ return LLVMBuildExtractElement(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, swizzle, 0),
+ "");
+ }
- value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
- swizzle * 4, ac_glc, can_speculate, false);
+ value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4, ac_glc,
+ can_speculate, false);
- value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
- swizzle * 4 + 4, ac_glc, can_speculate, false);
+ value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4 + 4, ac_glc,
+ can_speculate, false);
- return si_build_gather_64bit(ctx, type, value, value2);
+ return si_build_gather_64bit(ctx, type, value, value2);
}
/**
* \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
* \param dw_addr address in dwords
*/
-static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx,
- LLVMTypeRef type, unsigned swizzle,
- LLVMValueRef dw_addr)
+static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
+ LLVMValueRef dw_addr)
{
- LLVMValueRef value;
+ LLVMValueRef value;
- if (swizzle == ~0) {
- LLVMValueRef values[4];
+ if (swizzle == ~0) {
+ LLVMValueRef values[4];
- for (unsigned chan = 0; chan < 4; chan++)
- values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
- return ac_build_gather_values(&ctx->ac, values, 4);
- }
+ return ac_build_gather_values(&ctx->ac, values, 4);
+ }
- /* Split 64-bit loads. */
- if (ac_get_type_size(type) == 8) {
- LLVMValueRef lo, hi;
+ /* Split 64-bit loads. */
+ if (ac_get_type_size(type) == 8) {
+ LLVMValueRef lo, hi;
- lo = lshs_lds_load(ctx, ctx->i32, swizzle, dw_addr);
- hi = lshs_lds_load(ctx, ctx->i32, swizzle + 1, dw_addr);
- return si_build_gather_64bit(ctx, type, lo, hi);
- }
+ lo = lshs_lds_load(ctx, ctx->ac.i32, swizzle, dw_addr);
+ hi = lshs_lds_load(ctx, ctx->ac.i32, swizzle + 1, dw_addr);
+ return si_build_gather_64bit(ctx, type, lo, hi);
+ }
- dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
- LLVMConstInt(ctx->i32, swizzle, 0), "");
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), "");
- value = ac_lds_load(&ctx->ac, dw_addr);
+ value = ac_lds_load(&ctx->ac, dw_addr);
- return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
+ return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
}
/**
* \param dw_addr address in dwords
* \param value value to store
*/
-static void lshs_lds_store(struct si_shader_context *ctx,
- unsigned dw_offset_imm, LLVMValueRef dw_addr,
- LLVMValueRef value)
+static void lshs_lds_store(struct si_shader_context *ctx, unsigned dw_offset_imm,
+ LLVMValueRef dw_addr, LLVMValueRef value)
{
- dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
- LLVMConstInt(ctx->i32, dw_offset_imm, 0), "");
+ dw_addr =
+ LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, dw_offset_imm, 0), "");
- ac_lds_store(&ctx->ac, dw_addr, value);
+ ac_lds_store(&ctx->ac, dw_addr, value);
}
-enum si_tess_ring {
- TCS_FACTOR_RING,
- TESS_OFFCHIP_RING_TCS,
- TESS_OFFCHIP_RING_TES,
+enum si_tess_ring
+{
+ TCS_FACTOR_RING,
+ TESS_OFFCHIP_RING_TCS,
+ TESS_OFFCHIP_RING_TES,
};
-static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx,
- enum si_tess_ring ring)
+static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum si_tess_ring ring)
{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef addr = ac_get_arg(&ctx->ac,
- ring == TESS_OFFCHIP_RING_TES ?
- ctx->tes_offchip_addr :
- ctx->tcs_out_lds_layout);
-
- /* TCS only receives high 13 bits of the address. */
- if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) {
- addr = LLVMBuildAnd(builder, addr,
- LLVMConstInt(ctx->i32, 0xfff80000, 0), "");
- }
-
- if (ring == TCS_FACTOR_RING) {
- unsigned tf_offset = ctx->screen->tess_offchip_ring_size;
- addr = LLVMBuildAdd(builder, addr,
- LLVMConstInt(ctx->i32, tf_offset, 0), "");
- }
-
- uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (ctx->screen->info.chip_class >= GFX10)
- rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
- S_008F0C_RESOURCE_LEVEL(1);
- else
- rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-
- LLVMValueRef desc[4];
- desc[0] = addr;
- desc[1] = LLVMConstInt(ctx->i32,
- S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
- desc[2] = LLVMConstInt(ctx->i32, 0xffffffff, 0);
- desc[3] = LLVMConstInt(ctx->i32, rsrc3, false);
-
- return ac_build_gather_values(&ctx->ac, desc, 4);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef addr = ac_get_arg(
+ &ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->tes_offchip_addr : ctx->tcs_out_lds_layout);
+
+ /* TCS only receives high 13 bits of the address. */
+ if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) {
+ addr = LLVMBuildAnd(builder, addr, LLVMConstInt(ctx->ac.i32, 0xfff80000, 0), "");
+ }
+
+ if (ring == TCS_FACTOR_RING) {
+ unsigned tf_offset = ctx->screen->tess_offchip_ring_size;
+ addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");
+ }
+
+ uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (ctx->screen->info.chip_class >= GFX10)
+ rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ else
+ rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+ LLVMValueRef desc[4];
+ desc[0] = addr;
+ desc[1] = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
+ desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0);
+ desc[3] = LLVMConstInt(ctx->ac.i32, rsrc3, false);
+
+ return ac_build_gather_values(&ctx->ac, desc, 4);
}
void si_llvm_preload_tes_rings(struct si_shader_context *ctx)
{
- ctx->tess_offchip_ring = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES);
+ ctx->tess_offchip_ring = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES);
}
-static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
- LLVMTypeRef type,
- LLVMValueRef vertex_index,
- LLVMValueRef param_index,
- unsigned const_index,
- unsigned location,
- unsigned driver_location,
- unsigned component,
- unsigned num_components,
- bool is_patch,
- bool is_compact,
- bool load_input)
+static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
+ LLVMValueRef vertex_index, LLVMValueRef param_index,
+ unsigned const_index, unsigned location,
+ unsigned driver_location, unsigned component,
+ unsigned num_components, bool unused,
+ bool is_compact, bool load_input)
{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct si_shader_info *info = &ctx->shader->selector->info;
- LLVMValueRef dw_addr, stride;
- ubyte name, index;
-
- driver_location = driver_location / 4;
-
- if (load_input) {
- name = info->input_semantic_name[driver_location];
- index = info->input_semantic_index[driver_location];
- } else {
- name = info->output_semantic_name[driver_location];
- index = info->output_semantic_index[driver_location];
- }
-
- assert((name == TGSI_SEMANTIC_PATCH ||
- name == TGSI_SEMANTIC_TESSINNER ||
- name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
-
- if (load_input) {
- stride = get_tcs_in_vertex_dw_stride(ctx);
- dw_addr = get_tcs_in_current_patch_offset(ctx);
- } else {
- if (is_patch) {
- stride = NULL;
- dw_addr = get_tcs_out_current_patch_data_offset(ctx);
- } else {
- stride = get_tcs_out_vertex_dw_stride(ctx);
- dw_addr = get_tcs_out_current_patch_offset(ctx);
- }
- }
-
- if (!param_index) {
- param_index = LLVMConstInt(ctx->i32, const_index, 0);
- }
-
- dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr,
- vertex_index, param_index,
- name, index);
-
- LLVMValueRef value[4];
- for (unsigned i = 0; i < num_components; i++) {
- unsigned offset = i;
- if (ac_get_type_size(type) == 8)
- offset *= 2;
-
- offset += component;
- value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr);
- }
-
- return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct si_shader_info *info = &ctx->shader->selector->info;
+ LLVMValueRef dw_addr, stride;
+ ubyte name, index;
+
+ driver_location = driver_location / 4;
+
+ if (load_input) {
+ name = info->input_semantic_name[driver_location];
+ index = info->input_semantic_index[driver_location];
+ } else {
+ name = info->output_semantic_name[driver_location];
+ index = info->output_semantic_index[driver_location];
+ }
+
+ bool is_patch = vertex_index == NULL;
+ assert((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
+
+ if (load_input) {
+ stride = get_tcs_in_vertex_dw_stride(ctx);
+ dw_addr = get_tcs_in_current_patch_offset(ctx);
+ } else {
+ if (is_patch) {
+ stride = NULL;
+ dw_addr = get_tcs_out_current_patch_data_offset(ctx);
+ } else {
+ stride = get_tcs_out_vertex_dw_stride(ctx);
+ dw_addr = get_tcs_out_current_patch_offset(ctx);
+ }
+ }
+
+ if (!param_index) {
+ param_index = LLVMConstInt(ctx->ac.i32, const_index, 0);
+ }
+
+ dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
+ name, index);
+
+ LLVMValueRef value[4];
+ for (unsigned i = 0; i < num_components; i++) {
+ unsigned offset = i;
+ if (ac_get_type_size(type) == 8)
+ offset *= 2;
+
+ offset += component;
+ value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr);
+ }
+
+ return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
-LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
- LLVMTypeRef type,
- LLVMValueRef vertex_index,
- LLVMValueRef param_index,
- unsigned const_index,
- unsigned location,
- unsigned driver_location,
- unsigned component,
- unsigned num_components,
- bool is_patch,
- bool is_compact,
- bool load_input)
+static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,
+ LLVMValueRef vertex_index, LLVMValueRef param_index,
+ unsigned const_index, unsigned location,
+ unsigned driver_location, unsigned component,
+ unsigned num_components, bool unused, bool is_compact,
+ bool load_input)
{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct si_shader_info *info = &ctx->shader->selector->info;
- LLVMValueRef base, addr;
-
- driver_location = driver_location / 4;
- ubyte name = info->input_semantic_name[driver_location];
- ubyte index = info->input_semantic_index[driver_location];
-
- assert((name == TGSI_SEMANTIC_PATCH ||
- name == TGSI_SEMANTIC_TESSINNER ||
- name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
-
- base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
-
- if (!param_index) {
- param_index = LLVMConstInt(ctx->i32, const_index, 0);
- }
-
- addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
- param_index,
- name, index);
-
- /* TODO: This will generate rather ordinary llvm code, although it
- * should be easy for the optimiser to fix up. In future we might want
- * to refactor buffer_load().
- */
- LLVMValueRef value[4];
- for (unsigned i = 0; i < num_components; i++) {
- unsigned offset = i;
- if (ac_get_type_size(type) == 8) {
- offset *= 2;
- if (offset == 4) {
- ubyte name = info->input_semantic_name[driver_location + 1];
- ubyte index = info->input_semantic_index[driver_location + 1];
- addr = get_tcs_tes_buffer_address_from_generic_indices(ctx,
- vertex_index,
- param_index,
- name, index);
- }
-
- offset = offset % 4;
- }
-
- offset += component;
- value[i + component] = buffer_load(ctx, type, offset,
- ctx->tess_offchip_ring, base, addr, true);
- }
-
- return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct si_shader_info *info = &ctx->shader->selector->info;
+ LLVMValueRef base, addr;
+
+ driver_location = driver_location / 4;
+ ubyte name = info->input_semantic_name[driver_location];
+ ubyte index = info->input_semantic_index[driver_location];
+
+ assert((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER) == (vertex_index == NULL));
+
+ base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
+
+ if (!param_index) {
+ param_index = LLVMConstInt(ctx->ac.i32, const_index, 0);
+ }
+
+ addr =
+ get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, name, index);
+
+ /* TODO: This will generate rather ordinary llvm code, although it
+ * should be easy for the optimiser to fix up. In future we might want
+ * to refactor buffer_load().
+ */
+ LLVMValueRef value[4];
+ for (unsigned i = 0; i < num_components; i++) {
+ unsigned offset = i;
+ if (ac_get_type_size(type) == 8) {
+ offset *= 2;
+ if (offset == 4) {
+ ubyte name = info->input_semantic_name[driver_location + 1];
+ ubyte index = info->input_semantic_index[driver_location + 1];
+ addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index,
+ name, index);
+ }
+
+ offset = offset % 4;
+ }
+
+ offset += component;
+ value[i + component] =
+ buffer_load(ctx, type, offset, ctx->tess_offchip_ring, base, addr, true);
+ }
+
+ return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
-static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
- const struct nir_variable *var,
- LLVMValueRef vertex_index,
- LLVMValueRef param_index,
- unsigned const_index,
- LLVMValueRef src,
- unsigned writemask)
+static void si_nir_store_output_tcs(struct ac_shader_abi *abi, const struct nir_variable *var,
+ LLVMValueRef vertex_index, LLVMValueRef param_index,
+ unsigned const_index, LLVMValueRef src, unsigned writemask,
+ unsigned component, unsigned driver_location)
{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct si_shader_info *info = &ctx->shader->selector->info;
- const unsigned component = var->data.location_frac;
- unsigned driver_location = var->data.driver_location;
- LLVMValueRef dw_addr, stride;
- LLVMValueRef buffer, base, addr;
- LLVMValueRef values[8];
- bool skip_lds_store;
- bool is_tess_factor = false, is_tess_inner = false;
-
- driver_location = driver_location / 4;
- ubyte name = info->output_semantic_name[driver_location];
- ubyte index = info->output_semantic_index[driver_location];
-
- bool is_const = !param_index;
- if (!param_index)
- param_index = LLVMConstInt(ctx->i32, const_index, 0);
-
- const bool is_patch = var->data.patch ||
- var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
- var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
-
- /* Invalid SPIR-V can cause this. */
- if ((name == TGSI_SEMANTIC_PATCH ||
- name == TGSI_SEMANTIC_TESSINNER ||
- name == TGSI_SEMANTIC_TESSOUTER) != is_patch)
- return;
-
- if (!is_patch) {
- stride = get_tcs_out_vertex_dw_stride(ctx);
- dw_addr = get_tcs_out_current_patch_offset(ctx);
- dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr,
- vertex_index, param_index,
- name, index);
-
- skip_lds_store = !info->reads_pervertex_outputs;
- } else {
- dw_addr = get_tcs_out_current_patch_data_offset(ctx);
- dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr,
- vertex_index, param_index,
- name, index);
-
- skip_lds_store = !info->reads_perpatch_outputs;
-
- if (is_const && const_index == 0) {
- int name = info->output_semantic_name[driver_location];
-
- /* Always write tess factors into LDS for the TCS epilog. */
- if (name == TGSI_SEMANTIC_TESSINNER ||
- name == TGSI_SEMANTIC_TESSOUTER) {
- /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
- skip_lds_store = !info->reads_tessfactor_outputs &&
- ctx->shader->selector->info.tessfactors_are_def_in_all_invocs;
- is_tess_factor = true;
- is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
- }
- }
- }
-
- buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
-
- base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
-
- addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
- param_index, name, index);
-
- for (unsigned chan = component; chan < 8; chan++) {
- if (!(writemask & (1 << chan)))
- continue;
- LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
-
- unsigned buffer_store_offset = chan % 4;
- if (chan == 4) {
- ubyte name = info->output_semantic_name[driver_location + 1];
- ubyte index = info->output_semantic_index[driver_location + 1];
- addr = get_tcs_tes_buffer_address_from_generic_indices(ctx,
- vertex_index,
- param_index,
- name, index);
- }
-
- /* Skip LDS stores if there is no LDS read of this output. */
- if (!skip_lds_store)
- lshs_lds_store(ctx, chan, dw_addr, value);
-
- value = ac_to_integer(&ctx->ac, value);
- values[chan] = value;
-
- if (writemask != 0xF && !is_tess_factor) {
- ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
- addr, base,
- 4 * buffer_store_offset,
- ac_glc);
- }
-
- /* Write tess factors into VGPRs for the epilog. */
- if (is_tess_factor &&
- ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
- if (!is_tess_inner) {
- LLVMBuildStore(ctx->ac.builder, value, /* outer */
- ctx->invoc0_tess_factors[chan]);
- } else if (chan < 2) {
- LLVMBuildStore(ctx->ac.builder, value, /* inner */
- ctx->invoc0_tess_factors[4 + chan]);
- }
- }
- }
-
- if (writemask == 0xF && !is_tess_factor) {
- LLVMValueRef value = ac_build_gather_values(&ctx->ac,
- values, 4);
- ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr,
- base, 0, ac_glc);
- }
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct si_shader_info *info = &ctx->shader->selector->info;
+ LLVMValueRef dw_addr, stride;
+ LLVMValueRef buffer, base, addr;
+ LLVMValueRef values[8];
+ bool skip_lds_store;
+ bool is_tess_factor = false, is_tess_inner = false;
+
+ driver_location = driver_location / 4;
+ ubyte name = info->output_semantic_name[driver_location];
+ ubyte index = info->output_semantic_index[driver_location];
+
+ bool is_const = !param_index;
+ if (!param_index)
+ param_index = LLVMConstInt(ctx->ac.i32, const_index, 0);
+
+ const bool is_patch = vertex_index == NULL;
+
+ /* Invalid SPIR-V can cause this. */
+ if ((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER) != is_patch)
+ return;
+
+ if (!is_patch) {
+ stride = get_tcs_out_vertex_dw_stride(ctx);
+ dw_addr = get_tcs_out_current_patch_offset(ctx);
+ dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
+ name, index);
+
+ skip_lds_store = !info->reads_pervertex_outputs;
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(ctx);
+ dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr, vertex_index, param_index,
+ name, index);
+
+ skip_lds_store = !info->reads_perpatch_outputs;
+
+ if (is_const && const_index == 0) {
+ int name = info->output_semantic_name[driver_location];
+
+ /* Always write tess factors into LDS for the TCS epilog. */
+ if (name == TGSI_SEMANTIC_TESSINNER || name == TGSI_SEMANTIC_TESSOUTER) {
+ /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
+ skip_lds_store = !info->reads_tessfactor_outputs &&
+ ctx->shader->selector->info.tessfactors_are_def_in_all_invocs;
+ is_tess_factor = true;
+ is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
+ }
+ }
+ }
+
+ buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
+
+ base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
+
+ addr =
+ get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, name, index);
+
+ for (unsigned chan = component; chan < 8; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+ LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
+
+ unsigned buffer_store_offset = chan % 4;
+ if (chan == 4) {
+ ubyte name = info->output_semantic_name[driver_location + 1];
+ ubyte index = info->output_semantic_index[driver_location + 1];
+ addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index,
+ name, index);
+ }
+
+ /* Skip LDS stores if there is no LDS read of this output. */
+ if (!skip_lds_store)
+ lshs_lds_store(ctx, chan, dw_addr, value);
+
+ value = ac_to_integer(&ctx->ac, value);
+ values[chan] = value;
+
+ if (writemask != 0xF && !is_tess_factor) {
+ ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base,
+ 4 * buffer_store_offset, ac_glc);
+ }
+
+ /* Write tess factors into VGPRs for the epilog. */
+ if (is_tess_factor && ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
+ if (!is_tess_inner) {
+ LLVMBuildStore(ctx->ac.builder, value, /* outer */
+ ctx->invoc0_tess_factors[chan]);
+ } else if (chan < 2) {
+ LLVMBuildStore(ctx->ac.builder, value, /* inner */
+ ctx->invoc0_tess_factors[4 + chan]);
+ }
+ }
+ }
+
+ if (writemask == 0xF && !is_tess_factor) {
+ LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4);
+ ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr, base, 0, ac_glc);
+ }
}
static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi)
{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- LLVMValueRef coord[4] = {
- ac_get_arg(&ctx->ac, ctx->tes_u),
- ac_get_arg(&ctx->ac, ctx->tes_v),
- ctx->ac.f32_0,
- ctx->ac.f32_0
- };
-
- /* For triangles, the vector should be (u, v, 1-u-v). */
- if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
- PIPE_PRIM_TRIANGLES) {
- coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
- LLVMBuildFAdd(ctx->ac.builder,
- coord[0], coord[1], ""), "");
- }
- return ac_build_gather_values(&ctx->ac, coord, 4);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->tes_u), ac_get_arg(&ctx->ac, ctx->tes_v),
+ ctx->ac.f32_0, ctx->ac.f32_0};
+
+ /* For triangles, the vector should be (u, v, 1-u-v). */
+ if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_TRIANGLES) {
+ coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
+ LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
+ }
+ return ac_build_gather_values(&ctx->ac, coord, 4);
}
-static LLVMValueRef load_tess_level(struct si_shader_context *ctx,
- unsigned semantic_name)
+static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned semantic_name)
{
- LLVMValueRef base, addr;
-
- int param = si_shader_io_get_unique_index_patch(semantic_name, 0);
+ LLVMValueRef base, addr;
- base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
- addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
- LLVMConstInt(ctx->i32, param, 0));
+ int param = si_shader_io_get_unique_index_patch(semantic_name, 0);
- return buffer_load(ctx, ctx->f32,
- ~0, ctx->tess_offchip_ring, base, addr, true);
+ base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
+ addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
+ LLVMConstInt(ctx->ac.i32, param, 0));
+ return buffer_load(ctx, ctx->ac.f32, ~0, ctx->tess_offchip_ring, base, addr, true);
}
-static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx,
- unsigned semantic_name)
+static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx, unsigned semantic_name)
{
- LLVMValueRef buf, slot, val[4];
- int i, offset;
-
- slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
- buf = ac_get_arg(&ctx->ac, ctx->rw_buffers);
- buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
- offset = semantic_name == TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL ? 4 : 0;
-
- for (i = 0; i < 4; i++)
- val[i] = si_buffer_load_const(ctx, buf,
- LLVMConstInt(ctx->i32, (offset + i) * 4, 0));
- return ac_build_gather_values(&ctx->ac, val, 4);
+ LLVMValueRef buf, slot, val[4];
+ int i, offset;
+
+ slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
+ buf = ac_get_arg(&ctx->ac, ctx->rw_buffers);
+ buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
+ offset = semantic_name == TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL ? 4 : 0;
+
+ for (i = 0; i < 4; i++)
+ val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0));
+ return ac_build_gather_values(&ctx->ac, val, 4);
}
-static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi,
- unsigned varying_id,
- bool load_default_state)
+static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, unsigned varying_id,
+ bool load_default_state)
{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- unsigned semantic_name;
-
- if (load_default_state) {
- switch (varying_id) {
- case VARYING_SLOT_TESS_LEVEL_INNER:
- semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL;
- break;
- case VARYING_SLOT_TESS_LEVEL_OUTER:
- semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL;
- break;
- default:
- unreachable("unknown tess level");
- }
- return load_tess_level_default(ctx, semantic_name);
- }
-
- switch (varying_id) {
- case VARYING_SLOT_TESS_LEVEL_INNER:
- semantic_name = TGSI_SEMANTIC_TESSINNER;
- break;
- case VARYING_SLOT_TESS_LEVEL_OUTER:
- semantic_name = TGSI_SEMANTIC_TESSOUTER;
- break;
- default:
- unreachable("unknown tess level");
- }
-
- return load_tess_level(ctx, semantic_name);
-
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ unsigned semantic_name;
+
+ if (load_default_state) {
+ switch (varying_id) {
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL;
+ break;
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL;
+ break;
+ default:
+ unreachable("unknown tess level");
+ }
+ return load_tess_level_default(ctx, semantic_name);
+ }
+
+ switch (varying_id) {
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ semantic_name = TGSI_SEMANTIC_TESSINNER;
+ break;
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ semantic_name = TGSI_SEMANTIC_TESSOUTER;
+ break;
+ default:
+ unreachable("unknown tess level");
+ }
+
+ return load_tess_level(ctx, semantic_name);
}
static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- if (ctx->type == PIPE_SHADER_TESS_CTRL)
- return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);
- else if (ctx->type == PIPE_SHADER_TESS_EVAL)
- return get_num_tcs_out_vertices(ctx);
- else
- unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ if (ctx->type == PIPE_SHADER_TESS_CTRL)
+ return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);
+ else if (ctx->type == PIPE_SHADER_TESS_EVAL)
+ return get_num_tcs_out_vertices(ctx);
+ else
+ unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
}
/**
*/
static void si_copy_tcs_inputs(struct si_shader_context *ctx)
{
- LLVMValueRef invocation_id, buffer, buffer_offset;
- LLVMValueRef lds_vertex_stride, lds_base;
- uint64_t inputs;
+ LLVMValueRef invocation_id, buffer, buffer_offset;
+ LLVMValueRef lds_vertex_stride, lds_base;
+ uint64_t inputs;
- invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
- buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
- buffer_offset = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
+ invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
+ buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
+ buffer_offset = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
- lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
- lds_base = get_tcs_in_current_patch_offset(ctx);
- lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride,
- lds_base);
+ lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
+ lds_base = get_tcs_in_current_patch_offset(ctx);
+ lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
- inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
- while (inputs) {
- unsigned i = u_bit_scan64(&inputs);
+ inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
+ while (inputs) {
+ unsigned i = u_bit_scan64(&inputs);
- LLVMValueRef lds_ptr = LLVMBuildAdd(ctx->ac.builder, lds_base,
- LLVMConstInt(ctx->i32, 4 * i, 0),
- "");
+ LLVMValueRef lds_ptr =
+ LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), "");
- LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx,
- get_rel_patch_id(ctx),
- invocation_id,
- LLVMConstInt(ctx->i32, i, 0));
+ LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(
+ ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0));
- LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
+ LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
- ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
- buffer_offset, 0, ac_glc);
- }
+ ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, buffer_offset, 0,
+ ac_glc);
+ }
}
-static void si_write_tess_factors(struct si_shader_context *ctx,
- LLVMValueRef rel_patch_id,
- LLVMValueRef invocation_id,
- LLVMValueRef tcs_out_current_patch_data_offset,
- LLVMValueRef invoc0_tf_outer[4],
- LLVMValueRef invoc0_tf_inner[2])
+static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef rel_patch_id,
+ LLVMValueRef invocation_id,
+ LLVMValueRef tcs_out_current_patch_data_offset,
+ LLVMValueRef invoc0_tf_outer[4], LLVMValueRef invoc0_tf_inner[2])
{
- struct si_shader *shader = ctx->shader;
- unsigned tess_inner_index, tess_outer_index;
- LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
- LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
- unsigned stride, outer_comps, inner_comps, i, offset;
-
- /* Add a barrier before loading tess factors from LDS. */
- if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
- si_llvm_emit_barrier(ctx);
-
- /* Do this only for invocation 0, because the tess levels are per-patch,
- * not per-vertex.
- *
- * This can't jump, because invocation 0 executes this. It should
- * at least mask out the loads and stores for other invocations.
- */
- ac_build_ifcc(&ctx->ac,
- LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
- invocation_id, ctx->i32_0, ""), 6503);
-
- /* Determine the layout of one tess factor element in the buffer. */
- switch (shader->key.part.tcs.epilog.prim_mode) {
- case PIPE_PRIM_LINES:
- stride = 2; /* 2 dwords, 1 vec2 store */
- outer_comps = 2;
- inner_comps = 0;
- break;
- case PIPE_PRIM_TRIANGLES:
- stride = 4; /* 4 dwords, 1 vec4 store */
- outer_comps = 3;
- inner_comps = 1;
- break;
- case PIPE_PRIM_QUADS:
- stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
- outer_comps = 4;
- inner_comps = 2;
- break;
- default:
- assert(0);
- return;
- }
-
- for (i = 0; i < 4; i++) {
- inner[i] = LLVMGetUndef(ctx->i32);
- outer[i] = LLVMGetUndef(ctx->i32);
- }
-
- if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
- /* Tess factors are in VGPRs. */
- for (i = 0; i < outer_comps; i++)
- outer[i] = out[i] = invoc0_tf_outer[i];
- for (i = 0; i < inner_comps; i++)
- inner[i] = out[outer_comps+i] = invoc0_tf_inner[i];
- } else {
- /* Load tess_inner and tess_outer from LDS.
- * Any invocation can write them, so we can't get them from a temporary.
- */
- tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
- tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
-
- lds_base = tcs_out_current_patch_data_offset;
- lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
- LLVMConstInt(ctx->i32,
- tess_inner_index * 4, 0), "");
- lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
- LLVMConstInt(ctx->i32,
- tess_outer_index * 4, 0), "");
-
- for (i = 0; i < outer_comps; i++) {
- outer[i] = out[i] =
- lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
- }
- for (i = 0; i < inner_comps; i++) {
- inner[i] = out[outer_comps+i] =
- lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
- }
- }
-
- if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
- /* For isolines, the hardware expects tess factors in the
- * reverse order from what NIR specifies.
- */
- LLVMValueRef tmp = out[0];
- out[0] = out[1];
- out[1] = tmp;
- }
-
- /* Convert the outputs to vectors for stores. */
- vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
- vec1 = NULL;
-
- if (stride > 4)
- vec1 = ac_build_gather_values(&ctx->ac, out+4, stride - 4);
-
- /* Get the buffer. */
- buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
-
- /* Get the offset. */
- tf_base = ac_get_arg(&ctx->ac,
- ctx->tcs_factor_offset);
- byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
- LLVMConstInt(ctx->i32, 4 * stride, 0), "");
-
- ac_build_ifcc(&ctx->ac,
- LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
- rel_patch_id, ctx->i32_0, ""), 6504);
-
- /* Store the dynamic HS control word. */
- offset = 0;
- if (ctx->screen->info.chip_class <= GFX8) {
- ac_build_buffer_store_dword(&ctx->ac, buffer,
- LLVMConstInt(ctx->i32, 0x80000000, 0),
- 1, ctx->i32_0, tf_base,
- offset, ac_glc);
- offset += 4;
- }
-
- ac_build_endif(&ctx->ac, 6504);
-
- /* Store the tessellation factors. */
- ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
- MIN2(stride, 4), byteoffset, tf_base,
- offset, ac_glc);
- offset += 16;
- if (vec1)
- ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
- stride - 4, byteoffset, tf_base,
- offset, ac_glc);
-
- /* Store the tess factors into the offchip buffer if TES reads them. */
- if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
- LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
- LLVMValueRef tf_inner_offset;
- unsigned param_outer, param_inner;
-
- buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
- base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
-
- param_outer = si_shader_io_get_unique_index_patch(
- TGSI_SEMANTIC_TESSOUTER, 0);
- tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
- LLVMConstInt(ctx->i32, param_outer, 0));
-
- unsigned outer_vec_size =
- ac_has_vec3_support(ctx->screen->info.chip_class, false) ?
- outer_comps : util_next_power_of_two(outer_comps);
- outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);
-
- ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
- outer_comps, tf_outer_offset,
- base, 0, ac_glc);
- if (inner_comps) {
- param_inner = si_shader_io_get_unique_index_patch(
- TGSI_SEMANTIC_TESSINNER, 0);
- tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
- LLVMConstInt(ctx->i32, param_inner, 0));
-
- inner_vec = inner_comps == 1 ? inner[0] :
- ac_build_gather_values(&ctx->ac, inner, inner_comps);
- ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
- inner_comps, tf_inner_offset,
- base, 0, ac_glc);
- }
- }
-
- ac_build_endif(&ctx->ac, 6503);
+ struct si_shader *shader = ctx->shader;
+ unsigned tess_inner_index, tess_outer_index;
+ LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
+ LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
+ unsigned stride, outer_comps, inner_comps, i, offset;
+
+ /* Add a barrier before loading tess factors from LDS. */
+ if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
+ si_llvm_emit_barrier(ctx);
+
+ /* Do this only for invocation 0, because the tess levels are per-patch,
+ * not per-vertex.
+ *
+ * This can't jump, because invocation 0 executes this. It should
+ * at least mask out the loads and stores for other invocations.
+ */
+ ac_build_ifcc(&ctx->ac,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);
+
+ /* Determine the layout of one tess factor element in the buffer. */
+ switch (shader->key.part.tcs.epilog.prim_mode) {
+ case PIPE_PRIM_LINES:
+ stride = 2; /* 2 dwords, 1 vec2 store */
+ outer_comps = 2;
+ inner_comps = 0;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ stride = 4; /* 4 dwords, 1 vec4 store */
+ outer_comps = 3;
+ inner_comps = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
+ outer_comps = 4;
+ inner_comps = 2;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ for (i = 0; i < 4; i++) {
+ inner[i] = LLVMGetUndef(ctx->ac.i32);
+ outer[i] = LLVMGetUndef(ctx->ac.i32);
+ }
+
+ if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
+ /* Tess factors are in VGPRs. */
+ for (i = 0; i < outer_comps; i++)
+ outer[i] = out[i] = invoc0_tf_outer[i];
+ for (i = 0; i < inner_comps; i++)
+ inner[i] = out[outer_comps + i] = invoc0_tf_inner[i];
+ } else {
+ /* Load tess_inner and tess_outer from LDS.
+ * Any invocation can write them, so we can't get them from a temporary.
+ */
+ tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
+ tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
+
+ lds_base = tcs_out_current_patch_data_offset;
+ lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
+ LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, 0), "");
+ lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
+ LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, 0), "");
+
+ for (i = 0; i < outer_comps; i++) {
+ outer[i] = out[i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
+ }
+ for (i = 0; i < inner_comps; i++) {
+ inner[i] = out[outer_comps + i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
+ }
+ }
+
+ if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
+ /* For isolines, the hardware expects tess factors in the
+ * reverse order from what NIR specifies.
+ */
+ LLVMValueRef tmp = out[0];
+ out[0] = out[1];
+ out[1] = tmp;
+ }
+
+ /* Convert the outputs to vectors for stores. */
+ vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
+ vec1 = NULL;
+
+ if (stride > 4)
+ vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
+
+ /* Get the buffer. */
+ buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
+
+ /* Get the offset. */
+ tf_base = ac_get_arg(&ctx->ac, ctx->tcs_factor_offset);
+ byteoffset =
+ LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");
+
+ ac_build_ifcc(&ctx->ac,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);
+
+ /* Store the dynamic HS control word. */
+ offset = 0;
+ if (ctx->screen->info.chip_class <= GFX8) {
+ ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), 1,
+ ctx->ac.i32_0, tf_base, offset, ac_glc);
+ offset += 4;
+ }
+
+ ac_build_endif(&ctx->ac, 6504);
+
+ /* Store the tessellation factors. */
+ ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, offset,
+ ac_glc);
+ offset += 16;
+ if (vec1)
+ ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, offset,
+ ac_glc);
+
+ /* Store the tess factors into the offchip buffer if TES reads them. */
+ if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
+ LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
+ LLVMValueRef tf_inner_offset;
+ unsigned param_outer, param_inner;
+
+ buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
+ base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
+
+ param_outer = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
+ tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
+ LLVMConstInt(ctx->ac.i32, param_outer, 0));
+
+ unsigned outer_vec_size = ac_has_vec3_support(ctx->screen->info.chip_class, false)
+ ? outer_comps
+ : util_next_power_of_two(outer_comps);
+ outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);
+
+ ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, tf_outer_offset, base, 0,
+ ac_glc);
+ if (inner_comps) {
+ param_inner = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
+ tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
+ LLVMConstInt(ctx->ac.i32, param_inner, 0));
+
+ inner_vec =
+ inner_comps == 1 ? inner[0] : ac_build_gather_values(&ctx->ac, inner, inner_comps);
+ ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, tf_inner_offset, base,
+ 0, ac_glc);
+ }
+ }
+
+ ac_build_endif(&ctx->ac, 6503);
}
/* This only writes the tessellation factor levels. */
-static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi,
- unsigned max_outputs,
- LLVMValueRef *addrs)
+static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
-
- si_copy_tcs_inputs(ctx);
-
- rel_patch_id = get_rel_patch_id(ctx);
- invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
- tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
-
- if (ctx->screen->info.chip_class >= GFX9) {
- LLVMBasicBlockRef blocks[2] = {
- LLVMGetInsertBlock(builder),
- ctx->merged_wrap_if_entry_block
- };
- LLVMValueRef values[2];
-
- ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
-
- values[0] = rel_patch_id;
- values[1] = LLVMGetUndef(ctx->i32);
- rel_patch_id = ac_build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
-
- values[0] = tf_lds_offset;
- values[1] = LLVMGetUndef(ctx->i32);
- tf_lds_offset = ac_build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
-
- values[0] = invocation_id;
- values[1] = ctx->i32_1; /* cause the epilog to skip threads */
- invocation_id = ac_build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
- }
-
- /* Return epilog parameters from this function. */
- LLVMValueRef ret = ctx->return_value;
- unsigned vgpr;
-
- if (ctx->screen->info.chip_class >= GFX9) {
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout,
- 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout,
- 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
- /* Tess offchip and tess factor offsets are at the beginning. */
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
- vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;
- } else {
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout,
- GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout,
- GFX6_SGPR_TCS_OUT_LAYOUT);
- /* Tess offchip and tess factor offsets are after user SGPRs. */
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset,
- GFX6_TCS_NUM_USER_SGPR);
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset,
- GFX6_TCS_NUM_USER_SGPR + 1);
- vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
- }
-
- /* VGPRs */
- rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
- invocation_id = ac_to_float(&ctx->ac, invocation_id);
- tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
-
- /* Leave a hole corresponding to the two input VGPRs. This ensures that
- * the invocation_id output does not alias the tcs_rel_ids input,
- * which saves a V_MOV on gfx9.
- */
- vgpr += 2;
-
- ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
- ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
-
- if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
- vgpr++; /* skip the tess factor LDS offset */
- for (unsigned i = 0; i < 6; i++) {
- LLVMValueRef value =
- LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
- value = ac_to_float(&ctx->ac, value);
- ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
- }
- } else {
- ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
- }
- ctx->return_value = ret;
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
+
+ si_copy_tcs_inputs(ctx);
+
+ rel_patch_id = get_rel_patch_id(ctx);
+ invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
+ tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
+
+ if (ctx->screen->info.chip_class >= GFX9) {
+ LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block};
+ LLVMValueRef values[2];
+
+ ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
+
+ values[0] = rel_patch_id;
+ values[1] = LLVMGetUndef(ctx->ac.i32);
+ rel_patch_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
+
+ values[0] = tf_lds_offset;
+ values[1] = LLVMGetUndef(ctx->ac.i32);
+ tf_lds_offset = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
+
+ values[0] = invocation_id;
+ values[1] = ctx->ac.i32_1; /* cause the epilog to skip threads */
+ invocation_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
+ }
+
+ /* Return epilog parameters from this function. */
+ LLVMValueRef ret = ctx->return_value;
+ unsigned vgpr;
+
+ if (ctx->screen->info.chip_class >= GFX9) {
+ ret =
+ si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
+ /* Tess offchip and tess factor offsets are at the beginning. */
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
+ vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;
+ } else {
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT);
+ /* Tess offchip and tess factor offsets are after user SGPRs. */
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
+ vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
+ }
+
+ /* VGPRs */
+ rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
+ invocation_id = ac_to_float(&ctx->ac, invocation_id);
+ tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
+
+ /* Leave a hole corresponding to the two input VGPRs. This ensures that
+ * the invocation_id output does not alias the tcs_rel_ids input,
+ * which saves a V_MOV on gfx9.
+ */
+ vgpr += 2;
+
+ ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
+ ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
+
+ if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
+ vgpr++; /* skip the tess factor LDS offset */
+ for (unsigned i = 0; i < 6; i++) {
+ LLVMValueRef value = LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
+ value = ac_to_float(&ctx->ac, value);
+ ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
+ }
+ } else {
+ ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
+ }
+ ctx->return_value = ret;
}
/* Pass TCS inputs from LS to TCS on GFX9. */
static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
{
- LLVMValueRef ret = ctx->return_value;
-
- ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
- ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
- ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3);
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
- ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5);
-
- ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers,
- 8 + SI_SGPR_RW_BUFFERS);
- ret = si_insert_input_ptr(ctx, ret,
- ctx->bindless_samplers_and_images,
- 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
-
- ret = si_insert_input_ret(ctx, ret, ctx->vs_state_bits,
- 8 + SI_SGPR_VS_STATE_BITS);
-
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout,
- 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_offsets,
- 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
- ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout,
- 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
-
- unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
- ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
- ac_to_float(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id)),
- vgpr++, "");
- ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
- ac_to_float(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args.tcs_rel_ids)),
- vgpr++, "");
- ctx->return_value = ret;
+ LLVMValueRef ret = ctx->return_value;
+
+ ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
+ ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
+ ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3);
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
+ ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5);
+
+ ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS);
+ ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
+ 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
+
+ ret = si_insert_input_ret(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
+
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
+ ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
+
+ unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
+ ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id)),
+ vgpr++, "");
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
+ ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_rel_ids)),
+ vgpr++, "");
+ ctx->return_value = ret;
}
-void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
- LLVMValueRef *addrs)
+void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct si_shader *shader = ctx->shader;
- struct si_shader_info *info = &shader->selector->info;
- unsigned i, chan;
- LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->rel_auto_id);
- LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
- LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id,
- vertex_dw_stride, "");
-
- /* Write outputs to LDS. The next shader (TCS aka HS) will read
- * its inputs from it. */
- for (i = 0; i < info->num_outputs; i++) {
- unsigned name = info->output_semantic_name[i];
- unsigned index = info->output_semantic_index[i];
-
- /* The ARB_shader_viewport_layer_array spec contains the
- * following issue:
- *
- * 2) What happens if gl_ViewportIndex or gl_Layer is
- * written in the vertex shader and a geometry shader is
- * present?
- *
- * RESOLVED: The value written by the last vertex processing
- * stage is used. If the last vertex processing stage
- * (vertex, tessellation evaluation or geometry) does not
- * statically assign to gl_ViewportIndex or gl_Layer, index
- * or layer zero is assumed.
- *
- * So writes to those outputs in VS-as-LS are simply ignored.
- */
- if (name == TGSI_SEMANTIC_LAYER ||
- name == TGSI_SEMANTIC_VIEWPORT_INDEX)
- continue;
-
- int param = si_shader_io_get_unique_index(name, index, false);
- LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
- LLVMConstInt(ctx->i32, param * 4, 0), "");
-
- for (chan = 0; chan < 4; chan++) {
- if (!(info->output_usagemask[i] & (1 << chan)))
- continue;
-
- lshs_lds_store(ctx, chan, dw_addr,
- LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""));
- }
- }
-
- if (ctx->screen->info.chip_class >= GFX9)
- si_set_ls_return_value_for_tcs(ctx);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct si_shader *shader = ctx->shader;
+ struct si_shader_info *info = &shader->selector->info;
+ unsigned i, chan;
+ LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->rel_auto_id);
+ LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
+ LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");
+
+ /* Write outputs to LDS. The next shader (TCS aka HS) will read
+ * its inputs from it. */
+ for (i = 0; i < info->num_outputs; i++) {
+ unsigned name = info->output_semantic_name[i];
+ unsigned index = info->output_semantic_index[i];
+
+ /* The ARB_shader_viewport_layer_array spec contains the
+ * following issue:
+ *
+ * 2) What happens if gl_ViewportIndex or gl_Layer is
+ * written in the vertex shader and a geometry shader is
+ * present?
+ *
+ * RESOLVED: The value written by the last vertex processing
+ * stage is used. If the last vertex processing stage
+ * (vertex, tessellation evaluation or geometry) does not
+ * statically assign to gl_ViewportIndex or gl_Layer, index
+ * or layer zero is assumed.
+ *
+ * So writes to those outputs in VS-as-LS are simply ignored.
+ */
+ if (name == TGSI_SEMANTIC_LAYER || name == TGSI_SEMANTIC_VIEWPORT_INDEX)
+ continue;
+
+ int param = si_shader_io_get_unique_index(name, index, false);
+ LLVMValueRef dw_addr =
+ LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
+
+ for (chan = 0; chan < 4; chan++) {
+ if (!(info->output_usagemask[i] & (1 << chan)))
+ continue;
+
+ lshs_lds_store(ctx, chan, dw_addr,
+ LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""));
+ }
+ }
+
+ if (ctx->screen->info.chip_class >= GFX9)
+ si_set_ls_return_value_for_tcs(ctx);
}
/**
* Compile the TCS epilog function. This writes tesselation factors to memory
* based on the output primitive type of the tesselator (determined by TES).
*/
-void si_llvm_build_tcs_epilog(struct si_shader_context *ctx,
- union si_shader_part_key *key)
+void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
{
- memset(&ctx->args, 0, sizeof(ctx->args));
-
- if (ctx->screen->info.chip_class >= GFX9) {
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->tcs_offchip_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->tcs_factor_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->tcs_offchip_layout);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->tcs_out_lds_layout);
- } else {
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->tcs_offchip_layout);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->tcs_out_lds_layout);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->tcs_offchip_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- &ctx->tcs_factor_offset);
- }
-
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
- struct ac_arg rel_patch_id; /* patch index within the wave (REL_PATCH_ID) */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id);
- struct ac_arg invocation_id; /* invocation ID within the patch */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id);
- struct ac_arg tcs_out_current_patch_data_offset; /* LDS offset where tess factors should be loaded from */
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT,
- &tcs_out_current_patch_data_offset);
-
- struct ac_arg tess_factors[6];
- for (unsigned i = 0; i < 6; i++)
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
-
- /* Create the function. */
- si_llvm_create_func(ctx, "tcs_epilog", NULL, 0,
- ctx->screen->info.chip_class >= GFX7 ? 128 : 0);
- ac_declare_lds_as_pointer(&ctx->ac);
-
- LLVMValueRef invoc0_tess_factors[6];
- for (unsigned i = 0; i < 6; i++)
- invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
-
- si_write_tess_factors(ctx,
- ac_get_arg(&ctx->ac, rel_patch_id),
- ac_get_arg(&ctx->ac, invocation_id),
- ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
- invoc0_tess_factors, invoc0_tess_factors + 4);
-
- LLVMBuildRetVoid(ctx->ac.builder);
+ memset(&ctx->args, 0, sizeof(ctx->args));
+
+ if (ctx->screen->info.chip_class >= GFX9) {
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
+ } else {
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
+ }
+
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
+ struct ac_arg rel_patch_id; /* patch index within the wave (REL_PATCH_ID) */
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id);
+ struct ac_arg invocation_id; /* invocation ID within the patch */
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id);
+ struct ac_arg
+ tcs_out_current_patch_data_offset; /* LDS offset where tess factors should be loaded from */
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tcs_out_current_patch_data_offset);
+
+ struct ac_arg tess_factors[6];
+ for (unsigned i = 0; i < 6; i++)
+ ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
+
+ /* Create the function. */
+ si_llvm_create_func(ctx, "tcs_epilog", NULL, 0, ctx->screen->info.chip_class >= GFX7 ? 128 : 0);
+ ac_declare_lds_as_pointer(&ctx->ac);
+
+ LLVMValueRef invoc0_tess_factors[6];
+ for (unsigned i = 0; i < 6; i++)
+ invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
+
+ si_write_tess_factors(ctx, ac_get_arg(&ctx->ac, rel_patch_id),
+ ac_get_arg(&ctx->ac, invocation_id),
+ ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
+ invoc0_tess_factors, invoc0_tess_factors + 4);
+
+ LLVMBuildRetVoid(ctx->ac.builder);
}
void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
{
- ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
- ctx->abi.load_tess_level = si_load_tess_level;
- ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
- ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
- ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
+ ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
+ ctx->abi.load_tess_level = si_load_tess_level;
+ ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
+ ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
+ ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
}
-void si_llvm_init_tes_callbacks(struct si_shader_context *ctx)
+void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)
{
- ctx->abi.load_tess_varyings = si_nir_load_input_tes;
- ctx->abi.load_tess_coord = si_load_tess_coord;
- ctx->abi.load_tess_level = si_load_tess_level;
- ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
+ ctx->abi.load_tess_varyings = si_nir_load_input_tes;
+ ctx->abi.load_tess_coord = si_load_tess_coord;
+ ctx->abi.load_tess_level = si_load_tess_level;
+ ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
+
+ if (ctx->shader->key.as_es)
+ ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
+ else if (ngg_cull_shader)
+ ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue;
+ else if (ctx->shader->key.as_ngg)
+ ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
+ else
+ ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
}