#include "ac_binary.h"
#include "ac_llvm_util.h"
+#include "ac_exp_param.h"
#include "si_shader_internal.h"
#include "si_pipe.h"
#include "sid.h"
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
- struct si_shader *shader,
LLVMTargetMachineRef tm);
static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data);
-static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
+static void si_dump_shader_key(unsigned processor, struct si_shader *shader,
FILE *f);
static unsigned llvm_get_type_size(LLVMTypeRef type);
static void si_build_vs_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
-static void si_build_vs_epilog_function(struct si_shader_context *ctx,
- union si_shader_part_key *key);
static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
static void si_build_ps_prolog_function(struct si_shader_context *ctx,
*/
#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
-/* The VS location of the PrimitiveID input is the same in the epilog,
- * so that the main shader part doesn't have to move it.
- */
-#define VS_EPILOG_PRIMID_LOC 2
-
enum {
CONST_ADDR_SPACE = 2,
LOCAL_ADDR_SPACE = 3,
};
+static bool is_merged_shader(struct si_shader *shader)
+{
+ if (shader->selector->screen->b.chip_class <= VI)
+ return false;
+
+ return shader->key.as_ls ||
+ shader->key.as_es ||
+ shader->selector->type == PIPE_SHADER_TESS_CTRL ||
+ shader->selector->type == PIPE_SHADER_GEOMETRY;
+}
+
+/**
+ * Returns a unique index for a per-patch semantic name and index. The index
+ * must be less than 32, so that a 32-bit bitmask of used inputs or outputs
+ * can be calculated.
+ */
+unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index)
+{
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_TESSOUTER:
+ return 0;
+ case TGSI_SEMANTIC_TESSINNER:
+ return 1;
+ case TGSI_SEMANTIC_PATCH:
+ assert(index < 30);
+ return 2 + index;
+
+ default:
+ assert(!"invalid semantic name");
+ return 0;
+ }
+}
+
/**
* Returns a unique index for a semantic name and index. The index must be
* less than 64, so that a 64-bit bitmask of used inputs or outputs can be
assert(!"invalid generic index");
return 0;
- /* patch indices are completely separate and thus start from 0 */
- case TGSI_SEMANTIC_TESSOUTER:
- return 0;
- case TGSI_SEMANTIC_TESSINNER:
- return 1;
- case TGSI_SEMANTIC_PATCH:
- return 2 + index;
-
default:
assert(!"invalid semantic name");
return 0;
{
switch (ctx->type) {
case PIPE_SHADER_TESS_CTRL:
- return unpack_param(ctx, SI_PARAM_REL_IDS, 0, 8);
+ return unpack_param(ctx, ctx->param_tcs_rel_ids, 0, 8);
case PIPE_SHADER_TESS_EVAL:
return LLVMGetParam(ctx->main_fn,
static LLVMValueRef
get_tcs_in_patch_stride(struct si_shader_context *ctx)
{
- if (ctx->type == PIPE_SHADER_VERTEX)
- return unpack_param(ctx, SI_PARAM_VS_STATE_BITS, 8, 13);
- else if (ctx->type == PIPE_SHADER_TESS_CTRL)
- return unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 8, 13);
- else {
- assert(0);
- return NULL;
- }
+ return unpack_param(ctx, ctx->param_vs_state_bits, 8, 13);
}
static LLVMValueRef
get_tcs_out_patch_stride(struct si_shader_context *ctx)
{
- return unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
+ return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 0, 13);
}
static LLVMValueRef
{
return lp_build_mul_imm(&ctx->bld_base.uint_bld,
unpack_param(ctx,
- SI_PARAM_TCS_OUT_OFFSETS,
+ ctx->param_tcs_out_lds_offsets,
0, 16),
4);
}
{
return lp_build_mul_imm(&ctx->bld_base.uint_bld,
unpack_param(ctx,
- SI_PARAM_TCS_OUT_OFFSETS,
+ ctx->param_tcs_out_lds_offsets,
16, 16),
4);
}
LLVMValueRef input[3];
/* Load the T list */
- t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS);
+ t_list_ptr = LLVMGetParam(ctx->main_fn, ctx->param_vertex_buffers);
t_offset = LLVMConstInt(ctx->i32, input_index, 0);
ctx->param_vertex_index0 +
input_index);
- fix_fetch = ctx->shader->key.mono.vs.fix_fetch[input_index];
+ fix_fetch = ctx->shader->key.mono.vs_fix_fetch[input_index];
/* Do multiple loads for special formats. */
switch (fix_fetch) {
ctx->param_vs_prim_id);
case PIPE_SHADER_TESS_CTRL:
return LLVMGetParam(ctx->main_fn,
- SI_PARAM_PATCH_ID);
+ ctx->param_tcs_patch_id);
case PIPE_SHADER_TESS_EVAL:
return LLVMGetParam(ctx->main_fn,
ctx->param_tes_patch_id);
case PIPE_SHADER_GEOMETRY:
return LLVMGetParam(ctx->main_fn,
- SI_PARAM_PRIMITIVE_ID);
+ ctx->param_gs_prim_id);
default:
assert(0);
return ctx->i32_0;
{
LLVMValueRef result = get_indirect_index(ctx, ind, rel_index);
- /* LLVM 3.8: If indirect resource indexing is used:
- * - SI & CIK hang
- * - VI crashes
- */
- if (HAVE_LLVM == 0x0308)
- return LLVMGetUndef(ctx->i32);
-
return si_llvm_bound_index(ctx, result, num);
}
LLVMBuildMul(gallivm->builder, ind_index,
LLVMConstInt(ctx->i32, 4, 0), ""), "");
- param = si_shader_io_get_unique_index(name[first], index[first]);
+ param = reg.Register.Dimension ?
+ si_shader_io_get_unique_index(name[first], index[first]) :
+ si_shader_io_get_unique_index_patch(name[first], index[first]);
} else {
- param = si_shader_io_get_unique_index(name[reg.Register.Index],
- index[reg.Register.Index]);
+ param = reg.Register.Dimension ?
+ si_shader_io_get_unique_index(name[reg.Register.Index],
+ index[reg.Register.Index]) :
+ si_shader_io_get_unique_index_patch(name[reg.Register.Index],
+ index[reg.Register.Index]);
}
/* Add the base address of the element. */
LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
LLVMValueRef param_stride, constant16;
- vertices_per_patch = unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 9, 6);
- num_patches = unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 0, 9);
+ vertices_per_patch = unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
+ num_patches = unpack_param(ctx, ctx->param_tcs_offchip_layout, 0, 6);
total_vertices = LLVMBuildMul(gallivm->builder, vertices_per_patch,
num_patches, "");
if (!vertex_index) {
LLVMValueRef patch_data_offset =
- unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 16, 16);
+ unpack_param(ctx, ctx->param_tcs_offchip_layout, 12, 20);
base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
patch_data_offset, "");
param_index = ctx->i32_0;
}
- param_index_base = si_shader_io_get_unique_index(name[param_base],
- index[param_base]);
+ param_index_base = reg.Register.Dimension ?
+ si_shader_io_get_unique_index(name[param_base], index[param_base]) :
+ si_shader_io_get_unique_index_patch(name[param_base], index[param_base]);
param_index = LLVMBuildAdd(gallivm->builder, param_index,
LLVMConstInt(ctx->i32, param_index_base, 0),
* \param value value to store
*/
static void lds_store(struct lp_build_tgsi_context *bld_base,
- unsigned swizzle, LLVMValueRef dw_addr,
+ unsigned dw_offset_imm, LLVMValueRef dw_addr,
LLVMValueRef value)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = &ctx->gallivm;
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
- LLVMConstInt(ctx->i32, swizzle, 0));
+ LLVMConstInt(ctx->i32, dw_offset_imm, 0));
value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
ac_build_indexed_store(&ctx->ac, ctx->lds,
dw_addr, value);
}
+static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx,
+ unsigned param)
+{
+ LLVMBuilderRef builder = ctx->gallivm.builder;
+
+ LLVMValueRef addr = LLVMGetParam(ctx->main_fn, param);
+ addr = LLVMBuildZExt(builder, addr, ctx->i64, "");
+ addr = LLVMBuildShl(builder, addr, LLVMConstInt(ctx->i64, 16, 0), "");
+
+ uint64_t desc2 = 0xffffffff;
+ uint64_t desc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ LLVMValueRef hi = LLVMConstInt(ctx->i64, desc2 | (desc3 << 32), 0);
+
+ LLVMValueRef desc = LLVMGetUndef(LLVMVectorType(ctx->i64, 2));
+ desc = LLVMBuildInsertElement(builder, desc, addr, ctx->i32_0, "");
+ desc = LLVMBuildInsertElement(builder, desc, hi, ctx->i32_1, "");
+ return LLVMBuildBitCast(builder, desc, ctx->v4i32, "");
+}
+
static LLVMValueRef fetch_input_tcs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef dw_addr, stride;
- stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 24, 8);
+ stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
LLVMValueRef dw_addr, stride;
if (reg->Register.Dimension) {
- stride = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ stride = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 8);
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
} else {
enum tgsi_opcode_type type, unsigned swizzle)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef rw_buffers, buffer, base, addr;
+ LLVMValueRef buffer, base, addr;
- rw_buffers = LLVMGetParam(ctx->main_fn,
- SI_PARAM_RW_BUFFERS);
- buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers,
- LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 0));
+ buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
- base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
+ base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
unsigned chan_index;
LLVMValueRef dw_addr, stride;
- LLVMValueRef rw_buffers, buffer, base, buf_addr;
+ LLVMValueRef buffer, base, buf_addr;
LLVMValueRef values[4];
bool skip_lds_store;
bool is_tess_factor = false;
}
if (reg->Register.Dimension) {
- stride = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ stride = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 13, 8);
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
skip_lds_store = !sh_info->reads_pervertex_outputs;
}
}
- rw_buffers = LLVMGetParam(ctx->main_fn,
- SI_PARAM_RW_BUFFERS);
- buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers,
- LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 0));
+ buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
- base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
+ base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL);
struct lp_build_context *uint = &ctx->bld_base.uint_bld;
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef vtx_offset, soffset;
- unsigned vtx_offset_param;
struct tgsi_shader_info *info = &shader->selector->info;
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
if (!reg->Register.Dimension)
return NULL;
+ param = si_shader_io_get_unique_index(semantic_name, semantic_index);
+
+ /* GFX9 has the ESGS ring in LDS. */
+ if (ctx->screen->b.chip_class >= GFX9) {
+ unsigned index = reg->Dimension.Index;
+
+ switch (index / 2) {
+ case 0:
+ vtx_offset = unpack_param(ctx, ctx->param_gs_vtx01_offset,
+ index % 2 ? 16 : 0, 16);
+ break;
+ case 1:
+ vtx_offset = unpack_param(ctx, ctx->param_gs_vtx23_offset,
+ index % 2 ? 16 : 0, 16);
+ break;
+ case 2:
+ vtx_offset = unpack_param(ctx, ctx->param_gs_vtx45_offset,
+ index % 2 ? 16 : 0, 16);
+ break;
+ default:
+ assert(0);
+ return NULL;
+ }
+
+ vtx_offset = LLVMBuildAdd(gallivm->builder, vtx_offset,
+ LLVMConstInt(ctx->i32, param * 4, 0), "");
+ return lds_load(bld_base, type, swizzle, vtx_offset);
+ }
+
+ /* GFX6: input load from the ESGS ring in memory. */
if (swizzle == ~0) {
LLVMValueRef values[TGSI_NUM_CHANNELS];
unsigned chan;
TGSI_NUM_CHANNELS);
}
- /* Get the vertex offset parameter */
- vtx_offset_param = reg->Dimension.Index;
+ /* Get the vertex offset parameter on GFX6. */
+ unsigned vtx_offset_param = reg->Dimension.Index;
if (vtx_offset_param < 2) {
- vtx_offset_param += SI_PARAM_VTX0_OFFSET;
+ vtx_offset_param += ctx->param_gs_vtx0_offset;
} else {
assert(vtx_offset_param < 6);
- vtx_offset_param += SI_PARAM_VTX2_OFFSET - 2;
+ vtx_offset_param += ctx->param_gs_vtx2_offset - 2;
}
vtx_offset = lp_build_mul_imm(uint,
LLVMGetParam(ctx->main_fn,
vtx_offset_param),
4);
- param = si_shader_io_get_unique_index(semantic_name, semantic_index);
soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0);
value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0,
LLVMBuilderRef builder = ctx->gallivm.builder;
LLVMValueRef args[2] = {resource, offset};
- return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2,
+ return lp_build_intrinsic(builder, "llvm.SI.load.const.v4i32", ctx->f32, args, 2,
LP_FUNC_ATTR_READNONE |
LP_FUNC_ATTR_LEGACY);
}
struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef desc = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
+ LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
LLVMValueRef buf_index = LLVMConstInt(ctx->i32, SI_PS_CONST_SAMPLE_POSITIONS, 0);
LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, buf_index);
LLVMGetParam(ctx->main_fn,
ctx->param_vertex_id),
LLVMGetParam(ctx->main_fn,
- SI_PARAM_BASE_VERTEX), "");
+ ctx->param_base_vertex), "");
break;
case TGSI_SEMANTIC_VERTEXID_NOBASE:
* (for direct draws) or the CP (for indirect draws) is the
* first vertex ID, but GLSL expects 0 to be returned.
*/
- LLVMValueRef vs_state = LLVMGetParam(ctx->main_fn, SI_PARAM_VS_STATE_BITS);
+ LLVMValueRef vs_state = LLVMGetParam(ctx->main_fn, ctx->param_vs_state_bits);
LLVMValueRef indexed;
indexed = LLVMBuildLShr(gallivm->builder, vs_state, ctx->i32_1, "");
indexed = LLVMBuildTrunc(gallivm->builder, indexed, ctx->i1, "");
value = LLVMBuildSelect(gallivm->builder, indexed,
- LLVMGetParam(ctx->main_fn, SI_PARAM_BASE_VERTEX),
+ LLVMGetParam(ctx->main_fn, ctx->param_base_vertex),
ctx->i32_0, "");
break;
}
case TGSI_SEMANTIC_BASEINSTANCE:
- value = LLVMGetParam(ctx->main_fn,
- SI_PARAM_START_INSTANCE);
+ value = LLVMGetParam(ctx->main_fn, ctx->param_start_instance);
break;
case TGSI_SEMANTIC_DRAWID:
- value = LLVMGetParam(ctx->main_fn,
- SI_PARAM_DRAWID);
+ value = LLVMGetParam(ctx->main_fn, ctx->param_draw_id);
break;
case TGSI_SEMANTIC_INVOCATIONID:
if (ctx->type == PIPE_SHADER_TESS_CTRL)
- value = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
+ value = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
else if (ctx->type == PIPE_SHADER_GEOMETRY)
value = LLVMGetParam(ctx->main_fn,
- SI_PARAM_GS_INSTANCE_ID);
+ ctx->param_gs_instance_id);
else
assert(!"INVOCATIONID not implemented");
break;
case TGSI_SEMANTIC_VERTICESIN:
if (ctx->type == PIPE_SHADER_TESS_CTRL)
- value = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
+ value = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 26, 6);
else if (ctx->type == PIPE_SHADER_TESS_EVAL)
- value = unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 9, 7);
+ value = unpack_param(ctx, ctx->param_tcs_offchip_layout, 6, 6);
else
assert(!"invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
break;
case TGSI_SEMANTIC_TESSINNER:
case TGSI_SEMANTIC_TESSOUTER:
{
- LLVMValueRef rw_buffers, buffer, base, addr;
- int param = si_shader_io_get_unique_index(decl->Semantic.Name, 0);
+ LLVMValueRef buffer, base, addr;
+ int param = si_shader_io_get_unique_index_patch(decl->Semantic.Name, 0);
- rw_buffers = LLVMGetParam(ctx->main_fn,
- SI_PARAM_RW_BUFFERS);
- buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers,
- LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 0));
+ buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
- base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
+ base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
LLVMConstInt(ctx->i32, param, 0));
int i, offset;
slot = LLVMConstInt(ctx->i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
- buf = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
+ buf = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
buf = ac_build_indexed_load_const(&ctx->ac, buf, slot);
offset = decl->Semantic.Name == TGSI_SEMANTIC_DEFAULT_TESSINNER_SI ? 4 : 0;
break;
case TGSI_SEMANTIC_GRID_SIZE:
- value = LLVMGetParam(ctx->main_fn, SI_PARAM_GRID_SIZE);
+ value = LLVMGetParam(ctx->main_fn, ctx->param_grid_size);
break;
case TGSI_SEMANTIC_BLOCK_SIZE:
value = lp_build_gather_values(gallivm, values, 3);
} else {
- value = LLVMGetParam(ctx->main_fn, SI_PARAM_BLOCK_SIZE);
+ value = LLVMGetParam(ctx->main_fn, ctx->param_block_size);
}
break;
}
case TGSI_SEMANTIC_BLOCK_ID:
- value = LLVMGetParam(ctx->main_fn, SI_PARAM_BLOCK_ID);
+ {
+ LLVMValueRef values[3];
+
+ for (int i = 0; i < 3; i++) {
+ values[i] = ctx->i32_0;
+ if (ctx->param_block_id[i] >= 0) {
+ values[i] = LLVMGetParam(ctx->main_fn,
+ ctx->param_block_id[i]);
+ }
+ }
+ value = lp_build_gather_values(gallivm, values, 3);
break;
+ }
case TGSI_SEMANTIC_THREAD_ID:
- value = LLVMGetParam(ctx->main_fn, SI_PARAM_THREAD_ID);
+ value = LLVMGetParam(ctx->main_fn, ctx->param_thread_id);
break;
case TGSI_SEMANTIC_HELPER_INVOCATION:
- if (HAVE_LLVM >= 0x0309) {
- value = lp_build_intrinsic(gallivm->builder,
- "llvm.amdgcn.ps.live",
- ctx->i1, NULL, 0,
- LP_FUNC_ATTR_READNONE);
- value = LLVMBuildNot(gallivm->builder, value, "");
- value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, "");
- } else {
- assert(!"TGSI_SEMANTIC_HELPER_INVOCATION unsupported");
- return;
- }
+ value = lp_build_intrinsic(gallivm->builder,
+ "llvm.amdgcn.ps.live",
+ ctx->i1, NULL, 0,
+ LP_FUNC_ATTR_READNONE);
+ value = LLVMBuildNot(gallivm->builder, value, "");
+ value = LLVMBuildSExt(gallivm->builder, value, ctx->i32, "");
break;
case TGSI_SEMANTIC_SUBGROUP_SIZE:
static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
{
LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn,
- SI_PARAM_CONST_BUFFERS);
+ ctx->param_const_buffers);
return ac_build_indexed_load_const(&ctx->ac, list_ptr,
LLVMConstInt(ctx->i32, i, 0));
idx = reg->Register.Index * 4 + swizzle;
if (reg->Register.Dimension && reg->Dimension.Indirect) {
- LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_CONST_BUFFERS);
+ LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_buffers);
LLVMValueRef index;
index = get_bounded_indirect_index(ctx, ®->DimIndirect,
reg->Dimension.Index,
unsigned chan;
unsigned const_chan;
LLVMValueRef base_elt;
- LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
+ LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
LLVMValueRef constbuf_index = LLVMConstInt(ctx->i32,
SI_VS_CONST_CLIP_PLANES, 0);
LLVMValueRef const_resource = ac_build_indexed_load_const(&ctx->ac, ptr, constbuf_index);
LLVMValueRef so_write_offset[4] = {};
LLVMValueRef so_buffers[4];
LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
- SI_PARAM_RW_BUFFERS);
+ ctx->param_rw_buffers);
for (i = 0; i < 4; i++) {
if (!so->stride[i])
shader->selector->info.writes_layer) {
pos_args[1].enabled_channels = shader->selector->info.writes_psize |
(shader->selector->info.writes_edgeflag << 1) |
- (shader->selector->info.writes_layer << 2) |
- (shader->selector->info.writes_viewport_index << 3);
+ (shader->selector->info.writes_layer << 2);
+
pos_args[1].valid_mask = 0; /* EXEC mask */
pos_args[1].done = 0; /* last export? */
pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
ctx->f32, "");
}
- if (shader->selector->info.writes_layer)
- pos_args[1].out[2] = layer_value;
+ if (ctx->screen->b.chip_class >= GFX9) {
+ /* GFX9 has the layer in out.z[10:0] and the viewport
+ * index in out.z[19:16].
+ */
+ if (shader->selector->info.writes_layer)
+ pos_args[1].out[2] = layer_value;
+
+ if (shader->selector->info.writes_viewport_index) {
+ LLVMValueRef v = viewport_index_value;
+
+ v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, v);
+ v = LLVMBuildShl(ctx->gallivm.builder, v,
+ LLVMConstInt(ctx->i32, 16, 0), "");
+ v = LLVMBuildOr(ctx->gallivm.builder, v,
+ bitcast(bld_base, TGSI_TYPE_UNSIGNED,
+ pos_args[1].out[2]), "");
+ pos_args[1].out[2] = bitcast(bld_base, TGSI_TYPE_FLOAT, v);
+ pos_args[1].enabled_channels |= 1 << 2;
+ }
+ } else {
+ if (shader->selector->info.writes_layer)
+ pos_args[1].out[2] = layer_value;
- if (shader->selector->info.writes_viewport_index)
- pos_args[1].out[3] = viewport_index_value;
+ if (shader->selector->info.writes_viewport_index) {
+ pos_args[1].out[3] = viewport_index_value;
+ pos_args[1].enabled_channels |= 1 << 3;
+ }
+ }
}
for (i = 0; i < 4; i++)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = &ctx->gallivm;
- LLVMValueRef invocation_id, rw_buffers, buffer, buffer_offset;
+ LLVMValueRef invocation_id, buffer, buffer_offset;
LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
uint64_t inputs;
- invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
-
- rw_buffers = LLVMGetParam(ctx->main_fn, SI_PARAM_RW_BUFFERS);
- buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers,
- LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 0));
+ invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
+ buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
+ buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
- buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
-
- lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 24, 8);
+ lds_vertex_stride = unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
lds_vertex_stride, "");
lds_base = get_tcs_in_current_patch_offset(ctx);
lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, "");
- inputs = ctx->shader->key.mono.tcs.inputs_to_copy;
+ inputs = ctx->shader->key.mono.ff_tcs_inputs_to_copy;
while (inputs) {
unsigned i = u_bit_scan64(&inputs);
struct si_shader *shader = ctx->shader;
unsigned tess_inner_index, tess_outer_index;
LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
- LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base, inner[4], outer[4];
- unsigned stride, outer_comps, inner_comps, i;
+ LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
+ unsigned stride, outer_comps, inner_comps, i, offset;
struct lp_build_if_state if_ctx, inner_if_ctx;
si_llvm_emit_barrier(NULL, bld_base, NULL);
/* Load tess_inner and tess_outer from LDS.
* Any invocation can write them, so we can't get them from a temporary.
*/
- tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
- tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
+ tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
+ tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
lds_base = tcs_out_current_patch_data_offset;
lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
/* Get the buffer. */
- rw_buffers = LLVMGetParam(ctx->main_fn,
- SI_PARAM_RW_BUFFERS);
- buffer = ac_build_indexed_load_const(&ctx->ac, rw_buffers,
- LLVMConstInt(ctx->i32, SI_HS_RING_TESS_FACTOR, 0));
+ buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_factor_addr_base64k);
/* Get the offset. */
tf_base = LLVMGetParam(ctx->main_fn,
- SI_PARAM_TESS_FACTOR_OFFSET);
+ ctx->param_tcs_factor_offset);
byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
LLVMConstInt(ctx->i32, 4 * stride, 0), "");
rel_patch_id, ctx->i32_0, ""));
/* Store the dynamic HS control word. */
- ac_build_buffer_store_dword(&ctx->ac, buffer,
- LLVMConstInt(ctx->i32, 0x80000000, 0),
- 1, ctx->i32_0, tf_base,
- 0, 1, 0, true, false);
+ offset = 0;
+ if (ctx->screen->b.chip_class <= VI) {
+ ac_build_buffer_store_dword(&ctx->ac, buffer,
+ LLVMConstInt(ctx->i32, 0x80000000, 0),
+ 1, ctx->i32_0, tf_base,
+ offset, 1, 0, true, false);
+ offset += 4;
+ }
lp_build_endif(&inner_if_ctx);
/* Store the tessellation factors. */
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
MIN2(stride, 4), byteoffset, tf_base,
- 4, 1, 0, true, false);
+ offset, 1, 0, true, false);
+ offset += 16;
if (vec1)
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
stride - 4, byteoffset, tf_base,
- 20, 1, 0, true, false);
+ offset, 1, 0, true, false);
/* Store the tess factors into the offchip buffer if TES reads them. */
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
LLVMValueRef tf_inner_offset;
unsigned param_outer, param_inner;
- buf = ac_build_indexed_load_const(&ctx->ac, rw_buffers,
- LLVMConstInt(ctx->i32, SI_HS_RING_TESS_OFFCHIP, 0));
- base = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
+ buf = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
+ base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
- param_outer = si_shader_io_get_unique_index(
+ param_outer = si_shader_io_get_unique_index_patch(
TGSI_SEMANTIC_TESSOUTER, 0);
tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
LLVMConstInt(ctx->i32, param_outer, 0));
outer_comps, tf_outer_offset,
base, 0, 1, 0, true, false);
if (inner_comps) {
- param_inner = si_shader_io_get_unique_index(
+ param_inner = si_shader_io_get_unique_index_patch(
TGSI_SEMANTIC_TESSINNER, 0);
tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
LLVMConstInt(ctx->i32, param_inner, 0));
lp_build_endif(&if_ctx);
}
+static LLVMValueRef
+si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
+ unsigned param, unsigned return_index)
+{
+ return LLVMBuildInsertValue(ctx->gallivm.builder, ret,
+ LLVMGetParam(ctx->main_fn, param),
+ return_index, "");
+}
+
+static LLVMValueRef
+si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
+ unsigned param, unsigned return_index)
+{
+ LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMValueRef p = LLVMGetParam(ctx->main_fn, param);
+
+ return LLVMBuildInsertValue(builder, ret,
+ LLVMBuildBitCast(builder, p, ctx->f32, ""),
+ return_index, "");
+}
+
+static LLVMValueRef
+si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret,
+ unsigned param, unsigned return_index)
+{
+ LLVMBuilderRef builder = ctx->gallivm.builder;
+ LLVMValueRef ptr, lo, hi;
+
+ ptr = LLVMGetParam(ctx->main_fn, param);
+ ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, "");
+ ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, "");
+ lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, "");
+ hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, "");
+ ret = LLVMBuildInsertValue(builder, ret, lo, return_index, "");
+ return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, "");
+}
+
/* This only writes the tessellation factor levels. */
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
- LLVMValueRef offchip_soffset, offchip_layout;
si_copy_tcs_inputs(bld_base);
rel_patch_id = get_rel_patch_id(ctx);
- invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5);
+ invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
/* Return epilog parameters from this function. */
LLVMBuilderRef builder = ctx->gallivm.builder;
LLVMValueRef ret = ctx->return_value;
- LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
unsigned vgpr;
- /* RW_BUFFERS pointer */
- rw_buffers = LLVMGetParam(ctx->main_fn,
- SI_PARAM_RW_BUFFERS);
- rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, "");
- rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, "");
- rw0 = LLVMBuildExtractElement(builder, rw_buffers,
- ctx->i32_0, "");
- rw1 = LLVMBuildExtractElement(builder, rw_buffers,
- ctx->i32_1, "");
- ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
- ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");
-
- /* Tess offchip and factor buffer soffset are after user SGPRs. */
- offchip_layout = LLVMGetParam(ctx->main_fn,
- SI_PARAM_TCS_OFFCHIP_LAYOUT);
- offchip_soffset = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
- tf_soffset = LLVMGetParam(ctx->main_fn,
- SI_PARAM_TESS_FACTOR_OFFSET);
- ret = LLVMBuildInsertValue(builder, ret, offchip_layout,
- SI_SGPR_TCS_OFFCHIP_LAYOUT, "");
- ret = LLVMBuildInsertValue(builder, ret, offchip_soffset,
- SI_TCS_NUM_USER_SGPR, "");
- ret = LLVMBuildInsertValue(builder, ret, tf_soffset,
- SI_TCS_NUM_USER_SGPR + 1, "");
+ if (ctx->screen->b.chip_class >= GFX9) {
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
+ 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
+ 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k,
+ 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
+ /* Tess offchip and tess factor offsets are at the beginning. */
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
+ vgpr = 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K + 1;
+ } else {
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
+ GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
+ GFX6_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k,
+ GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K);
+ /* Tess offchip and tess factor offsets are after user SGPRs. */
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset,
+ GFX6_TCS_NUM_USER_SGPR);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset,
+ GFX6_TCS_NUM_USER_SGPR + 1);
+ vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
+ }
/* VGPRs */
rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
- vgpr = SI_TCS_NUM_USER_SGPR + 2;
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
ctx->return_value = ret;
}
+/* Pass TCS inputs from LS to TCS on GFX9. */
+static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
+{
+ LLVMValueRef ret = ctx->return_value;
+
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
+
+ ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
+ 8 + SI_SGPR_VS_STATE_BITS);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
+ 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
+ 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout,
+ 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
+ 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_addr_base64k,
+ 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
+
+ unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2;
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
+ 8 + GFX9_SGPR_TCS_CONST_BUFFERS);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
+ 8 + GFX9_SGPR_TCS_SAMPLERS);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
+ 8 + GFX9_SGPR_TCS_IMAGES);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
+ 8 + GFX9_SGPR_TCS_SHADER_BUFFERS);
+
+ unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
+ ret = si_insert_input_ret_float(ctx, ret,
+ ctx->param_tcs_patch_id, vgpr++);
+ ret = si_insert_input_ret_float(ctx, ret,
+ ctx->param_tcs_rel_ids, vgpr++);
+ ctx->return_value = ret;
+}
+
+/* Pass GS inputs from ES to GS on GFX9. */
+static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
+{
+ LLVMValueRef ret = ctx->return_value;
+
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, ctx->param_rw_buffers, 0);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
+ ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
+
+ ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
+
+ unsigned desc_param = ctx->param_vs_state_bits + 1;
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
+ 8 + GFX9_SGPR_GS_CONST_BUFFERS);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
+ 8 + GFX9_SGPR_GS_SAMPLERS);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
+ 8 + GFX9_SGPR_GS_IMAGES);
+ ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3,
+ 8 + GFX9_SGPR_GS_SHADER_BUFFERS);
+
+ unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
+ for (unsigned i = 0; i < 5; i++) {
+ unsigned param = ctx->param_gs_vtx01_offset + i;
+ ret = si_insert_input_ret_float(ctx, ret, param, vgpr++);
+ }
+ ctx->return_value = ret;
+}
+
static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef vertex_id = LLVMGetParam(ctx->main_fn,
ctx->param_rel_auto_id);
LLVMValueRef vertex_dw_stride =
- unpack_param(ctx, SI_PARAM_VS_STATE_BITS, 24, 8);
+ unpack_param(ctx, ctx->param_vs_state_bits, 24, 8);
LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
vertex_dw_stride, "");
LLVMValueRef *out_ptr = ctx->outputs[i];
unsigned name = info->output_semantic_name[i];
unsigned index = info->output_semantic_index[i];
+
+ /* The ARB_shader_viewport_layer_array spec contains the
+ * following issue:
+ *
+ * 2) What happens if gl_ViewportIndex or gl_Layer is
+ * written in the vertex shader and a geometry shader is
+ * present?
+ *
+ * RESOLVED: The value written by the last vertex processing
+ * stage is used. If the last vertex processing stage
+ * (vertex, tessellation evaluation or geometry) does not
+ * statically assign to gl_ViewportIndex or gl_Layer, index
+ * or layer zero is assumed.
+ *
+ * So writes to those outputs in VS-as-LS are simply ignored.
+ */
+ if (name == TGSI_SEMANTIC_LAYER ||
+ name == TGSI_SEMANTIC_VIEWPORT_INDEX)
+ continue;
+
int param = si_shader_io_get_unique_index(name, index);
LLVMValueRef dw_addr = LLVMBuildAdd(gallivm->builder, base_dw_addr,
LLVMConstInt(ctx->i32, param * 4, 0), "");
LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""));
}
}
+
+ if (ctx->screen->b.chip_class >= GFX9)
+ si_set_ls_return_value_for_tcs(ctx);
}
static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
struct tgsi_shader_info *info = &es->selector->info;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
ctx->param_es2gs_offset);
+ LLVMValueRef lds_base = NULL;
unsigned chan;
int i;
+ if (ctx->screen->b.chip_class >= GFX9 && info->num_outputs) {
+ unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
+ lds_base = LLVMBuildMul(gallivm->builder, ac_get_thread_id(&ctx->ac),
+ LLVMConstInt(ctx->i32, itemsize_dw, 0), "");
+ }
+
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr = ctx->outputs[i];
- int param_index;
+ int param;
if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
continue;
- param_index = si_shader_io_get_unique_index(info->output_semantic_name[i],
- info->output_semantic_index[i]);
+ param = si_shader_io_get_unique_index(info->output_semantic_name[i],
+ info->output_semantic_index[i]);
for (chan = 0; chan < 4; chan++) {
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, "");
+ /* GFX9 has the ESGS ring in LDS. */
+ if (ctx->screen->b.chip_class >= GFX9) {
+ lds_store(bld_base, param * 4 + chan, lds_base, out_val);
+ continue;
+ }
+
ac_build_buffer_store_dword(&ctx->ac,
ctx->esgs_ring,
out_val, 1, NULL, soffset,
- (4 * param_index + chan) * 4,
+ (4 * param + chan) * 4,
1, 1, true, true);
}
}
+
+ if (ctx->screen->b.chip_class >= GFX9)
+ si_set_es_return_value_for_gs(ctx);
+}
+
+static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
+{
+ if (ctx->screen->b.chip_class >= GFX9)
+ return unpack_param(ctx, ctx->param_merged_wave_info, 16, 8);
+ else
+ return LLVMGetParam(ctx->main_fn, ctx->param_gs_wave_id);
}
static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
struct si_shader_context *ctx = si_shader_context(bld_base);
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
- LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID));
+ si_get_gs_wave_id(ctx));
}
static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
if (!cond) {
/* The state is in the first bit of the user SGPR. */
cond = LLVMGetParam(ctx->main_fn,
- SI_PARAM_VS_STATE_BITS);
+ ctx->param_vs_state_bits);
cond = LLVMBuildTrunc(gallivm->builder, cond,
ctx->i1, "");
lp_build_if(&if_ctx, gallivm, cond);
outputs[i].vertex_stream[j] =
(info->output_streams[i] >> (2 * j)) & 3;
}
-
}
- /* Return the primitive ID from the LLVM function. */
- ctx->return_value =
- LLVMBuildInsertValue(gallivm->builder,
- ctx->return_value,
- bitcast(bld_base, TGSI_TYPE_FLOAT,
- get_primitive_id(bld_base, 0)),
- VS_EPILOG_PRIMID_LOC, "");
-
if (ctx->shader->selector->so.num_outputs)
si_llvm_emit_streamout(ctx, outputs, i, 0);
+
+ /* Export PrimitiveID. */
+ if (ctx->shader->key.mono.vs_export_prim_id) {
+ outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
+ outputs[i].semantic_index = 0;
+ outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ get_primitive_id(bld_base, 0));
+ for (j = 1; j < 4; j++)
+ outputs[i].values[j] = LLVMConstReal(ctx->f32, 0);
+
+ memset(outputs[i].vertex_stream, 0,
+ sizeof(outputs[i].vertex_stream));
+ i++;
+ }
+
si_llvm_export_vs(bld_base, outputs, i);
FREE(outputs);
}
{
LLVMValueRef index;
LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
- SI_PARAM_SHADER_BUFFERS);
+ ctx->param_shader_buffers);
if (!reg->Register.Indirect)
index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
- SI_PARAM_IMAGES);
+ ctx->param_images);
LLVMValueRef index;
bool dcc_off = is_store;
static LLVMValueRef image_fetch_coords(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,
- unsigned src)
+ unsigned src, LLVMValueRef desc)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = &ctx->gallivm;
coords[chan] = tmp;
}
- /* 1D textures are allocated and used as 2D on GFX9. */
if (ctx->screen->b.chip_class >= GFX9) {
+ /* 1D textures are allocated and used as 2D on GFX9. */
if (target == TGSI_TEXTURE_1D) {
coords[1] = ctx->i32_0;
num_coords++;
} else if (target == TGSI_TEXTURE_1D_ARRAY) {
coords[2] = coords[1];
coords[1] = ctx->i32_0;
+ num_coords++;
+ } else if (target == TGSI_TEXTURE_2D) {
+ /* The hw can't bind a slice of a 3D image as a 2D
+ * image, because it ignores BASE_ARRAY if the target
+ * is 3D. The workaround is to read BASE_ARRAY and set
+ * it as the 3rd address operand for all 2D images.
+ */
+ LLVMValueRef first_layer, const5, mask;
+
+ const5 = LLVMConstInt(ctx->i32, 5, 0);
+ mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0);
+ first_layer = LLVMBuildExtractElement(builder, desc, const5, "");
+ first_layer = LLVMBuildAnd(builder, first_layer, mask, "");
+
+ coords[2] = first_layer;
+ num_coords++;
}
}
LLVMValueRef coords;
image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
- coords = image_fetch_coords(bld_base, inst, 1);
+ coords = image_fetch_coords(bld_base, inst, 1, rsrc);
if (target == TGSI_TEXTURE_BUFFER) {
buffer_append_args(ctx, emit_data, rsrc, coords,
*/
bool force_glc = ctx->screen->b.chip_class == SI;
- coords = image_fetch_coords(bld_base, inst, 0);
+ image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
+ coords = image_fetch_coords(bld_base, inst, 0, rsrc);
if (target == TGSI_TEXTURE_BUFFER) {
- image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
buffer_append_args(ctx, emit_data, rsrc, coords,
ctx->i32_0, false, force_glc);
} else {
emit_data->args[1] = coords;
- image_fetch_rsrc(bld_base, &memory, true, target,
- &emit_data->args[2]);
+ emit_data->args[2] = rsrc;
emit_data->args[3] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */
emit_data->arg_count = 4;
LLVMValueRef coords;
image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
- coords = image_fetch_coords(bld_base, inst, 1);
+ coords = image_fetch_coords(bld_base, inst, 1, rsrc);
if (target == TGSI_TEXTURE_BUFFER) {
buffer_append_args(ctx, emit_data, rsrc, coords,
new_data = LLVMBuildBitCast(builder, new_data, ctx->i32, "");
-#if HAVE_LLVM >= 0x309
result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data,
LLVMAtomicOrderingSequentiallyConsistent,
LLVMAtomicOrderingSequentiallyConsistent,
false);
-#endif
result = LLVMBuildExtractValue(builder, result, 0, "");
} else {
LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- LLVMValueRef list = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLERS);
+ LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers);
const struct tgsi_full_instruction *inst = emit_data->inst;
const struct tgsi_full_src_register *reg;
unsigned target = inst->Texture.Texture;
if (target == TGSI_TEXTURE_BUFFER) {
emit_data->dst_type = ctx->v4f32;
- emit_data->args[0] = LLVMBuildBitCast(gallivm->builder, res_ptr,
- ctx->v16i8, "");
+ emit_data->args[0] = res_ptr;
emit_data->args[1] = ctx->i32_0;
emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
emit_data->arg_count = 3;
struct gallivm_state *gallivm = &ctx->gallivm;
struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
- SI_PARAM_GS2VS_OFFSET);
+ ctx->param_gs2vs_offset);
LLVMValueRef gs_next_vertex;
LLVMValueRef can_emit, kill;
unsigned chan, offset;
/* Signal vertex emission */
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
- LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID));
+ si_get_gs_wave_id(ctx));
if (!use_kill)
lp_build_endif(&if_state);
}
/* Signal primitive cut */
stream = si_llvm_get_stream(bld_base, emit_data);
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
- LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID));
+ si_get_gs_wave_id(ctx));
}
static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
* The real barrier instruction isn’t needed, because an entire patch
* always fits into a single wave.
*/
- if (HAVE_LLVM >= 0x0309 &&
- ctx->screen->b.chip_class == SI &&
+ if (ctx->screen->b.chip_class == SI &&
ctx->type == PIPE_SHADER_TESS_CTRL) {
emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
return;
}
lp_build_intrinsic(gallivm->builder,
- HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier"
- : "llvm.AMDGPU.barrier.local",
+ "llvm.amdgcn.s.barrier",
ctx->voidt, NULL, 0, LP_FUNC_ATTR_CONVERGENT);
}
const char *name,
LLVMTypeRef *returns, unsigned num_returns,
LLVMTypeRef *params, unsigned num_params,
- int last_sgpr)
+ int last_sgpr, unsigned max_workgroup_size)
{
int i;
si_llvm_create_func(ctx, name, returns, num_returns,
params, num_params);
- si_llvm_shader_type(ctx->main_fn, ctx->type);
ctx->return_value = LLVMGetUndef(ctx->return_type);
for (i = 0; i <= last_sgpr; ++i) {
lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
}
+ if (max_workgroup_size) {
+ si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size",
+ max_workgroup_size);
+ }
LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
"no-signed-zeros-fp-math",
"true");
}
}
-static void declare_tess_lds(struct si_shader_context *ctx)
+static void declare_lds_as_pointer(struct si_shader_context *ctx)
{
struct gallivm_state *gallivm = &ctx->gallivm;
unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
ctx->lds = LLVMBuildIntToPtr(gallivm->builder, ctx->i32_0,
LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
- "tess_lds");
+ "lds");
}
static unsigned si_get_max_workgroup_size(struct si_shader *shader)
{
+ switch (shader->selector->type) {
+ case PIPE_SHADER_TESS_CTRL:
+ /* Return this so that LLVM doesn't remove s_barrier
+ * instructions on chips where we use s_barrier. */
+ return shader->selector->screen->b.chip_class >= CIK ? 128 : 64;
+
+ case PIPE_SHADER_GEOMETRY:
+ return shader->selector->screen->b.chip_class >= GFX9 ? 128 : 64;
+
+ case PIPE_SHADER_COMPUTE:
+ break; /* see below */
+
+ default:
+ return 0;
+ }
+
const unsigned *properties = shader->selector->info.properties;
unsigned max_work_group_size =
properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
return max_work_group_size;
}
+static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
+ LLVMTypeRef *params,
+ unsigned *num_params,
+ bool assign_params)
+{
+ params[(*num_params)++] = const_array(ctx->v4i32, SI_NUM_CONST_BUFFERS);
+ params[(*num_params)++] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
+ params[(*num_params)++] = const_array(ctx->v8i32, SI_NUM_IMAGES);
+ params[(*num_params)++] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
+
+ if (assign_params) {
+ ctx->param_const_buffers = *num_params - 4;
+ ctx->param_samplers = *num_params - 3;
+ ctx->param_images = *num_params - 2;
+ ctx->param_shader_buffers = *num_params - 1;
+ }
+}
+
+static void declare_default_desc_pointers(struct si_shader_context *ctx,
+ LLVMTypeRef *params,
+ unsigned *num_params)
+{
+ params[ctx->param_rw_buffers = (*num_params)++] =
+ const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
+ declare_per_stage_desc_pointers(ctx, params, num_params, true);
+}
+
+static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
+ LLVMTypeRef *params,
+ unsigned *num_params)
+{
+ params[ctx->param_vertex_buffers = (*num_params)++] =
+ const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS);
+ params[ctx->param_base_vertex = (*num_params)++] = ctx->i32;
+ params[ctx->param_start_instance = (*num_params)++] = ctx->i32;
+ params[ctx->param_draw_id = (*num_params)++] = ctx->i32;
+ params[ctx->param_vs_state_bits = (*num_params)++] = ctx->i32;
+}
+
+static void declare_vs_input_vgprs(struct si_shader_context *ctx,
+ LLVMTypeRef *params, unsigned *num_params,
+ unsigned *num_prolog_vgprs)
+{
+ struct si_shader *shader = ctx->shader;
+
+ params[ctx->param_vertex_id = (*num_params)++] = ctx->i32;
+ if (shader->key.as_ls) {
+ params[ctx->param_rel_auto_id = (*num_params)++] = ctx->i32;
+ params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
+ } else {
+ params[ctx->param_instance_id = (*num_params)++] = ctx->i32;
+ params[ctx->param_vs_prim_id = (*num_params)++] = ctx->i32;
+ }
+ params[(*num_params)++] = ctx->i32; /* unused */
+
+ if (!shader->is_gs_copy_shader) {
+ /* Vertex load indices. */
+ ctx->param_vertex_index0 = (*num_params);
+ for (unsigned i = 0; i < shader->selector->info.num_inputs; i++)
+ params[(*num_params)++] = ctx->i32;
+ *num_prolog_vgprs += shader->selector->info.num_inputs;
+ }
+}
+
+static void declare_tes_input_vgprs(struct si_shader_context *ctx,
+ LLVMTypeRef *params, unsigned *num_params)
+{
+ params[ctx->param_tes_u = (*num_params)++] = ctx->f32;
+ params[ctx->param_tes_v = (*num_params)++] = ctx->f32;
+ params[ctx->param_tes_rel_patch_id = (*num_params)++] = ctx->i32;
+ params[ctx->param_tes_patch_id = (*num_params)++] = ctx->i32;
+}
+
+enum {
+ /* Convenient merged shader definitions. */
+ SI_SHADER_MERGED_VERTEX_TESSCTRL = PIPE_SHADER_TYPES,
+ SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY,
+};
+
static void create_function(struct si_shader_context *ctx)
{
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
struct gallivm_state *gallivm = &ctx->gallivm;
struct si_shader *shader = ctx->shader;
- LLVMTypeRef params[SI_NUM_PARAMS + SI_MAX_ATTRIBS], v3i32;
+ LLVMTypeRef params[100]; /* just make it large enough */
LLVMTypeRef returns[16+32*4];
- unsigned i, last_sgpr, num_params, num_return_sgprs;
+ unsigned i, last_sgpr, num_params = 0, num_return_sgprs;
unsigned num_returns = 0;
unsigned num_prolog_vgprs = 0;
+ unsigned type = ctx->type;
- v3i32 = LLVMVectorType(ctx->i32, 3);
+ /* Set MERGED shaders. */
+ if (ctx->screen->b.chip_class >= GFX9) {
+ if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL)
+ type = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
+ else if (shader->key.as_es || type == PIPE_SHADER_GEOMETRY)
+ type = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY;
+ }
- params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
- params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
- params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
- params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
- params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
+ LLVMTypeRef v3i32 = LLVMVectorType(ctx->i32, 3);
- switch (ctx->type) {
+ switch (type) {
case PIPE_SHADER_VERTEX:
- params[SI_PARAM_VERTEX_BUFFERS] = const_array(ctx->v16i8, SI_MAX_ATTRIBS);
- params[SI_PARAM_BASE_VERTEX] = ctx->i32;
- params[SI_PARAM_START_INSTANCE] = ctx->i32;
- params[SI_PARAM_DRAWID] = ctx->i32;
- params[SI_PARAM_VS_STATE_BITS] = ctx->i32;
- num_params = SI_PARAM_VS_STATE_BITS+1;
+ declare_default_desc_pointers(ctx, params, &num_params);
+ declare_vs_specific_input_sgprs(ctx, params, &num_params);
if (shader->key.as_es) {
params[ctx->param_es2gs_offset = num_params++] = ctx->i32;
} else if (shader->key.as_ls) {
/* no extra parameters */
} else {
- if (shader->is_gs_copy_shader) {
- num_params = SI_PARAM_RW_BUFFERS+1;
- }
+ if (shader->is_gs_copy_shader)
+ num_params = ctx->param_rw_buffers + 1;
/* The locations of the other parameters are assigned dynamically. */
declare_streamout_params(ctx, &shader->selector->so,
last_sgpr = num_params-1;
/* VGPRs */
- params[ctx->param_vertex_id = num_params++] = ctx->i32;
- params[ctx->param_rel_auto_id = num_params++] = ctx->i32;
- params[ctx->param_vs_prim_id = num_params++] = ctx->i32;
- params[ctx->param_instance_id = num_params++] = ctx->i32;
-
- if (!shader->is_gs_copy_shader) {
- /* Vertex load indices. */
- ctx->param_vertex_index0 = num_params;
-
- for (i = 0; i < shader->selector->info.num_inputs; i++)
- params[num_params++] = ctx->i32;
-
- num_prolog_vgprs += shader->selector->info.num_inputs;
-
- /* PrimitiveID output. */
- if (!shader->key.as_es && !shader->key.as_ls)
- for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
- returns[num_returns++] = ctx->f32;
- }
+ declare_vs_input_vgprs(ctx, params, &num_params,
+ &num_prolog_vgprs);
break;
- case PIPE_SHADER_TESS_CTRL:
- params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32;
- params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32;
- params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32;
- params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32;
- params[ctx->param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx->i32;
- params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx->i32;
- last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
+ case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */
+ declare_default_desc_pointers(ctx, params, &num_params);
+ params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
+ params[ctx->param_tcs_out_lds_offsets = num_params++] = ctx->i32;
+ params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
+ params[ctx->param_vs_state_bits = num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
+ params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
+ params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
+ last_sgpr = num_params - 1;
/* VGPRs */
- params[SI_PARAM_PATCH_ID] = ctx->i32;
- params[SI_PARAM_REL_IDS] = ctx->i32;
- num_params = SI_PARAM_REL_IDS+1;
+ params[ctx->param_tcs_patch_id = num_params++] = ctx->i32;
+ params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32;
- /* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are
+ /* param_tcs_offchip_offset and param_tcs_factor_offset are
* placed after the user SGPRs.
*/
- for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++)
+ for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
returns[num_returns++] = ctx->i32; /* SGPRs */
-
for (i = 0; i < 3; i++)
returns[num_returns++] = ctx->f32; /* VGPRs */
break;
+ case SI_SHADER_MERGED_VERTEX_TESSCTRL:
+ /* Merged stages have 8 system SGPRs at the beginning. */
+ params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_HS */
+ const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
+ params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
+ params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
+ params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
+ params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32; /* unused */
+ params[num_params++] = ctx->i32; /* unused */
+
+ params[num_params++] = ctx->i32; /* unused */
+ params[num_params++] = ctx->i32; /* unused */
+ declare_per_stage_desc_pointers(ctx, params, &num_params,
+ ctx->type == PIPE_SHADER_VERTEX);
+ declare_vs_specific_input_sgprs(ctx, params, &num_params);
+
+ params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
+ params[ctx->param_tcs_out_lds_offsets = num_params++] = ctx->i32;
+ params[ctx->param_tcs_out_lds_layout = num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
+ params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32; /* unused */
+
+ declare_per_stage_desc_pointers(ctx, params, &num_params,
+ ctx->type == PIPE_SHADER_TESS_CTRL);
+ last_sgpr = num_params - 1;
+
+ /* VGPRs (first TCS, then VS) */
+ params[ctx->param_tcs_patch_id = num_params++] = ctx->i32;
+ params[ctx->param_tcs_rel_ids = num_params++] = ctx->i32;
+
+ if (ctx->type == PIPE_SHADER_VERTEX) {
+ declare_vs_input_vgprs(ctx, params, &num_params,
+ &num_prolog_vgprs);
+
+ /* LS return values are inputs to the TCS main shader part. */
+ for (i = 0; i < 8 + GFX9_TCS_NUM_USER_SGPR; i++)
+ returns[num_returns++] = ctx->i32; /* SGPRs */
+ for (i = 0; i < 2; i++)
+ returns[num_returns++] = ctx->f32; /* VGPRs */
+ } else {
+ /* TCS return values are inputs to the TCS epilog.
+ *
+ * param_tcs_offchip_offset, param_tcs_factor_offset,
+ * param_tcs_offchip_layout, and param_rw_buffers
+ * should be passed to the epilog.
+ */
+ for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; i++)
+ returns[num_returns++] = ctx->i32; /* SGPRs */
+ for (i = 0; i < 3; i++)
+ returns[num_returns++] = ctx->f32; /* VGPRs */
+ }
+ break;
+
+ case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
+ /* Merged stages have 8 system SGPRs at the beginning. */
+ params[ctx->param_rw_buffers = num_params++] = /* SPI_SHADER_USER_DATA_ADDR_LO_GS */
+ const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
+ params[ctx->param_gs2vs_offset = num_params++] = ctx->i32;
+ params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
+ params[ctx->param_merged_scratch_offset = num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS << 8) */
+ params[num_params++] = ctx->i32; /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
+
+ params[num_params++] = ctx->i32; /* unused */
+ params[num_params++] = ctx->i32; /* unused */
+ declare_per_stage_desc_pointers(ctx, params, &num_params,
+ (ctx->type == PIPE_SHADER_VERTEX ||
+ ctx->type == PIPE_SHADER_TESS_EVAL));
+ if (ctx->type == PIPE_SHADER_VERTEX) {
+ declare_vs_specific_input_sgprs(ctx, params, &num_params);
+ } else {
+ /* TESS_EVAL (and also GEOMETRY):
+ * Declare as many input SGPRs as the VS has. */
+ params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32; /* unused */
+ params[num_params++] = ctx->i32; /* unused */
+ params[num_params++] = ctx->i32; /* unused */
+ params[ctx->param_vs_state_bits = num_params++] = ctx->i32; /* unused */
+ }
+
+ declare_per_stage_desc_pointers(ctx, params, &num_params,
+ ctx->type == PIPE_SHADER_GEOMETRY);
+ last_sgpr = num_params - 1;
+
+ /* VGPRs (first GS, then VS/TES) */
+ params[ctx->param_gs_vtx01_offset = num_params++] = ctx->i32;
+ params[ctx->param_gs_vtx23_offset = num_params++] = ctx->i32;
+ params[ctx->param_gs_prim_id = num_params++] = ctx->i32;
+ params[ctx->param_gs_instance_id = num_params++] = ctx->i32;
+ params[ctx->param_gs_vtx45_offset = num_params++] = ctx->i32;
+
+ if (ctx->type == PIPE_SHADER_VERTEX) {
+ declare_vs_input_vgprs(ctx, params, &num_params,
+ &num_prolog_vgprs);
+ } else if (ctx->type == PIPE_SHADER_TESS_EVAL) {
+ declare_tes_input_vgprs(ctx, params, &num_params);
+ }
+
+ if (ctx->type == PIPE_SHADER_VERTEX ||
+ ctx->type == PIPE_SHADER_TESS_EVAL) {
+ /* ES return values are inputs to GS. */
+ for (i = 0; i < 8 + GFX9_GS_NUM_USER_SGPR; i++)
+ returns[num_returns++] = ctx->i32; /* SGPRs */
+ for (i = 0; i < 5; i++)
+ returns[num_returns++] = ctx->f32; /* VGPRs */
+ }
+ break;
+
case PIPE_SHADER_TESS_EVAL:
- params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32;
- num_params = SI_PARAM_TCS_OFFCHIP_LAYOUT+1;
+ declare_default_desc_pointers(ctx, params, &num_params);
+ params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
if (shader->key.as_es) {
- params[ctx->param_oc_lds = num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
params[num_params++] = ctx->i32;
params[ctx->param_es2gs_offset = num_params++] = ctx->i32;
} else {
params[num_params++] = ctx->i32;
declare_streamout_params(ctx, &shader->selector->so,
params, ctx->i32, &num_params);
- params[ctx->param_oc_lds = num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
}
last_sgpr = num_params - 1;
/* VGPRs */
- params[ctx->param_tes_u = num_params++] = ctx->f32;
- params[ctx->param_tes_v = num_params++] = ctx->f32;
- params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32;
- params[ctx->param_tes_patch_id = num_params++] = ctx->i32;
-
- /* PrimitiveID output. */
- if (!shader->key.as_es)
- for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++)
- returns[num_returns++] = ctx->f32;
+ declare_tes_input_vgprs(ctx, params, &num_params);
break;
case PIPE_SHADER_GEOMETRY:
- params[SI_PARAM_GS2VS_OFFSET] = ctx->i32;
- params[SI_PARAM_GS_WAVE_ID] = ctx->i32;
- last_sgpr = SI_PARAM_GS_WAVE_ID;
+ declare_default_desc_pointers(ctx, params, &num_params);
+ params[ctx->param_gs2vs_offset = num_params++] = ctx->i32;
+ params[ctx->param_gs_wave_id = num_params++] = ctx->i32;
+ last_sgpr = num_params - 1;
/* VGPRs */
- params[SI_PARAM_VTX0_OFFSET] = ctx->i32;
- params[SI_PARAM_VTX1_OFFSET] = ctx->i32;
- params[SI_PARAM_PRIMITIVE_ID] = ctx->i32;
- params[SI_PARAM_VTX2_OFFSET] = ctx->i32;
- params[SI_PARAM_VTX3_OFFSET] = ctx->i32;
- params[SI_PARAM_VTX4_OFFSET] = ctx->i32;
- params[SI_PARAM_VTX5_OFFSET] = ctx->i32;
- params[SI_PARAM_GS_INSTANCE_ID] = ctx->i32;
- num_params = SI_PARAM_GS_INSTANCE_ID+1;
+ params[ctx->param_gs_vtx0_offset = num_params++] = ctx->i32;
+ params[ctx->param_gs_vtx1_offset = num_params++] = ctx->i32;
+ params[ctx->param_gs_prim_id = num_params++] = ctx->i32;
+ params[ctx->param_gs_vtx2_offset = num_params++] = ctx->i32;
+ params[ctx->param_gs_vtx3_offset = num_params++] = ctx->i32;
+ params[ctx->param_gs_vtx4_offset = num_params++] = ctx->i32;
+ params[ctx->param_gs_vtx5_offset = num_params++] = ctx->i32;
+ params[ctx->param_gs_instance_id = num_params++] = ctx->i32;
break;
case PIPE_SHADER_FRAGMENT:
+ declare_default_desc_pointers(ctx, params, &num_params);
params[SI_PARAM_ALPHA_REF] = ctx->f32;
params[SI_PARAM_PRIM_MASK] = ctx->i32;
last_sgpr = SI_PARAM_PRIM_MASK;
break;
case PIPE_SHADER_COMPUTE:
- params[SI_PARAM_GRID_SIZE] = v3i32;
- params[SI_PARAM_BLOCK_SIZE] = v3i32;
- params[SI_PARAM_BLOCK_ID] = v3i32;
- last_sgpr = SI_PARAM_BLOCK_ID;
+ declare_default_desc_pointers(ctx, params, &num_params);
+ if (shader->selector->info.uses_grid_size)
+ params[ctx->param_grid_size = num_params++] = v3i32;
+ if (shader->selector->info.uses_block_size)
+ params[ctx->param_block_size = num_params++] = v3i32;
+
+ for (i = 0; i < 3; i++) {
+ ctx->param_block_id[i] = -1;
+ if (shader->selector->info.uses_block_id[i])
+ params[ctx->param_block_id[i] = num_params++] = ctx->i32;
+ }
+ last_sgpr = num_params - 1;
- params[SI_PARAM_THREAD_ID] = v3i32;
- num_params = SI_PARAM_THREAD_ID + 1;
+ params[ctx->param_thread_id = num_params++] = v3i32;
break;
default:
assert(0 && "unimplemented shader");
assert(num_params <= ARRAY_SIZE(params));
si_create_function(ctx, "main", returns, num_returns, params,
- num_params, last_sgpr);
+ num_params, last_sgpr,
+ si_get_max_workgroup_size(shader));
/* Reserve register locations for VGPR inputs the PS prolog may need. */
if (ctx->type == PIPE_SHADER_FRAGMENT &&
S_0286D0_LINEAR_CENTROID_ENA(1) |
S_0286D0_FRONT_FACE_ENA(1) |
S_0286D0_POS_FIXED_PT_ENA(1));
- } else if (ctx->type == PIPE_SHADER_COMPUTE) {
- si_llvm_add_attribute(ctx->main_fn,
- "amdgpu-max-work-group-size",
- si_get_max_workgroup_size(shader));
}
shader->info.num_input_sgprs = 0;
"ddxy_lds",
LOCAL_ADDR_SPACE);
- if ((ctx->type == PIPE_SHADER_VERTEX && shader->key.as_ls) ||
- ctx->type == PIPE_SHADER_TESS_CTRL)
- declare_tess_lds(ctx);
+ if (shader->key.as_ls ||
+ ctx->type == PIPE_SHADER_TESS_CTRL ||
+ /* GFX9 has the ESGS ring buffer in LDS. */
+ (ctx->screen->b.chip_class >= GFX9 &&
+ (shader->key.as_es ||
+ ctx->type == PIPE_SHADER_GEOMETRY)))
+ declare_lds_as_pointer(ctx);
}
/**
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
- SI_PARAM_RW_BUFFERS);
+ ctx->param_rw_buffers);
- if ((ctx->type == PIPE_SHADER_VERTEX &&
- ctx->shader->key.as_es) ||
- (ctx->type == PIPE_SHADER_TESS_EVAL &&
- ctx->shader->key.as_es) ||
- ctx->type == PIPE_SHADER_GEOMETRY) {
+ if (ctx->screen->b.chip_class <= VI &&
+ (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) {
unsigned ring =
ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
: SI_ES_RING_ESGS;
S_008F0C_ADD_TID_ENABLE(1),
0),
LLVMConstInt(ctx->i32, 3, 0), "");
- ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, "");
ctx->gsvs_ring[stream] = ring;
}
case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
+ case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
case R_00B848_COMPUTE_PGM_RSRC1:
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
conf->spi_ps_input_addr = conf->spi_ps_input_ena;
}
-void si_shader_apply_scratch_relocs(struct si_context *sctx,
- struct si_shader *shader,
- struct si_shader_config *config,
- uint64_t scratch_va)
+void si_shader_apply_scratch_relocs(struct si_shader *shader,
+ uint64_t scratch_va)
{
unsigned i;
uint32_t scratch_rsrc_dword0 = scratch_va;
uint32_t scratch_rsrc_dword1 =
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
- /* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE
- * correctly.
- */
- if (HAVE_LLVM >= 0x0309)
- scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
- else
- scratch_rsrc_dword1 |=
- S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
+ /* Enable scratch coalescing. */
+ scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
for (i = 0 ; i < shader->binary.reloc_count; i++) {
const struct ac_shader_reloc *reloc =
if (shader->prolog)
size += shader->prolog->binary.code_size;
+ if (shader->previous_stage)
+ size += shader->previous_stage->binary.code_size;
+ if (shader->prolog2)
+ size += shader->prolog2->binary.code_size;
if (shader->epilog)
size += shader->epilog->binary.code_size;
return size;
{
const struct ac_shader_binary *prolog =
shader->prolog ? &shader->prolog->binary : NULL;
+ const struct ac_shader_binary *previous_stage =
+ shader->previous_stage ? &shader->previous_stage->binary : NULL;
+ const struct ac_shader_binary *prolog2 =
+ shader->prolog2 ? &shader->prolog2->binary : NULL;
const struct ac_shader_binary *epilog =
shader->epilog ? &shader->epilog->binary : NULL;
const struct ac_shader_binary *mainb = &shader->binary;
unsigned char *ptr;
assert(!prolog || !prolog->rodata_size);
- assert((!prolog && !epilog) || !mainb->rodata_size);
+ assert(!previous_stage || !previous_stage->rodata_size);
+ assert(!prolog2 || !prolog2->rodata_size);
+ assert((!prolog && !previous_stage && !prolog2 && !epilog) ||
+ !mainb->rodata_size);
assert(!epilog || !epilog->rodata_size);
/* GFX9 can fetch at most 128 bytes past the end of the shader.
/* Upload. */
ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
- PIPE_TRANSFER_READ_WRITE);
+ PIPE_TRANSFER_READ_WRITE |
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+ /* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
+ * endian-independent. */
if (prolog) {
- util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
+ memcpy(ptr, prolog->code, prolog->code_size);
ptr += prolog->code_size;
}
+ if (previous_stage) {
+ memcpy(ptr, previous_stage->code, previous_stage->code_size);
+ ptr += previous_stage->code_size;
+ }
+ if (prolog2) {
+ memcpy(ptr, prolog2->code, prolog2->code_size);
+ ptr += prolog2->code_size;
+ }
- util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
+ memcpy(ptr, mainb->code, mainb->code_size);
ptr += mainb->code_size;
if (epilog)
- util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size);
+ memcpy(ptr, epilog->code, epilog->code_size);
else if (mainb->rodata_size > 0)
- util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size);
+ memcpy(ptr, mainb->rodata, mainb->rodata_size);
sscreen->b.ws->buffer_unmap(shader->bo->buf);
return 0;
{
if (!check_debug_option ||
r600_can_dump_shader(&sscreen->b, processor))
- si_dump_shader_key(processor, &shader->key, file);
+ si_dump_shader_key(processor, shader, file);
if (!check_debug_option && shader->binary.llvm_ir_string) {
fprintf(file, "\n%s - main shader part - LLVM IR:\n\n",
if (shader->prolog)
si_shader_dump_disassembly(&shader->prolog->binary,
debug, "prolog", file);
+ if (shader->previous_stage)
+ si_shader_dump_disassembly(&shader->previous_stage->binary,
+ debug, "previous stage", file);
+ if (shader->prolog2)
+ si_shader_dump_disassembly(&shader->prolog2->binary,
+ debug, "prolog2", file);
si_shader_dump_disassembly(&shader->binary, debug, "main", file);
check_debug_option);
}
-int si_compile_llvm(struct si_screen *sscreen,
- struct ac_shader_binary *binary,
- struct si_shader_config *conf,
- LLVMTargetMachineRef tm,
- LLVMModuleRef mod,
- struct pipe_debug_callback *debug,
- unsigned processor,
- const char *name)
+static int si_compile_llvm(struct si_screen *sscreen,
+ struct ac_shader_binary *binary,
+ struct si_shader_config *conf,
+ LLVMTargetMachineRef tm,
+ LLVMModuleRef mod,
+ struct pipe_debug_callback *debug,
+ unsigned processor,
+ const char *name)
{
int r = 0;
unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
shader->selector = gs_selector;
shader->is_gs_copy_shader = true;
- si_init_shader_ctx(&ctx, sscreen, shader, tm);
+ si_init_shader_ctx(&ctx, sscreen, tm);
+ ctx.shader = shader;
ctx.type = PIPE_SHADER_VERTEX;
builder = gallivm->builder;
LLVMBuildRetVoid(gallivm->builder);
- /* Dump LLVM IR before any optimization passes */
- if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
- r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
- ac_dump_module(ctx.gallivm.module);
-
- si_llvm_finalize_module(&ctx,
- r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_GEOMETRY));
+ ctx.type = PIPE_SHADER_GEOMETRY; /* override for shader dumping */
+ si_llvm_optimize_module(&ctx);
r = si_compile_llvm(sscreen, &ctx.shader->binary,
&ctx.shader->config, ctx.tm,
return shader;
}
-static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
+static void si_dump_shader_key_vs(struct si_shader_key *key,
+ struct si_vs_prolog_bits *prolog,
+ const char *prefix, FILE *f)
+{
+ fprintf(f, " %s.instance_divisors = {", prefix);
+ for (int i = 0; i < ARRAY_SIZE(prolog->instance_divisors); i++) {
+ fprintf(f, !i ? "%u" : ", %u",
+ prolog->instance_divisors[i]);
+ }
+ fprintf(f, "}\n");
+
+ fprintf(f, " mono.vs.fix_fetch = {");
+ for (int i = 0; i < SI_MAX_ATTRIBS; i++)
+ fprintf(f, !i ? "%u" : ", %u", key->mono.vs_fix_fetch[i]);
+ fprintf(f, "}\n");
+}
+
+static void si_dump_shader_key(unsigned processor, struct si_shader *shader,
FILE *f)
{
- int i;
+ struct si_shader_key *key = &shader->key;
fprintf(f, "SHADER KEY\n");
- switch (shader) {
+ switch (processor) {
case PIPE_SHADER_VERTEX:
- fprintf(f, " part.vs.prolog.instance_divisors = {");
- for (i = 0; i < ARRAY_SIZE(key->part.vs.prolog.instance_divisors); i++)
- fprintf(f, !i ? "%u" : ", %u",
- key->part.vs.prolog.instance_divisors[i]);
- fprintf(f, "}\n");
- fprintf(f, " part.vs.epilog.export_prim_id = %u\n", key->part.vs.epilog.export_prim_id);
+ si_dump_shader_key_vs(key, &key->part.vs.prolog,
+ "part.vs.prolog", f);
fprintf(f, " as_es = %u\n", key->as_es);
fprintf(f, " as_ls = %u\n", key->as_ls);
-
- fprintf(f, " mono.vs.fix_fetch = {");
- for (i = 0; i < SI_MAX_ATTRIBS; i++)
- fprintf(f, !i ? "%u" : ", %u", key->mono.vs.fix_fetch[i]);
- fprintf(f, "}\n");
+ fprintf(f, " mono.vs_export_prim_id = %u\n",
+ key->mono.vs_export_prim_id);
break;
case PIPE_SHADER_TESS_CTRL:
+ if (shader->selector->screen->b.chip_class >= GFX9) {
+ si_dump_shader_key_vs(key, &key->part.tcs.ls_prolog,
+ "part.tcs.ls_prolog", f);
+ }
fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->part.tcs.epilog.prim_mode);
- fprintf(f, " mono.tcs.inputs_to_copy = 0x%"PRIx64"\n", key->mono.tcs.inputs_to_copy);
+ fprintf(f, " mono.ff_tcs_inputs_to_copy = 0x%"PRIx64"\n", key->mono.ff_tcs_inputs_to_copy);
break;
case PIPE_SHADER_TESS_EVAL:
- fprintf(f, " part.tes.epilog.export_prim_id = %u\n", key->part.tes.epilog.export_prim_id);
fprintf(f, " as_es = %u\n", key->as_es);
+ fprintf(f, " mono.vs_export_prim_id = %u\n",
+ key->mono.vs_export_prim_id);
break;
case PIPE_SHADER_GEOMETRY:
+ if (shader->is_gs_copy_shader)
+ break;
+
+ if (shader->selector->screen->b.chip_class >= GFX9 &&
+ key->part.gs.es->type == PIPE_SHADER_VERTEX) {
+ si_dump_shader_key_vs(key, &key->part.gs.vs_prolog,
+ "part.gs.vs_prolog", f);
+ }
fprintf(f, " part.gs.prolog.tri_strip_adj_fix = %u\n", key->part.gs.prolog.tri_strip_adj_fix);
break;
assert(0);
}
- if ((shader == PIPE_SHADER_GEOMETRY ||
- shader == PIPE_SHADER_TESS_EVAL ||
- shader == PIPE_SHADER_VERTEX) &&
+ if ((processor == PIPE_SHADER_GEOMETRY ||
+ processor == PIPE_SHADER_TESS_EVAL ||
+ processor == PIPE_SHADER_VERTEX) &&
!key->as_es && !key->as_ls) {
fprintf(f, " opt.hw_vs.kill_outputs = 0x%"PRIx64"\n", key->opt.hw_vs.kill_outputs);
fprintf(f, " opt.hw_vs.kill_outputs2 = 0x%x\n", key->opt.hw_vs.kill_outputs2);
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
- struct si_shader *shader,
LLVMTargetMachineRef tm)
{
struct lp_build_tgsi_context *bld_base;
struct lp_build_tgsi_action tmpl = {};
- si_llvm_context_init(ctx, sscreen, shader, tm,
- (shader && shader->selector) ? &shader->selector->info : NULL,
- (shader && shader->selector) ? shader->selector->tokens : NULL);
+ si_llvm_context_init(ctx, sscreen, tm);
bld_base = &ctx->bld_base;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
}
-#define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
-#define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
-
-/* Return true if the PARAM export has been eliminated. */
-static bool si_eliminate_const_output(struct si_shader_context *ctx,
- LLVMValueRef inst, unsigned offset)
-{
- struct si_shader *shader = ctx->shader;
- unsigned num_outputs = shader->selector->info.num_outputs;
- unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
- bool is_zero[4] = {}, is_one[4] = {};
-
- for (i = 0; i < 4; i++) {
- LLVMBool loses_info;
- LLVMValueRef p = LLVMGetOperand(inst, EXP_OUT0 + i);
-
- /* It's a constant expression. Undef outputs are eliminated too. */
- if (LLVMIsUndef(p)) {
- is_zero[i] = true;
- is_one[i] = true;
- } else if (LLVMIsAConstantFP(p)) {
- double a = LLVMConstRealGetDouble(p, &loses_info);
-
- if (a == 0)
- is_zero[i] = true;
- else if (a == 1)
- is_one[i] = true;
- else
- return false; /* other constant */
- } else
- return false;
- }
-
- /* Only certain combinations of 0 and 1 can be eliminated. */
- if (is_zero[0] && is_zero[1] && is_zero[2])
- default_val = is_zero[3] ? 0 : 1;
- else if (is_one[0] && is_one[1] && is_one[2])
- default_val = is_zero[3] ? 2 : 3;
- else
- return false;
-
- /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
- LLVMInstructionEraseFromParent(inst);
-
- /* Change OFFSET to DEFAULT_VAL. */
- for (i = 0; i < num_outputs; i++) {
- if (shader->info.vs_output_param_offset[i] == offset) {
- shader->info.vs_output_param_offset[i] =
- EXP_PARAM_DEFAULT_VAL_0000 + default_val;
- break;
- }
- }
- return true;
-}
-
-struct si_vs_exports {
- unsigned num;
- unsigned offset[SI_MAX_VS_OUTPUTS];
- LLVMValueRef inst[SI_MAX_VS_OUTPUTS];
-};
-
static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
- LLVMBasicBlockRef bb;
- struct si_vs_exports exports;
- bool removed_any = false;
-
- exports.num = 0;
- if (ctx->type == PIPE_SHADER_FRAGMENT ||
- ctx->type == PIPE_SHADER_COMPUTE ||
- shader->key.as_es ||
- shader->key.as_ls)
+ if ((ctx->type != PIPE_SHADER_VERTEX &&
+ ctx->type != PIPE_SHADER_TESS_EVAL) ||
+ shader->key.as_ls ||
+ shader->key.as_es)
return;
- /* Process all LLVM instructions. */
- bb = LLVMGetFirstBasicBlock(ctx->main_fn);
- while (bb) {
- LLVMValueRef inst = LLVMGetFirstInstruction(bb);
-
- while (inst) {
- LLVMValueRef cur = inst;
- inst = LLVMGetNextInstruction(inst);
-
- if (LLVMGetInstructionOpcode(cur) != LLVMCall)
- continue;
-
- LLVMValueRef callee = lp_get_called_value(cur);
-
- if (!lp_is_function(callee))
- continue;
-
- const char *name = LLVMGetValueName(callee);
- unsigned num_args = LLVMCountParams(callee);
-
- /* Check if this is an export instruction. */
- if ((num_args != 9 && num_args != 8) ||
- (strcmp(name, "llvm.SI.export") &&
- strcmp(name, "llvm.amdgcn.exp.f32")))
- continue;
-
- LLVMValueRef arg = LLVMGetOperand(cur, EXP_TARGET);
- unsigned target = LLVMConstIntGetZExtValue(arg);
-
- if (target < V_008DFC_SQ_EXP_PARAM)
- continue;
-
- target -= V_008DFC_SQ_EXP_PARAM;
-
- /* Eliminate constant value PARAM exports. */
- if (si_eliminate_const_output(ctx, cur, target)) {
- removed_any = true;
- } else {
- exports.offset[exports.num] = target;
- exports.inst[exports.num] = cur;
- exports.num++;
- }
- }
- bb = LLVMGetNextBasicBlock(bb);
- }
-
- /* Remove holes in export memory due to removed PARAM exports.
- * This is done by renumbering all PARAM exports.
- */
- if (removed_any) {
- ubyte current_offset[SI_MAX_VS_OUTPUTS];
- unsigned new_count = 0;
- unsigned out, i;
-
- /* Make a copy of the offsets. We need the old version while
- * we are modifying some of them. */
- assert(sizeof(current_offset) ==
- sizeof(shader->info.vs_output_param_offset));
- memcpy(current_offset, shader->info.vs_output_param_offset,
- sizeof(current_offset));
-
- for (i = 0; i < exports.num; i++) {
- unsigned offset = exports.offset[i];
-
- for (out = 0; out < info->num_outputs; out++) {
- if (current_offset[out] != offset)
- continue;
-
- LLVMSetOperand(exports.inst[i], EXP_TARGET,
- LLVMConstInt(ctx->i32,
- V_008DFC_SQ_EXP_PARAM + new_count, 0));
- shader->info.vs_output_param_offset[out] = new_count;
- new_count++;
- break;
- }
- }
- shader->info.nr_param_exports = new_count;
- }
+ ac_optimize_vs_outputs(&ctx->ac,
+ ctx->main_fn,
+ shader->info.vs_output_param_offset,
+ info->num_outputs,
+ &shader->info.nr_param_exports);
}
static void si_count_scratch_private_memory(struct si_shader_context *ctx)
}
}
+static void si_init_exec_full_mask(struct si_shader_context *ctx)
+{
+ LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
+ lp_build_intrinsic(ctx->gallivm.builder,
+ "llvm.amdgcn.init.exec", ctx->voidt,
+ &full_mask, 1, LP_FUNC_ATTR_CONVERGENT);
+}
+
+static void si_init_exec_from_input(struct si_shader_context *ctx,
+ unsigned param, unsigned bitoffset)
+{
+ LLVMValueRef args[] = {
+ LLVMGetParam(ctx->main_fn, param),
+ LLVMConstInt(ctx->i32, bitoffset, 0),
+ };
+ lp_build_intrinsic(ctx->gallivm.builder,
+ "llvm.amdgcn.init.exec.from.input",
+ ctx->voidt, args, 2, LP_FUNC_ATTR_CONVERGENT);
+}
+
static bool si_compile_tgsi_main(struct si_shader_context *ctx,
- struct si_shader *shader)
+ bool is_monolithic)
{
+ struct si_shader *shader = ctx->shader;
struct si_shader_selector *sel = shader->selector;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
create_function(ctx);
preload_ring_buffers(ctx);
+ /* For GFX9 merged shaders:
+ * - Set EXEC. If the prolog is present, set EXEC there instead.
+ * - Add a barrier before the second shader.
+ *
+ * The same thing for monolithic shaders is done in
+ * si_build_wrapper_function.
+ */
+ if (ctx->screen->b.chip_class >= GFX9 && !is_monolithic) {
+ if (sel->info.num_instructions > 1 && /* not empty shader */
+ (shader->key.as_es || shader->key.as_ls) &&
+ (ctx->type == PIPE_SHADER_TESS_EVAL ||
+ (ctx->type == PIPE_SHADER_VERTEX &&
+ !sel->vs_needs_prolog))) {
+ si_init_exec_from_input(ctx,
+ ctx->param_merged_wave_info, 0);
+ } else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
+ ctx->type == PIPE_SHADER_GEOMETRY) {
+ si_init_exec_from_input(ctx,
+ ctx->param_merged_wave_info, 8);
+ si_llvm_emit_barrier(NULL, bld_base, NULL);
+ }
+ }
+
if (ctx->type == PIPE_SHADER_GEOMETRY) {
int i;
for (i = 0; i < 4; i++) {
/**
* Compute the VS prolog key, which contains all the information needed to
* build the VS prolog function, and set shader->info bits where needed.
+ *
+ * \param info Shader info of the vertex shader.
+ * \param num_input_sgprs Number of input SGPRs for the vertex shader.
+ * \param prolog_key Key of the VS prolog
+ * \param shader_out The vertex shader, or the next shader if merging LS+HS or ES+GS.
+ * \param key Output shader part key.
*/
-static void si_get_vs_prolog_key(struct si_shader *shader,
+static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
+ unsigned num_input_sgprs,
+ const struct si_vs_prolog_bits *prolog_key,
+ struct si_shader *shader_out,
union si_shader_part_key *key)
{
- struct tgsi_shader_info *info = &shader->selector->info;
-
memset(key, 0, sizeof(*key));
- key->vs_prolog.states = shader->key.part.vs.prolog;
- key->vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ key->vs_prolog.states = *prolog_key;
+ key->vs_prolog.num_input_sgprs = num_input_sgprs;
key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
+ key->vs_prolog.as_ls = shader_out->key.as_ls;
+
+ if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
+ key->vs_prolog.as_ls = 1;
+ key->vs_prolog.num_merged_next_stage_vgprs = 2;
+ } else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
+ key->vs_prolog.num_merged_next_stage_vgprs = 5;
+ }
/* Set the instanceID flag. */
for (unsigned i = 0; i < info->num_inputs; i++)
if (key->vs_prolog.states.instance_divisors[i])
- shader->info.uses_instanceid = true;
-}
-
-/**
- * Compute the VS epilog key, which contains all the information needed to
- * build the VS epilog function, and set the PrimitiveID output offset.
- */
-static void si_get_vs_epilog_key(struct si_shader *shader,
- struct si_vs_epilog_bits *states,
- union si_shader_part_key *key)
-{
- memset(key, 0, sizeof(*key));
- key->vs_epilog.states = *states;
-
- /* Set up the PrimitiveID output. */
- if (shader->key.part.vs.epilog.export_prim_id) {
- unsigned index = shader->selector->info.num_outputs;
- unsigned offset = shader->info.nr_param_exports++;
-
- key->vs_epilog.prim_id_param_offset = offset;
- assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset));
- shader->info.vs_output_param_offset[index] = offset;
- }
+ shader_out->info.uses_instanceid = true;
}
/**
static void si_build_gs_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
- const unsigned num_sgprs = SI_GS_NUM_USER_SGPR + 2;
- const unsigned num_vgprs = 8;
+ unsigned num_sgprs, num_vgprs;
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMTypeRef params[32];
- LLVMTypeRef returns[32];
+ LLVMTypeRef params[48]; /* 40 SGPRs (maximum) + some VGPRs */
+ LLVMTypeRef returns[48];
LLVMValueRef func, ret;
+ if (ctx->screen->b.chip_class >= GFX9) {
+ num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR;
+ num_vgprs = 5; /* ES inputs are not needed by GS */
+ } else {
+ num_sgprs = GFX6_GS_NUM_USER_SGPR + 2;
+ num_vgprs = 8;
+ }
+
for (unsigned i = 0; i < num_sgprs; ++i) {
params[i] = ctx->i32;
returns[i] = ctx->i32;
/* Create the function. */
si_create_function(ctx, "gs_prolog", returns, num_sgprs + num_vgprs,
- params, num_sgprs + num_vgprs, num_sgprs - 1);
+ params, num_sgprs + num_vgprs, num_sgprs - 1, 0);
func = ctx->main_fn;
+ /* Set the full EXEC mask for the prolog, because we are only fiddling
+ * with registers here. The main shader part will set the correct EXEC
+ * mask.
+ */
+ if (ctx->screen->b.chip_class >= GFX9 && !key->gs_prolog.is_monolithic)
+ si_init_exec_full_mask(ctx);
+
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
if (key->gs_prolog.states.tri_strip_adj_fix) {
/* Remap the input vertices for every other primitive. */
- const unsigned vtx_params[6] = {
+ const unsigned gfx6_vtx_params[6] = {
num_sgprs,
num_sgprs + 1,
num_sgprs + 3,
num_sgprs + 5,
num_sgprs + 6
};
+ const unsigned gfx9_vtx_params[3] = {
+ num_sgprs,
+ num_sgprs + 1,
+ num_sgprs + 4,
+ };
+ LLVMValueRef vtx_in[6], vtx_out[6];
LLVMValueRef prim_id, rotate;
+ if (ctx->screen->b.chip_class >= GFX9) {
+ for (unsigned i = 0; i < 3; i++) {
+ vtx_in[i*2] = unpack_param(ctx, gfx9_vtx_params[i], 0, 16);
+ vtx_in[i*2+1] = unpack_param(ctx, gfx9_vtx_params[i], 16, 16);
+ }
+ } else {
+ for (unsigned i = 0; i < 6; i++)
+ vtx_in[i] = LLVMGetParam(func, gfx6_vtx_params[i]);
+ }
+
prim_id = LLVMGetParam(func, num_sgprs + 2);
rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, "");
for (unsigned i = 0; i < 6; ++i) {
- LLVMValueRef base, rotated, actual;
- base = LLVMGetParam(func, vtx_params[i]);
- rotated = LLVMGetParam(func, vtx_params[(i + 4) % 6]);
- actual = LLVMBuildSelect(builder, rotate, rotated, base, "");
- actual = LLVMBuildBitCast(builder, actual, ctx->f32, "");
- ret = LLVMBuildInsertValue(builder, ret, actual, vtx_params[i], "");
+ LLVMValueRef base, rotated;
+ base = vtx_in[i];
+ rotated = vtx_in[(i + 4) % 6];
+ vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, "");
+ }
+
+ if (ctx->screen->b.chip_class >= GFX9) {
+ for (unsigned i = 0; i < 3; i++) {
+ LLVMValueRef hi, out;
+
+ hi = LLVMBuildShl(builder, vtx_out[i*2+1],
+ LLVMConstInt(ctx->i32, 16, 0), "");
+ out = LLVMBuildOr(builder, vtx_out[i*2], hi, "");
+ out = LLVMBuildBitCast(builder, out, ctx->f32, "");
+ ret = LLVMBuildInsertValue(builder, ret, out,
+ gfx9_vtx_params[i], "");
+ }
+ } else {
+ for (unsigned i = 0; i < 6; i++) {
+ LLVMValueRef out;
+
+ out = LLVMBuildBitCast(builder, vtx_out[i], ctx->f32, "");
+ ret = LLVMBuildInsertValue(builder, ret, out,
+ gfx6_vtx_params[i], "");
+ }
}
}
static void si_build_wrapper_function(struct si_shader_context *ctx,
LLVMValueRef *parts,
unsigned num_parts,
- unsigned main_part)
+ unsigned main_part,
+ unsigned next_shader_first_part)
{
struct gallivm_state *gallivm = &ctx->gallivm;
LLVMBuilderRef builder = ctx->gallivm.builder;
/* PS epilog has one arg per color component */
LLVMTypeRef param_types[48];
- LLVMValueRef out[48];
+ LLVMValueRef initial[48], out[48];
LLVMTypeRef function_type;
unsigned num_params;
- unsigned num_out;
+ unsigned num_out, initial_num_out;
MAYBE_UNUSED unsigned num_out_sgpr; /* used in debug checks */
+ MAYBE_UNUSED unsigned initial_num_out_sgpr; /* used in debug checks */
unsigned num_sgprs, num_vgprs;
unsigned last_sgpr_param;
unsigned gprs;
+ struct lp_build_if_state if_state;
for (unsigned i = 0; i < num_parts; ++i) {
lp_add_function_attr(parts[i], -1, LP_FUNC_ATTR_ALWAYSINLINE);
gprs += size;
}
- si_create_function(ctx, "wrapper", NULL, 0, param_types, num_params, last_sgpr_param);
+ si_create_function(ctx, "wrapper", NULL, 0, param_types, num_params,
+ last_sgpr_param,
+ si_get_max_workgroup_size(ctx->shader));
+
+ if (is_merged_shader(ctx->shader))
+ si_init_exec_full_mask(ctx);
/* Record the arguments of the function as if they were an output of
* a previous part.
num_out_sgpr = num_out;
}
+ memcpy(initial, out, sizeof(out));
+ initial_num_out = num_out;
+ initial_num_out_sgpr = num_out_sgpr;
+
/* Now chain the parts. */
for (unsigned part = 0; part < num_parts; ++part) {
LLVMValueRef in[48];
num_params = LLVMCountParams(parts[part]);
assert(num_params <= ARRAY_SIZE(param_types));
+ /* Merged shaders are executed conditionally depending
+ * on the number of enabled threads passed in the input SGPRs. */
+ if (is_merged_shader(ctx->shader) &&
+ (part == 0 || part == next_shader_first_part)) {
+ LLVMValueRef ena, count = initial[3];
+
+ /* The thread count for the 2nd shader is at bit-offset 8. */
+ if (part == next_shader_first_part) {
+ count = LLVMBuildLShr(builder, count,
+ LLVMConstInt(ctx->i32, 8, 0), "");
+ }
+ count = LLVMBuildAnd(builder, count,
+ LLVMConstInt(ctx->i32, 0x7f, 0), "");
+ ena = LLVMBuildICmp(builder, LLVMIntULT,
+ ac_get_thread_id(&ctx->ac), count, "");
+ lp_build_if(&if_state, &ctx->gallivm, ena);
+ }
+
/* Derive arguments for the next part from outputs of the
* previous one.
*/
}
ret = LLVMBuildCall(builder, parts[part], in, num_params, "");
- ret_type = LLVMTypeOf(ret);
+
+ if (is_merged_shader(ctx->shader) &&
+ (part + 1 == next_shader_first_part ||
+ part + 1 == num_parts)) {
+ lp_build_endif(&if_state);
+
+ if (part + 1 == next_shader_first_part) {
+ /* A barrier is required between 2 merged shaders. */
+ si_llvm_emit_barrier(NULL, &ctx->bld_base, NULL);
+
+ /* The second half of the merged shader should use
+ * the inputs from the toplevel (wrapper) function,
+ * not the return value from the last call.
+ *
+ * That's because the last call was executed condi-
+ * tionally, so we can't consume it in the main
+ * block.
+ */
+ memcpy(out, initial, sizeof(initial));
+ num_out = initial_num_out;
+ num_out_sgpr = initial_num_out_sgpr;
+ }
+ continue;
+ }
/* Extract the returned GPRs. */
+ ret_type = LLVMTypeOf(ret);
num_out = 0;
num_out_sgpr = 0;
{
struct si_shader_selector *sel = shader->selector;
struct si_shader_context ctx;
- LLVMModuleRef mod;
int r = -1;
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
si_dump_streamout(&sel->so);
}
- si_init_shader_ctx(&ctx, sscreen, shader, tm);
+ si_init_shader_ctx(&ctx, sscreen, tm);
+ si_llvm_context_set_tgsi(&ctx, shader);
ctx.separate_prolog = !is_monolithic;
- memset(shader->info.vs_output_param_offset, EXP_PARAM_UNDEFINED,
+ memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
sizeof(shader->info.vs_output_param_offset));
shader->info.uses_instanceid = sel->info.uses_instanceid;
ctx.load_system_value = declare_system_value;
- if (!si_compile_tgsi_main(&ctx, shader)) {
+ if (!si_compile_tgsi_main(&ctx, is_monolithic)) {
si_llvm_dispose(&ctx);
return -1;
}
if (is_monolithic && ctx.type == PIPE_SHADER_VERTEX) {
- LLVMValueRef parts[3];
- bool need_prolog;
- bool need_epilog;
-
- need_prolog = sel->info.num_inputs;
- need_epilog = !shader->key.as_es && !shader->key.as_ls;
+ LLVMValueRef parts[2];
+ bool need_prolog = sel->vs_needs_prolog;
- parts[need_prolog ? 1 : 0] = ctx.main_fn;
+ parts[1] = ctx.main_fn;
if (need_prolog) {
union si_shader_part_key prolog_key;
- si_get_vs_prolog_key(shader, &prolog_key);
+ si_get_vs_prolog_key(&sel->info,
+ shader->info.num_input_sgprs,
+ &shader->key.part.vs.prolog,
+ shader, &prolog_key);
si_build_vs_prolog_function(&ctx, &prolog_key);
parts[0] = ctx.main_fn;
}
- if (need_epilog) {
+ si_build_wrapper_function(&ctx, parts + !need_prolog,
+ 1 + need_prolog, need_prolog, 0);
+ } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) {
+ if (sscreen->b.chip_class >= GFX9) {
+ struct si_shader_selector *ls = shader->key.part.tcs.ls;
+ LLVMValueRef parts[4];
+
+ /* TCS main part */
+ parts[2] = ctx.main_fn;
+
+ /* TCS epilog */
+ union si_shader_part_key tcs_epilog_key;
+ memset(&tcs_epilog_key, 0, sizeof(tcs_epilog_key));
+ tcs_epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
+ si_build_tcs_epilog_function(&ctx, &tcs_epilog_key);
+ parts[3] = ctx.main_fn;
+
+ /* VS prolog */
+ if (ls->vs_needs_prolog) {
+ union si_shader_part_key vs_prolog_key;
+ si_get_vs_prolog_key(&ls->info,
+ shader->info.num_input_sgprs,
+ &shader->key.part.tcs.ls_prolog,
+ shader, &vs_prolog_key);
+ vs_prolog_key.vs_prolog.is_monolithic = true;
+ si_build_vs_prolog_function(&ctx, &vs_prolog_key);
+ parts[0] = ctx.main_fn;
+ }
+
+ /* VS as LS main part */
+ struct si_shader shader_ls = {};
+ shader_ls.selector = ls;
+ shader_ls.key.as_ls = 1;
+ shader_ls.key.mono = shader->key.mono;
+ shader_ls.key.opt = shader->key.opt;
+ si_llvm_context_set_tgsi(&ctx, &shader_ls);
+
+ if (!si_compile_tgsi_main(&ctx, true)) {
+ si_llvm_dispose(&ctx);
+ return -1;
+ }
+ shader->info.uses_instanceid |= ls->info.uses_instanceid;
+ parts[1] = ctx.main_fn;
+
+ /* Reset the shader context. */
+ ctx.shader = shader;
+ ctx.type = PIPE_SHADER_TESS_CTRL;
+
+ si_build_wrapper_function(&ctx,
+ parts + !ls->vs_needs_prolog,
+ 4 - !ls->vs_needs_prolog, 0,
+ ls->vs_needs_prolog ? 2 : 1);
+ } else {
+ LLVMValueRef parts[2];
union si_shader_part_key epilog_key;
- si_get_vs_epilog_key(shader, &shader->key.part.vs.epilog, &epilog_key);
- si_build_vs_epilog_function(&ctx, &epilog_key);
- parts[need_prolog ? 2 : 1] = ctx.main_fn;
+
+ parts[0] = ctx.main_fn;
+
+ memset(&epilog_key, 0, sizeof(epilog_key));
+ epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
+ si_build_tcs_epilog_function(&ctx, &epilog_key);
+ parts[1] = ctx.main_fn;
+
+ si_build_wrapper_function(&ctx, parts, 2, 0, 0);
}
+ } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) {
+ if (ctx.screen->b.chip_class >= GFX9) {
+ struct si_shader_selector *es = shader->key.part.gs.es;
+ LLVMValueRef es_prolog = NULL;
+ LLVMValueRef es_main = NULL;
+ LLVMValueRef gs_prolog = NULL;
+ LLVMValueRef gs_main = ctx.main_fn;
+
+ /* GS prolog */
+ union si_shader_part_key gs_prolog_key;
+ memset(&gs_prolog_key, 0, sizeof(gs_prolog_key));
+ gs_prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
+ gs_prolog_key.gs_prolog.is_monolithic = true;
+ si_build_gs_prolog_function(&ctx, &gs_prolog_key);
+ gs_prolog = ctx.main_fn;
+
+ /* ES prolog */
+ if (es->vs_needs_prolog) {
+ union si_shader_part_key vs_prolog_key;
+ si_get_vs_prolog_key(&es->info,
+ shader->info.num_input_sgprs,
+ &shader->key.part.tcs.ls_prolog,
+ shader, &vs_prolog_key);
+ vs_prolog_key.vs_prolog.is_monolithic = true;
+ si_build_vs_prolog_function(&ctx, &vs_prolog_key);
+ es_prolog = ctx.main_fn;
+ }
- si_build_wrapper_function(&ctx, parts, 1 + need_prolog + need_epilog,
- need_prolog ? 1 : 0);
- } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) {
- LLVMValueRef parts[2];
- union si_shader_part_key epilog_key;
+ /* ES main part */
+ struct si_shader shader_es = {};
+ shader_es.selector = es;
+ shader_es.key.as_es = 1;
+ shader_es.key.mono = shader->key.mono;
+ shader_es.key.opt = shader->key.opt;
+ si_llvm_context_set_tgsi(&ctx, &shader_es);
- parts[0] = ctx.main_fn;
+ if (!si_compile_tgsi_main(&ctx, true)) {
+ si_llvm_dispose(&ctx);
+ return -1;
+ }
+ shader->info.uses_instanceid |= es->info.uses_instanceid;
+ es_main = ctx.main_fn;
- memset(&epilog_key, 0, sizeof(epilog_key));
- epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
- si_build_tcs_epilog_function(&ctx, &epilog_key);
- parts[1] = ctx.main_fn;
+ /* Reset the shader context. */
+ ctx.shader = shader;
+ ctx.type = PIPE_SHADER_GEOMETRY;
- si_build_wrapper_function(&ctx, parts, 2, 0);
- } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_EVAL &&
- !shader->key.as_es) {
- LLVMValueRef parts[2];
- union si_shader_part_key epilog_key;
+ /* Prepare the array of shader parts. */
+ LLVMValueRef parts[4];
+ unsigned num_parts = 0, main_part, next_first_part;
- parts[0] = ctx.main_fn;
+ if (es_prolog)
+ parts[num_parts++] = es_prolog;
- si_get_vs_epilog_key(shader, &shader->key.part.tes.epilog, &epilog_key);
- si_build_vs_epilog_function(&ctx, &epilog_key);
- parts[1] = ctx.main_fn;
+ parts[main_part = num_parts++] = es_main;
+ parts[next_first_part = num_parts++] = gs_prolog;
+ parts[num_parts++] = gs_main;
- si_build_wrapper_function(&ctx, parts, 2, 0);
- } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) {
- LLVMValueRef parts[2];
- union si_shader_part_key prolog_key;
+ si_build_wrapper_function(&ctx, parts, num_parts,
+ main_part, next_first_part);
+ } else {
+ LLVMValueRef parts[2];
+ union si_shader_part_key prolog_key;
- parts[1] = ctx.main_fn;
+ parts[1] = ctx.main_fn;
- memset(&prolog_key, 0, sizeof(prolog_key));
- prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
- si_build_gs_prolog_function(&ctx, &prolog_key);
- parts[0] = ctx.main_fn;
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
+ si_build_gs_prolog_function(&ctx, &prolog_key);
+ parts[0] = ctx.main_fn;
- si_build_wrapper_function(&ctx, parts, 2, 1);
+ si_build_wrapper_function(&ctx, parts, 2, 1, 0);
+ }
} else if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) {
LLVMValueRef parts[3];
union si_shader_part_key prolog_key;
si_build_ps_epilog_function(&ctx, &epilog_key);
parts[need_prolog ? 2 : 1] = ctx.main_fn;
- si_build_wrapper_function(&ctx, parts, need_prolog ? 3 : 2, need_prolog ? 1 : 0);
+ si_build_wrapper_function(&ctx, parts, need_prolog ? 3 : 2,
+ need_prolog ? 1 : 0, 0);
}
- mod = ctx.gallivm.module;
-
- /* Dump LLVM IR before any optimization passes */
- if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
- r600_can_dump_shader(&sscreen->b, ctx.type))
- ac_dump_module(mod);
-
- si_llvm_finalize_module(&ctx,
- r600_extra_shader_checks(&sscreen->b, ctx.type));
+ si_llvm_optimize_module(&ctx);
/* Post-optimization transformations and analysis. */
si_eliminate_const_vs_outputs(&ctx);
/* Compile to bytecode. */
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
- mod, debug, ctx.type, "TGSI shader");
+ ctx.gallivm.module, debug, ctx.type, "TGSI shader");
si_llvm_dispose(&ctx);
if (r) {
fprintf(stderr, "LLVM failed to compile shader\n");
}
/* Add the scratch offset to input SGPRs. */
- if (shader->config.scratch_bytes_per_wave)
+ if (shader->config.scratch_bytes_per_wave && !is_merged_shader(shader))
shader->info.num_input_sgprs += 1; /* scratch byte offset */
/* Calculate the number of fragment input VGPRs. */
struct si_shader_context ctx;
struct gallivm_state *gallivm = &ctx.gallivm;
- si_init_shader_ctx(&ctx, sscreen, &shader, tm);
+ si_init_shader_ctx(&ctx, sscreen, tm);
+ ctx.shader = &shader;
ctx.type = type;
switch (type) {
build(&ctx, key);
/* Compile. */
- si_llvm_finalize_module(&ctx,
- r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_FRAGMENT));
+ si_llvm_optimize_module(&ctx);
if (si_compile_llvm(sscreen, &result->binary, &result->config, tm,
gallivm->module, debug, ctx.type, name)) {
LLVMTypeRef *params, *returns;
LLVMValueRef ret, func;
int last_sgpr, num_params, num_returns, i;
+ unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
+ key->vs_prolog.num_merged_next_stage_vgprs;
+ unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 4;
+ unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs +
+ num_input_vgprs;
+ unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 8 : 0;
- ctx->param_vertex_id = key->vs_prolog.num_input_sgprs;
- ctx->param_instance_id = key->vs_prolog.num_input_sgprs + 3;
+ ctx->param_vertex_id = first_vs_vgpr;
+ ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
/* 4 preloaded VGPRs + vertex load indices as prolog outputs */
- params = alloca((key->vs_prolog.num_input_sgprs + 4) *
- sizeof(LLVMTypeRef));
- returns = alloca((key->vs_prolog.num_input_sgprs + 4 +
- key->vs_prolog.last_input + 1) *
+ params = alloca(num_all_input_regs * sizeof(LLVMTypeRef));
+ returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
sizeof(LLVMTypeRef));
num_params = 0;
num_returns = 0;
}
last_sgpr = num_params - 1;
- /* 4 preloaded VGPRs (outputs must be floats) */
- for (i = 0; i < 4; i++) {
+ /* Preloaded VGPRs (outputs must be floats) */
+ for (i = 0; i < num_input_vgprs; i++) {
params[num_params++] = ctx->i32;
returns[num_returns++] = ctx->f32;
}
/* Create the function. */
si_create_function(ctx, "vs_prolog", returns, num_returns, params,
- num_params, last_sgpr);
+ num_params, last_sgpr, 0);
func = ctx->main_fn;
+ if (key->vs_prolog.num_merged_next_stage_vgprs &&
+ !key->vs_prolog.is_monolithic)
+ si_init_exec_from_input(ctx, 3, 0);
+
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
LLVMValueRef p = LLVMGetParam(func, i);
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
}
- for (i = num_params - 4; i < num_params; i++) {
+ for (; i < num_params; i++) {
LLVMValueRef p = LLVMGetParam(func, i);
p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
if (divisor) {
/* InstanceID / Divisor + StartInstance */
index = get_instance_index_for_fetch(ctx,
+ user_sgpr_base +
SI_SGPR_START_INSTANCE,
divisor);
} else {
/* VertexID + BaseVertex */
index = LLVMBuildAdd(gallivm->builder,
LLVMGetParam(func, ctx->param_vertex_id),
- LLVMGetParam(func, SI_SGPR_BASE_VERTEX), "");
+ LLVMGetParam(func, user_sgpr_base +
+ SI_SGPR_BASE_VERTEX), "");
}
index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
si_llvm_build_ret(ctx, ret);
}
-/**
- * Build the vertex shader epilog function. This is also used by the tessellation
- * evaluation shader compiled as VS.
- *
- * The input is PrimitiveID.
- *
- * If PrimitiveID is required by the pixel shader, export it.
- * Otherwise, do nothing.
- */
-static void si_build_vs_epilog_function(struct si_shader_context *ctx,
- union si_shader_part_key *key)
-{
- struct gallivm_state *gallivm = &ctx->gallivm;
- struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
- LLVMTypeRef params[5];
- int num_params, i;
-
- /* Declare input VGPRs. */
- num_params = key->vs_epilog.states.export_prim_id ?
- (VS_EPILOG_PRIMID_LOC + 1) : 0;
- assert(num_params <= ARRAY_SIZE(params));
-
- for (i = 0; i < num_params; i++)
- params[i] = ctx->f32;
-
- /* Create the function. */
- si_create_function(ctx, "vs_epilog", NULL, 0, params, num_params, -1);
-
- /* Emit exports. */
- if (key->vs_epilog.states.export_prim_id) {
- struct lp_build_context *base = &bld_base->base;
- struct ac_export_args args;
-
- args.enabled_channels = 0x1; /* enabled channels */
- args.valid_mask = 0; /* whether the EXEC mask is valid */
- args.done = 0; /* DONE bit */
- args.target = V_008DFC_SQ_EXP_PARAM +
- key->vs_epilog.prim_id_param_offset;
- args.compr = 0; /* COMPR flag (0 = 32-bit export) */
- args.out[0] = LLVMGetParam(ctx->main_fn,
- VS_EPILOG_PRIMID_LOC); /* X */
- args.out[1] = base->undef; /* Y */
- args.out[2] = base->undef; /* Z */
- args.out[3] = base->undef; /* W */
-
- ac_build_export(&ctx->ac, &args);
- }
-
- LLVMBuildRetVoid(gallivm->builder);
-}
-
-/**
- * Create & compile a vertex shader epilog. This a helper used by VS and TES.
- */
-static bool si_get_vs_epilog(struct si_screen *sscreen,
+static bool si_get_vs_prolog(struct si_screen *sscreen,
LLVMTargetMachineRef tm,
- struct si_shader *shader,
- struct pipe_debug_callback *debug,
- struct si_vs_epilog_bits *states)
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug,
+ struct si_shader *main_part,
+ const struct si_vs_prolog_bits *key)
{
- union si_shader_part_key epilog_key;
+ struct si_shader_selector *vs = main_part->selector;
- si_get_vs_epilog_key(shader, states, &epilog_key);
+ /* The prolog is a no-op if there are no inputs. */
+ if (!vs->vs_needs_prolog)
+ return true;
- shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs,
- PIPE_SHADER_VERTEX, true,
- &epilog_key, tm, debug,
- si_build_vs_epilog_function,
- "Vertex Shader Epilog");
- return shader->epilog != NULL;
+ /* Get the prolog. */
+ union si_shader_part_key prolog_key;
+ si_get_vs_prolog_key(&vs->info, main_part->info.num_input_sgprs,
+ key, shader, &prolog_key);
+
+ shader->prolog =
+ si_get_shader_part(sscreen, &sscreen->vs_prologs,
+ PIPE_SHADER_VERTEX, true, &prolog_key, tm,
+ debug, si_build_vs_prolog_function,
+ "Vertex Shader Prolog");
+ return shader->prolog != NULL;
}
/**
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
- struct tgsi_shader_info *info = &shader->selector->info;
- union si_shader_part_key prolog_key;
-
- /* Get the prolog. */
- si_get_vs_prolog_key(shader, &prolog_key);
-
- /* The prolog is a no-op if there are no inputs. */
- if (info->num_inputs) {
- shader->prolog =
- si_get_shader_part(sscreen, &sscreen->vs_prologs,
- PIPE_SHADER_VERTEX, true,
- &prolog_key, tm, debug,
- si_build_vs_prolog_function,
- "Vertex Shader Prolog");
- if (!shader->prolog)
- return false;
- }
-
- /* Get the epilog. */
- if (!shader->key.as_es && !shader->key.as_ls &&
- !si_get_vs_epilog(sscreen, tm, shader, debug,
- &shader->key.part.vs.epilog))
- return false;
-
- return true;
-}
-
-/**
- * Select and compile (or reuse) TES parts (epilog).
- */
-static bool si_shader_select_tes_parts(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
- struct si_shader *shader,
- struct pipe_debug_callback *debug)
-{
- if (shader->key.as_es)
- return true;
-
- /* TES compiled as VS. */
- return si_get_vs_epilog(sscreen, tm, shader, debug,
- &shader->key.part.tes.epilog);
+ return si_get_vs_prolog(sscreen, tm, shader, debug, shader,
+ &shader->key.part.vs.prolog);
}
/**
{
struct gallivm_state *gallivm = &ctx->gallivm;
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
- LLVMTypeRef params[16];
+ LLVMTypeRef params[32];
LLVMValueRef func;
- int last_sgpr, num_params;
-
- /* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */
- params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
- params[SI_PARAM_CONST_BUFFERS] = ctx->i64;
- params[SI_PARAM_SAMPLERS] = ctx->i64;
- params[SI_PARAM_IMAGES] = ctx->i64;
- params[SI_PARAM_SHADER_BUFFERS] = ctx->i64;
- params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32;
- params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32;
- params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32;
- params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32;
- params[ctx->param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx->i32;
- params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx->i32;
- last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
- num_params = last_sgpr + 1;
+ int last_sgpr, num_params = 0;
+
+ if (ctx->screen->b.chip_class >= GFX9) {
+ params[num_params++] = ctx->i64;
+ params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32; /* wave info */
+ params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
+ params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
+ } else {
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i64;
+ params[num_params++] = ctx->i64;
+ params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_addr_base64k = num_params++] = ctx->i32;
+ params[ctx->param_tcs_factor_addr_base64k = num_params++] = ctx->i32;
+ params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
+ params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
+ }
+ last_sgpr = num_params - 1;
params[num_params++] = ctx->i32; /* patch index within the wave (REL_PATCH_ID) */
params[num_params++] = ctx->i32; /* invocation ID within the patch */
params[num_params++] = ctx->i32; /* LDS offset where tess factors should be loaded from */
/* Create the function. */
- si_create_function(ctx, "tcs_epilog", NULL, 0, params, num_params, last_sgpr);
- declare_tess_lds(ctx);
+ si_create_function(ctx, "tcs_epilog", NULL, 0, params, num_params, last_sgpr,
+ ctx->screen->b.chip_class >= CIK ? 128 : 64);
+ declare_lds_as_pointer(ctx);
func = ctx->main_fn;
si_write_tess_factors(bld_base,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
- union si_shader_part_key epilog_key;
+ if (sscreen->b.chip_class >= GFX9) {
+ struct si_shader *ls_main_part =
+ shader->key.part.tcs.ls->main_shader_part_ls;
+
+ if (!si_get_vs_prolog(sscreen, tm, shader, debug, ls_main_part,
+ &shader->key.part.tcs.ls_prolog))
+ return false;
+
+ shader->previous_stage = ls_main_part;
+ }
/* Get the epilog. */
+ union si_shader_part_key epilog_key;
memset(&epilog_key, 0, sizeof(epilog_key));
epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
- union si_shader_part_key prolog_key;
+ if (sscreen->b.chip_class >= GFX9) {
+ struct si_shader *es_main_part =
+ shader->key.part.gs.es->main_shader_part_es;
+
+ if (shader->key.part.gs.es->type == PIPE_SHADER_VERTEX &&
+ !si_get_vs_prolog(sscreen, tm, shader, debug, es_main_part,
+ &shader->key.part.gs.vs_prolog))
+ return false;
+
+ shader->previous_stage = es_main_part;
+ }
if (!shader->key.part.gs.prolog.tri_strip_adj_fix)
return true;
+ union si_shader_part_key prolog_key;
memset(&prolog_key, 0, sizeof(prolog_key));
prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
- shader->prolog = si_get_shader_part(sscreen, &sscreen->gs_prologs,
+ shader->prolog2 = si_get_shader_part(sscreen, &sscreen->gs_prologs,
PIPE_SHADER_GEOMETRY, true,
&prolog_key, tm, debug,
si_build_gs_prolog_function,
"Geometry Shader Prolog");
- return shader->prolog != NULL;
+ return shader->prolog2 != NULL;
}
/**
/* Create the function. */
si_create_function(ctx, "ps_prolog", params, num_returns, params,
- num_params, last_sgpr);
+ num_params, last_sgpr, 0);
func = ctx->main_fn;
/* Copy inputs to outputs. This should be no-op, as the registers match,
list = lp_build_gather_values(gallivm, ptr, 2);
list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
list = LLVMBuildIntToPtr(gallivm->builder, list,
- const_array(ctx->v16i8, SI_NUM_RW_BUFFERS), "");
+ const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), "");
si_llvm_emit_polygon_stipple(ctx, list, pos);
}
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
LLVMTypeRef params[16+8*4+3];
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
- int last_sgpr, num_params, i;
+ int last_sgpr, num_params = 0, i;
struct si_ps_exports exp = {};
/* Declare input SGPRs. */
- params[SI_PARAM_RW_BUFFERS] = ctx->i64;
- params[SI_PARAM_CONST_BUFFERS] = ctx->i64;
- params[SI_PARAM_SAMPLERS] = ctx->i64;
- params[SI_PARAM_IMAGES] = ctx->i64;
- params[SI_PARAM_SHADER_BUFFERS] = ctx->i64;
+ params[ctx->param_rw_buffers = num_params++] = ctx->i64;
+ params[ctx->param_const_buffers = num_params++] = ctx->i64;
+ params[ctx->param_samplers = num_params++] = ctx->i64;
+ params[ctx->param_images = num_params++] = ctx->i64;
+ params[ctx->param_shader_buffers = num_params++] = ctx->i64;
+ assert(num_params == SI_PARAM_ALPHA_REF);
params[SI_PARAM_ALPHA_REF] = ctx->f32;
last_sgpr = SI_PARAM_ALPHA_REF;
params[i] = ctx->f32;
/* Create the function. */
- si_create_function(ctx, "ps_epilog", NULL, 0, params, num_params, last_sgpr);
+ si_create_function(ctx, "ps_epilog", NULL, 0, params, num_params,
+ last_sgpr, 0);
/* Disable elimination of unused inputs. */
si_llvm_add_attribute(ctx->main_fn,
"InitialPSInputAddr", 0xffffff);
return -1;
break;
case PIPE_SHADER_TESS_EVAL:
- if (!si_shader_select_tes_parts(sscreen, tm, shader, debug))
- return -1;
break;
case PIPE_SHADER_GEOMETRY:
if (!si_shader_select_gs_parts(sscreen, tm, shader, debug))
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
shader->prolog->config.num_vgprs);
}
+ if (shader->previous_stage) {
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+ shader->previous_stage->config.num_sgprs);
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->previous_stage->config.num_vgprs);
+ shader->config.spilled_sgprs =
+ MAX2(shader->config.spilled_sgprs,
+ shader->previous_stage->config.spilled_sgprs);
+ shader->config.spilled_vgprs =
+ MAX2(shader->config.spilled_vgprs,
+ shader->previous_stage->config.spilled_vgprs);
+ shader->config.private_mem_vgprs =
+ MAX2(shader->config.private_mem_vgprs,
+ shader->previous_stage->config.private_mem_vgprs);
+ shader->config.scratch_bytes_per_wave =
+ MAX2(shader->config.scratch_bytes_per_wave,
+ shader->previous_stage->config.scratch_bytes_per_wave);
+ shader->info.uses_instanceid |=
+ shader->previous_stage->info.uses_instanceid;
+ }
+ if (shader->prolog2) {
+ shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+ shader->prolog2->config.num_sgprs);
+ shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+ shader->prolog2->config.num_vgprs);
+ }
if (shader->epilog) {
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
shader->epilog->config.num_sgprs);