LLVMValueRef values[4];
unsigned name;
unsigned index;
+ unsigned sid;
unsigned usage;
};
4);
/* Load the ESGS ring resource descriptor */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
- lp_build_const_int32(gallivm,
- NUM_PIPE_CONST_BUFFERS + 1));
+ lp_build_const_int32(gallivm, SI_RING_ESGS));
args[0] = t_list;
args[1] = vtx_offset;
LLVMConstInt(i32, tfe, 0)
};
+ /* The instruction offset field has 12 bits */
+ assert(offen || inst_offset < (1 << 12));
+
/* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
unsigned func = CLAMP(num_channels, 1, 3) - 1;
const char *types[] = {"i32", "v2i32", "v4i32"};
/* On SI, the vertex shader is responsible for writing streamout data
* to buffers. */
-static void si_llvm_emit_streamout(struct si_shader_context *shader)
+static void si_llvm_emit_streamout(struct si_shader_context *shader,
+ struct si_shader_output_values *outputs,
+ unsigned noutput)
{
struct pipe_stream_output_info *so = &shader->shader->selector->so;
struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
}
- LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS] = shader->radeon_bld.soa.outputs;
-
/* Write streamout data. */
for (i = 0; i < so->num_outputs; i++) {
unsigned buf_idx = so->output[i].output_buffer;
/* Load the output as int. */
for (j = 0; j < num_comps; j++) {
- out[j] = LLVMBuildLoad(builder, outputs[reg][start+j], "");
- out[j] = LLVMBuildBitCast(builder, out[j], i32, "");
+ unsigned outidx = 0;
+
+ while (outidx < noutput && outputs[outidx].index != reg)
+ outidx++;
+
+ if (outidx < noutput)
+ out[j] = LLVMBuildBitCast(builder,
+ outputs[outidx].values[start+j],
+ i32, "");
+ else
+ out[j] = NULL;
}
+ if (!out[0])
+ continue;
+
/* Pack the output. */
LLVMValueRef vdata = NULL;
unsigned pos_idx;
int i;
- if (si_shader_ctx->shader->selector->so.num_outputs) {
- si_llvm_emit_streamout(si_shader_ctx);
+ if (outputs && si_shader_ctx->shader->selector->so.num_outputs) {
+ si_llvm_emit_streamout(si_shader_ctx, outputs, noutput);
}
for (i = 0; i < noutput; i++) {
semantic_name = outputs[i].name;
- semantic_index = outputs[i].index;
+ semantic_index = outputs[i].sid;
semantic_usage = outputs[i].usage;
handle_semantic:
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct si_pipe_shader *shader = si_shader_ctx->shader;
+ struct si_shader *es = &si_shader_ctx->shader->shader;
+ struct si_shader *gs = si_shader_ctx->gs_for_vs;
struct tgsi_parse_context *parse = &si_shader_ctx->parse;
LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_ES2GS_OFFSET);
LLVMValueRef t_list_ptr;
LLVMValueRef t_list;
unsigned chan;
if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
continue;
- si_store_shader_io_attribs(&shader->shader, d);
+ si_store_shader_io_attribs(es, d);
}
/* Load the ESGS ring resource descriptor */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
- lp_build_const_int32(gallivm,
- NUM_PIPE_CONST_BUFFERS + 1));
+ lp_build_const_int32(gallivm, SI_RING_ESGS));
- for (i = 0; i < shader->shader.noutput; i++) {
+ for (i = 0; i < es->noutput; i++) {
LLVMValueRef *out_ptr =
- si_shader_ctx->radeon_bld.soa.outputs[shader->shader.output[i].index];
+ si_shader_ctx->radeon_bld.soa.outputs[es->output[i].index];
+ int j;
+
+ for (j = 0; j < gs->ninput; j++) {
+ if (gs->input[j].name == es->output[i].name &&
+ gs->input[j].sid == es->output[i].sid)
+ break;
+ }
+ if (j == gs->ninput)
+ continue;
for (chan = 0; chan < 4; chan++) {
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
- LLVMValueRef voffset =
- lp_build_const_int32(gallivm, (4 * i + chan) * 4);
- LLVMValueRef soffset =
- LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_ES2GS_OFFSET);
-
out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
build_tbuffer_store(si_shader_ctx, t_list, out_val, 1,
- voffset, soffset, 0,
+ LLVMGetUndef(i32), soffset,
+ (4 * gs->input[j].param_offset + chan) * 4,
V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_NUM_FORMAT_UINT,
- 1, 0, 1, 1, 0);
+ 0, 0, 1, 1, 0);
}
}
}
outputs = REALLOC(outputs, noutput * sizeof(outputs[0]),
(noutput + 1) * sizeof(outputs[0]));
for (index = d->Range.First; index <= d->Range.Last; index++) {
+ outputs[noutput].index = index;
outputs[noutput].name = d->Semantic.Name;
- outputs[noutput].index = d->Semantic.Index;
+ outputs[noutput].sid = d->Semantic.Index;
outputs[noutput].usage = d->Declaration.UsageMask;
for (i = 0; i < 4; i++)
struct si_shader *shader = &si_shader_ctx->shader->shader;
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_GS2VS_OFFSET);
LLVMValueRef gs_next_vertex;
+ LLVMValueRef can_emit, kill;
LLVMValueRef t_list_ptr;
LLVMValueRef t_list;
LLVMValueRef args[2];
int i;
/* Load the GSVS ring resource descriptor */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
- lp_build_const_int32(gallivm,
- NUM_PIPE_CONST_BUFFERS + 2));
+ lp_build_const_int32(gallivm, SI_RING_GSVS));
if (shader->noutput == 0) {
struct tgsi_parse_context *parse = &si_shader_ctx->parse;
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(gallivm->builder, si_shader_ctx->gs_next_vertex, "");
+
+ /* If this thread has already emitted the declared maximum number of
+ * vertices, kill it: excessive vertex emissions are not supposed to
+ * have any effect, and GS threads have no externally observable
+ * effects other than emitting vertices.
+ */
+ can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
+ lp_build_const_int32(gallivm,
+ shader->gs_max_out_vertices), "");
+ kill = lp_build_select(&bld_base->base, can_emit,
+ lp_build_const_float(gallivm, 1.0f),
+ lp_build_const_float(gallivm, -1.0f));
+ build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+ LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
+
for (i = 0; i < shader->noutput; i++) {
LLVMValueRef *out_ptr =
si_shader_ctx->radeon_bld.soa.outputs[shader->output[i].index];
for (chan = 0; chan < 4; chan++) {
LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
- LLVMValueRef soffset =
- LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_GS2VS_OFFSET);
LLVMValueRef voffset =
lp_build_const_int32(gallivm, (i * 4 + chan) *
shader->gs_max_out_vertices);
struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_pipe_shader *shader = si_shader_ctx->shader;
- LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32;
+ LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32;
unsigned i, last_sgpr, num_params;
i8 = LLVMInt8TypeInContext(gallivm->context);
params[SI_PARAM_CONST] = LLVMPointerType(
LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE);
+ params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST];
+
/* We assume at most 16 textures per program at the moment.
* This need probably need to be changed to support bindless textures */
params[SI_PARAM_SAMPLER] = LLVMPointerType(
switch (si_shader_ctx->type) {
case TGSI_PROCESSOR_VERTEX:
params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST];
- params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST];
params[SI_PARAM_START_INSTANCE] = i32;
num_params = SI_PARAM_START_INSTANCE+1;
if (shader->key.vs.as_es) {
struct gallivm_state * gallivm = bld_base->base.gallivm;
unsigned i;
- if (!si_shader_ctx->shader->selector->so.num_outputs)
+ if (si_shader_ctx->type != TGSI_PROCESSOR_VERTEX ||
+ si_shader_ctx->shader->key.vs.as_es ||
+ !si_shader_ctx->shader->selector->so.num_outputs)
return;
LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_SO_BUFFER);
+ SI_PARAM_RW_BUFFERS);
/* Load the resources, we rely on the code sinking to do the rest */
for (i = 0; i < 4; ++i) {
if (si_shader_ctx->shader->selector->so.stride[i]) {
- LLVMValueRef offset = lp_build_const_int32(gallivm, i);
+ LLVMValueRef offset = lp_build_const_int32(gallivm,
+ SI_RW_SO + i);
si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset);
}
{
unsigned i;
uint32_t *ptr;
- struct radeon_llvm_binary binary;
+ struct radeon_shader_binary binary;
bool dump = r600_can_dump_shader(&sctx->screen->b,
shader->selector ? shader->selector->tokens : NULL);
memset(&binary, 0, sizeof(binary));
}
ptr = (uint32_t*)sctx->b.ws->buffer_map(shader->bo->cs_buf, sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
- if (0 /*SI_BIG_ENDIAN*/) {
+ if (SI_BIG_ENDIAN) {
for (i = 0; i < binary.code_size / 4; ++i) {
- ptr[i] = util_bswap32(*(uint32_t*)(binary.code + i*4));
+ ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.code + i*4)));
}
} else {
memcpy(ptr, binary.code, binary.code_size);
struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
struct lp_build_context *base = &bld_base->base;
struct lp_build_context *uint = &bld_base->uint_bld;
+ struct si_shader *shader = &si_shader_ctx->shader->shader;
struct si_shader *gs = &si_shader_ctx->shader->selector->current->shader;
struct si_shader_output_values *outputs;
LLVMValueRef t_list_ptr, t_list;
preload_streamout_buffers(si_shader_ctx);
/* Load the GSVS ring resource descriptor */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
- lp_build_const_int32(gallivm,
- NUM_PIPE_CONST_BUFFERS + 1));
+ lp_build_const_int32(gallivm, SI_RING_GSVS));
args[0] = t_list;
args[1] = lp_build_mul_imm(uint,
struct si_shader_output *out = gs->output + i;
unsigned chan;
+ shader->output[i] = *out;
+
outputs[i].name = out->name;
outputs[i].index = out->index;
+ outputs[i].sid = out->sid;
outputs[i].usage = out->usage;
for (chan = 0; chan < 4; chan++) {
base->elem_type, "");
}
}
+ shader->noutput = gs->noutput;
si_llvm_export_vs(bld_base, outputs, gs->noutput);
struct lp_build_tgsi_context * bld_base;
LLVMModuleRef mod;
int r = 0;
- bool dump = r600_can_dump_shader(&sctx->screen->b, shader->selector->tokens);
+ bool dump = r600_can_dump_shader(&sctx->screen->b, sel->tokens);
+
+ /* Dump TGSI code before doing TGSI->LLVM conversion in case the
+ * conversion fails. */
+ if (dump) {
+ tgsi_dump(sel->tokens, 0);
+ si_dump_streamout(&sel->so);
+ }
assert(shader->shader.noutput == 0);
assert(shader->shader.nparam == 0);
preload_samplers(&si_shader_ctx);
preload_streamout_buffers(&si_shader_ctx);
- /* Dump TGSI code before doing TGSI->LLVM conversion in case the
- * conversion fails. */
- if (dump) {
- tgsi_dump(sel->tokens, 0);
- si_dump_streamout(&sel->so);
- }
-
if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
si_shader_ctx.gs_next_vertex =
lp_build_alloca(bld_base->base.gallivm,
if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
shader->gs_copy_shader = CALLOC_STRUCT(si_pipe_shader);
shader->gs_copy_shader->selector = shader->selector;
+ shader->gs_copy_shader->key = shader->key;
si_shader_ctx.shader = shader->gs_copy_shader;
if ((r = si_generate_gs_copy_shader(sctx, &si_shader_ctx, dump))) {
free(shader->gs_copy_shader);