#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_dump.h"
#include "radeonsi_pipe.h"
else
si_shader_ctx->shader->spi_shader_col_format |=
V_028714_SPI_SHADER_32_ABGR << (4 * cbuf);
+
+ si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * cbuf);
}
}
}
}
+static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
+ unsigned index)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct lp_build_context *base = &bld_base->base;
+ struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
+ LLVMValueRef args[9];
+ unsigned reg_index;
+ unsigned chan;
+ unsigned const_chan;
+ LLVMValueRef out_elts[4];
+ LLVMValueRef base_elt;
+ LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef const_resource = build_indexed_load(si_shader_ctx, ptr, uint->one);
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][chan];
+ out_elts[chan] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
+ }
+
+ for (reg_index = 0; reg_index < 2; reg_index ++) {
+ args[5] =
+ args[6] =
+ args[7] =
+ args[8] = lp_build_const_float(base->gallivm, 0.0f);
+
+ /* Compute dot products of position and user clip plane vectors */
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
+ args[0] = const_resource;
+ args[1] = lp_build_const_int32(base->gallivm,
+ ((reg_index * 4 + chan) * 4 +
+ const_chan) * 4);
+ base_elt = build_intrinsic(base->gallivm->builder,
+ "llvm.SI.load.const",
+ base->elem_type,
+ args, 2,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ args[5 + chan] =
+ lp_build_add(base, args[5 + chan],
+ lp_build_mul(base, base_elt,
+ out_elts[const_chan]));
+ }
+ }
+
+ args[0] = lp_build_const_int32(base->gallivm, 0xf);
+ args[1] = uint->zero;
+ args[2] = uint->zero;
+ args[3] = lp_build_const_int32(base->gallivm,
+ V_008DFC_SQ_EXP_POS + 2 + reg_index);
+ args[4] = uint->zero;
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9);
+ }
+}
+
/* XXX: This is partially implemented for VS only at this point. It is not complete */
static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
{
struct tgsi_parse_context *parse = &si_shader_ctx->parse;
LLVMValueRef args[9];
LLVMValueRef last_args[9] = { 0 };
+ unsigned semantic_name;
unsigned color_count = 0;
unsigned param_count = 0;
int depth_index = -1, stencil_index = -1;
switch (d->Declaration.File) {
case TGSI_FILE_INPUT:
i = shader->ninput++;
+ assert(i < Elements(shader->input));
shader->input[i].name = d->Semantic.Name;
shader->input[i].sid = d->Semantic.Index;
shader->input[i].interpolate = d->Interp.Interpolate;
case TGSI_FILE_OUTPUT:
i = shader->noutput++;
+ assert(i < Elements(shader->output));
shader->output[i].name = d->Semantic.Name;
shader->output[i].sid = d->Semantic.Index;
shader->output[i].interpolate = d->Interp.Interpolate;
continue;
}
+ semantic_name = d->Semantic.Name;
+handle_semantic:
for (index = d->Range.First; index <= d->Range.Last; index++) {
/* Select the correct target */
- switch(d->Semantic.Name) {
+ switch(semantic_name) {
case TGSI_SEMANTIC_PSIZE:
- target = V_008DFC_SQ_EXP_POS;
+ shader->vs_out_misc_write = 1;
+ shader->vs_out_point_size = 1;
+ target = V_008DFC_SQ_EXP_POS + 1;
break;
case TGSI_SEMANTIC_POSITION:
if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
color_count++;
}
break;
+ case TGSI_SEMANTIC_CLIPDIST:
+ shader->clip_dist_write |=
+ d->Declaration.UsageMask << (d->Semantic.Index << 2);
+ target = V_008DFC_SQ_EXP_POS + 2 + d->Semantic.Index;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ si_llvm_emit_clipvertex(bld_base, index);
+ shader->clip_dist_write = 0xFF;
+ continue;
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
target = V_008DFC_SQ_EXP_PARAM + param_count;
target = 0;
fprintf(stderr,
"Warning: SI unhandled output type:%d\n",
- d->Semantic.Name);
+ semantic_name);
}
si_llvm_init_export_args(bld_base, d, index, target, args);
if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
- (d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
- (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
+ (semantic_name == TGSI_SEMANTIC_POSITION) :
+ (semantic_name == TGSI_SEMANTIC_COLOR)) {
if (last_args[0]) {
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
}
}
+
+ if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
+ semantic_name = TGSI_SEMANTIC_GENERIC;
+ goto handle_semantic;
+ }
}
if (depth_index >= 0 || stencil_index >= 0) {
si_shader_ctx->shader->spi_shader_col_format |=
V_028714_SPI_SHADER_32_ABGR;
+ si_shader_ctx->shader->cb_shader_mask |= S_02823C_OUTPUT0_ENABLE(0xf);
}
/* Specify whether the EXEC mask represents the valid mask */
si_shader_ctx->shader->spi_shader_col_format |=
si_shader_ctx->shader->spi_shader_col_format << 4;
+ si_shader_ctx->shader->cb_shader_mask |=
+ si_shader_ctx->shader->cb_shader_mask << 4;
}
last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
unsigned target = inst->Texture.Texture;
LLVMValueRef coords[4];
LLVMValueRef address[16];
+ int ref_pos;
+ unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
unsigned count = 0;
unsigned chan;
- /* WriteMask */
- /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/
- emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf);
-
/* Fetch and project texture coordinates */
coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
for (chan = 0; chan < 3; chan++ ) {
if (opcode == TGSI_OPCODE_TXB)
address[count++] = coords[3];
- if ((target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_SHADOWCUBE) &&
- opcode != TGSI_OPCODE_TXQ)
+ if (target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_SHADOWCUBE)
radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
/* Pack depth comparison value */
case TGSI_TEXTURE_SHADOW1D_ARRAY:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
- address[count++] = coords[2];
- break;
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
- address[count++] = coords[3];
+ assert(ref_pos >= 0);
+ address[count++] = coords[ref_pos];
break;
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
/* Pack texture coordinates */
address[count++] = coords[0];
- switch (target) {
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_2D_ARRAY:
- case TGSI_TEXTURE_3D:
- case TGSI_TEXTURE_CUBE:
- case TGSI_TEXTURE_RECT:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT:
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- case TGSI_TEXTURE_SHADOWCUBE:
- case TGSI_TEXTURE_2D_MSAA:
- case TGSI_TEXTURE_2D_ARRAY_MSAA:
- case TGSI_TEXTURE_CUBE_ARRAY:
- case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ if (num_coords > 1)
address[count++] = coords[1];
- }
- switch (target) {
- case TGSI_TEXTURE_3D:
- case TGSI_TEXTURE_CUBE:
- case TGSI_TEXTURE_SHADOWCUBE:
- case TGSI_TEXTURE_CUBE_ARRAY:
- case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ if (num_coords > 2)
address[count++] = coords[2];
- }
/* Pack array slice */
switch (target) {
}
/* Pack LOD */
- if (opcode == TGSI_OPCODE_TXL)
+ if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
address[count++] = coords[3];
if (count > 16) {
"");
}
- /* Pad to power of two vector */
- while (count < util_next_power_of_two(count))
- address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-
- emit_data->args[1] = lp_build_gather_values(gallivm, address, count);
-
/* Resource */
- emit_data->args[2] = si_shader_ctx->resources[emit_data->inst->Src[1].Register.Index];
+ emit_data->args[1] = si_shader_ctx->resources[emit_data->inst->Src[1].Register.Index];
+
+ if (opcode == TGSI_OPCODE_TXF) {
+ /* add tex offsets */
+ if (inst->Texture.NumOffsets) {
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ const struct tgsi_texture_offset * off = inst->TexOffsets;
+
+ assert(inst->Texture.NumOffsets == 1);
+
+ address[0] =
+ lp_build_add(uint_bld, address[0],
+ bld->immediates[off->Index][off->SwizzleX]);
+ if (num_coords > 1)
+ address[1] =
+ lp_build_add(uint_bld, address[1],
+ bld->immediates[off->Index][off->SwizzleY]);
+ if (num_coords > 2)
+ address[2] =
+ lp_build_add(uint_bld, address[2],
+ bld->immediates[off->Index][off->SwizzleZ]);
+ }
- /* Sampler */
- emit_data->args[3] = si_shader_ctx->samplers[emit_data->inst->Src[1].Register.Index];
+ emit_data->dst_type = LLVMVectorType(
+ LLVMInt32TypeInContext(bld_base->base.gallivm->context),
+ 4);
- /* Dimensions */
- emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, target);
+ emit_data->arg_count = 3;
+ } else {
+ /* Sampler */
+ emit_data->args[2] = si_shader_ctx->samplers[emit_data->inst->Src[1].Register.Index];
- emit_data->arg_count = 5;
- /* XXX: To optimize, we could use a float or v2f32, if the last bits of
- * the writemask are clear */
- emit_data->dst_type = LLVMVectorType(
+ emit_data->dst_type = LLVMVectorType(
LLVMFloatTypeInContext(bld_base->base.gallivm->context),
4);
+
+ emit_data->arg_count = 4;
+ }
+
+ /* Dimensions */
+ emit_data->args[emit_data->arg_count - 1] =
+ lp_build_const_int32(bld_base->base.gallivm, target);
+
+ /* Pad to power of two vector */
+ while (count < util_next_power_of_two(count))
+ address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+
+ emit_data->args[0] = lp_build_gather_values(gallivm, address, count);
}
static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
char intr_name[23];
sprintf(intr_name, "%sv%ui32", action->intr_name,
- LLVMGetVectorSize(LLVMTypeOf(emit_data->args[1])));
+ LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
emit_data->output[emit_data->chan] = build_intrinsic(
base->gallivm->builder, intr_name, emit_data->dst_type,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
}
+static void txq_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+
+ /* Mip level */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
+
+ /* Resource */
+ emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index];
+
+ /* Dimensions */
+ emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm,
+ inst->Texture.Texture);
+
+ emit_data->arg_count = 3;
+
+ emit_data->dst_type = LLVMVectorType(
+ LLVMInt32TypeInContext(bld_base->base.gallivm->context),
+ 4);
+}
+
static const struct lp_build_tgsi_action tex_action = {
.fetch_args = tex_fetch_args,
.emit = build_tex_intrinsic,
.intr_name = "llvm.SI.sampleb."
};
+static const struct lp_build_tgsi_action txf_action = {
+ .fetch_args = tex_fetch_args,
+ .emit = build_tex_intrinsic,
+ .intr_name = "llvm.SI.imageload."
+};
+
static const struct lp_build_tgsi_action txl_action = {
.fetch_args = tex_fetch_args,
.emit = build_tex_intrinsic,
.intr_name = "llvm.SI.samplel."
};
+static const struct lp_build_tgsi_action txq_action = {
+ .fetch_args = txq_fetch_args,
+ .emit = build_tgsi_intrinsic_nomem,
+ .intr_name = "llvm.SI.resinfo"
+};
+
static void create_meta_data(struct si_shader_context *si_shader_ctx)
{
struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader,
LLVMModuleRef mod)
{
- unsigned char *inst_bytes;
- unsigned inst_byte_count;
unsigned i;
uint32_t *ptr;
bool dump;
+ struct radeon_llvm_binary binary;
dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE);
- radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count,
- r600_get_llvm_processor_name(rctx->screen->family),
- dump);
-
+ memset(&binary, 0, sizeof(binary));
+ radeon_llvm_compile(mod, &binary,
+ r600_get_llvm_processor_name(rctx->screen->family), dump);
if (dump) {
fprintf(stderr, "SI CODE:\n");
- for (i = 0; i < inst_byte_count; i+=4 ) {
- fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
- inst_bytes[i + 2], inst_bytes[i + 1],
- inst_bytes[i]);
+ for (i = 0; i < binary.code_size; i+=4 ) {
+ fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3],
+ binary.code[i + 2], binary.code[i + 1],
+ binary.code[i]);
}
}
- shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
- shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
- shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
+ /* XXX: We may be able to emit some of these values directly rather than
+ * extracting fields to be emitted later.
+ */
+ for (i = 0; i < binary.config_size; i+= 8) {
+ unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i));
+ unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
+ switch (reg) {
+ case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
+ case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
+ case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
+ case R_00B848_COMPUTE_PGM_RSRC1:
+ shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8;
+ shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4;
+ break;
+ case R_0286CC_SPI_PS_INPUT_ENA:
+ shader->spi_ps_input_ena = value;
+ break;
+ default:
+ fprintf(stderr, "Warning: Compiler emitted unknown "
+ "config register: 0x%x\n", reg);
+ break;
+ }
+ }
/* copy new shader */
si_resource_reference(&shader->bo, NULL);
shader->bo = si_resource_create_custom(rctx->context.screen, PIPE_USAGE_IMMUTABLE,
- inst_byte_count - 12);
+ binary.code_size);
if (shader->bo == NULL) {
return -ENOMEM;
}
ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
if (0 /*R600_BIG_ENDIAN*/) {
- for (i = 0; i < (inst_byte_count-12)/4; ++i) {
- ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
+ for (i = 0; i < binary.code_size / 4; ++i) {
+ ptr[i] = util_bswap32(*(uint32_t*)(binary.code + i*4));
}
} else {
- memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
+ memcpy(ptr, binary.code, binary.code_size);
}
rctx->ws->buffer_unmap(shader->bo->cs_buf);
- free(inst_bytes);
+ free(binary.code);
+ free(binary.config);
return 0;
}
bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action;
+ bld_base->op_actions[TGSI_OPCODE_TXF] = txf_action;
bld_base->op_actions[TGSI_OPCODE_TXL] = txl_action;
bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
+ bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
si_shader_ctx.radeon_bld.load_input = declare_input;
si_shader_ctx.radeon_bld.load_system_value = declare_system_value;