#include "r600_shader.h"
#include "r600d.h"
+#include "sb/sb_public.h"
+
#include "pipe/p_shader_tokens.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
struct r600_pipe_shader *pipeshader,
struct r600_shader_key key);
+static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
+ int size, unsigned comp_mask) {
+
+ if (!size)
+ return;
+
+ if (ps->num_arrays == ps->max_arrays) {
+ ps->max_arrays += 64;
+ ps->arrays = realloc(ps->arrays, ps->max_arrays *
+ sizeof(struct r600_shader_array));
+ }
+
+ int n = ps->num_arrays;
+ ++ps->num_arrays;
+
+ ps->arrays[n].comp_mask = comp_mask;
+ ps->arrays[n].gpr_start = start_gpr;
+ ps->arrays[n].gpr_count = size;
+}
+
static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens)
{
struct tgsi_parse_context parse;
int r, i;
uint32_t *ptr;
bool dump = r600_can_dump_shader(rctx->screen, tgsi_get_processor_type(sel->tokens));
+ unsigned use_sb = rctx->screen->debug_flags & DBG_SB;
+ unsigned sb_disasm = use_sb || (rctx->screen->debug_flags & DBG_SB_DISASM);
shader->shader.bc.isa = rctx->isa;
R600_ERR("translation from TGSI failed !\n");
return r;
}
- r = r600_bytecode_build(&shader->shader.bc);
- if (r) {
- R600_ERR("building bytecode failed !\n");
- return r;
+
+ /* Check if the bytecode has already been built. When using the llvm
+ * backend, r600_shader_from_tgsi() will take care of building the
+ * bytecode.
+ */
+ if (!shader->shader.bc.bytecode) {
+ r = r600_bytecode_build(&shader->shader.bc);
+ if (r) {
+ R600_ERR("building bytecode failed !\n");
+ return r;
+ }
}
- if (dump) {
+
+ if (dump && !sb_disasm) {
fprintf(stderr, "--------------------------------------------------------------\n");
r600_bytecode_disasm(&shader->shader.bc);
fprintf(stderr, "______________________________________________________________\n");
+ } else if ((dump && sb_disasm) || use_sb) {
+ r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
+ dump, use_sb);
+ if (r) {
+ R600_ERR("r600_sb_bytecode_process failed !\n");
+ return r;
+ }
}
-
/* Store the shader in a buffer. */
if (shader->bo == NULL) {
shader->bo = (struct r600_resource*)
static int tgsi_endloop(struct r600_shader_ctx *ctx);
static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
-/*
- * bytestream -> r600 shader
- *
- * These functions are used to transform the output of the LLVM backend into
- * struct r600_bytecode.
- */
-
-static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
- unsigned char * bytes, unsigned num_bytes);
-
#ifdef HAVE_OPENCL
int r600_compute_shader_create(struct pipe_context * ctx,
LLVMModuleRef mod, struct r600_bytecode * bytecode)
{
struct r600_context *r600_ctx = (struct r600_context *)ctx;
- unsigned char * bytes;
- unsigned byte_count;
struct r600_shader_ctx shader_ctx;
+ boolean use_kill = false;
bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0;
+ unsigned use_sb = r600_ctx->screen->debug_flags & DBG_SB_CS;
+ unsigned sb_disasm = use_sb ||
+ (r600_ctx->screen->debug_flags & DBG_SB_DISASM);
- r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
shader_ctx.bc = bytecode;
r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family,
r600_ctx->screen->msaa_texture_support);
shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
shader_ctx.bc->isa = r600_ctx->isa;
- r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
- if (shader_ctx.bc->chip_class == CAYMAN) {
- cm_bytecode_add_cf_end(shader_ctx.bc);
- }
- r600_bytecode_build(shader_ctx.bc);
- if (dump) {
+ r600_llvm_compile(mod, r600_ctx->family,
+ shader_ctx.bc, &use_kill, dump);
+
+ if (dump && !sb_disasm) {
r600_bytecode_disasm(shader_ctx.bc);
+ } else if ((dump && sb_disasm) || use_sb) {
+ if (r600_sb_bytecode_process(r600_ctx, shader_ctx.bc, NULL, dump, use_sb))
+ R600_ERR("r600_sb_bytecode_process failed!\n");
}
- free(bytes);
+
return 1;
}
#endif /* HAVE_OPENCL */
-static uint32_t i32_from_byte_stream(unsigned char * bytes,
- unsigned * bytes_read)
-{
- unsigned i;
- uint32_t out = 0;
- for (i = 0; i < 4; i++) {
- out |= bytes[(*bytes_read)++] << (8 * i);
- }
- return out;
-}
-
-static unsigned r600_src_from_byte_stream(unsigned char * bytes,
- unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx)
-{
- unsigned i;
- unsigned sel0, sel1;
- sel0 = bytes[bytes_read++];
- sel1 = bytes[bytes_read++];
- alu->src[src_idx].sel = sel0 | (sel1 << 8);
- alu->src[src_idx].chan = bytes[bytes_read++];
- alu->src[src_idx].neg = bytes[bytes_read++];
- alu->src[src_idx].abs = bytes[bytes_read++];
- alu->src[src_idx].rel = bytes[bytes_read++];
- alu->src[src_idx].kc_bank = bytes[bytes_read++];
- for (i = 0; i < 4; i++) {
- alu->src[src_idx].value |= bytes[bytes_read++] << (i * 8);
- }
- return bytes_read;
-}
-
-static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx,
- unsigned char * bytes, unsigned bytes_read)
-{
- unsigned src_idx, src_num;
- struct r600_bytecode_alu alu;
- unsigned src_use_sel[3];
- const struct alu_op_info *alu_op;
- unsigned src_sel[3] = {};
- uint32_t word0, word1;
-
- src_num = bytes[bytes_read++];
-
- memset(&alu, 0, sizeof(alu));
- for(src_idx = 0; src_idx < src_num; src_idx++) {
- unsigned i;
- src_use_sel[src_idx] = bytes[bytes_read++];
- for (i = 0; i < 4; i++) {
- src_sel[src_idx] |= bytes[bytes_read++] << (i * 8);
- }
- for (i = 0; i < 4; i++) {
- alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8);
- }
- }
-
- word0 = i32_from_byte_stream(bytes, &bytes_read);
- word1 = i32_from_byte_stream(bytes, &bytes_read);
-
- switch(ctx->bc->chip_class) {
- default:
- case R600:
- r600_bytecode_alu_read(ctx->bc, &alu, word0, word1);
- break;
- case R700:
- case EVERGREEN:
- case CAYMAN:
- r700_bytecode_alu_read(ctx->bc, &alu, word0, word1);
- break;
- }
-
- for(src_idx = 0; src_idx < src_num; src_idx++) {
- if (src_use_sel[src_idx]) {
- unsigned sel = src_sel[src_idx];
-
- alu.src[src_idx].chan = sel & 3;
- sel >>= 2;
-
- if (sel>=512) { /* constant */
- sel -= 512;
- alu.src[src_idx].kc_bank = sel >> 12;
- alu.src[src_idx].sel = (sel & 4095) + 512;
- }
- else {
- alu.src[src_idx].sel = sel;
- }
- }
- }
-
- alu_op = r600_isa_alu(alu.op);
-
-#if HAVE_LLVM < 0x0302
- if ((alu_op->flags & AF_PRED) && alu_op->src_count == 2) {
- alu.update_pred = 1;
- alu.dst.write = 0;
- alu.src[1].sel = V_SQ_ALU_SRC_0;
- alu.src[1].chan = 0;
- alu.last = 1;
- }
-#endif
-
- if (alu_op->flags & AF_MOVA) {
- ctx->bc->ar_reg = alu.src[0].sel;
- ctx->bc->ar_chan = alu.src[0].chan;
- ctx->bc->ar_loaded = 0;
- return bytes_read;
- }
-
- r600_bytecode_add_alu_type(ctx->bc, &alu, ctx->bc->cf_last->op);
-
- /* XXX: Handle other KILL instructions */
- if (alu_op->flags & AF_KILL) {
- ctx->shader->uses_kill = 1;
- /* XXX: This should be enforced in the LLVM backend. */
- ctx->bc->force_add_cf = 1;
- }
- return bytes_read;
-}
-
-static void llvm_if(struct r600_shader_ctx *ctx)
-{
- r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
- fc_pushlevel(ctx, FC_IF);
- callstack_push(ctx, FC_PUSH_VPM);
-}
-
-static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx)
-{
- unsigned opcode = TGSI_OPCODE_BRK;
- if (ctx->bc->chip_class == CAYMAN)
- ctx->inst_info = &cm_shader_tgsi_instruction[opcode];
- else if (ctx->bc->chip_class >= EVERGREEN)
- ctx->inst_info = &eg_shader_tgsi_instruction[opcode];
- else
- ctx->inst_info = &r600_shader_tgsi_instruction[opcode];
- llvm_if(ctx);
- tgsi_loop_brk_cont(ctx);
- tgsi_endif(ctx);
-}
-
-static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
- unsigned char * bytes, unsigned bytes_read)
-{
- struct r600_bytecode_alu alu;
- unsigned inst;
- memset(&alu, 0, sizeof(alu));
- bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0);
- inst = bytes[bytes_read++];
- switch (inst) {
- case 0: /* IF_PREDICATED */
- llvm_if(ctx);
- break;
- case 1: /* ELSE */
- tgsi_else(ctx);
- break;
- case 2: /* ENDIF */
- tgsi_endif(ctx);
- break;
- case 3: /* BGNLOOP */
- tgsi_bgnloop(ctx);
- break;
- case 4: /* ENDLOOP */
- tgsi_endloop(ctx);
- break;
- case 5: /* PREDICATED_BREAK */
- r600_break_from_byte_stream(ctx);
- break;
- case 6: /* CONTINUE */
- {
- unsigned opcode = TGSI_OPCODE_CONT;
- if (ctx->bc->chip_class == CAYMAN) {
- ctx->inst_info =
- &cm_shader_tgsi_instruction[opcode];
- } else if (ctx->bc->chip_class >= EVERGREEN) {
- ctx->inst_info =
- &eg_shader_tgsi_instruction[opcode];
- } else {
- ctx->inst_info =
- &r600_shader_tgsi_instruction[opcode];
- }
- tgsi_loop_brk_cont(ctx);
- }
- break;
- }
-
- return bytes_read;
-}
-
-static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
- unsigned char * bytes, unsigned bytes_read)
-{
- struct r600_bytecode_tex tex;
-
- uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
- uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
- uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
-
- tex.op = r600_isa_fetch_by_opcode(ctx->bc->isa, G_SQ_TEX_WORD0_TEX_INST(word0));
- tex.resource_id = G_SQ_TEX_WORD0_RESOURCE_ID(word0);
- tex.src_gpr = G_SQ_TEX_WORD0_SRC_GPR(word0);
- tex.src_rel = G_SQ_TEX_WORD0_SRC_REL(word0);
- tex.dst_gpr = G_SQ_TEX_WORD1_DST_GPR(word1);
- tex.dst_rel = G_SQ_TEX_WORD1_DST_REL(word1);
- tex.dst_sel_x = G_SQ_TEX_WORD1_DST_SEL_X(word1);
- tex.dst_sel_y = G_SQ_TEX_WORD1_DST_SEL_Y(word1);
- tex.dst_sel_z = G_SQ_TEX_WORD1_DST_SEL_Z(word1);
- tex.dst_sel_w = G_SQ_TEX_WORD1_DST_SEL_W(word1);
- tex.lod_bias = G_SQ_TEX_WORD1_LOD_BIAS(word1);
- tex.coord_type_x = G_SQ_TEX_WORD1_COORD_TYPE_X(word1);
- tex.coord_type_y = G_SQ_TEX_WORD1_COORD_TYPE_Y(word1);
- tex.coord_type_z = G_SQ_TEX_WORD1_COORD_TYPE_Z(word1);
- tex.coord_type_w = G_SQ_TEX_WORD1_COORD_TYPE_W(word1);
- tex.offset_x = G_SQ_TEX_WORD2_OFFSET_X(word2);
- tex.offset_y = G_SQ_TEX_WORD2_OFFSET_Y(word2);
- tex.offset_z = G_SQ_TEX_WORD2_OFFSET_Z(word2);
- tex.sampler_id = G_SQ_TEX_WORD2_SAMPLER_ID(word2);
- tex.src_sel_x = G_SQ_TEX_WORD2_SRC_SEL_X(word2);
- tex.src_sel_y = G_SQ_TEX_WORD2_SRC_SEL_Y(word2);
- tex.src_sel_z = G_SQ_TEX_WORD2_SRC_SEL_Z(word2);
- tex.src_sel_w = G_SQ_TEX_WORD2_SRC_SEL_W(word2);
- tex.offset_x <<= 1;
- tex.offset_y <<= 1;
- tex.offset_z <<= 1;
-
- tex.inst_mod = 0;
-
- r600_bytecode_add_tex(ctx->bc, &tex);
-
- return bytes_read;
-}
-
-static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
- unsigned char * bytes, unsigned bytes_read)
-{
- struct r600_bytecode_vtx vtx;
-
- uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
- uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
- uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read);
-
- memset(&vtx, 0, sizeof(vtx));
-
- /* WORD0 */
- vtx.op = r600_isa_fetch_by_opcode(ctx->bc->isa,
- G_SQ_VTX_WORD0_VTX_INST(word0));
- vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0);
- vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0);
- vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0);
- vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0);
- vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0);
-
- /* WORD1 */
- vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1);
- vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1);
- vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1);
- vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1);
- vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1);
- vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1);
- vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1);
- vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1);
- vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1);
- vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1);
-
- /* WORD 2*/
- vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2);
- vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2);
-
- if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
- fprintf(stderr, "Error adding vtx\n");
- }
-
- /* Use the Texture Cache for compute shaders*/
- if (ctx->bc->chip_class >= EVERGREEN &&
- ctx->bc->type == TGSI_PROCESSOR_COMPUTE) {
- ctx->bc->cf_last->op = CF_OP_TEX;
- }
- return bytes_read;
-}
-
-static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
- unsigned char * bytes, unsigned bytes_read)
-{
- uint32_t word0 = 0, word1 = 0;
- struct r600_bytecode_output output;
- memset(&output, 0, sizeof(struct r600_bytecode_output));
- word0 = i32_from_byte_stream(bytes, &bytes_read);
- word1 = i32_from_byte_stream(bytes, &bytes_read);
- if (ctx->bc->chip_class >= EVERGREEN)
- eg_bytecode_export_read(ctx->bc, &output, word0,word1);
- else
- r600_bytecode_export_read(ctx->bc, &output, word0,word1);
- r600_bytecode_add_output(ctx->bc, &output);
- return bytes_read;
-}
-
-static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
- unsigned char * bytes, unsigned num_bytes)
-{
- unsigned bytes_read = 0;
- ctx->bc->nstack = bytes[bytes_read++];
- unsigned i, byte;
- while (bytes_read < num_bytes) {
- char inst_type = bytes[bytes_read++];
- switch (inst_type) {
- case 0:
- bytes_read = r600_alu_from_byte_stream(ctx, bytes,
- bytes_read);
- break;
- case 1:
- bytes_read = r600_tex_from_byte_stream(ctx, bytes,
- bytes_read);
- break;
- case 2:
- bytes_read = r600_fc_from_byte_stream(ctx, bytes,
- bytes_read);
- break;
- case 3:
- r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
- for (i = 0; i < 2; i++) {
- for (byte = 0 ; byte < 4; byte++) {
- ctx->bc->cf_last->isa[i] |=
- (bytes[bytes_read++] << (byte * 8));
- }
- }
- break;
-
- case 4:
- bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
- bytes_read);
- break;
- case 5:
- bytes_read = r600_export_from_byte_stream(ctx, bytes,
- bytes_read);
- break;
- case 6: {
- int32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
- int32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
-
- r600_bytecode_add_cf(ctx->bc);
- ctx->bc->cf_last->op = r600_isa_cf_by_opcode(ctx->bc->isa, G_SQ_CF_ALU_WORD1_CF_INST(word1), 1);
- ctx->bc->cf_last->kcache[0].bank = G_SQ_CF_ALU_WORD0_KCACHE_BANK0(word0);
- ctx->bc->cf_last->kcache[0].addr = G_SQ_CF_ALU_WORD1_KCACHE_ADDR0(word1);
- ctx->bc->cf_last->kcache[0].mode = G_SQ_CF_ALU_WORD0_KCACHE_MODE0(word0);
- ctx->bc->cf_last->kcache[1].bank = G_SQ_CF_ALU_WORD0_KCACHE_BANK1(word0);
- ctx->bc->cf_last->kcache[1].addr = G_SQ_CF_ALU_WORD1_KCACHE_ADDR1(word1);
- ctx->bc->cf_last->kcache[1].mode = G_SQ_CF_ALU_WORD1_KCACHE_MODE1(word1);
- break;
- }
- default:
- /* XXX: Error here */
- break;
- }
- }
-}
-
-/* End bytestream -> r600 shader functions*/
-
static int tgsi_is_supported(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
}
}
break;
- case TGSI_FILE_CONSTANT:
case TGSI_FILE_TEMPORARY:
+ if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ if (d->Array.ArrayID) {
+ r600_add_gpr_array(ctx->shader,
+ ctx->file_offset[TGSI_FILE_TEMPORARY] +
+ d->Range.First,
+ d->Range.Last - d->Range.First + 1, 0x0F);
+ }
+ }
+ break;
+
+ case TGSI_FILE_CONSTANT:
case TGSI_FILE_SAMPLER:
case TGSI_FILE_ADDRESS:
break;
return 0;
}
+
static int r600_shader_from_tgsi(struct r600_screen *rscreen,
struct r600_pipe_shader *pipeshader,
struct r600_shader_key key)
int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
/* Declarations used by llvm code */
bool use_llvm = false;
- unsigned char * inst_bytes = NULL;
- unsigned inst_byte_count = 0;
+ bool indirect_gprs;
#ifdef R600_USE_LLVM
use_llvm = !(rscreen->debug_flags & DBG_NO_LLVM);
rscreen->msaa_texture_support);
ctx.tokens = tokens;
tgsi_scan_shader(tokens, &ctx.info);
+ shader->indirect_files = ctx.info.indirect_files;
+ indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
tgsi_parse_init(&ctx.parse, tokens);
ctx.type = ctx.parse.FullHeader.Processor.Processor;
shader->processor_type = ctx.type;
for (i = 0; i < TGSI_FILE_COUNT; i++) {
ctx.file_offset[i] = 0;
}
- if (ctx.type == TGSI_PROCESSOR_VERTEX) {
- ctx.file_offset[TGSI_FILE_INPUT] = 1;
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
- }
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
- ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
- }
#ifdef R600_USE_LLVM
if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) {
use_llvm = 0;
}
#endif
+ if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+ ctx.file_offset[TGSI_FILE_INPUT] = 1;
+ if (!use_llvm) {
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
+ }
+ }
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
+ ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
+ }
ctx.use_llvm = use_llvm;
if (use_llvm) {
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
ctx.temp_reg = ctx.bc->ar_reg + 1;
+ if (indirect_gprs) {
+ shader->max_arrays = 0;
+ shader->num_arrays = 0;
+
+ if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
+ r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT],
+ ctx.file_offset[TGSI_FILE_OUTPUT] -
+ ctx.file_offset[TGSI_FILE_INPUT],
+ 0x0F);
+ }
+ if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+ r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT],
+ ctx.file_offset[TGSI_FILE_TEMPORARY] -
+ ctx.file_offset[TGSI_FILE_OUTPUT],
+ 0x0F);
+ }
+ }
+
ctx.nliterals = 0;
ctx.literals = NULL;
shader->fs_write_all = FALSE;
struct radeon_llvm_context radeon_llvm_ctx;
LLVMModuleRef mod;
bool dump = r600_can_dump_shader(rscreen, ctx.type);
+ boolean use_kill = false;
memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
radeon_llvm_ctx.type = ctx.type;
radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
- if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
- rscreen->family, dump)) {
- FREE(inst_bytes);
+ if (r600_llvm_compile(mod, rscreen->family, ctx.bc, &use_kill, dump)) {
radeon_llvm_dispose(&radeon_llvm_ctx);
use_llvm = 0;
fprintf(stderr, "R600 LLVM backend failed to compile "
ctx.file_offset[TGSI_FILE_OUTPUT] =
ctx.file_offset[TGSI_FILE_INPUT];
}
+ if (use_kill)
+ ctx.shader->uses_kill = use_kill;
radeon_llvm_dispose(&radeon_llvm_ctx);
}
#endif
/* Reset the temporary register counter. */
ctx.max_driver_temp_used = 0;
- /* Get instructions if we are using the LLVM backend. */
- if (use_llvm) {
- r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count);
- FREE(inst_bytes);
- }
-
noutput = shader->noutput;
if (ctx.clip_vertex_write) {
}
}
/* add program end */
- if (ctx.bc->chip_class == CAYMAN)
+ if (!use_llvm && ctx.bc->chip_class == CAYMAN)
cm_bytecode_add_cf_end(ctx.bc);
/* check GPR limit - we have 124 = 128 - 4
/* the array index is read from Z */
tex.coord_type_z = 0;
+ /* mask unused source components */
+ if (opcode == FETCH_OP_SAMPLE) {
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ tex.src_sel_z = 7;
+ tex.src_sel_w = 7;
+ break;
+ case TGSI_TEXTURE_1D_ARRAY:
+ tex.src_sel_y = 7;
+ tex.src_sel_w = 7;
+ break;
+ case TGSI_TEXTURE_1D:
+ tex.src_sel_y = 7;
+ tex.src_sel_z = 7;
+ tex.src_sel_w = 7;
+ break;
+ }
+ }
+
r = r600_bytecode_add_tex(ctx->bc, &tex);
if (r)
return r;
return 0;
}
-static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type)
{
struct r600_bytecode_alu alu;
int r;
alu.last = 1;
- r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+ r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
if (r)
return r;
return 0;
}
#endif
-static int tgsi_if(struct r600_shader_ctx *ctx)
+static int emit_if(struct r600_shader_ctx *ctx, int opcode)
{
- emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT);
+ int alu_type = CF_OP_ALU_PUSH_BEFORE;
+
+ /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
+ * LOOP_STARTxxx for nested loops may put the branch stack into a state
+ * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
+ * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
+ if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) {
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
+ ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ alu_type = CF_OP_ALU;
+ }
+
+ emit_logic_pred(ctx, opcode, alu_type);
r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
return 0;
}
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+ return emit_if(ctx, ALU_OP2_PRED_SETNE);
+}
+
+static int tgsi_uif(struct r600_shader_ctx *ctx)
+{
+ return emit_if(ctx, ALU_OP2_PRED_SETNE_INT);
+}
+
static int tgsi_else(struct r600_shader_ctx *ctx)
{
r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE);
{TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
{TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
{TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if},
- /* gap */
- {75, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif},
{76, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
{TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
{111, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_IFC, 0, ALU_OP0_NOP, tgsi_unsupported},
+ /* gap */
+ {114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
{TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
{TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
{TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if},
- /* gap */
- {75, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif},
{76, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
{TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
{111, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_IFC, 0, ALU_OP0_NOP, tgsi_unsupported},
+ /* gap */
+ {114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
{TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
{TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
{TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if},
- /* gap */
- {75, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif},
{76, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
{TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
{111, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_IFC, 0, ALU_OP0_NOP, tgsi_unsupported},
+ /* gap */
+ {114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */