return 0;
}
-static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader);
+static int r600_shader_from_tgsi(struct r600_screen *rscreen,
+ struct r600_pipe_shader *pipeshader,
+ struct r600_shader_key key);
-int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+int r600_pipe_shader_create(struct pipe_context *ctx,
+ struct r600_pipe_shader *shader,
+ struct r600_shader_key key)
{
static int dump_shaders = -1;
struct r600_context *rctx = (struct r600_context *)ctx;
}
}
}
- r = r600_shader_from_tgsi(rctx, shader);
+ r = r600_shader_from_tgsi(rctx->screen, shader, key);
if (r) {
R600_ERR("translation from TGSI failed !\n");
return r;
if (dump) {
r600_bytecode_dump(shader_ctx.bc);
}
+ free(bytes);
return 1;
}
unsigned char * bytes, unsigned bytes_read)
{
unsigned src_idx;
- unsigned inst0, inst1;
- unsigned push_modifier;
struct r600_bytecode_alu alu;
+ unsigned src_const_reg[3];
+ uint32_t word0, word1;
+
memset(&alu, 0, sizeof(alu));
for(src_idx = 0; src_idx < 3; src_idx++) {
- bytes_read = r600_src_from_byte_stream(bytes, bytes_read,
- &alu, src_idx);
- }
-
- alu.dst.sel = bytes[bytes_read++];
- alu.dst.chan = bytes[bytes_read++];
- alu.dst.clamp = bytes[bytes_read++];
- alu.dst.write = bytes[bytes_read++];
- alu.dst.rel = bytes[bytes_read++];
- inst0 = bytes[bytes_read++];
- inst1 = bytes[bytes_read++];
- alu.inst = inst0 | (inst1 << 8);
- alu.last = bytes[bytes_read++];
- alu.is_op3 = bytes[bytes_read++];
- push_modifier = bytes[bytes_read++];
- alu.pred_sel = bytes[bytes_read++];
- alu.bank_swizzle = bytes[bytes_read++];
- alu.bank_swizzle_force = bytes[bytes_read++];
- alu.omod = bytes[bytes_read++];
- alu.index_mode = bytes[bytes_read++];
+ unsigned i;
+ src_const_reg[src_idx] = bytes[bytes_read++];
+ for (i = 0; i < 4; i++) {
+ alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8);
+ }
+ }
+
+ word0 = i32_from_byte_stream(bytes, &bytes_read);
+ word1 = i32_from_byte_stream(bytes, &bytes_read);
+ switch(ctx->bc->chip_class) {
+ case R600:
+ r600_bytecode_alu_read(&alu, word0, word1);
+ break;
+ case R700:
+ case EVERGREEN:
+ case CAYMAN:
+ r700_bytecode_alu_read(&alu, word0, word1);
+ break;
+ }
+
+ for(src_idx = 0; src_idx < 3; src_idx++) {
+ if (src_const_reg[src_idx])
+ alu.src[src_idx].sel += 512;
+ }
if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE) ||
alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE) ||
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = 0;
alu.last = 1;
- }
+ }
- if (push_modifier) {
- alu.pred_sel = 0;
- alu.execute_mask = 1;
+ if (alu.execute_mask) {
+ alu.pred_sel = 0;
r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE));
- } else
+ } else {
r600_bytecode_add_alu(ctx->bc, &alu);
-
+ }
/* XXX: Handle other KILL instructions */
if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) {
return 0;
}
-static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader)
+static int r600_shader_from_tgsi(struct r600_screen *rscreen,
+ struct r600_pipe_shader *pipeshader,
+ struct r600_shader_key key)
{
struct r600_shader *shader = &pipeshader->shader;
struct tgsi_token *tokens = pipeshader->selector->tokens;
ctx.shader = shader;
ctx.native_integers = true;
- r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family);
+ r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family);
ctx.tokens = tokens;
tgsi_scan_shader(tokens, &ctx.info);
tgsi_parse_init(&ctx.parse, tokens);
shader->nr_ps_color_exports = 0;
shader->nr_ps_max_color_exports = 0;
- shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side;
+ shader->two_side = key.color_two_side;
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
dump = 1;
}
if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
- rctx->family, dump)) {
+ rscreen->family, dump)) {
FREE(inst_bytes);
radeon_llvm_dispose(&radeon_llvm_ctx);
use_llvm = 0;
shader->fs_write_all = TRUE;
break;
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
- if (property->u[0].Data == 1)
- shader->vs_prohibit_ucps = TRUE;
+ /* we don't need this one */
break;
}
break;
}
}
- if (shader->fs_write_all && rctx->chip_class >= EVERGREEN)
+ if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN)
shader->nr_ps_max_color_exports = 8;
if (ctx.fragcoord_input >= 0) {
case TGSI_PROCESSOR_FRAGMENT:
if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
/* never export more colors than the number of CBs */
- if (next_pixel_base && next_pixel_base >= (rctx->nr_cbufs + rctx->dual_src_blend * 1)) {
+ if (next_pixel_base && next_pixel_base >= key.nr_cbufs + key.dual_src_blend) {
/* skip export */
j--;
continue;
}
- output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3;
+ output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
output[j].array_base = next_pixel_base++;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
shader->nr_ps_color_exports++;
- if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) {
- for (k = 1; k < rctx->nr_cbufs; k++) {
+ if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
+ for (k = 1; k < key.nr_cbufs; k++) {
j++;
memset(&output[j], 0, sizeof(struct r600_bytecode_output));
output[j].gpr = shader->output[i].gpr;
output[j].swizzle_x = 0;
output[j].swizzle_y = 1;
output[j].swizzle_z = 2;
- output[j].swizzle_w = rctx->alpha_to_one && rctx->multisample_enable && !rctx->cb0_is_integer ? 5 : 3;
+ output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
output[j].burst_count = 1;
output[j].barrier = 1;
output[j].array_base = next_pixel_base++;
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], 0);
+
+ /* RSQ should take the absolute value of src */
+ if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) {
+ r600_bytecode_src_set_abs(&alu.src[j]);
+ }
}
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
static void fc_poplevel(struct r600_shader_ctx *ctx)
{
struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
- if (sp->mid) {
- free(sp->mid);
- sp->mid = NULL;
- }
+ free(sp->mid);
+ sp->mid = NULL;
sp->num_mid = 0;
sp->start = NULL;
sp->type = 0;
static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
{
- r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
+ /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
+ * limited to 4096 iterations, like the other LOOP_* instructions. */
+ r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10));
fc_pushlevel(ctx, FC_LOOP);