R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
return -EINVAL;
}
- if (i->Instruction.Predicate) {
- R600_ERR("predicate unsupported\n");
- return -EINVAL;
- }
#if 0
if (i->Instruction.Label) {
R600_ERR("label unsupported\n");
interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
k = eg_get_interpolator_index(interpolate, location);
- ctx->eg_interpolators[k].enabled = true;
+ if (k >= 0)
+ ctx->eg_interpolators[k].enabled = true;
}
} else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
k = eg_get_interpolator_index(interpolate, location);
- ctx->eg_interpolators[k].enabled = true;
+ if (k >= 0)
+ ctx->eg_interpolators[k].enabled = true;
}
}
}
struct pipe_stream_output_info so = pipeshader->selector->so;
struct tgsi_full_immediate *immediate;
struct r600_shader_ctx ctx;
- struct r600_bytecode_output output[32];
+ struct r600_bytecode_output output[ARRAY_SIZE(shader->output)];
unsigned output_done, noutput;
unsigned opcode;
int i, j, k, r = 0;
int i, j, r, lasti = tgsi_last_instruction(write_mask);
/* use temp register if trans_only and more than one dst component */
int use_tmp = trans_only && (write_mask ^ (1 << lasti));
+ unsigned op = ctx->inst_info->op;
+
+ if (op == ALU_OP2_MUL_IEEE &&
+ ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+ op = ALU_OP2_MUL;
for (i = 0; i <= lasti; i++) {
if (!(write_mask & (1 << i)))
} else
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- alu.op = ctx->inst_info->op;
+ alu.op = op;
if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
int temp_regs[4];
+ unsigned op = ctx->inst_info->op;
+
+ if (op == ALU_OP3_MULADD_IEEE &&
+ ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+ op = ALU_OP3_MULADD;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
temp_regs[j] = 0;
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
+ alu.op = op;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]);
if (r)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int i, j, r;
+ unsigned op = ctx->inst_info->op;
+ if (op == ALU_OP2_DOT4_IEEE &&
+ ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+ op = ALU_OP2_DOT4;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
+ alu.op = op;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
}
static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
{
- ctx->bc->fc_sp++;
+ assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack));
ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+ ctx->bc->fc_sp++;
}
static void fc_poplevel(struct r600_shader_ctx *ctx)
{
- struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
+ struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1];
free(sp->mid);
sp->mid = NULL;
sp->num_mid = 0;
r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE);
ctx->bc->cf_last->pop_count = 1;
- fc_set_mid(ctx, ctx->bc->fc_sp);
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
+ fc_set_mid(ctx, ctx->bc->fc_sp - 1);
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id;
return 0;
}
static int tgsi_endif(struct r600_shader_ctx *ctx)
{
pops(ctx, 1);
- if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_IF) {
R600_ERR("if/endif unbalanced in shader\n");
return -1;
}
- if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid == NULL) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->pop_count = 1;
} else {
- ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
}
fc_poplevel(ctx);
r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END);
- if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_LOOP) {
R600_ERR("loop/endloop in shader code are not paired.\n");
return -EINVAL;
}
LOOP START point to CF after LOOP END
BRK/CONT point to LOOP END CF
*/
- ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
+ ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2;
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
- for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
- ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
+ for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp - 1].num_mid; i++) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[i]->cf_addr = ctx->bc->cf_last->id;
}
/* XXX add LOOPRET support */
fc_poplevel(ctx);
for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
{
- if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type)
break;
}
if (fscp == 0) {
r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK);
if (r)
return r;
- fc_set_mid(ctx, fscp);
+ fc_set_mid(ctx, fscp - 1);
return tgsi_endif(ctx);
} else {
r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK);
if (r)
return r;
- fc_set_mid(ctx, fscp);
+ fc_set_mid(ctx, fscp - 1);
}
return 0;
for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
{
- if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type)
break;
}
r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
- fc_set_mid(ctx, fscp);
+ fc_set_mid(ctx, fscp - 1);
return 0;
}
[TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
- [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [25] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
[TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
[TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
- [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [25] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
[TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
[TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
- [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [25] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
[TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},