R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs);
return -EINVAL;
}
- if (i->Instruction.Predicate) {
- R600_ERR("predicate unsupported\n");
- return -EINVAL;
- }
#if 0
if (i->Instruction.Label) {
R600_ERR("label unsupported\n");
interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
k = eg_get_interpolator_index(interpolate, location);
- ctx->eg_interpolators[k].enabled = true;
+ if (k >= 0)
+ ctx->eg_interpolators[k].enabled = true;
}
} else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
k = eg_get_interpolator_index(interpolate, location);
- ctx->eg_interpolators[k].enabled = true;
+ if (k >= 0)
+ ctx->eg_interpolators[k].enabled = true;
}
}
}
struct pipe_stream_output_info so = pipeshader->selector->so;
struct tgsi_full_immediate *immediate;
struct r600_shader_ctx ctx;
- struct r600_bytecode_output output[32];
+ struct r600_bytecode_output output[ARRAY_SIZE(shader->output)];
unsigned output_done, noutput;
unsigned opcode;
int i, j, k, r = 0;
int i, j, r, lasti = tgsi_last_instruction(write_mask);
/* use temp register if trans_only and more than one dst component */
int use_tmp = trans_only && (write_mask ^ (1 << lasti));
+ unsigned op = ctx->inst_info->op;
+
+ if (op == ALU_OP2_MUL_IEEE &&
+ ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+ op = ALU_OP2_MUL;
for (i = 0; i <= lasti; i++) {
if (!(write_mask & (1 << i)))
} else
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- alu.op = ctx->inst_info->op;
+ alu.op = op;
if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
return 0;
}
-static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
+static int cayman_emit_unary_double_raw(struct r600_bytecode *bc,
+ unsigned op,
+ int dst_reg,
+ struct r600_shader_src *src,
+ bool abs)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- int i, r;
struct r600_bytecode_alu alu;
- int last_slot = 3;
- int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- int t1 = ctx->temp_reg;
+ const int last_slot = 3;
+ int r;
/* these have to write the result to X/Y by the looks of it */
- for (i = 0 ; i < last_slot; i++) {
+ for (int i = 0 ; i < last_slot; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
-
- /* should only be one src regs */
- assert (inst->Instruction.NumSrcRegs == 1);
+ alu.op = op;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
- r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+ r600_bytecode_src(&alu.src[0], src, 1);
+ r600_bytecode_src(&alu.src[1], src, 0);
- /* RSQ should take the absolute value of src */
- if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
- ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) {
+ if (abs)
r600_bytecode_src_set_abs(&alu.src[1]);
- }
- alu.dst.sel = t1;
+
+ alu.dst.sel = dst_reg;
alu.dst.chan = i;
alu.dst.write = (i == 0 || i == 1);
- if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1)
+ if (bc->chip_class != CAYMAN || i == last_slot - 1)
alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
+ r = r600_bytecode_add_alu(bc, &alu);
if (r)
return r;
}
+ return 0;
+}
+
+static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int i, r;
+ struct r600_bytecode_alu alu;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ int t1 = ctx->temp_reg;
+
+ /* should only be one src regs */
+ assert(inst->Instruction.NumSrcRegs == 1);
+
+ /* only support one double at a time */
+ assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
+ inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
+
+ r = cayman_emit_unary_double_raw(
+ ctx->bc, ctx->inst_info->op, t1,
+ &ctx->src[0],
+ ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
+ ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT);
+ if (r)
+ return r;
+
for (i = 0 ; i <= lasti; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
int t1 = ctx->temp_reg;
- for (k = 0; k < 2; k++) {
- if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
- continue;
+ /* t1 would get overwritten below if we actually tried to
+ * multiply two pairs of doubles at a time. */
+ assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
+ inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
- for (i = 0; i < 4; i++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
- for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
- }
- alu.dst.sel = t1;
- alu.dst.chan = i;
- alu.dst.write = 1;
- if (i == 3)
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+ for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
+ r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
}
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
}
for (i = 0; i <= lasti; i++) {
return 0;
}
+/*
+ * Emit RECIP_64 + MUL_64 to implement division.
+ */
+static int cayman_ddiv_instr(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ int r;
+ struct r600_bytecode_alu alu;
+ int t1 = ctx->temp_reg;
+ int k;
+
+ /* Only support one double at a time. This is the same constraint as
+ * in DMUL lowering. */
+ assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
+ inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
+
+ k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
+
+ r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false);
+ if (r)
+ return r;
+
+ for (int i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_MUL_64;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1));
+
+ alu.src[1].sel = t1;
+ alu.src[1].chan = (i == 3) ? 0 : 1;
+
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (int i = 0; i < 2; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = t1;
+ alu.src[0].chan = i;
+ tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst);
+ alu.dst.write = 1;
+ if (i == 1)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
/*
* r600 - trunc to -PI..PI range
* r700 - normalize by dividing by 2PI
int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
int temp_regs[4];
+ unsigned op = ctx->inst_info->op;
+
+ if (op == ALU_OP3_MULADD_IEEE &&
+ ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+ op = ALU_OP3_MULADD;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
temp_regs[j] = 0;
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
+ alu.op = op;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]);
if (r)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int i, j, r;
+ unsigned op = ctx->inst_info->op;
+ if (op == ALU_OP2_DOT4_IEEE &&
+ ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+ op = ALU_OP2_DOT4;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ctx->inst_info->op;
+ alu.op = op;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
}
static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
{
- ctx->bc->fc_sp++;
+ assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack));
ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+ ctx->bc->fc_sp++;
}
static void fc_poplevel(struct r600_shader_ctx *ctx)
{
- struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
+ struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1];
free(sp->mid);
sp->mid = NULL;
sp->num_mid = 0;
r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE);
ctx->bc->cf_last->pop_count = 1;
- fc_set_mid(ctx, ctx->bc->fc_sp);
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
+ fc_set_mid(ctx, ctx->bc->fc_sp - 1);
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id;
return 0;
}
static int tgsi_endif(struct r600_shader_ctx *ctx)
{
pops(ctx, 1);
- if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_IF) {
R600_ERR("if/endif unbalanced in shader\n");
return -1;
}
- if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid == NULL) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->pop_count = 1;
} else {
- ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
}
fc_poplevel(ctx);
r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END);
- if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
+ if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_LOOP) {
R600_ERR("loop/endloop in shader code are not paired.\n");
return -EINVAL;
}
LOOP START point to CF after LOOP END
BRK/CONT point to LOOP END CF
*/
- ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
+ ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2;
- ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2;
- for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
- ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
+ for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp - 1].num_mid; i++) {
+ ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[i]->cf_addr = ctx->bc->cf_last->id;
}
/* XXX add LOOPRET support */
fc_poplevel(ctx);
for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
{
- if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type)
break;
}
if (fscp == 0) {
r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK);
if (r)
return r;
- fc_set_mid(ctx, fscp);
+ fc_set_mid(ctx, fscp - 1);
return tgsi_endif(ctx);
} else {
r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK);
if (r)
return r;
- fc_set_mid(ctx, fscp);
+ fc_set_mid(ctx, fscp - 1);
}
return 0;
for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
{
- if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type)
break;
}
r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
- fc_set_mid(ctx, fscp);
+ fc_set_mid(ctx, fscp - 1);
return 0;
}
[TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
- [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [25] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
[TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
[TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
- [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [25] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
[TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
[TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
[TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
[TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+ [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr },
[TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
[TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
[TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
[TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr},
[TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp},
[TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log},
- [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2},
+ [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2},
[TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2},
- [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp},
- [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
+ [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
- [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3},
+ [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
[TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp},
[TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3},
[TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
[22] = { ALU_OP0_NOP, tgsi_unsupported},
[23] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2},
- [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported},
+ [25] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2},
[TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2},
[TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr},
[32] = { ALU_OP0_NOP, tgsi_unsupported},
[33] = { ALU_OP0_NOP, tgsi_unsupported},
[34] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig},
[TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
[TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
[TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex},
[69] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported},
- [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp},
+ [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
[TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex},
[TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
[TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if},
[TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
[TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
[TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
+ [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr },
[TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
[TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
[TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},