case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
case TYPE_S32:
- case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break;
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32;
+ break;
+ }
+ /* fallthrough */
+ case TYPE_U32:
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32;
+ break;
+ }
+ res.data.u32 = a->data.u32 * b->data.u32; break;
default:
return;
}
{
const int t = !s;
const operation op = i->op;
+ Instruction *newi = i;
switch (i->op) {
case OP_MUL:
if (i->dType == TYPE_F32)
tryCollapseChainedMULs(i, s, imm0);
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+ assert(!isFloatType(i->sType));
+ if (imm0.isInteger(1) && i->dType == TYPE_S32) {
+ bld.setPosition(i, false);
+ // Need to set to the sign value, which is a compare.
+ newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
+ TYPE_S32, i->getSrc(t), bld.mkImm(0));
+ delete_Instruction(prog, i);
+ } else if (imm0.isInteger(0) || imm0.isInteger(1)) {
+ // The high bits can't be set in this case (either mul by 0 or
+ // unsigned by 1)
+ i->op = OP_MOV;
+ i->subOp = 0;
+ i->setSrc(0, new_ImmediateValue(prog, 0u));
+ i->src(0).mod = Modifier(0);
+ i->setSrc(1, NULL);
+ } else if (!imm0.isNegative() && imm0.isPow2()) {
+ // Translate into a shift
+ imm0.applyLog2();
+ i->op = OP_SHR;
+ i->subOp = 0;
+ imm0.reg.data.u32 = 32 - imm0.reg.data.u32;
+ i->setSrc(0, i->getSrc(t));
+ i->src(0).mod = i->src(t).mod;
+ i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
+ i->src(1).mod = 0;
+ }
+ } else
if (imm0.isInteger(0)) {
i->op = OP_MOV;
i->setSrc(0, new_ImmediateValue(prog, 0u));
else
tA = tB;
tB = s ? bld.getSSA() : i->getDef(0);
- bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
+ newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
if (s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
tA = bld.getSSA();
bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0));
tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
- bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
+ newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
if (d < 0)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
default:
return;
}
- if (i->op != op)
+ if (newi->op != op)
foldCount++;
}