high.offset = inst->dst.offset % REG_SIZE;
if (devinfo->gen >= 7) {
+ if (inst->src[1].abs)
+ lower_src_modifiers(this, block, inst, 1);
+
if (inst->src[1].file == IMM) {
ibld.MUL(low, inst->src[0],
brw_imm_uw(inst->src[1].ud & 0xffff));
subscript(inst->src[1], BRW_REGISTER_TYPE_UW, 1));
}
} else {
+ if (inst->src[0].abs)
+ lower_src_modifiers(this, block, inst, 0);
+
ibld.MUL(low, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 0),
inst->src[1]);
ibld.MUL(high, subscript(inst->src[0], BRW_REGISTER_TYPE_UW, 1),
}
} else if (inst->opcode == SHADER_OPCODE_MULH) {
+ /* According to the BDW+ BSpec page for the "Multiply Accumulate
+ * High" instruction:
+ *
+ * "An added preliminary mov is required for source modification on
+ * src1:
+ * mov (8) r3.0<1>:d -r3<8;8,1>:d
+ * mul (8) acc0:d r2.0<8;8,1>:d r3.0<16;8,2>:uw
+ * mach (8) r5.0<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d"
+ */
+ if (devinfo->gen >= 8 && (inst->src[1].negate || inst->src[1].abs))
+ lower_src_modifiers(this, block, inst, 1);
+
/* Should have been lowered to 8-wide. */
assert(inst->exec_size <= get_lowered_simd_width(devinfo, inst));
const fs_reg acc = retype(brw_acc_reg(inst->exec_size),
* On Gen8, the multiply instruction does a full 32x32-bit
* multiply, but in order to do a 64-bit multiply we can simulate
* the previous behavior and then use a MACH instruction.
- *
- * FINISHME: Don't use source modifiers on src1.
*/
assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
mul->src[1].type == BRW_REGISTER_TYPE_UD);
return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
}
}
+
+ /**
+ * Remove any modifiers from the \p i-th source region of the instruction,
+ * including negate, abs and any implicit type conversion to the execution
+ * type. Instead any source modifiers will be implemented as a separate
+ * MOV instruction prior to the original instruction.
+ */
+ inline bool
+ lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i)
+ {
+ assert(inst->components_read(i) == 1);
+ const fs_builder ibld(v, block, inst);
+ const fs_reg tmp = ibld.vgrf(get_exec_type(inst));
+
+ ibld.MOV(tmp, inst->src[i]);
+ inst->src[i] = tmp;
+
+ return true;
+ }
}
void shuffle_from_32bit_read(const brw::fs_builder &bld,