{
unsigned vstride, width;
- if (inst->opcode == FS_OPCODE_DDX_FINE) {
- /* produce accurate derivatives */
- vstride = BRW_VERTICAL_STRIDE_2;
- width = BRW_WIDTH_2;
- } else {
- /* replicate the derivative at the top-left pixel to other pixels */
- vstride = BRW_VERTICAL_STRIDE_4;
- width = BRW_WIDTH_4;
- }
+ if (devinfo->gen >= 8) {
+ if (inst->opcode == FS_OPCODE_DDX_FINE) {
+ /* produce accurate derivatives */
+ vstride = BRW_VERTICAL_STRIDE_2;
+ width = BRW_WIDTH_2;
+ } else {
+ /* replicate the derivative at the top-left pixel to other pixels */
+ vstride = BRW_VERTICAL_STRIDE_4;
+ width = BRW_WIDTH_4;
+ }
+
+ struct brw_reg src0 = byte_offset(src, type_sz(src.type));;
+ struct brw_reg src1 = src;
- struct brw_reg src0 = byte_offset(src, type_sz(src.type));;
- struct brw_reg src1 = src;
+ src0.vstride = vstride;
+ src0.width = width;
+ src0.hstride = BRW_HORIZONTAL_STRIDE_0;
+ src1.vstride = vstride;
+ src1.width = width;
+ src1.hstride = BRW_HORIZONTAL_STRIDE_0;
- src0.vstride = vstride;
- src0.width = width;
- src0.hstride = BRW_HORIZONTAL_STRIDE_0;
- src1.vstride = vstride;
- src1.width = width;
- src1.hstride = BRW_HORIZONTAL_STRIDE_0;
+ brw_ADD(p, dst, src0, negate(src1));
+ } else {
+ /* On Haswell and earlier, the region used above appears to not work
+ * correctly for compressed instructions. At least on Haswell and
+ * Iron Lake, compressed ALIGN16 instructions do work. Since we
+ * would have to split to SIMD8 no matter which method we choose, we
+ * may as well use ALIGN16 on all platforms gen7 and earlier.
+ */
+ struct brw_reg src0 = stride(src, 4, 4, 1);
+ struct brw_reg src1 = stride(src, 4, 4, 1);
+ if (inst->opcode == FS_OPCODE_DDX_FINE) {
+ src0.swizzle = BRW_SWIZZLE_XXZZ;
+ src1.swizzle = BRW_SWIZZLE_YYWW;
+ } else {
+ src0.swizzle = BRW_SWIZZLE_XXXX;
+ src1.swizzle = BRW_SWIZZLE_YYYY;
+ }
- brw_ADD(p, dst, src0, negate(src1));
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, dst, negate(src0), src1);
+ brw_pop_insn_state(p);
+ }
}
/* The negate_value boolean is used to negate the derivative computation for
}
} else {
/* replicate the derivative at the top-left pixel to other pixels */
- struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size);
- struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size);
+ if (devinfo->gen >= 8) {
+ struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size);
+ struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size);
- brw_ADD(p, dst, negate(src0), src1);
+ brw_ADD(p, dst, negate(src0), src1);
+ } else {
+ /* On Haswell and earlier, the region used above appears to not work
+ * correctly for compressed instructions. At least on Haswell and
+ * Iron Lake, compressed ALIGN16 instructions do work. Since we
+ * would have to split to SIMD8 no matter which method we choose, we
+ * may as well use ALIGN16 on all platforms gen7 and earlier.
+ */
+ struct brw_reg src0 = stride(src, 4, 4, 1);
+ struct brw_reg src1 = stride(src, 4, 4, 1);
+ src0.swizzle = BRW_SWIZZLE_XXXX;
+ src1.swizzle = BRW_SWIZZLE_ZZZZ;
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ brw_ADD(p, dst, negate(src0), src1);
+ brw_pop_insn_state(p);
+ }
}
}