i965/fs: Implement workaround for IVB CMP dependency race in the SIMD lowering pass.
authorFrancisco Jerez <currojerez@riseup.net>
Tue, 17 May 2016 22:58:04 +0000 (15:58 -0700)
committerFrancisco Jerez <currojerez@riseup.net>
Sat, 28 May 2016 06:19:22 +0000 (23:19 -0700)
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
src/mesa/drivers/dri/i965/brw_fs.cpp

index b1cd0d959fb53cd1a006d3775750b59cbd063e80..0b7c84a9e40dc352573b2d752651b6410deeb643 100644 (file)
@@ -4739,7 +4739,6 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
    case BRW_OPCODE_SHR:
    case BRW_OPCODE_SHL:
    case BRW_OPCODE_ASR:
-   case BRW_OPCODE_CMP:
    case BRW_OPCODE_CMPN:
    case BRW_OPCODE_CSEL:
    case BRW_OPCODE_F32TO16:
@@ -4766,6 +4765,23 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
    case FS_OPCODE_PACK:
       return get_fpu_lowered_simd_width(devinfo, inst);
 
+   case BRW_OPCODE_CMP: {
+      /* The Ivybridge/BayTrail WaCMPInstFlagDepClearedEarly workaround says that
+       * when the destination is a GRF the dependency-clear bit on the flag
+       * register is cleared early.
+       *
+       * Suggested workarounds are to disable coissuing CMP instructions
+       * or to split CMP(16) instructions into two CMP(8) instructions.
+       *
+       * We choose to split into CMP(8) instructions since disabling
+       * coissuing would affect CMP instructions not otherwise affected by
+       * the errata.
+       */
+      const unsigned max_width = (devinfo->gen == 7 && !devinfo->is_haswell &&
+                                  !inst->dst.is_null() ? 8 : ~0);
+      return MIN2(max_width, get_fpu_lowered_simd_width(devinfo, inst));
+   }
+
    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
    case SHADER_OPCODE_SQRT: