intel/fs: Remove nasty open-coded CHV/BXT 64-bit workarounds.
authorFrancisco Jerez <currojerez@riseup.net>
Fri, 7 Dec 2018 23:40:43 +0000 (15:40 -0800)
committerFrancisco Jerez <currojerez@riseup.net>
Wed, 9 Jan 2019 20:03:09 +0000 (12:03 -0800)
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/intel/compiler/brw_fs_builder.h
src/intel/compiler/brw_fs_nir.cpp

index 4846820722cca569e1545ade08ce6b48021de4d6..c50af4c1f55053b35cf94b46ba33d3121e69ef06 100644 (file)
@@ -451,43 +451,13 @@ namespace brw {
 
          if (cluster_size > 1) {
             const fs_builder ubld = exec_all().group(dispatch_width() / 2, 0);
-            dst_reg left = horiz_stride(tmp, 2);
-            dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
-
-            /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
-             *
-             *    "When source or destination datatype is 64b or operation is
-             *    integer DWord multiply, regioning in Align1 must follow
-             *    these rules:
-             *
-             *    [...]
-             *
-             *    3. Source and Destination offset must be the same, except
-             *       the case of scalar source."
-             *
-             * In order to work around this, we create a temporary register
-             * and shift left over to match right.  If we have a 64-bit type,
-             * we have to use two integer MOVs instead of a 64-bit MOV.
-             */
-            if (need_matching_subreg_offset(opcode, tmp.type)) {
-               dst_reg tmp2 = vgrf(tmp.type);
-               dst_reg new_left = horiz_stride(horiz_offset(tmp2, 1), 2);
-               if (type_sz(tmp.type) > 4) {
-                  ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 0),
-                           subscript(left, BRW_REGISTER_TYPE_D, 0));
-                  ubld.MOV(subscript(new_left, BRW_REGISTER_TYPE_D, 1),
-                           subscript(left, BRW_REGISTER_TYPE_D, 1));
-               } else {
-                  ubld.MOV(new_left, left);
-               }
-               left = new_left;
-            }
+            const dst_reg left = horiz_stride(tmp, 2);
+            const dst_reg right = horiz_stride(horiz_offset(tmp, 1), 2);
             set_condmod(mod, ubld.emit(opcode, right, left, right));
          }
 
          if (cluster_size > 2) {
-            if (type_sz(tmp.type) <= 4 &&
-                !need_matching_subreg_offset(opcode, tmp.type)) {
+            if (type_sz(tmp.type) <= 4) {
                const fs_builder ubld =
                   exec_all().group(dispatch_width() / 4, 0);
                src_reg left = horiz_stride(horiz_offset(tmp, 1), 4);
@@ -787,38 +757,6 @@ namespace brw {
          }
       }
 
-
-      /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
-       *
-       *    "When source or destination datatype is 64b or operation is
-       *    integer DWord multiply, regioning in Align1 must follow
-       *    these rules:
-       *
-       *    [...]
-       *
-       *    3. Source and Destination offset must be the same, except
-       *       the case of scalar source."
-       *
-       * This helper just detects when we're in this case.
-       */
-      bool
-      need_matching_subreg_offset(enum opcode opcode,
-                                  enum brw_reg_type type) const
-      {
-         if (!shader->devinfo->is_cherryview &&
-             !gen_device_info_is_9lp(shader->devinfo))
-            return false;
-
-         if (type_sz(type) > 4)
-            return true;
-
-         if (opcode == BRW_OPCODE_MUL &&
-             !brw_reg_type_is_floating_point(type))
-            return true;
-
-         return false;
-      }
-
       bblock_t *block;
       exec_node *cursor;
 
index 92ec85a27cc3aa70551d458a7adb06904c1fb0e0..312cd22de799386b924445123131ac7e6223ae8a 100644 (file)
@@ -805,30 +805,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    case nir_op_i2i64:
    case nir_op_u2f64:
    case nir_op_u2u64:
-      /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
-       *
-       *    "When source or destination is 64b (...), regioning in Align1
-       *     must follow these rules:
-       *
-       *     1. Source and destination horizontal stride must be aligned to
-       *        the same qword.
-       *     (...)"
-       *
-       * This means that conversions from bit-sizes smaller than 64-bit to
-       * 64-bit need to have the source data elements aligned to 64-bit.
-       * This restriction does not apply to BDW and later.
-       */
-      if (nir_dest_bit_size(instr->dest.dest) == 64 &&
-          nir_src_bit_size(instr->src[0].src) < 64 &&
-          (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
-         fs_reg tmp = bld.vgrf(result.type, 1);
-         tmp = subscript(tmp, op[0].type, 0);
-         inst = bld.MOV(tmp, op[0]);
-         inst = bld.MOV(result, tmp);
-         inst->saturate = instr->dest.saturate;
-         break;
-      }
-      /* fallthrough */
    case nir_op_f2f32:
    case nir_op_f2i32:
    case nir_op_f2u32:
@@ -1463,36 +1439,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       unreachable("not reached: should have been lowered");
 
    case nir_op_ishl:
+      bld.SHL(result, op[0], op[1]);
+      break;
    case nir_op_ishr:
-   case nir_op_ushr: {
-      fs_reg shift_count = op[1];
-
-      if (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) {
-         if (op[1].file == VGRF &&
-             (result.type == BRW_REGISTER_TYPE_Q ||
-              result.type == BRW_REGISTER_TYPE_UQ)) {
-            shift_count = fs_reg(VGRF, alloc.allocate(dispatch_width / 4),
-                                 BRW_REGISTER_TYPE_UD);
-            shift_count.stride = 2;
-            bld.MOV(shift_count, op[1]);
-         }
-      }
-
-      switch (instr->op) {
-      case nir_op_ishl:
-         bld.SHL(result, op[0], shift_count);
-         break;
-      case nir_op_ishr:
-         bld.ASR(result, op[0], shift_count);
-         break;
-      case nir_op_ushr:
-         bld.SHR(result, op[0], shift_count);
-         break;
-      default:
-         unreachable("not reached");
-      }
+      bld.ASR(result, op[0], op[1]);
+      break;
+   case nir_op_ushr:
+      bld.SHR(result, op[0], op[1]);
       break;
-   }
 
    case nir_op_pack_half_2x16_split:
       bld.emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
@@ -4414,34 +4368,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       const fs_reg tmp_left = horiz_stride(tmp, 2);
       const fs_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
 
-      /* From the Cherryview PRM Vol. 7, "Register Region Restrictiosn":
-       *
-       *    "When source or destination datatype is 64b or operation is
-       *    integer DWord multiply, regioning in Align1 must follow
-       *    these rules:
-       *
-       *    [...]
-       *
-       *    3. Source and Destination offset must be the same, except
-       *       the case of scalar source."
-       *
-       * In order to work around this, we have to emit two 32-bit MOVs instead
-       * of a single 64-bit MOV to do the shuffle.
-       */
-      if (type_sz(value.type) > 4 &&
-          (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
-         ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 0),
-                  subscript(src_right, BRW_REGISTER_TYPE_D, 0));
-         ubld.MOV(subscript(tmp_left, BRW_REGISTER_TYPE_D, 1),
-                  subscript(src_right, BRW_REGISTER_TYPE_D, 1));
-         ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 0),
-                  subscript(src_left, BRW_REGISTER_TYPE_D, 0));
-         ubld.MOV(subscript(tmp_right, BRW_REGISTER_TYPE_D, 1),
-                  subscript(src_left, BRW_REGISTER_TYPE_D, 1));
-      } else {
-         ubld.MOV(tmp_left, src_right);
-         ubld.MOV(tmp_right, src_left);
-      }
+      ubld.MOV(tmp_left, src_right);
+      ubld.MOV(tmp_right, src_left);
+
       bld.MOV(retype(dest, value.type), tmp);
       break;
    }