i965/compiler: handle conversion to smaller type in the lowering pass for that

author Iago Toral Quiroga <itoral@igalia.com>

Fri, 4 May 2018 09:33:07 +0000 (11:33 +0200)

committer Iago Toral Quiroga <itoral@igalia.com>

Sat, 5 May 2018 10:41:02 +0000 (12:41 +0200)
author Iago Toral Quiroga <itoral@igalia.com>
Fri, 4 May 2018 09:33:07 +0000 (11:33 +0200)
committer Iago Toral Quiroga <itoral@igalia.com>
Sat, 5 May 2018 10:41:02 +0000 (12:41 +0200)
diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp b/src/intel/compiler/brw_fs_lower_conversions.cpp

index 663c9674c494dd45500dc6504ae8e53f0fa7a19e..e27e2402746e1d21a1c7470dc32e917bc209c7f8 100644 (file)
--- a/src/intel/compiler/brw_fs_lower_conversions.cpp
+++ b/src/intel/compiler/brw_fs_lower_conversions.cpp
@@ -43,6 +43,24 @@ supports_type_conversion(const fs_inst *inst) {
     }
  }
  
+/* From the SKL PRM Vol 2a, "Move":
+ *
+ *    "A mov with the same source and destination type, no source modifier,
+ *     and no saturation is a raw move. A packed byte destination region (B
+ *     or UB type with HorzStride == 1 and ExecSize > 1) can only be written
+ *     using raw move."
+ */
+static bool
+is_byte_raw_mov (const fs_inst *inst)
+{
+   return type_sz(inst->dst.type) == 1 &&
+          inst->opcode == BRW_OPCODE_MOV &&
+          inst->src[0].type == inst->dst.type &&
+          !inst->saturate &&
+          !inst->src[0].negate &&
+          !inst->src[0].abs;
+}
+
  bool
  fs_visitor::lower_conversions()
  {
@@ -54,7 +72,8 @@ fs_visitor::lower_conversions()
        bool saturate = inst->saturate;
  
        if (supports_type_conversion(inst)) {
-         if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
+         if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
+             !is_byte_raw_mov(inst)) {
              /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
               * Single Precision Float":
               *
@@ -64,6 +83,9 @@ fs_visitor::lower_conversions()
               * So we need to allocate a temporary that's two registers, and then do
               * a strided MOV to get the lower DWord of every Qword that has the
               * result.
+             *
+             * This restriction applies, in general, whenever we convert to
+             * a type with a smaller bit-size.
               */
              fs_reg temp = ibld.vgrf(get_exec_type(inst));
              fs_reg strided_temp = subscript(temp, dst.type, 0);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp

index c7f7bc21b8a001b25c66e338f185452e43f2b91d..1ce89520bf1b744d748677f829a6f02b0978fba4 100644 (file)
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -755,19 +755,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
         */
  
     case nir_op_f2f16_undef:
-   case nir_op_i2i16:
-   case nir_op_u2u16: {
-      /* TODO: Fixing aligment rules for conversions from 32-bits to
-       * 16-bit types should be moved to lower_conversions
-       */
-      fs_reg tmp = bld.vgrf(op[0].type, 1);
-      tmp = subscript(tmp, result.type, 0);
-      inst = bld.MOV(tmp, op[0]);
-      inst->saturate = instr->dest.saturate;
-      inst = bld.MOV(result, tmp);
+      inst = bld.MOV(result, op[0]);
        inst->saturate = instr->dest.saturate;
        break;
-   }
  
     case nir_op_f2f64:
     case nir_op_f2i64:
@@ -807,6 +797,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
     case nir_op_f2u16:
     case nir_op_i2i32:
     case nir_op_u2u32:
+   case nir_op_i2i16:
+   case nir_op_u2u16:
     case nir_op_i2f16:
     case nir_op_u2f16:
        inst = bld.MOV(result, op[0]);
author	Iago Toral Quiroga <itoral@igalia.com>
	Fri, 4 May 2018 09:33:07 +0000 (11:33 +0200)
committer	Iago Toral Quiroga <itoral@igalia.com>
	Sat, 5 May 2018 10:41:02 +0000 (12:41 +0200)
src/intel/compiler/brw_fs_lower_conversions.cpp		patch \| blob \| history
src/intel/compiler/brw_fs_nir.cpp		patch \| blob \| history