i965: Use source region <1,2,0> when converting to DF.
authorMatt Turner <mattst88@gmail.com>
Fri, 13 Jan 2017 02:05:58 +0000 (18:05 -0800)
committerFrancisco Jerez <currojerez@riseup.net>
Fri, 14 Apr 2017 21:56:08 +0000 (14:56 -0700)
Doing so allows us to use a single MOV in VEC4_OPCODE_TO_DOUBLE instead
of two.

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
src/intel/compiler/brw_eu_emit.c
src/intel/compiler/brw_vec4_generator.cpp

index 058742d4f6e733cf90e45ca9f179d89bf599701d..8637310a35aeaa789be7e61c50a236044e1996ea 100644 (file)
@@ -1089,7 +1089,6 @@ void brw_##OP(struct brw_codegen *p,                                            \
 }
 
 
-ALU1(MOV)
 ALU2(SEL)
 ALU1(NOT)
 ALU2(AND)
@@ -1123,6 +1122,33 @@ ALU2(SUBB)
 ROUND(RNDZ)
 ROUND(RNDE)
 
+brw_inst *
+brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
+{
+   const struct gen_device_info *devinfo = p->devinfo;
+
+   /* When converting F->DF on IVB/BYT, every odd source channel is ignored.
+    * To avoid the problems that causes, we use a <1,2,0> source region to read
+    * each element twice.
+    */
+   if (devinfo->gen == 7 && !devinfo->is_haswell &&
+       brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 &&
+       dest.type == BRW_REGISTER_TYPE_DF &&
+       (src0.type == BRW_REGISTER_TYPE_F ||
+        src0.type == BRW_REGISTER_TYPE_D ||
+        src0.type == BRW_REGISTER_TYPE_UD) &&
+       !has_scalar_region(src0)) {
+      assert(src0.vstride == BRW_VERTICAL_STRIDE_4 &&
+             src0.width == BRW_WIDTH_4 &&
+             src0.hstride == BRW_HORIZONTAL_STRIDE_1);
+
+      src0.vstride = BRW_VERTICAL_STRIDE_1;
+      src0.width = BRW_WIDTH_2;
+      src0.hstride = BRW_HORIZONTAL_STRIDE_0;
+   }
+
+   return brw_alu1(p, BRW_OPCODE_MOV, dest, src0);
+}
 
 brw_inst *
 brw_ADD(struct brw_codegen *p, struct brw_reg dest,
index 2ac287f17fa0baffdb0ee21d413e0ecbab73eee6..d3192ab7db3b64b4456c7c6360f3ac0c6b46fce0 100644 (file)
@@ -1958,18 +1958,7 @@ generate_code(struct brw_codegen *p,
 
          brw_set_default_access_mode(p, BRW_ALIGN_1);
 
-         struct brw_reg tmp = retype(dst, src[0].type);
-         tmp.hstride = BRW_HORIZONTAL_STRIDE_2;
-         tmp.width = BRW_WIDTH_4;
-         src[0].vstride = BRW_VERTICAL_STRIDE_4;
-         src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
-         src[0].width = BRW_WIDTH_4;
-         brw_MOV(p, tmp, src[0]);
-
-         tmp.vstride = BRW_VERTICAL_STRIDE_8;
-         tmp.hstride = BRW_HORIZONTAL_STRIDE_2;
-         tmp.width = BRW_WIDTH_4;
-         brw_MOV(p, dst, tmp);
+         brw_MOV(p, dst, src[0]);
 
          brw_set_default_access_mode(p, BRW_ALIGN_16);
          break;