intel/eu: Fix broadcast instruction for 64-bit values on little-core
authorJason Ekstrand <jason.ekstrand@intel.com>
Tue, 17 Oct 2017 21:45:12 +0000 (14:45 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 7 Nov 2017 18:37:52 +0000 (10:37 -0800)
We're not using broadcast for any 32-bit types right now since we mostly
use it for emit_uniformize on 32-bit buffer indices.  However, SPIR-V
subgroups are going to need it for 64-bit so let's make it work.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/intel/compiler/brw_eu_emit.c

index a18cfa4239f7b1b7729e881d5a6143e9e4eeafe2..fae74cf80abf02e199fb35ae5d493659dcb50ca2 100644 (file)
@@ -3430,8 +3430,30 @@ brw_broadcast(struct brw_codegen *p,
          brw_pop_insn_state(p);
 
          /* Use indirect addressing to fetch the specified component. */
-         brw_MOV(p, dst,
-                 retype(brw_vec1_indirect(addr.subnr, offset), src.type));
+         if (type_sz(src.type) > 4 &&
+             (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
+            /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
+             *
+             *    "When source or destination datatype is 64b or operation is
+             *    integer DWord multiply, indirect addressing must not be
+             *    used."
+             *
+             * To work around both of this issue, we do two integer MOVs
+             * insead of one 64-bit MOV.  Because no double value should ever
+             * cross a register boundary, it's safe to use the immediate
+             * offset in the indirect here to handle adding 4 bytes to the
+             * offset and avoid the extra ADD to the register file.
+             */
+            brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
+                       retype(brw_vec1_indirect(addr.subnr, offset),
+                              BRW_REGISTER_TYPE_D));
+            brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
+                       retype(brw_vec1_indirect(addr.subnr, offset + 4),
+                              BRW_REGISTER_TYPE_D));
+         } else {
+            brw_MOV(p, dst,
+                    retype(brw_vec1_indirect(addr.subnr, offset), src.type));
+         }
       } else {
          /* In SIMD4x2 mode the index can be either zero or one, replicate it
           * to all bits of a flag register,