i965/fs: indirect addressing with doubles is not supported in CHV/BSW/BXT
authorSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Mon, 13 Jun 2016 06:29:53 +0000 (08:29 +0200)
committerSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Fri, 17 Jun 2016 09:33:18 +0000 (11:33 +0200)
From the Cherryview's PRM, Volume 7, 3D Media GPGPU Engine, Register Region
Restrictions, page 844:

  "When source or destination datatype is 64b or operation is integer DWord
   multiply, indirect addressing must not be used."

v2:
- Fix it for Broxton too.

v3:
- Simplify code by using subscript() and not creating a new num_components
variable (Kenneth).

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95462
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index d72b37b5854f32ec62cfff0bb70accc88d6edd21..ad9b421fff4a2fc98af93704d6745e3f29699a13 100644 (file)
@@ -3611,10 +3611,32 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
          unsigned read_size = instr->const_index[1] -
             (instr->num_components - 1) * type_sz(dest.type);
 
+         fs_reg indirect_chv_high_32bit;
+         bool is_chv_bxt_64bit =
+            (devinfo->is_cherryview || devinfo->is_broxton) &&
+            type_sz(dest.type) == 8;
+         if (is_chv_bxt_64bit) {
+            indirect_chv_high_32bit = vgrf(glsl_type::uint_type);
+            /* Calculate indirect address to read high 32 bits */
+            bld.ADD(indirect_chv_high_32bit, indirect, brw_imm_ud(4));
+         }
+
          for (unsigned j = 0; j < instr->num_components; j++) {
-            bld.emit(SHADER_OPCODE_MOV_INDIRECT,
-                     offset(dest, bld, j), offset(src, bld, j),
-                     indirect, brw_imm_ud(read_size));
+            if (!is_chv_bxt_64bit) {
+               bld.emit(SHADER_OPCODE_MOV_INDIRECT,
+                        offset(dest, bld, j), offset(src, bld, j),
+                        indirect, brw_imm_ud(read_size));
+            } else {
+               bld.emit(SHADER_OPCODE_MOV_INDIRECT,
+                        subscript(offset(dest, bld, j), BRW_REGISTER_TYPE_UD, 0),
+                        offset(src, bld, j),
+                        indirect, brw_imm_ud(read_size));
+
+               bld.emit(SHADER_OPCODE_MOV_INDIRECT,
+                        subscript(offset(dest, bld, j), BRW_REGISTER_TYPE_UD, 1),
+                        offset(src, bld, j),
+                        indirect_chv_high_32bit, brw_imm_ud(read_size));
+            }
          }
       }
       break;