i965/fs: Handle nir shared variable store intrinsic
authorJordan Justen <jordan.l.justen@intel.com>
Tue, 28 Jul 2015 22:25:46 +0000 (15:25 -0700)
committerJordan Justen <jordan.l.justen@intel.com>
Thu, 10 Dec 2015 07:50:38 +0000 (23:50 -0800)
v4:
 * Apply similar optimization for shared variable stores as
   0cb7d7b4b7c32246d4c4225a1d17d7ff79a7526d. This was causing a
   OpenGLES 3.1 CTS failure, but
   867c436ca841b4196b4dde4786f5086c76b20dd7 fixes that.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp

index c9da49564a7812aef3e587150759e261eface796..4b7ea1756e9a7476cb477da55b12e15422ad03ea 100644 (file)
@@ -2455,6 +2455,54 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_store_shared_indirect:
+      has_indirect = true;
+      /* fallthrough */
+   case nir_intrinsic_store_shared: {
+      assert(devinfo->gen >= 7);
+
+      /* Block index */
+      fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
+
+      /* Value */
+      fs_reg val_reg = get_nir_src(instr->src[0]);
+
+      /* Writemask */
+      unsigned writemask = instr->const_index[1];
+
+      /* Combine groups of consecutive enabled channels in one write
+       * message. We use ffs to find the first enabled channel and then ffs on
+       * the bit-inverse, down-shifted writemask to determine the length of
+       * the block of enabled bits.
+       */
+      while (writemask) {
+         unsigned first_component = ffs(writemask) - 1;
+         unsigned length = ffs(~(writemask >> first_component)) - 1;
+         fs_reg offset_reg;
+
+         if (!has_indirect) {
+            offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component);
+         } else {
+            offset_reg = vgrf(glsl_type::uint_type);
+            bld.ADD(offset_reg,
+                    retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
+                    brw_imm_ud(4 * first_component));
+         }
+
+         emit_untyped_write(bld, surf_index, offset_reg,
+                            offset(val_reg, bld, first_component),
+                            1 /* dims */, length,
+                            BRW_PREDICATE_NONE);
+
+         /* Clear the bits in the writemask that we just wrote, then try
+          * again to see if more channels are left.
+          */
+         writemask &= (15 << (first_component + length));
+      }
+
+      break;
+   }
+
    case nir_intrinsic_load_input_indirect:
       unreachable("Not allowed");
       /* fallthrough */