i965/vec4: Support full std140 layout for push constants

author Jason Ekstrand <jason.ekstrand@intel.com>

Tue, 5 Apr 2016 22:55:35 +0000 (15:55 -0700)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 15 Apr 2016 21:04:38 +0000 (14:04 -0700)
author Jason Ekstrand <jason.ekstrand@intel.com>
Tue, 5 Apr 2016 22:55:35 +0000 (15:55 -0700)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 15 Apr 2016 21:04:38 +0000 (14:04 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index b67d104c34029c8415c6afe8fc4769e1d708fd8c..e199d9697dc443c3e8bcd9c208f96f815b0ab106 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -686,24 +686,44 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
     }
  
     case nir_intrinsic_load_uniform: {
-      /* Offsets are in bytes but they should always be multiples of 16 */
-      assert(instr->const_index[0] % 16 == 0);
+      /* Offsets are in bytes but they should always be multiples of 4 */
+      assert(nir_intrinsic_base(instr) % 4 == 0);
  
        dest = get_nir_dest(instr->dest);
  
        src = src_reg(dst_reg(UNIFORM, instr->const_index[0] / 16));
        src.type = dest.type;
  
+      /* Uniforms don't actually have to be vec4 aligned.  In the case that
+       * it isn't, we have to use a swizzle to shift things around.  They
+       * do still have the std140 alignment requirement that vec2's have to
+       * be vec2-aligned and vec3's and vec4's have to be vec4-aligned.
+       *
+       * The swizzle also works in the indirect case as the generator adds
+       * the swizzle to the offset for us.
+       */
+      unsigned shift = (nir_intrinsic_base(instr) % 16) / 4;
+      assert(shift + instr->num_components <= 4);
+
        nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
        if (const_offset) {
-         /* Offsets are in bytes but they should always be multiples of 16 */
-         assert(const_offset->u32[0] % 16 == 0);
-         src.reg_offset = const_offset->u32[0] / 16;
+         /* Offsets are in bytes but they should always be multiples of 4 */
+         assert(const_offset->u32[0] % 4 == 0);
+
+         unsigned offset = const_offset->u32[0] + shift * 4;
+         src.reg_offset = offset / 16;
+         shift = (nir_intrinsic_base(instr) % 16) / 4;
+         src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
  
           emit(MOV(dest, src));
        } else {
+         src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
+
           src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
  
+         /* MOV_INDIRECT is going to stomp the whole thing anyway */
+         dest.writemask = WRITEMASK_XYZW;
+
           emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
                indirect, brw_imm_ud(instr->const_index[1]));
        }
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Tue, 5 Apr 2016 22:55:35 +0000 (15:55 -0700)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 15 Apr 2016 21:04:38 +0000 (14:04 -0700)