i965/fs: Actually set/use the mlen for gen7 uniform pull constant loads
authorJason Ekstrand <jason.ekstrand@intel.com>
Fri, 19 Jun 2015 19:58:37 +0000 (12:58 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Tue, 30 Jun 2015 22:57:56 +0000 (15:57 -0700)
Previously, we were allocating the payload with different sizes per gen and
then figuring out the mlen in the generator based on gen.  This meant,
among other things, that the higher level passes knew nothing about it.

Acked-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_generator.cpp

index 79ca33e42ed2b53e3966233cebc695aa66823b9e..94f42949ce2afe4e02d445121dcc27ec5b23266c 100644 (file)
@@ -2909,14 +2909,18 @@ fs_visitor::lower_uniform_pull_constant_loads()
          assert(const_offset_reg.file == IMM &&
                 const_offset_reg.type == BRW_REGISTER_TYPE_UD);
          const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
-         fs_reg payload = fs_reg(GRF, alloc.allocate(1));
 
-         /* We have to use a message header on Skylake to get SIMD4x2 mode.
-          * Reserve space for the register.
-          */
+         fs_reg payload, offset;
          if (devinfo->gen >= 9) {
-            payload.reg_offset++;
-            alloc.sizes[payload.reg] = 2;
+            /* We have to use a message header on Skylake to get SIMD4x2
+             * mode.  Reserve space for the register.
+            */
+            offset = payload = fs_reg(GRF, alloc.allocate(2));
+            offset.reg_offset++;
+            inst->mlen = 2;
+         } else {
+            offset = payload = fs_reg(GRF, alloc.allocate(1));
+            inst->mlen = 1;
          }
 
          /* This is actually going to be a MOV, but since only the first dword
@@ -2925,7 +2929,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
           * by live variable analysis, or register allocation will explode.
           */
          fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET,
-                                               8, payload, const_offset_reg);
+                                               8, offset, const_offset_reg);
          setup->force_writemask_all = true;
 
          setup->ir = inst->ir;
@@ -2938,6 +2942,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
           */
          inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
          inst->src[1] = payload;
+         inst->base_mrf = -1;
 
          invalidate_live_intervals();
       } else {
index 2ed0bac6fd9d520d874f9e9e6122e452e44fe218..8d821abbac23f49679308af47de4819a7ee7cebf 100644 (file)
@@ -1054,7 +1054,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
                                                        struct brw_reg index,
                                                        struct brw_reg offset)
 {
-   assert(inst->mlen == 0);
    assert(index.type == BRW_REGISTER_TYPE_UD);
 
    assert(offset.file == BRW_GENERAL_REGISTER_FILE);
@@ -1069,12 +1068,10 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
 
    struct brw_reg src = offset;
    bool header_present = false;
-   int mlen = 1;
 
    if (devinfo->gen >= 9) {
       /* Skylake requires a message header in order to use SIMD4x2 mode. */
-      src = retype(brw_vec4_grf(offset.nr - 1, 0), BRW_REGISTER_TYPE_UD);
-      mlen = 2;
+      src = retype(brw_vec4_grf(offset.nr, 0), BRW_REGISTER_TYPE_UD);
       header_present = true;
 
       brw_push_insn_state(p);
@@ -1105,7 +1102,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
                               0, /* LD message ignores sampler unit */
                               GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                               1, /* rlen */
-                              mlen,
+                              inst->mlen,
                               header_present,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
@@ -1135,7 +1132,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
                               0, /* LD message ignores sampler unit */
                               GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                               1, /* rlen */
-                              mlen,
+                              inst->mlen,
                               header_present,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);