X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_visitor.cpp;h=76b2a05700f6b06d65070f159515186a70c53bb8;hb=cea360a7087f9533ce596f052070195254a28c9e;hp=08a1f8bb77c1905893ec49d8525bd6e0a9804459;hpb=f9a9ba5eac2f1934bd7fecc92cd309f22411164b;p=mesa.git

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 08a1f8bb77c..76b2a05700f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -23,8 +23,8 @@
 
 #include "brw_vec4.h"
 #include "brw_cfg.h"
-#include "glsl/ir_uniform.h"
-#include "program/sampler.h"
+#include "brw_eu.h"
+#include "brw_program.h"
 
 namespace brw {
 
@@ -183,6 +183,7 @@ ALU3(MAD)
 ALU2_ACC(ADDC)
 ALU2_ACC(SUBB)
 ALU2(MAC)
+ALU1(DIM)
 
 /** Gen4 predicated IF. */
 vec4_instruction *
@@ -566,18 +567,12 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
    emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
 }
 
-/**
- * Returns the minimum number of vec4 elements needed to pack a type.
- *
- * For simple types, it will return 1 (a single vec4); for matrices, the
- * number of columns; for array and struct, the sum of the vec4_size of
- * each of its elements; and for sampler and atomic, zero.
- *
- * This method is useful to calculate how much register space is needed to
- * store a particular type.
+/*
+ * Returns the minimum number of vec4 (as_vec4 == true) or dvec4 (as_vec4 ==
+ * false) elements needed to pack a type.
  */
-extern "C" int
-type_size_vec4(const struct glsl_type *type)
+static int
+type_size_xvec4(const struct glsl_type *type, bool as_vec4)
 {
    unsigned int i;
    int size;
@@ -587,23 +582,27 @@ type_size_vec4(const struct glsl_type *type)
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
    case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_DOUBLE:
       if (type->is_matrix()) {
-	 return type->matrix_columns;
+         const glsl_type *col_type = type->column_type();
+         unsigned col_slots =
+            (as_vec4 && col_type->is_dual_slot()) ? 2 : 1;
+         return type->matrix_columns * col_slots;
       } else {
-	 /* Regardless of size of vector, it gets a vec4. This is bad
-	  * packing for things like floats, but otherwise arrays become a
-	  * mess.  Hopefully a later pass over the code can pack scalars
-	  * down if appropriate.
-	  */
-	 return 1;
+         /* Regardless of size of vector, it gets a vec4. This is bad
+          * packing for things like floats, but otherwise arrays become a
+          * mess.  Hopefully a later pass over the code can pack scalars
+          * down if appropriate.
+          */
+         return (as_vec4 && type->is_dual_slot()) ? 2 : 1;
       }
    case GLSL_TYPE_ARRAY:
       assert(type->length > 0);
-      return type_size_vec4(type->fields.array) * type->length;
+      return type_size_xvec4(type->fields.array, as_vec4) * type->length;
    case GLSL_TYPE_STRUCT:
       size = 0;
       for (i = 0; i < type->length; i++) {
-	 size += type_size_vec4(type->fields.structure[i].type);
+	 size += type_size_xvec4(type->fields.structure[i].type, as_vec4);
       }
       return size;
    case GLSL_TYPE_SUBROUTINE:
@@ -619,15 +618,56 @@ type_size_vec4(const struct glsl_type *type)
    case GLSL_TYPE_IMAGE:
       return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
    case GLSL_TYPE_VOID:
-   case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_ERROR:
    case GLSL_TYPE_INTERFACE:
+   case GLSL_TYPE_FUNCTION:
       unreachable("not reached");
    }
 
    return 0;
 }
 
+/**
+ * Returns the minimum number of vec4 elements needed to pack a type.
+ *
+ * For simple types, it will return 1 (a single vec4); for matrices, the
+ * number of columns; for array and struct, the sum of the vec4_size of
+ * each of its elements; and for sampler and atomic, zero.
+ *
+ * This method is useful to calculate how much register space is needed to
+ * store a particular type.
+ */
+extern "C" int
+type_size_vec4(const struct glsl_type *type)
+{
+   return type_size_xvec4(type, true);
+}
+
+/**
+ * Returns the minimum number of dvec4 elements needed to pack a type.
+ *
+ * For simple types, it will return 1 (a single dvec4); for matrices, the
+ * number of columns; for array and struct, the sum of the dvec4_size of
+ * each of its elements; and for sampler and atomic, zero.
+ *
+ * This method is useful to calculate how much register space is needed to
+ * store a particular type.
+ *
+ * Measuring double-precision vertex inputs as dvec4 is required because
+ * ARB_vertex_attrib_64bit states that these uses the same number of locations
+ * than the single-precision version. That is, two consecutives dvec4 would be
+ * located in location "x" and location "x+1", not "x+2".
+ *
+ * In order to map vec4/dvec4 vertex inputs in the proper ATTRs,
+ * remap_vs_attrs() will take in account both the location and also if the
+ * type fits in one or two vec4 slots.
+ */
+extern "C" int
+type_size_dvec4(const struct glsl_type *type)
+{
+   return type_size_xvec4(type, false);
+}
+
 src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 {
    init();
@@ -678,18 +718,8 @@ vec4_instruction *
 vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
                           src_reg src0, src_reg src1)
 {
-   vec4_instruction *inst;
-
-   if (devinfo->gen >= 6) {
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->conditional_mod = conditionalmod;
-   } else {
-      emit(CMP(dst, src0, src1, conditionalmod));
-
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->predicate = BRW_PREDICATE_NORMAL;
-   }
-
+   vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+   inst->conditional_mod = conditionalmod;
    return inst;
 }
 
@@ -767,7 +797,7 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
       pull->mlen = 2;
       pull->header_size = 1;
    } else if (devinfo->gen >= 7) {
-      dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+      dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
 
       grf_offset.type = offset_reg.type;
 
@@ -815,13 +845,14 @@ vec4_visitor::emit_uniformize(const src_reg &src)
 
 src_reg
 vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
-                             src_reg coordinate, src_reg sampler)
+                             src_reg coordinate, src_reg surface)
 {
    vec4_instruction *inst =
       new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
                                     dst_reg(this, glsl_type::uvec4_type));
    inst->base_mrf = 2;
-   inst->src[1] = sampler;
+   inst->src[1] = surface;
+   inst->src[2] = surface;
 
    int param_base;
 
@@ -876,7 +907,8 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
                            uint32_t constant_offset,
                            src_reg offset_value,
                            src_reg mcs,
-                           bool is_cube_array,
+                           uint32_t surface,
+                           src_reg surface_reg,
                            uint32_t sampler,
                            src_reg sampler_reg)
 {
@@ -909,12 +941,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
       unreachable("TXB is not valid for vertex shaders.");
    case ir_lod:
       unreachable("LOD is not valid for vertex shaders.");
+   case ir_samples_identical: {
+      /* There are some challenges implementing this for vec4, and it seems
+       * unlikely to be used anyway.  For now, just return false ways.
+       */
+      emit(MOV(dest, brw_imm_ud(0u)));
+      return;
+   }
    default:
       unreachable("Unrecognized tex op");
    }
 
-   vec4_instruction *inst = new(mem_ctx) vec4_instruction(
-      opcode, dst_reg(this, dest_type));
+   vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode, dest);
 
    inst->offset = constant_offset;
 
@@ -936,7 +974,8 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
    inst->dst.writemask = WRITEMASK_XYZW;
    inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
 
-   inst->src[1] = sampler_reg;
+   inst->src[1] = surface_reg;
+   inst->src[2] = sampler_reg;
 
    /* MRF for the first parameter */
    int param_base = inst->base_mrf + inst->header_size;
@@ -1055,18 +1094,23 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
    /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
     * spec requires layers.
     */
-   if (op == ir_txs && is_cube_array) {
-      emit_math(SHADER_OPCODE_INT_QUOTIENT,
-                writemask(inst->dst, WRITEMASK_Z),
-                src_reg(inst->dst), brw_imm_d(6));
+   if (op == ir_txs && devinfo->gen < 7) {
+      /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
+      emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z),
+                  src_reg(inst->dst), brw_imm_d(1));
    }
 
    if (devinfo->gen == 6 && op == ir_tg4) {
-      emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
+      emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst);
    }
 
-   swizzle_result(op, dest,
-                  src_reg(inst->dst), sampler, dest_type);
+   if (op == ir_query_levels) {
+      /* # levels is in .w */
+      src_reg swizzled(dest);
+      swizzled.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W,
+                                      SWIZZLE_W, SWIZZLE_W);
+      emit(MOV(dest, swizzled));
+   }
 }
 
 /**
@@ -1096,87 +1140,6 @@ vec4_visitor::emit_gen6_gather_wa(uint8_t wa, dst_reg dst)
    }
 }
 
-/**
- * Set up the gather channel based on the swizzle, for gather4.
- */
-uint32_t
-vec4_visitor::gather_channel(unsigned gather_component, uint32_t sampler)
-{
-   int swiz = GET_SWZ(key_tex->swizzles[sampler], gather_component);
-   switch (swiz) {
-      case SWIZZLE_X: return 0;
-      case SWIZZLE_Y:
-         /* gather4 sampler is broken for green channel on RG32F --
-          * we must ask for blue instead.
-          */
-         if (key_tex->gather_channel_quirk_mask & (1 << sampler))
-            return 2;
-         return 1;
-      case SWIZZLE_Z: return 2;
-      case SWIZZLE_W: return 3;
-      default:
-         unreachable("Not reached"); /* zero, one swizzles handled already */
-   }
-}
-
-void
-vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest,
-                             src_reg orig_val, uint32_t sampler,
-                             const glsl_type *dest_type)
-{
-   int s = key_tex->swizzles[sampler];
-
-   dst_reg swizzled_result = dest;
-
-   if (op == ir_query_levels) {
-      /* # levels is in .w */
-      orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
-      emit(MOV(swizzled_result, orig_val));
-      return;
-   }
-
-   if (op == ir_txs || dest_type == glsl_type::float_type
-			|| s == SWIZZLE_NOOP || op == ir_tg4) {
-      emit(MOV(swizzled_result, orig_val));
-      return;
-   }
-
-
-   int zero_mask = 0, one_mask = 0, copy_mask = 0;
-   int swizzle[4] = {0};
-
-   for (int i = 0; i < 4; i++) {
-      switch (GET_SWZ(s, i)) {
-      case SWIZZLE_ZERO:
-	 zero_mask |= (1 << i);
-	 break;
-      case SWIZZLE_ONE:
-	 one_mask |= (1 << i);
-	 break;
-      default:
-	 copy_mask |= (1 << i);
-	 swizzle[i] = GET_SWZ(s, i);
-	 break;
-      }
-   }
-
-   if (copy_mask) {
-      orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
-      swizzled_result.writemask = copy_mask;
-      emit(MOV(swizzled_result, orig_val));
-   }
-
-   if (zero_mask) {
-      swizzled_result.writemask = zero_mask;
-      emit(MOV(swizzled_result, brw_imm_f(0.0f)));
-   }
-
-   if (one_mask) {
-      swizzled_result.writemask = one_mask;
-      emit(MOV(swizzled_result, brw_imm_f(1.0f)));
-   }
-}
-
 void
 vec4_visitor::gs_emit_vertex(int stream_id)
 {
@@ -1189,61 +1152,6 @@ vec4_visitor::gs_end_primitive()
    unreachable("not reached");
 }
 
-void
-vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
-                                  dst_reg dst, src_reg surf_offset,
-                                  src_reg src0, src_reg src1)
-{
-   unsigned mlen = 1 + (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
-   src_reg src_payload(this, glsl_type::uint_type, mlen);
-   dst_reg payload(src_payload);
-   payload.writemask = WRITEMASK_X;
-
-   /* Set the atomic operation offset. */
-   emit(MOV(offset(payload, 0), surf_offset));
-   unsigned i = 1;
-
-   /* Set the atomic operation arguments. */
-   if (src0.file != BAD_FILE) {
-      emit(MOV(offset(payload, i), src0));
-      i++;
-   }
-
-   if (src1.file != BAD_FILE) {
-      emit(MOV(offset(payload, i), src1));
-      i++;
-   }
-
-   /* Emit the instruction.  Note that this maps to the normal SIMD8
-    * untyped atomic message on Ivy Bridge, but that's OK because
-    * unused channels will be masked out.
-    */
-   vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
-                                 src_payload,
-                                 brw_imm_ud(surf_index), brw_imm_ud(atomic_op));
-   inst->mlen = mlen;
-}
-
-void
-vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
-                                        src_reg surf_offset)
-{
-   dst_reg offset(this, glsl_type::uint_type);
-   offset.writemask = WRITEMASK_X;
-
-   /* Set the surface read offset. */
-   emit(MOV(offset, surf_offset));
-
-   /* Emit the instruction.  Note that this maps to the normal SIMD8
-    * untyped surface read message, but that's OK because unused
-    * channels will be masked out.
-    */
-   vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
-                                 src_reg(offset),
-                                 brw_imm_ud(surf_index), brw_imm_d(1));
-   inst->mlen = 1;
-}
-
 void
 vec4_visitor::emit_ndc_computation()
 {
@@ -1364,12 +1272,34 @@ vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
    assert(varying < VARYING_SLOT_MAX);
    assert(output_reg[varying].type == reg.type);
    current_annotation = output_reg_annotation[varying];
-   if (output_reg[varying].file != BAD_FILE)
+   if (output_reg[varying].file != BAD_FILE) {
       return emit(MOV(reg, src_reg(output_reg[varying])));
-   else
+   } else
       return NULL;
 }
 
+void
+vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component)
+{
+   assert(varying < VARYING_SLOT_MAX);
+   assert(varying >= VARYING_SLOT_VAR0);
+   varying = varying - VARYING_SLOT_VAR0;
+
+   unsigned num_comps = output_generic_num_components[varying][component];
+   if (num_comps == 0)
+      return;
+
+   assert(output_generic_reg[varying][component].type == reg.type);
+   current_annotation = output_reg_annotation[varying];
+   if (output_generic_reg[varying][component].file != BAD_FILE) {
+      src_reg src = src_reg(output_generic_reg[varying][component]);
+      src.swizzle = BRW_SWZ_COMP_OUTPUT(component);
+      reg.writemask =
+         brw_writemask_for_component_packing(num_comps, component);
+      emit(MOV(reg, src));
+   }
+}
+
 void
 vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
 {
@@ -1409,7 +1339,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
       /* No need to write to this slot */
       break;
    default:
-      emit_generic_urb_slot(reg, varying);
+      if (varying >= VARYING_SLOT_VAR0) {
+         for (int i = 0; i < 4; i++) {
+            emit_generic_urb_slot(reg, varying, i);
+         }
+      } else {
+         emit_generic_urb_slot(reg, varying);
+      }
       break;
    }
 }
@@ -1534,35 +1470,6 @@ vec4_visitor::get_scratch_offset(bblock_t *block, vec4_instruction *inst,
    }
 }
 
-src_reg
-vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
-				       src_reg *reladdr, int reg_offset)
-{
-   if (reladdr) {
-      src_reg index = src_reg(this, glsl_type::int_type);
-
-      emit_before(block, inst, ADD(dst_reg(index), *reladdr,
-                                   brw_imm_d(reg_offset)));
-
-      /* Pre-gen6, the message header uses byte offsets instead of vec4
-       * (16-byte) offset units.
-       */
-      if (devinfo->gen < 6) {
-         emit_before(block, inst, MUL(dst_reg(index), index, brw_imm_d(16)));
-      }
-
-      return index;
-   } else if (devinfo->gen >= 8) {
-      /* Store the offset in a GRF so we can send-from-GRF. */
-      src_reg offset = src_reg(this, glsl_type::int_type);
-      emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset)));
-      return offset;
-   } else {
-      int message_header_scale = devinfo->gen < 6 ? 16 : 1;
-      return brw_imm_d(reg_offset * message_header_scale);
-   }
-}
-
 /**
  * Emits an instruction before @inst to load the value named by @orig_src
  * from scratch space at @base_offset to @temp.
@@ -1740,12 +1647,24 @@ vec4_visitor::move_grf_array_access_to_scratch()
 void
 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
 				      dst_reg temp, src_reg orig_src,
-				      int base_offset)
+                                      int base_offset, src_reg indirect)
 {
    int reg_offset = base_offset + orig_src.reg_offset;
    const unsigned index = prog_data->base.binding_table.pull_constants_start;
-   src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
-                                             reg_offset);
+
+   src_reg offset;
+   if (indirect.file != BAD_FILE) {
+      offset = src_reg(this, glsl_type::uint_type);
+
+      emit_before(block, inst, ADD(dst_reg(offset), indirect,
+                                   brw_imm_ud(reg_offset * 16)));
+   } else if (devinfo->gen >= 8) {
+      /* Store the offset in a GRF so we can send-from-GRF. */
+      offset = src_reg(this, glsl_type::uint_type);
+      emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16)));
+   } else {
+      offset = brw_imm_d(reg_offset * 16);
+   }
 
    emit_pull_constant_load_reg(temp,
                                brw_imm_ud(index),
@@ -1770,61 +1689,65 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
 void
 vec4_visitor::move_uniform_array_access_to_pull_constants()
 {
+   /* The vulkan dirver doesn't support pull constants other than UBOs so
+    * everything has to be pushed regardless.
+    */
+   if (stage_prog_data->pull_param == NULL) {
+      split_uniform_registers();
+      return;
+   }
+
    int pull_constant_loc[this->uniforms];
    memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
-   bool nested_reladdr;
 
-   /* Walk through and find array access of uniforms.  Put a copy of that
-    * uniform in the pull constant buffer.
-    *
-    * Note that we don't move constant-indexed accesses to arrays.  No
-    * testing has been done of the performance impact of this choice.
+   /* First, walk through the instructions and determine which things need to
+    * be pulled.  We mark something as needing to be pulled by setting
+    * pull_constant_loc to 0.
     */
-   do {
-      nested_reladdr = false;
-
-      foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-         for (int i = 0 ; i < 3; i++) {
-            if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
-               continue;
+   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+      /* We only care about MOV_INDIRECT of a uniform */
+      if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
+          inst->src[0].file != UNIFORM)
+         continue;
 
-            int uniform = inst->src[i].nr;
+      int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
 
-            if (inst->src[i].reladdr->reladdr)
-               nested_reladdr = true;  /* will need another pass */
+      for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++)
+         pull_constant_loc[uniform_nr + j] = 0;
+   }
 
-            /* If this array isn't already present in the pull constant buffer,
-             * add it.
-             */
-            if (pull_constant_loc[uniform] == -1) {
-               const gl_constant_value **values =
-                  &stage_prog_data->param[uniform * 4];
+   /* Next, we walk the list of uniforms and assign real pull constant
+    * locations and set their corresponding entries in pull_param.
+    */
+   for (int j = 0; j < this->uniforms; j++) {
+      if (pull_constant_loc[j] < 0)
+         continue;
 
-               pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
+      pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4;
 
-               assert(uniform < uniform_array_size);
-               for (int j = 0; j < uniform_size[uniform] * 4; j++) {
-                  stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
-                     = values[j];
-               }
-            }
+      for (int i = 0; i < 4; i++) {
+         stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
+            = stage_prog_data->param[j * 4 + i];
+      }
+   }
 
-            /* Set up the annotation tracking for new generated instructions. */
-            base_ir = inst->ir;
-            current_annotation = inst->annotation;
+   /* Finally, we can walk through the instructions and lower MOV_INDIRECT
+    * instructions to actual uniform pulls.
+    */
+   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+      /* We only care about MOV_INDIRECT of a uniform */
+      if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
+          inst->src[0].file != UNIFORM)
+         continue;
 
-            dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+      int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
 
-            emit_pull_constant_load(block, inst, temp, inst->src[i],
-                                    pull_constant_loc[uniform]);
+      assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
 
-            inst->src[i].file = temp.file;
-            inst->src[i].nr = temp.nr;
-            inst->src[i].reg_offset = temp.reg_offset;
-            inst->src[i].reladdr = NULL;
-         }
-      }
-   } while (nested_reladdr);
+      emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
+                              pull_constant_loc[uniform_nr], inst->src[1]);
+      inst->remove(block);
+   }
 
    /* Now there are no accesses of the UNIFORM file with a reladdr, so
     * no need to track them as larger-than-vec4 objects.  This will be
@@ -1870,6 +1793,9 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
    this->current_annotation = NULL;
    memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
 
+   memset(this->output_generic_num_components, 0,
+          sizeof(this->output_generic_num_components));
+
    this->virtual_grf_start = NULL;
    this->virtual_grf_end = NULL;
    this->live_intervals = NULL;
@@ -1877,17 +1803,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
    this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
 
    this->uniforms = 0;
-
-   /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
-    * at least one. See setup_uniforms() in brw_vec4.cpp.
-    */
-   this->uniform_array_size = 1;
-   if (prog_data) {
-      this->uniform_array_size =
-         MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
-   }
-
-   this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
 }
 
 vec4_visitor::~vec4_visitor()