i965/vs: Map scalar VS input locations properly; avoid tons of MOVs.
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 14 Aug 2015 22:15:11 +0000 (15:15 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Sat, 10 Oct 2015 18:40:19 +0000 (11:40 -0700)
Previously, we used nir_lower_io with the scalar type_size function,
which mapped VERT_ATTRIB_* locations to...some numbers.  Then, in
fs_visitor::nir_setup_inputs(), we created temporaries indexed by
those numbers, and emitted MOVs from the actual ATTR registers to
those temporaries.  Virtually all of these were copy propagated away,
but it's still ugly.

This patch reworks our input lowering to produce NIR lower_input
intrinsics that properly index into the ATTR file, so we can access
it directly.

No changes in shader-db.

v2: Fix unreachable() message (Ken), update commit message (Matt).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_nir.c

index bc0df6850c41a05b0aced99b2ecbab6ddca1dfec..51189a2d263cd9f9c1e4b8b9c6f17655f613f458 100644 (file)
@@ -56,61 +56,25 @@ fs_visitor::emit_nir_code()
 void
 fs_visitor::nir_setup_inputs()
 {
+   if (stage != MESA_SHADER_FRAGMENT)
+      return;
+
    nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
 
    nir_foreach_variable(var, &nir->inputs) {
-      enum brw_reg_type type = brw_type_for_base_type(var->type);
       fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
 
       fs_reg reg;
-      switch (stage) {
-      case MESA_SHADER_VERTEX: {
-         /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
-          * stored in nir_variable::location.
-          *
-          * However, NIR's load_input intrinsics use a different index - an
-          * offset into a single contiguous array containing all inputs.
-          * This index corresponds to the nir_variable::driver_location field.
-          *
-          * So, we need to copy from fs_reg(ATTR, var->location) to
-          * offset(nir_inputs, var->data.driver_location).
-          */
-         const glsl_type *const t = var->type->without_array();
-         const unsigned components = t->components();
-         const unsigned cols = t->matrix_columns;
-         const unsigned elts = t->vector_elements;
-         unsigned array_length = var->type->is_array() ? var->type->length : 1;
-         for (unsigned i = 0; i < array_length; i++) {
-            for (unsigned j = 0; j < cols; j++) {
-               for (unsigned k = 0; k < elts; k++) {
-                  bld.MOV(offset(retype(input, type), bld,
-                                 components * i + elts * j + k),
-                          offset(fs_reg(ATTR, var->data.location + i, type),
-                                 bld, 4 * j + k));
-               }
-            }
-         }
-         break;
-      }
-      case MESA_SHADER_GEOMETRY:
-      case MESA_SHADER_COMPUTE:
-      case MESA_SHADER_TESS_CTRL:
-      case MESA_SHADER_TESS_EVAL:
-         unreachable("fs_visitor not used for these stages yet.");
-         break;
-      case MESA_SHADER_FRAGMENT:
-         if (var->data.location == VARYING_SLOT_POS) {
-            reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
-                                                var->data.origin_upper_left);
-            emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
-                                      input, reg), 0xF);
-         } else {
-            emit_general_interpolation(input, var->name, var->type,
-                                       (glsl_interp_qualifier) var->data.interpolation,
-                                       var->data.location, var->data.centroid,
-                                       var->data.sample);
-         }
-         break;
+      if (var->data.location == VARYING_SLOT_POS) {
+         reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
+                                             var->data.origin_upper_left);
+         emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
+                                   input, reg), 0xF);
+      } else {
+         emit_general_interpolation(input, var->name, var->type,
+                                    (glsl_interp_qualifier) var->data.interpolation,
+                                    var->data.location, var->data.centroid,
+                                    var->data.sample);
       }
    }
 }
@@ -1575,8 +1539,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
    case nir_intrinsic_load_input: {
       unsigned index = 0;
       for (unsigned j = 0; j < instr->num_components; j++) {
-         fs_reg src = offset(retype(nir_inputs, dest.type), bld,
-                             instr->const_index[0] + index);
+         fs_reg src;
+         if (stage == MESA_SHADER_VERTEX) {
+            src = offset(fs_reg(ATTR, instr->const_index[0], dest.type), bld, index);
+         } else {
+            src = offset(retype(nir_inputs, dest.type), bld,
+                         instr->const_index[0] + index);
+         }
          if (has_indirect)
             src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
          index++;
index 80f36dc2399dd806807d6b17899fdc1a63bf6fb7..15c1b1984a14f2e17fe8c0b0e1a4d9b8dcf4a939 100644 (file)
@@ -31,15 +31,36 @@ static void
 brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
 {
    switch (nir->stage) {
+   case MESA_SHADER_VERTEX:
+      /* For now, leave the vec4 backend doing the old method. */
+      if (!is_scalar) {
+         nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
+                                  type_size_vec4);
+         break;
+      }
+
+      /* Start with the location of the variable's base. */
+      foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+         var->data.driver_location = var->data.location;
+      }
+
+      /* Now use nir_lower_io to walk dereference chains.  Attribute arrays
+       * are loaded as one vec4 per element (or matrix column), so we use
+       * type_size_vec4 here.
+       */
+      nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+      break;
    case MESA_SHADER_GEOMETRY:
       foreach_list_typed(nir_variable, var, node, &nir->inputs) {
          var->data.driver_location = var->data.location;
       }
       break;
-   default:
+   case MESA_SHADER_FRAGMENT:
       nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
                                is_scalar ? type_size_scalar : type_size_vec4);
       break;
+   default:
+      unreachable("unsupported shader stage");
    }
 }