Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / mesa / drivers / dri / i965 / brw_nir.c
index 10e9273521016feefd2805b5cfd86a294af78c4e..fdfc4f661d1d4247717542839ed0f482c81b64ce 100644 (file)
 
 #include "brw_nir.h"
 #include "brw_shader.h"
-#include "glsl/glsl_parser_extras.h"
 #include "glsl/nir/glsl_to_nir.h"
+#include "glsl/nir/nir_builder.h"
 #include "program/prog_to_nir.h"
 
+struct remap_vs_attrs_state {
+   nir_builder b;
+   uint64_t inputs_read;
+};
+
 static bool
-remap_vs_attrs(nir_block *block, void *closure)
+remap_vs_attrs(nir_block *block, void *void_state)
 {
-   GLbitfield64 inputs_read = *((GLbitfield64 *) closure);
+   struct remap_vs_attrs_state *state = void_state;
 
-   nir_foreach_instr(block, instr) {
+   nir_foreach_instr_safe(block, instr) {
       if (instr->type != nir_instr_type_intrinsic)
          continue;
 
       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
-      /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
-      assert(intrin->intrinsic != nir_intrinsic_load_input_indirect);
-
       if (intrin->intrinsic == nir_intrinsic_load_input) {
          /* Attributes come in a contiguous block, ordered by their
           * gl_vert_attrib value.  That means we can compute the slot
           * number for an attribute by masking out the enabled attributes
           * before it and counting the bits.
           */
-         int attr = intrin->const_index[0];
-         int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr));
+         nir_const_value *const_offset = nir_src_as_const_value(intrin->src[0]);
+
+         /* We set EmitNoIndirect for VS inputs, so there are no indirects. */
+         assert(const_offset);
+
+         int attr = intrin->const_index[0] + const_offset->u[0];
+         int slot = _mesa_bitcount_64(state->inputs_read &
+                                      BITFIELD64_MASK(attr));
+
+         /* The NIR -> FS pass will just add the base and offset together, so
+          * there's no reason to keep them separate.  Just put it all in
+          * const_index[0] and set the offset src[0] to load_const(0).
+          */
          intrin->const_index[0] = 4 * slot;
+
+         state->b.cursor = nir_before_instr(&intrin->instr);
+         nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+                               nir_src_for_ssa(nir_imm_int(&state->b, 0)));
       }
    }
    return true;
@@ -62,13 +79,6 @@ brw_nir_lower_inputs(nir_shader *nir,
 {
    switch (nir->stage) {
    case MESA_SHADER_VERTEX:
-      /* For now, leave the vec4 backend doing the old method. */
-      if (!is_scalar) {
-         nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
-                                  type_size_vec4);
-         break;
-      }
-
       /* Start with the location of the variable's base. */
       foreach_list_typed(nir_variable, var, node, &nir->inputs) {
          var->data.driver_location = var->data.location;
@@ -80,15 +90,25 @@ brw_nir_lower_inputs(nir_shader *nir,
        */
       nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
 
-      /* Finally, translate VERT_ATTRIB_* values into the actual registers.
-       *
-       * Note that we can use nir->info.inputs_read instead of key->inputs_read
-       * since the two are identical aside from Gen4-5 edge flag differences.
-       */
-      GLbitfield64 inputs_read = nir->info.inputs_read;
-      nir_foreach_overload(nir, overload) {
-         if (overload->impl) {
-            nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read);
+      if (is_scalar) {
+         /* Finally, translate VERT_ATTRIB_* values into the actual registers.
+          *
+          * Note that we can use nir->info.inputs_read instead of
+          * key->inputs_read since the two are identical aside from Gen4-5
+          * edge flag differences.
+          */
+         struct remap_vs_attrs_state remap_state = {
+            .inputs_read = nir->info.inputs_read,
+         };
+
+         /* This pass needs actual constants */
+         nir_opt_constant_folding(nir);
+
+         nir_foreach_overload(nir, overload) {
+            if (overload->impl) {
+               nir_builder_init(&remap_state.b, overload->impl);
+               nir_foreach_block(overload->impl, remap_vs_attrs, &remap_state);
+            }
          }
       }
       break;
@@ -171,12 +191,40 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
    }
 }
 
+static int
+type_size_scalar_bytes(const struct glsl_type *type)
+{
+   return type_size_scalar(type) * 4;
+}
+
+static int
+type_size_vec4_bytes(const struct glsl_type *type)
+{
+   return type_size_vec4(type) * 16;
+}
+
+static void
+brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
+{
+   if (is_scalar) {
+      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
+                               type_size_scalar_bytes);
+      nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes);
+   } else {
+      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
+                               type_size_vec4_bytes);
+      nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes);
+   }
+}
+
+#include "util/debug.h"
+
 static bool
 should_clone_nir()
 {
    static int should_clone = -1;
    if (should_clone < 1)
-      should_clone = brw_env_var_as_boolean("NIR_TEST_CLONE", false);
+      should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
 
    return should_clone;
 }
@@ -298,9 +346,7 @@ brw_lower_nir(nir_shader *nir,
 
    OPT_V(brw_nir_lower_inputs, devinfo, is_scalar);
    OPT_V(brw_nir_lower_outputs, is_scalar);
-   //nir_assign_var_locations(&nir->uniforms,
-   //                         &nir->num_uniforms,
-   //                         is_scalar ? type_size_scalar : type_size_vec4);
+   //OPT_V(brw_nir_lower_uniforms, is_scalar);
    OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4);
 
    if (shader_prog) {