vc4: Lower NIR inputs to scalar as well.
authorEric Anholt <eric@anholt.net>
Thu, 30 Jul 2015 00:16:26 +0000 (17:16 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 30 Jul 2015 22:47:12 +0000 (15:47 -0700)
For now this is just scalarizing, but it also means we'll get to dump a
bunch of QIR-based lowering in a moment.

src/gallium/drivers/vc4/vc4_nir_lower_io.c
src/gallium/drivers/vc4/vc4_program.c

index 43376888248936ca05ab6c49630cd9e8845d85dd..9882b6b8a35aad4726a27eb374101dac33aefd41 100644 (file)
  * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
  * something amenable to the VC4 architecture.
  *
- * Currently, it split outputs into scalars, and drops any non-position values
- * in coordinate shaders.
+ * Currently, it split inputs and outputs into scalars, and drops any
+ * non-position outputs in coordinate shaders.
  */
 
+static void
+vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
+                    nir_intrinsic_instr *intr)
+{
+        /* All TGSI-to-NIR inputs are vec4. */
+        assert(intr->num_components == 4);
+
+        nir_builder_insert_before_instr(b, &intr->instr);
+
+        /* Generate scalar loads equivalent to the original VEC4. */
+        nir_ssa_def *dests[4];
+        for (unsigned i = 0; i < intr->num_components; i++) {
+                nir_intrinsic_instr *intr_comp =
+                        nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input);
+                intr_comp->num_components = 1;
+                intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+                nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+                nir_builder_instr_insert(b, &intr_comp->instr);
+
+                dests[i] = &intr_comp->dest.ssa;
+        }
+
+        /* Batch things back together into a vec4.  This will get split by the
+         * later ALU scalarization pass.
+         */
+        nir_ssa_def *vec_instr = nir_vec4(b, dests[0], dests[1],
+                                          dests[2], dests[3]);
+
+        /* Replace the old intrinsic with a reference to our reconstructed
+         * vec4.
+         */
+        nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec_instr),
+                                 ralloc_parent(b->impl));
+        nir_instr_remove(&intr->instr);
+}
+
 static void
 vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
                      nir_intrinsic_instr *intr)
@@ -84,6 +120,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
         switch (intr->intrinsic) {
+        case nir_intrinsic_load_input:
+                vc4_nir_lower_input(c, b, intr);
+                break;
+
         case nir_intrinsic_store_output:
                 vc4_nir_lower_output(c, b, intr);
                 break;
index 85bb1c48780566192e424dad5962a98f9e779ca5..dfc3815c5c1185c9124daeb5a5f20755ba26207f 100644 (file)
@@ -1889,8 +1889,8 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
                 break;
 
         case nir_intrinsic_load_input:
-                for (int i = 0; i < instr->num_components; i++)
-                        dest[i] = c->inputs[instr->const_index[0] * 4 + i];
+                assert(instr->num_components == 1);
+                *dest = c->inputs[instr->const_index[0]];
 
                 break;