pan/bi: Flesh out st_vary IR
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Fri, 20 Mar 2020 16:25:08 +0000 (12:25 -0400)
committerMarge Bot <eric+marge@anholt.net>
Sun, 22 Mar 2020 03:32:35 +0000 (03:32 +0000)
We need to make the semantics of BI_VECTOR a bit more precise -
vectorize only the first argument, not all of them. This is enough for
current and future users, as far as I know.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4276>

src/panfrost/bifrost/bi_ra.c
src/panfrost/bifrost/bifrost_compile.c
src/panfrost/bifrost/bir.c
src/panfrost/bifrost/compiler.h

index 8c0fa92a12e7434bec97e63b41b5296c3aa8dead..a047932ae71ae66ab66d7f52c6d05f3ca9fc0f7c 100644 (file)
@@ -114,7 +114,7 @@ bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src)
         if (ins->src[src] >= l->node_count)
                 return;
 
-        bool vector = (bi_class_props[ins->type] & BI_VECTOR);
+        bool vector = (bi_class_props[ins->type] & BI_VECTOR) && src == 0;
         unsigned offset = 0;
 
         if (vector) {
@@ -125,7 +125,7 @@ bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src)
                 unsigned size = nir_alu_type_get_type_size(T);
                 unsigned bytes = (MAX2(size, 8) / 8);
                 unsigned comps_per_reg = 4 / bytes;
-                unsigned components = bi_get_component_count(ins);
+                unsigned components = bi_get_component_count(ins, src);
 
                 for (unsigned i = 0; i < components; ++i) {
                         unsigned off = ins->swizzle[src][i] / comps_per_reg;
index ff1d354b49fc4dbfed2ff8a7a0c82cbfe25472f0..25bb4ab9c58539882f78b5356d14e26ce10297e7 100644 (file)
@@ -178,22 +178,22 @@ bi_emit_st_vary(bi_context *ctx, nir_intrinsic_instr *instr)
         address.src_types[2] = nir_type_uint32;
         address.src_types[3] = nir_intrinsic_type(instr);
         address.dest = bi_make_temp(ctx);
-        address.dest_type = nir_type_uint64;
-        address.writemask = (1 << 8) - 1;
+        address.dest_type = nir_type_uint32;
+        address.writemask = (1 << 12) - 1;
 
         bi_instruction st = {
                 .type = BI_STORE_VAR,
                 .src = {
-                        address.dest,
-                        bir_src_index(&instr->src[0])
+                        bir_src_index(&instr->src[0]),
+                        address.dest, address.dest, address.dest,
                 },
                 .src_types = {
-                        nir_type_uint64,
-                        nir_type_uint32
+                        nir_type_uint32,
+                        nir_type_uint32, nir_type_uint32, nir_type_uint32,
                 },
                 .swizzle = {
-                        { 0 },
-                        { 0, 1, 2, 3 }
+                        { 0, 1, 2, 3 },
+                        { 0 }, { 1 }, { 2}
                 }
         };
 
index 5de9d30fe30fa91e22078ba75ed56a0384afa5aa..c385cfe7587c71f3fe2c515e6a2165068b2aea06 100644 (file)
@@ -93,10 +93,10 @@ bi_from_bytemask(uint16_t bytemask, unsigned bytes)
 }
 
 unsigned
-bi_get_component_count(bi_instruction *ins)
+bi_get_component_count(bi_instruction *ins, unsigned src)
 {
         if (bi_class_props[ins->type] & BI_VECTOR) {
-                return 4;
+                return (src == 0) ? 4 : 1;
         } else {
                 /* Stores imply VECTOR */
                 assert(ins->dest_type);
@@ -119,10 +119,10 @@ uint16_t
 bi_bytemask_of_read_components(bi_instruction *ins, unsigned node)
 {
         uint16_t mask = 0x0;
-        unsigned component_count = bi_get_component_count(ins);
 
         bi_foreach_src(ins, s) {
                 if (ins->src[s] != node) continue;
+                unsigned component_count = bi_get_component_count(ins, s);
                 nir_alu_type T = ins->src_types[s];
                 unsigned size = nir_alu_type_get_type_size(T);
                 unsigned bytes = (MAX2(size, 8) / 8);
index 1a777feaaac1337b1895fc1d09110d12c0948a76..ee868f75d925dee7400a885b2b1bced55f9f28b9 100644 (file)
@@ -111,7 +111,8 @@ extern unsigned bi_class_props[BI_NUM_CLASSES];
  * the end of a clause. Implies ADD */
 #define BI_SCHED_HI_LATENCY (1 << 7)
 
-/* Intrinsic is vectorized and should read 4 components regardless of writemask */
+/* Intrinsic is vectorized and should read 4 components in the first source
+ * regardless of writemask */
 #define BI_VECTOR (1 << 8)
 
 /* Use a data register for src0/dest respectively, bypassing the usual
@@ -530,7 +531,7 @@ bool bi_has_source_mods(bi_instruction *ins);
 bool bi_is_src_swizzled(bi_instruction *ins, unsigned s);
 bool bi_has_arg(bi_instruction *ins, unsigned arg);
 uint16_t bi_from_bytemask(uint16_t bytemask, unsigned bytes);
-unsigned bi_get_component_count(bi_instruction *ins);
+unsigned bi_get_component_count(bi_instruction *ins, unsigned s);
 unsigned bi_load32_components(bi_instruction *ins);
 uint16_t bi_bytemask_of_read_components(bi_instruction *ins, unsigned node);