freedreno/ir3: Simpify the immediates from an array of vec4 to array of dwords.
authorEric Anholt <eric@anholt.net>
Thu, 9 Jul 2020 22:00:33 +0000 (15:00 -0700)
committerMarge Bot <eric+marge@anholt.net>
Wed, 5 Aug 2020 23:06:55 +0000 (23:06 +0000)
We usually had to split the idx/swiz out of the dword index anyway.  Note
that incidentally, immediates_size now increments in vec4s instad of
4*vec4s.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5990>

src/freedreno/computerator/a6xx.c
src/freedreno/ir3/ir3_cp.c
src/freedreno/ir3/ir3_parser.y
src/freedreno/ir3/ir3_shader.c
src/freedreno/ir3/ir3_shader.h
src/freedreno/vulkan/tu_pipeline.c
src/gallium/drivers/freedreno/ir3/ir3_const.h

index baa9b7d8504840848c59a056f9b80bc4557582d1..bf2b9262d6d3b1c92587dfafb2eb4cbdd31074db 100644 (file)
@@ -225,9 +225,9 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t grid[3
        if (ir3_kernel->info.numwg != INVALID_REG) {
                assert((ir3_kernel->info.numwg & 0x3) == 0);
                int idx = ir3_kernel->info.numwg >> 2;
-               const_state->immediates[idx].val[0] = grid[0];
-               const_state->immediates[idx].val[1] = grid[1];
-               const_state->immediates[idx].val[2] = grid[2];
+               const_state->immediates[idx * 4 + 0] = grid[0];
+               const_state->immediates[idx * 4 + 1] = grid[1];
+               const_state->immediates[idx * 4 + 2] = grid[2];
        }
 
        /* truncate size to avoid writing constants that shader
@@ -240,7 +240,7 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t grid[3
        size *= 4;
 
        if (size > 0) {
-               emit_const(ring, base, size, const_state->immediates[0].val);
+               emit_const(ring, base, size, const_state->immediates);
        }
 }
 
index 9420116f536d28bcbf61c30cff60ab4f53bc9c43..12c0433cca3042af20b2d5201930fa089a6ef1fc 100644 (file)
@@ -161,8 +161,6 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
        if (!ir3_valid_flags(instr, n, new_flags))
                return false;
 
-       unsigned swiz, idx, i;
-
        reg = ir3_reg_clone(ctx->shader, reg);
 
        /* Half constant registers seems to handle only 32-bit values
@@ -196,9 +194,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
                new_flags &= ~IR3_REG_FNEG;
        }
 
-       /* Reallocate for 4 more elements whenever it's necessary */
+       /* Reallocate for 4 more elements whenever it's necessary.  Note that ir3
+        * printing relies on having groups of 4 dwords, so we fill the unused
+        * slots with a dummy value.
+        */
        struct ir3_const_state *const_state = ir3_const_state(ctx->so);
-       if (const_state->immediates_count == const_state->immediates_size * 4) {
+       if (const_state->immediates_count == const_state->immediates_size) {
                const_state->immediates = rerzalloc(const_state,
                                const_state->immediates,
                                __typeof__(const_state->immediates[0]),
@@ -206,17 +207,14 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
                                const_state->immediates_size + 4);
                const_state->immediates_size += 4;
 
-               for (int i = const_state->immediates_count; i < const_state->immediates_size * 4; i++)
-                       const_state->immediates[i / 4].val[i % 4] = 0xd0d0d0d0;
+               for (int i = const_state->immediates_count; i < const_state->immediates_size; i++)
+                       const_state->immediates[i] = 0xd0d0d0d0;
        }
 
+       int i;
        for (i = 0; i < const_state->immediates_count; i++) {
-               swiz = i % 4;
-               idx  = i / 4;
-
-               if (const_state->immediates[idx].val[swiz] == reg->uim_val) {
+               if (const_state->immediates[i] == reg->uim_val)
                        break;
-               }
        }
 
        if (i == const_state->immediates_count) {
@@ -227,10 +225,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
                                ir3_max_const(ctx->so))
                        return false;
 
-               swiz = i % 4;
-               idx  = i / 4;
-
-               const_state->immediates[idx].val[swiz] = reg->uim_val;
+               const_state->immediates[i] = reg->uim_val;
                const_state->immediates_count++;
        }
 
index 5f79c1b140ccdf7f45097b7e074fcdba5e337d11..e82035d59d15269d39a2179cc751216d6caaa4f8 100644 (file)
@@ -156,7 +156,7 @@ static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsig
        struct ir3_const_state *const_state = ir3_const_state(variant);
        assert((reg & 0x7) == 0);
        int idx = reg >> (1 + 2); /* low bit is half vs full, next two bits are swiz */
-       if (const_state->immediates_count == const_state->immediates_size * 4) {
+       if (const_state->immediates_count == const_state->immediates_size) {
                const_state->immediates = rerzalloc(const_state,
                                const_state->immediates,
                                __typeof__(const_state->immediates[0]),
@@ -164,10 +164,10 @@ static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsig
                                const_state->immediates_size + 4);
                const_state->immediates_size += 4;
        }
-       const_state->immediates[idx].val[0] = c0;
-       const_state->immediates[idx].val[1] = c1;
-       const_state->immediates[idx].val[2] = c2;
-       const_state->immediates[idx].val[3] = c3;
+       const_state->immediates[idx * 4 + 0] = c0;
+       const_state->immediates[idx * 4 + 1] = c1;
+       const_state->immediates[idx * 4 + 2] = c2;
+       const_state->immediates[idx * 4 + 3] = c3;
        const_state->immediates_count++;
 }
 
index 0e6653a25d4888c733f2d3c6649dcb84aaca9159..55d62aedcd4e63656b29ceb24f254fc06095b676 100644 (file)
@@ -574,10 +574,10 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
        for (i = 0; i < DIV_ROUND_UP(const_state->immediates_count, 4); i++) {
                fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
                fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
-                               const_state->immediates[i].val[0],
-                               const_state->immediates[i].val[1],
-                               const_state->immediates[i].val[2],
-                               const_state->immediates[i].val[3]);
+                               const_state->immediates[i * 4 + 0],
+                               const_state->immediates[i * 4 + 1],
+                               const_state->immediates[i * 4 + 2],
+                               const_state->immediates[i * 4 + 3]);
        }
 
        disasm_a3xx(bin, so->info.sizedwords, 0, out, ir->compiler->gpu_id);
index 25a4371c52de5edf02264c633d0ed386863f3849..99084e22f8c237445a63490ced5c9cc2d17908ee 100644 (file)
@@ -193,9 +193,7 @@ struct ir3_const_state {
 
        unsigned immediates_count;
        unsigned immediates_size;
-       struct {
-               uint32_t val[4];
-       } *immediates;
+       uint32_t *immediates;
 
        /* State of ubo access lowered to push consts: */
        struct ir3_ubo_analysis_state ubo_state;
index b8eab8564ac56a23ac7c8eab10470f26c5ec20da..922586b9a61bbef05b2be7eaa30117520213ad0e 100644 (file)
@@ -464,12 +464,7 @@ tu6_emit_xs_config(struct tu_cs *cs,
    tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
    tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
 
-   for (unsigned i = 0; i < size; i++) {
-      tu_cs_emit(cs, const_state->immediates[i].val[0]);
-      tu_cs_emit(cs, const_state->immediates[i].val[1]);
-      tu_cs_emit(cs, const_state->immediates[i].val[2]);
-      tu_cs_emit(cs, const_state->immediates[i].val[3]);
-   }
+   tu_cs_emit_array(cs, const_state->immediates, size * 4);
 }
 
 static void
index 7a0386e3f3246239ad0305141bf1347d33aff211..774fd3f168bfffe3e78b2957ed4d25b4d3d58b5f 100644 (file)
@@ -271,7 +271,7 @@ ir3_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v
        size *= 4;
 
        if (size > 0)
-               emit_const(ring, v, base, 0, size, const_state->immediates[0].val, NULL);
+               emit_const(ring, v, base, 0, size, const_state->immediates, NULL);
 }
 
 static inline void