From 51acfe22306fcaf7767a7cfcc64b46049210c5d0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 9 Jul 2020 15:00:33 -0700 Subject: [PATCH] freedreno/ir3: Simpify the immediates from an array of vec4 to array of dwords. We usually had to split the idx/swiz out of the dword index anyway. Note that incidentally, immediates_size now increments in vec4s instad of 4*vec4s. Part-of: --- src/freedreno/computerator/a6xx.c | 8 +++--- src/freedreno/ir3/ir3_cp.c | 25 ++++++++----------- src/freedreno/ir3/ir3_parser.y | 10 ++++---- src/freedreno/ir3/ir3_shader.c | 8 +++--- src/freedreno/ir3/ir3_shader.h | 4 +-- src/freedreno/vulkan/tu_pipeline.c | 7 +----- src/gallium/drivers/freedreno/ir3/ir3_const.h | 2 +- 7 files changed, 26 insertions(+), 38 deletions(-) diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index baa9b7d8504..bf2b9262d6d 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -225,9 +225,9 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t grid[3 if (ir3_kernel->info.numwg != INVALID_REG) { assert((ir3_kernel->info.numwg & 0x3) == 0); int idx = ir3_kernel->info.numwg >> 2; - const_state->immediates[idx].val[0] = grid[0]; - const_state->immediates[idx].val[1] = grid[1]; - const_state->immediates[idx].val[2] = grid[2]; + const_state->immediates[idx * 4 + 0] = grid[0]; + const_state->immediates[idx * 4 + 1] = grid[1]; + const_state->immediates[idx * 4 + 2] = grid[2]; } /* truncate size to avoid writing constants that shader @@ -240,7 +240,7 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t grid[3 size *= 4; if (size > 0) { - emit_const(ring, base, size, const_state->immediates[0].val); + emit_const(ring, base, size, const_state->immediates); } } diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index 9420116f536..12c0433cca3 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -161,8 +161,6 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n, if (!ir3_valid_flags(instr, n, new_flags)) return false; - unsigned swiz, idx, i; - reg = ir3_reg_clone(ctx->shader, reg); /* Half constant registers seems to handle only 32-bit values @@ -196,9 +194,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n, new_flags &= ~IR3_REG_FNEG; } - /* Reallocate for 4 more elements whenever it's necessary */ + /* Reallocate for 4 more elements whenever it's necessary. Note that ir3 + * printing relies on having groups of 4 dwords, so we fill the unused + * slots with a dummy value. + */ struct ir3_const_state *const_state = ir3_const_state(ctx->so); - if (const_state->immediates_count == const_state->immediates_size * 4) { + if (const_state->immediates_count == const_state->immediates_size) { const_state->immediates = rerzalloc(const_state, const_state->immediates, __typeof__(const_state->immediates[0]), @@ -206,17 +207,14 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n, const_state->immediates_size + 4); const_state->immediates_size += 4; - for (int i = const_state->immediates_count; i < const_state->immediates_size * 4; i++) - const_state->immediates[i / 4].val[i % 4] = 0xd0d0d0d0; + for (int i = const_state->immediates_count; i < const_state->immediates_size; i++) + const_state->immediates[i] = 0xd0d0d0d0; } + int i; for (i = 0; i < const_state->immediates_count; i++) { - swiz = i % 4; - idx = i / 4; - - if (const_state->immediates[idx].val[swiz] == reg->uim_val) { + if (const_state->immediates[i] == reg->uim_val) break; - } } if (i == const_state->immediates_count) { @@ -227,10 +225,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n, ir3_max_const(ctx->so)) return false; - swiz = i % 4; - idx = i / 4; - - const_state->immediates[idx].val[swiz] = reg->uim_val; + const_state->immediates[i] = reg->uim_val; const_state->immediates_count++; } diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y index 5f79c1b140c..e82035d59d1 100644 --- a/src/freedreno/ir3/ir3_parser.y +++ b/src/freedreno/ir3/ir3_parser.y @@ -156,7 +156,7 @@ static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsig struct ir3_const_state *const_state = ir3_const_state(variant); assert((reg & 0x7) == 0); int idx = reg >> (1 + 2); /* low bit is half vs full, next two bits are swiz */ - if (const_state->immediates_count == const_state->immediates_size * 4) { + if (const_state->immediates_count == const_state->immediates_size) { const_state->immediates = rerzalloc(const_state, const_state->immediates, __typeof__(const_state->immediates[0]), @@ -164,10 +164,10 @@ static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsig const_state->immediates_size + 4); const_state->immediates_size += 4; } - const_state->immediates[idx].val[0] = c0; - const_state->immediates[idx].val[1] = c1; - const_state->immediates[idx].val[2] = c2; - const_state->immediates[idx].val[3] = c3; + const_state->immediates[idx * 4 + 0] = c0; + const_state->immediates[idx * 4 + 1] = c1; + const_state->immediates[idx * 4 + 2] = c2; + const_state->immediates[idx * 4 + 3] = c3; const_state->immediates_count++; } diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 0e6653a25d4..55d62aedcd4 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -574,10 +574,10 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) for (i = 0; i < DIV_ROUND_UP(const_state->immediates_count, 4); i++) { fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i); fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", - const_state->immediates[i].val[0], - const_state->immediates[i].val[1], - const_state->immediates[i].val[2], - const_state->immediates[i].val[3]); + const_state->immediates[i * 4 + 0], + const_state->immediates[i * 4 + 1], + const_state->immediates[i * 4 + 2], + const_state->immediates[i * 4 + 3]); } disasm_a3xx(bin, so->info.sizedwords, 0, out, ir->compiler->gpu_id); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 25a4371c52d..99084e22f8c 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -193,9 +193,7 @@ struct ir3_const_state { unsigned immediates_count; unsigned immediates_size; - struct { - uint32_t val[4]; - } *immediates; + uint32_t *immediates; /* State of ubo access lowered to push consts: */ struct ir3_ubo_analysis_state ubo_state; diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index b8eab8564ac..922586b9a61 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -464,12 +464,7 @@ tu6_emit_xs_config(struct tu_cs *cs, tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - for (unsigned i = 0; i < size; i++) { - tu_cs_emit(cs, const_state->immediates[i].val[0]); - tu_cs_emit(cs, const_state->immediates[i].val[1]); - tu_cs_emit(cs, const_state->immediates[i].val[2]); - tu_cs_emit(cs, const_state->immediates[i].val[3]); - } + tu_cs_emit_array(cs, const_state->immediates, size * 4); } static void diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h index 7a0386e3f32..774fd3f168b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -271,7 +271,7 @@ ir3_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v size *= 4; if (size > 0) - emit_const(ring, v, base, 0, size, const_state->immediates[0].val, NULL); + emit_const(ring, v, base, 0, size, const_state->immediates, NULL); } static inline void -- 2.30.2