From ad02ba42f0931abfdb79af29678267bd063e92f3 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 15 Sep 2014 11:45:56 -0700 Subject: [PATCH] vc4: Move register allocation to a separate file. I'm going to be rewriting it all, and having it mixed up with the QIR-to-QPU opcode translation was messy. --- src/gallium/drivers/vc4/Makefile.sources | 1 + src/gallium/drivers/vc4/vc4_context.h | 1 + src/gallium/drivers/vc4/vc4_qpu_emit.c | 106 +----------- .../drivers/vc4/vc4_register_allocate.c | 157 ++++++++++++++++++ 4 files changed, 165 insertions(+), 100 deletions(-) create mode 100644 src/gallium/drivers/vc4/vc4_register_allocate.c diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index f64848f9468..bcb4209d0d0 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -23,6 +23,7 @@ C_SOURCES := \ vc4_qpu_emit.c \ vc4_qpu.h \ vc4_qpu_validate.c \ + vc4_register_allocate.c \ vc4_resource.c \ vc4_resource.h \ vc4_screen.c \ diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index b82c08c5ff8..e5864333a68 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -230,6 +230,7 @@ void vc4_flush(struct pipe_context *pctx); void vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo); void vc4_emit_state(struct pipe_context *pctx); void vc4_generate_code(struct vc4_compile *c); +struct qpu_reg *vc4_register_allocate(struct vc4_compile *c); void vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode); bool vc4_rt_format_supported(enum pipe_format f); diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 26520fec22f..6c4c76d3217 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -210,43 +210,11 @@ serialize_insts(struct vc4_compile *c) void vc4_generate_code(struct vc4_compile *c) { - struct qpu_reg allocate_to_qpu_reg[4 + 32 + 32]; - bool reg_in_use[ARRAY_SIZE(allocate_to_qpu_reg)]; - int *reg_allocated = calloc(c->num_temps, sizeof(*reg_allocated)); - int *reg_uses_remaining = - calloc(c->num_temps, sizeof(*reg_uses_remaining)); + struct qpu_reg *temp_registers = vc4_register_allocate(c); bool discard = false; - for (int i = 0; i < ARRAY_SIZE(reg_in_use); i++) - reg_in_use[i] = false; - for (int i = 0; i < c->num_temps; i++) - reg_allocated[i] = -1; - - uint32_t next_reg = 0; - for (int i = 0; i < 4; i++) - allocate_to_qpu_reg[next_reg++] = qpu_rn(i == 3 ? 4 : i); - for (int i = 0; i < 32; i++) - allocate_to_qpu_reg[next_reg++] = qpu_ra(i); - for (int i = 0; i < 32; i++) - allocate_to_qpu_reg[next_reg++] = qpu_rb(i); - assert(next_reg == ARRAY_SIZE(allocate_to_qpu_reg)); - make_empty_list(&c->qpu_inst_list); - struct simple_node *node; - foreach(node, &c->instructions) { - struct qinst *qinst = (struct qinst *)node; - - if (qinst->dst.file == QFILE_TEMP) - reg_uses_remaining[qinst->dst.index]++; - for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) { - if (qinst->src[i].file == QFILE_TEMP) - reg_uses_remaining[qinst->src[i].index]++; - } - if (qinst->op == QOP_FRAG_Z) - reg_in_use[3 + 32 + QPU_R_FRAG_PAYLOAD_ZW] = true; - } - switch (c->stage) { case QSTAGE_VERT: case QSTAGE_COORD: @@ -259,6 +227,7 @@ vc4_generate_code(struct vc4_compile *c) break; } + struct simple_node *node; foreach(node, &c->instructions) { struct qinst *qinst = (struct qinst *)node; @@ -306,18 +275,7 @@ vc4_generate_code(struct vc4_compile *c) src[i] = qpu_rn(0); break; case QFILE_TEMP: - if (reg_allocated[index] == -1) { - fprintf(stderr, "undefined reg use: "); - qir_dump_inst(qinst); - fprintf(stderr, "\n"); - - src[i] = qpu_rn(0); - } else { - src[i] = allocate_to_qpu_reg[reg_allocated[index]]; - reg_uses_remaining[index]--; - if (reg_uses_remaining[index] == 0) - reg_in_use[reg_allocated[index]] = false; - } + src[i] = temp_registers[index]; break; case QFILE_UNIF: src[i] = qpu_unif(); @@ -333,63 +291,9 @@ vc4_generate_code(struct vc4_compile *c) case QFILE_NULL: dst = qpu_ra(QPU_W_NOP); break; - case QFILE_TEMP: - if (reg_allocated[qinst->dst.index] == -1) { - int alloc; - for (alloc = 0; - alloc < ARRAY_SIZE(reg_in_use); - alloc++) { - struct qpu_reg reg = allocate_to_qpu_reg[alloc]; - - switch (qinst->op) { - case QOP_PACK_SCALED: - /* The pack flags require an - * A-file register. - */ - if (reg.mux != QPU_MUX_A) - continue; - break; - case QOP_TEX_RESULT: - case QOP_TLB_COLOR_READ: - /* Only R4-generating - * instructions get to store - * values in R4 for now, until - * we figure out how to do - * interference. - */ - if (reg.mux != QPU_MUX_R4) - continue; - break; - case QOP_FRAG_Z: - if (reg.mux != QPU_MUX_B || - reg.addr != QPU_R_FRAG_PAYLOAD_ZW) { - continue; - } - break; - default: - if (reg.mux == QPU_MUX_R4) - continue; - break; - } - - if (!reg_in_use[alloc]) - break; - } - assert(alloc != ARRAY_SIZE(reg_in_use) && "need better reg alloc"); - reg_in_use[alloc] = true; - reg_allocated[qinst->dst.index] = alloc; - } - - dst = allocate_to_qpu_reg[reg_allocated[qinst->dst.index]]; - - reg_uses_remaining[qinst->dst.index]--; - if (reg_uses_remaining[qinst->dst.index] == 0) { - reg_in_use[reg_allocated[qinst->dst.index]] = - false; - } + dst = temp_registers[qinst->dst.index]; break; - case QFILE_VARY: case QFILE_UNIF: assert(!"not reached"); @@ -645,4 +549,6 @@ vc4_generate_code(struct vc4_compile *c) vc4_dump_program(c); vc4_qpu_validate(c->qpu_insts, c->qpu_inst_count); + + free(temp_registers); } diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c new file mode 100644 index 00000000000..97d4fb3d131 --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -0,0 +1,157 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "vc4_context.h" +#include "vc4_qir.h" +#include "vc4_qpu.h" + +/** + * Returns a mapping from QFILE_TEMP indices to struct qpu_regs. + * + * The return value should be freed by the caller. + */ +struct qpu_reg * +vc4_register_allocate(struct vc4_compile *c) +{ + struct simple_node *node; + struct qpu_reg allocate_to_qpu_reg[4 + 32 + 32]; + bool reg_in_use[ARRAY_SIZE(allocate_to_qpu_reg)]; + int *reg_allocated = calloc(c->num_temps, sizeof(*reg_allocated)); + int *reg_uses_remaining = + calloc(c->num_temps, sizeof(*reg_uses_remaining)); + struct qpu_reg *temp_registers = calloc(c->num_temps, + sizeof(*temp_registers)); + + for (int i = 0; i < ARRAY_SIZE(reg_in_use); i++) + reg_in_use[i] = false; + for (int i = 0; i < c->num_temps; i++) + reg_allocated[i] = -1; + + /* If things aren't ever written (undefined values), just read from + * r0. + */ + for (int i = 0; i < c->num_temps; i++) + temp_registers[i] = qpu_rn(0); + + uint32_t next_reg = 0; + for (int i = 0; i < 4; i++) + allocate_to_qpu_reg[next_reg++] = qpu_rn(i == 3 ? 4 : i); + for (int i = 0; i < 32; i++) + allocate_to_qpu_reg[next_reg++] = qpu_ra(i); + for (int i = 0; i < 32; i++) + allocate_to_qpu_reg[next_reg++] = qpu_rb(i); + assert(next_reg == ARRAY_SIZE(allocate_to_qpu_reg)); + + foreach(node, &c->instructions) { + struct qinst *qinst = (struct qinst *)node; + + if (qinst->dst.file == QFILE_TEMP) + reg_uses_remaining[qinst->dst.index]++; + for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) { + if (qinst->src[i].file == QFILE_TEMP) + reg_uses_remaining[qinst->src[i].index]++; + } + if (qinst->op == QOP_FRAG_Z) + reg_in_use[3 + 32 + QPU_R_FRAG_PAYLOAD_ZW] = true; + } + + foreach(node, &c->instructions) { + struct qinst *qinst = (struct qinst *)node; + + for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) { + int index = qinst->src[i].index; + + if (qinst->src[i].file != QFILE_TEMP) + continue; + + if (reg_allocated[index] == -1) { + fprintf(stderr, "undefined reg use: "); + qir_dump_inst(qinst); + fprintf(stderr, "\n"); + } else { + reg_uses_remaining[index]--; + if (reg_uses_remaining[index] == 0) + reg_in_use[reg_allocated[index]] = false; + } + } + + if (qinst->dst.file == QFILE_TEMP) { + if (reg_allocated[qinst->dst.index] == -1) { + int alloc; + for (alloc = 0; + alloc < ARRAY_SIZE(reg_in_use); + alloc++) { + struct qpu_reg reg = allocate_to_qpu_reg[alloc]; + + switch (qinst->op) { + case QOP_PACK_SCALED: + /* The pack flags require an + * A-file register. + */ + if (reg.mux != QPU_MUX_A) + continue; + break; + case QOP_TEX_RESULT: + case QOP_TLB_COLOR_READ: + /* Only R4-generating + * instructions get to store + * values in R4 for now, until + * we figure out how to do + * interference. + */ + if (reg.mux != QPU_MUX_R4) + continue; + break; + case QOP_FRAG_Z: + if (reg.mux != QPU_MUX_B || + reg.addr != QPU_R_FRAG_PAYLOAD_ZW) { + continue; + } + break; + default: + if (reg.mux == QPU_MUX_R4) + continue; + break; + } + + if (!reg_in_use[alloc]) + break; + } + assert(alloc != ARRAY_SIZE(reg_in_use) && "need better reg alloc"); + reg_in_use[alloc] = true; + reg_allocated[qinst->dst.index] = alloc; + temp_registers[qinst->dst.index] = allocate_to_qpu_reg[alloc]; + } + + reg_uses_remaining[qinst->dst.index]--; + if (reg_uses_remaining[qinst->dst.index] == 0) { + reg_in_use[reg_allocated[qinst->dst.index]] = + false; + } + } + } + + return temp_registers; +} -- 2.30.2