vc4: Move register allocation to a separate file.
authorEric Anholt <eric@anholt.net>
Mon, 15 Sep 2014 18:45:56 +0000 (11:45 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 16 Sep 2014 22:14:52 +0000 (15:14 -0700)
I'm going to be rewriting it all, and having it mixed up with the
QIR-to-QPU opcode translation was messy.

src/gallium/drivers/vc4/Makefile.sources
src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_qpu_emit.c
src/gallium/drivers/vc4/vc4_register_allocate.c [new file with mode: 0644]

index f64848f9468e3f23040171943b8315c8e02438b9..bcb4209d0d0fe81799a23c3716caf8d851860910 100644 (file)
@@ -23,6 +23,7 @@ C_SOURCES := \
        vc4_qpu_emit.c \
        vc4_qpu.h \
        vc4_qpu_validate.c \
+       vc4_register_allocate.c \
        vc4_resource.c \
        vc4_resource.h \
        vc4_screen.c \
index b82c08c5ff8be82d8b7460d429bea3e43a51024b..e5864333a68d026b9032e0e58296c21b771a3c8d 100644 (file)
@@ -230,6 +230,7 @@ void vc4_flush(struct pipe_context *pctx);
 void vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo);
 void vc4_emit_state(struct pipe_context *pctx);
 void vc4_generate_code(struct vc4_compile *c);
+struct qpu_reg *vc4_register_allocate(struct vc4_compile *c);
 void vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode);
 
 bool vc4_rt_format_supported(enum pipe_format f);
index 26520fec22f5d3e6ae7f6733bb6572462edd7fcf..6c4c76d3217e1fa28508b0737e1ff180ce23008e 100644 (file)
@@ -210,43 +210,11 @@ serialize_insts(struct vc4_compile *c)
 void
 vc4_generate_code(struct vc4_compile *c)
 {
-        struct qpu_reg allocate_to_qpu_reg[4 + 32 + 32];
-        bool reg_in_use[ARRAY_SIZE(allocate_to_qpu_reg)];
-        int *reg_allocated = calloc(c->num_temps, sizeof(*reg_allocated));
-        int *reg_uses_remaining =
-                calloc(c->num_temps, sizeof(*reg_uses_remaining));
+        struct qpu_reg *temp_registers = vc4_register_allocate(c);
         bool discard = false;
 
-        for (int i = 0; i < ARRAY_SIZE(reg_in_use); i++)
-                reg_in_use[i] = false;
-        for (int i = 0; i < c->num_temps; i++)
-                reg_allocated[i] = -1;
-
-        uint32_t next_reg = 0;
-        for (int i = 0; i < 4; i++)
-                allocate_to_qpu_reg[next_reg++] = qpu_rn(i == 3 ? 4 : i);
-        for (int i = 0; i < 32; i++)
-                allocate_to_qpu_reg[next_reg++] = qpu_ra(i);
-        for (int i = 0; i < 32; i++)
-                allocate_to_qpu_reg[next_reg++] = qpu_rb(i);
-        assert(next_reg == ARRAY_SIZE(allocate_to_qpu_reg));
-
         make_empty_list(&c->qpu_inst_list);
 
-        struct simple_node *node;
-        foreach(node, &c->instructions) {
-                struct qinst *qinst = (struct qinst *)node;
-
-                if (qinst->dst.file == QFILE_TEMP)
-                        reg_uses_remaining[qinst->dst.index]++;
-                for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
-                        if (qinst->src[i].file == QFILE_TEMP)
-                                reg_uses_remaining[qinst->src[i].index]++;
-                }
-                if (qinst->op == QOP_FRAG_Z)
-                        reg_in_use[3 + 32 + QPU_R_FRAG_PAYLOAD_ZW] = true;
-        }
-
         switch (c->stage) {
         case QSTAGE_VERT:
         case QSTAGE_COORD:
@@ -259,6 +227,7 @@ vc4_generate_code(struct vc4_compile *c)
                 break;
         }
 
+        struct simple_node *node;
         foreach(node, &c->instructions) {
                 struct qinst *qinst = (struct qinst *)node;
 
@@ -306,18 +275,7 @@ vc4_generate_code(struct vc4_compile *c)
                                 src[i] = qpu_rn(0);
                                 break;
                         case QFILE_TEMP:
-                                if (reg_allocated[index] == -1) {
-                                        fprintf(stderr, "undefined reg use: ");
-                                        qir_dump_inst(qinst);
-                                        fprintf(stderr, "\n");
-
-                                        src[i] = qpu_rn(0);
-                                } else {
-                                        src[i] = allocate_to_qpu_reg[reg_allocated[index]];
-                                        reg_uses_remaining[index]--;
-                                        if (reg_uses_remaining[index] == 0)
-                                                reg_in_use[reg_allocated[index]] = false;
-                                }
+                                src[i] = temp_registers[index];
                                 break;
                         case QFILE_UNIF:
                                 src[i] = qpu_unif();
@@ -333,63 +291,9 @@ vc4_generate_code(struct vc4_compile *c)
                 case QFILE_NULL:
                         dst = qpu_ra(QPU_W_NOP);
                         break;
-
                 case QFILE_TEMP:
-                        if (reg_allocated[qinst->dst.index] == -1) {
-                                int alloc;
-                                for (alloc = 0;
-                                     alloc < ARRAY_SIZE(reg_in_use);
-                                     alloc++) {
-                                        struct qpu_reg reg = allocate_to_qpu_reg[alloc];
-
-                                        switch (qinst->op) {
-                                        case QOP_PACK_SCALED:
-                                                /* The pack flags require an
-                                                 * A-file register.
-                                                 */
-                                                if (reg.mux != QPU_MUX_A)
-                                                        continue;
-                                                break;
-                                        case QOP_TEX_RESULT:
-                                        case QOP_TLB_COLOR_READ:
-                                                /* Only R4-generating
-                                                 * instructions get to store
-                                                 * values in R4 for now, until
-                                                 * we figure out how to do
-                                                 * interference.
-                                                 */
-                                                if (reg.mux != QPU_MUX_R4)
-                                                        continue;
-                                                break;
-                                        case QOP_FRAG_Z:
-                                                if (reg.mux != QPU_MUX_B ||
-                                                    reg.addr != QPU_R_FRAG_PAYLOAD_ZW) {
-                                                        continue;
-                                                }
-                                                break;
-                                        default:
-                                                if (reg.mux == QPU_MUX_R4)
-                                                        continue;
-                                                break;
-                                        }
-
-                                        if (!reg_in_use[alloc])
-                                                break;
-                                }
-                                assert(alloc != ARRAY_SIZE(reg_in_use) && "need better reg alloc");
-                                reg_in_use[alloc] = true;
-                                reg_allocated[qinst->dst.index] = alloc;
-                        }
-
-                        dst = allocate_to_qpu_reg[reg_allocated[qinst->dst.index]];
-
-                        reg_uses_remaining[qinst->dst.index]--;
-                        if (reg_uses_remaining[qinst->dst.index] == 0) {
-                                reg_in_use[reg_allocated[qinst->dst.index]] =
-                                        false;
-                        }
+                        dst = temp_registers[qinst->dst.index];
                         break;
-
                 case QFILE_VARY:
                 case QFILE_UNIF:
                         assert(!"not reached");
@@ -645,4 +549,6 @@ vc4_generate_code(struct vc4_compile *c)
                 vc4_dump_program(c);
 
         vc4_qpu_validate(c->qpu_insts, c->qpu_inst_count);
+
+        free(temp_registers);
 }
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
new file mode 100644 (file)
index 0000000..97d4fb3
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+
+#include "vc4_context.h"
+#include "vc4_qir.h"
+#include "vc4_qpu.h"
+
+/**
+ * Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
+ *
+ * The return value should be freed by the caller.
+ */
+struct qpu_reg *
+vc4_register_allocate(struct vc4_compile *c)
+{
+        struct simple_node *node;
+        struct qpu_reg allocate_to_qpu_reg[4 + 32 + 32];
+        bool reg_in_use[ARRAY_SIZE(allocate_to_qpu_reg)];
+        int *reg_allocated = calloc(c->num_temps, sizeof(*reg_allocated));
+        int *reg_uses_remaining =
+                calloc(c->num_temps, sizeof(*reg_uses_remaining));
+        struct qpu_reg *temp_registers = calloc(c->num_temps,
+                                                sizeof(*temp_registers));
+
+        for (int i = 0; i < ARRAY_SIZE(reg_in_use); i++)
+                reg_in_use[i] = false;
+        for (int i = 0; i < c->num_temps; i++)
+                reg_allocated[i] = -1;
+
+        /* If things aren't ever written (undefined values), just read from
+         * r0.
+         */
+        for (int i = 0; i < c->num_temps; i++)
+                temp_registers[i] = qpu_rn(0);
+
+        uint32_t next_reg = 0;
+        for (int i = 0; i < 4; i++)
+                allocate_to_qpu_reg[next_reg++] = qpu_rn(i == 3 ? 4 : i);
+        for (int i = 0; i < 32; i++)
+                allocate_to_qpu_reg[next_reg++] = qpu_ra(i);
+        for (int i = 0; i < 32; i++)
+                allocate_to_qpu_reg[next_reg++] = qpu_rb(i);
+        assert(next_reg == ARRAY_SIZE(allocate_to_qpu_reg));
+
+        foreach(node, &c->instructions) {
+                struct qinst *qinst = (struct qinst *)node;
+
+                if (qinst->dst.file == QFILE_TEMP)
+                        reg_uses_remaining[qinst->dst.index]++;
+                for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
+                        if (qinst->src[i].file == QFILE_TEMP)
+                                reg_uses_remaining[qinst->src[i].index]++;
+                }
+                if (qinst->op == QOP_FRAG_Z)
+                        reg_in_use[3 + 32 + QPU_R_FRAG_PAYLOAD_ZW] = true;
+        }
+
+        foreach(node, &c->instructions) {
+                struct qinst *qinst = (struct qinst *)node;
+
+                for (int i = 0; i < qir_get_op_nsrc(qinst->op); i++) {
+                        int index = qinst->src[i].index;
+
+                        if (qinst->src[i].file != QFILE_TEMP)
+                                continue;
+
+                        if (reg_allocated[index] == -1) {
+                                fprintf(stderr, "undefined reg use: ");
+                                qir_dump_inst(qinst);
+                                fprintf(stderr, "\n");
+                        } else {
+                                reg_uses_remaining[index]--;
+                                if (reg_uses_remaining[index] == 0)
+                                        reg_in_use[reg_allocated[index]] = false;
+                        }
+                }
+
+                if (qinst->dst.file == QFILE_TEMP) {
+                        if (reg_allocated[qinst->dst.index] == -1) {
+                                int alloc;
+                                for (alloc = 0;
+                                     alloc < ARRAY_SIZE(reg_in_use);
+                                     alloc++) {
+                                        struct qpu_reg reg = allocate_to_qpu_reg[alloc];
+
+                                        switch (qinst->op) {
+                                        case QOP_PACK_SCALED:
+                                                /* The pack flags require an
+                                                 * A-file register.
+                                                 */
+                                                if (reg.mux != QPU_MUX_A)
+                                                        continue;
+                                                break;
+                                        case QOP_TEX_RESULT:
+                                        case QOP_TLB_COLOR_READ:
+                                                /* Only R4-generating
+                                                 * instructions get to store
+                                                 * values in R4 for now, until
+                                                 * we figure out how to do
+                                                 * interference.
+                                                 */
+                                                if (reg.mux != QPU_MUX_R4)
+                                                        continue;
+                                                break;
+                                        case QOP_FRAG_Z:
+                                                if (reg.mux != QPU_MUX_B ||
+                                                    reg.addr != QPU_R_FRAG_PAYLOAD_ZW) {
+                                                        continue;
+                                                }
+                                                break;
+                                        default:
+                                                if (reg.mux == QPU_MUX_R4)
+                                                        continue;
+                                                break;
+                                        }
+
+                                        if (!reg_in_use[alloc])
+                                                break;
+                                }
+                                assert(alloc != ARRAY_SIZE(reg_in_use) && "need better reg alloc");
+                                reg_in_use[alloc] = true;
+                                reg_allocated[qinst->dst.index] = alloc;
+                                temp_registers[qinst->dst.index] = allocate_to_qpu_reg[alloc];
+                        }
+
+                        reg_uses_remaining[qinst->dst.index]--;
+                        if (reg_uses_remaining[qinst->dst.index] == 0) {
+                                reg_in_use[reg_allocated[qinst->dst.index]] =
+                                        false;
+                        }
+                }
+        }
+
+        return temp_registers;
+}