vc4: Expose r4 to register allocation.
authorEric Anholt <eric@anholt.net>
Sun, 7 Sep 2014 21:04:40 +0000 (14:04 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 10 Sep 2014 03:38:39 +0000 (20:38 -0700)
We potentially need to be careful that use of a value stored in r4 isn't
copy-propagated (or something) across another r4 write.  That doesn't
appear to happen currently, and this makes the dataflow more obvious.  It
also opens up not unpacking the r4 value, which will be useful for depth
textures.

src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c

index da5453975bbe83545b83a2effbd293ad72a40c4e..2f85cc6ee3cef2104624191e18b25105e73d5536 100644 (file)
@@ -497,11 +497,11 @@ tgsi_to_qir_tex(struct vc4_compile *c,
         }
 
         c->num_texture_samples++;
-        qir_emit(c, qir_inst(QOP_TEX_RESULT, c->undef, c->undef, c->undef));
+        struct qreg r4 = qir_TEX_RESULT(c);
 
         struct qreg unpacked[4];
         for (int i = 0; i < 4; i++)
-                unpacked[i] = qir_R4_UNPACK(c, i);
+                unpacked[i] = qir_R4_UNPACK(c, r4, i);
 
         enum pipe_format format = c->key->tex_format[unit];
         const uint8_t *swiz = vc4_get_format_swizzle(format);
@@ -1096,10 +1096,9 @@ emit_frag_end(struct vc4_compile *c)
         struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
         if (c->fs_key->blend.blend_enable ||
             c->fs_key->blend.colormask != 0xf) {
-                qir_emit(c, qir_inst(QOP_TLB_COLOR_READ, c->undef,
-                                     c->undef, c->undef));
+                struct qreg r4 = qir_TLB_COLOR_READ(c);
                 for (int i = 0; i < 4; i++)
-                        tlb_read_color[i] = qir_R4_UNPACK(c, i);
+                        tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
                 for (int i = 0; i < 4; i++)
                         dst_color[i] = get_swizzled_channel(c,
                                                             tlb_read_color,
index fc54cdb90924f92979b9f44da5e96cd3b5399a91..ef8a4e54d2427268a6b36a6dbe7a8a2a57f981ac 100644 (file)
@@ -78,7 +78,7 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
         [QOP_TLB_PASSTHROUGH_Z_WRITE] = { "tlb_passthrough_z", 0, 0, true },
         [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
-        [QOP_TLB_COLOR_READ] = { "tlb_color_read", 0, 0, true },
+        [QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0, true },
         [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
 
         [QOP_FRAG_X] = { "frag_x", 1, 0 },
@@ -90,11 +90,11 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_TEX_T] = { "tex_t", 0, 2 },
         [QOP_TEX_R] = { "tex_r", 0, 2 },
         [QOP_TEX_B] = { "tex_b", 0, 2 },
-        [QOP_TEX_RESULT] = { "tex_result", 0, 0 },
-        [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 0 },
-        [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 0 },
-        [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 0 },
-        [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 0 },
+        [QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
+        [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 1 },
+        [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 1 },
+        [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 1 },
+        [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 1 },
 };
 
 static const char *
index 82d5a27fc1ea852cdb7c29abc0e033551fa8151e..05a3249d39b0de017ad2cd85d70b694159cc7e5b 100644 (file)
@@ -353,13 +353,15 @@ QIR_ALU0(FRAG_X)
 QIR_ALU0(FRAG_Y)
 QIR_ALU0(FRAG_Z)
 QIR_ALU0(FRAG_RCP_W)
+QIR_ALU0(TEX_RESULT)
+QIR_ALU0(TLB_COLOR_READ)
 QIR_NODST_1(TLB_DISCARD_SETUP)
 
 static inline struct qreg
-qir_R4_UNPACK(struct vc4_compile *c, int i)
+qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
 {
         struct qreg t = qir_get_temp(c);
-        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
+        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef));
         return t;
 }
 
index cac2e6bbdca40d434092afea90495aa9e294abf5..4dd8609d9cad56537a9acf1a888e5f65f33225e1 100644 (file)
@@ -210,7 +210,7 @@ serialize_insts(struct vc4_compile *c)
 void
 vc4_generate_code(struct vc4_compile *c)
 {
-        struct qpu_reg allocate_to_qpu_reg[3 + 32 + 32];
+        struct qpu_reg allocate_to_qpu_reg[4 + 32 + 32];
         bool reg_in_use[ARRAY_SIZE(allocate_to_qpu_reg)];
         int *reg_allocated = calloc(c->num_temps, sizeof(*reg_allocated));
         int *reg_uses_remaining =
@@ -221,12 +221,15 @@ vc4_generate_code(struct vc4_compile *c)
                 reg_in_use[i] = false;
         for (int i = 0; i < c->num_temps; i++)
                 reg_allocated[i] = -1;
-        for (int i = 0; i < 3; i++)
-                allocate_to_qpu_reg[i] = qpu_rn(i);
+
+        uint32_t next_reg = 0;
+        for (int i = 0; i < 4; i++)
+                allocate_to_qpu_reg[next_reg++] = qpu_rn(i == 3 ? 4 : i);
         for (int i = 0; i < 32; i++)
-                allocate_to_qpu_reg[i + 3] = qpu_ra(i);
+                allocate_to_qpu_reg[next_reg++] = qpu_ra(i);
         for (int i = 0; i < 32; i++)
-                allocate_to_qpu_reg[i + 3 + 32] = qpu_rb(i);
+                allocate_to_qpu_reg[next_reg++] = qpu_rb(i);
+        assert(next_reg == ARRAY_SIZE(allocate_to_qpu_reg));
 
         make_empty_list(&c->qpu_inst_list);
 
@@ -338,10 +341,31 @@ vc4_generate_code(struct vc4_compile *c)
                                 for (alloc = 0;
                                      alloc < ARRAY_SIZE(reg_in_use);
                                      alloc++) {
-                                        /* The pack flags require an A-file register. */
-                                        if (qinst->op == QOP_PACK_SCALED &&
-                                            allocate_to_qpu_reg[alloc].mux != QPU_MUX_A) {
-                                                continue;
+                                        struct qpu_reg reg = allocate_to_qpu_reg[alloc];
+
+                                        switch (qinst->op) {
+                                        case QOP_PACK_SCALED:
+                                                /* The pack flags require an
+                                                 * A-file register.
+                                                 */
+                                                if (reg.mux != QPU_MUX_A)
+                                                        continue;
+                                                break;
+                                        case QOP_TEX_RESULT:
+                                        case QOP_TLB_COLOR_READ:
+                                                /* Only R4-generating
+                                                 * instructions get to store
+                                                 * values in R4 for now, until
+                                                 * we figure out how to do
+                                                 * interference.
+                                                 */
+                                                if (reg.mux != QPU_MUX_R4)
+                                                        continue;
+                                                break;
+                                        default:
+                                                if (reg.mux == QPU_MUX_R4)
+                                                        continue;
+                                                break;
                                         }
 
                                         if (!reg_in_use[alloc])
@@ -549,7 +573,8 @@ vc4_generate_code(struct vc4_compile *c)
                 case QOP_R4_UNPACK_B:
                 case QOP_R4_UNPACK_C:
                 case QOP_R4_UNPACK_D:
-                        queue(c, qpu_a_MOV(dst, qpu_r4()));
+                        assert(src[0].mux == QPU_MUX_R4);
+                        queue(c, qpu_a_MOV(dst, src[0]));
                         *last_inst(c) |= QPU_PM;
                         *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_R4_8A +
                                                        (qinst->op -