From d952a98c5322e64cb436bd8b0f0064441f37ac77 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 7 Sep 2014 14:04:40 -0700 Subject: [PATCH] vc4: Expose r4 to register allocation. We potentially need to be careful that use of a value stored in r4 isn't copy-propagated (or something) across another r4 write. That doesn't appear to happen currently, and this makes the dataflow more obvious. It also opens up not unpacking the r4 value, which will be useful for depth textures. --- src/gallium/drivers/vc4/vc4_program.c | 9 +++--- src/gallium/drivers/vc4/vc4_qir.c | 12 +++---- src/gallium/drivers/vc4/vc4_qir.h | 6 ++-- src/gallium/drivers/vc4/vc4_qpu_emit.c | 45 ++++++++++++++++++++------ 4 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index da5453975bb..2f85cc6ee3c 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -497,11 +497,11 @@ tgsi_to_qir_tex(struct vc4_compile *c, } c->num_texture_samples++; - qir_emit(c, qir_inst(QOP_TEX_RESULT, c->undef, c->undef, c->undef)); + struct qreg r4 = qir_TEX_RESULT(c); struct qreg unpacked[4]; for (int i = 0; i < 4; i++) - unpacked[i] = qir_R4_UNPACK(c, i); + unpacked[i] = qir_R4_UNPACK(c, r4, i); enum pipe_format format = c->key->tex_format[unit]; const uint8_t *swiz = vc4_get_format_swizzle(format); @@ -1096,10 +1096,9 @@ emit_frag_end(struct vc4_compile *c) struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef }; if (c->fs_key->blend.blend_enable || c->fs_key->blend.colormask != 0xf) { - qir_emit(c, qir_inst(QOP_TLB_COLOR_READ, c->undef, - c->undef, c->undef)); + struct qreg r4 = qir_TLB_COLOR_READ(c); for (int i = 0; i < 4; i++) - tlb_read_color[i] = qir_R4_UNPACK(c, i); + tlb_read_color[i] = qir_R4_UNPACK(c, r4, i); for (int i = 0; i < 4; i++) dst_color[i] = get_swizzled_channel(c, tlb_read_color, diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index fc54cdb9092..ef8a4e54d24 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -78,7 +78,7 @@ static const struct qir_op_info qir_op_info[] = { [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_PASSTHROUGH_Z_WRITE] = { "tlb_passthrough_z", 0, 0, true }, [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true }, - [QOP_TLB_COLOR_READ] = { "tlb_color_read", 0, 0, true }, + [QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0, true }, [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 }, [QOP_FRAG_X] = { "frag_x", 1, 0 }, @@ -90,11 +90,11 @@ static const struct qir_op_info qir_op_info[] = { [QOP_TEX_T] = { "tex_t", 0, 2 }, [QOP_TEX_R] = { "tex_r", 0, 2 }, [QOP_TEX_B] = { "tex_b", 0, 2 }, - [QOP_TEX_RESULT] = { "tex_result", 0, 0 }, - [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 0 }, - [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 0 }, - [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 0 }, - [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 0 }, + [QOP_TEX_RESULT] = { "tex_result", 1, 0, true }, + [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 1 }, + [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 1 }, + [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 1 }, + [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 1 }, }; static const char * diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 82d5a27fc1e..05a3249d39b 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -353,13 +353,15 @@ QIR_ALU0(FRAG_X) QIR_ALU0(FRAG_Y) QIR_ALU0(FRAG_Z) QIR_ALU0(FRAG_RCP_W) +QIR_ALU0(TEX_RESULT) +QIR_ALU0(TLB_COLOR_READ) QIR_NODST_1(TLB_DISCARD_SETUP) static inline struct qreg -qir_R4_UNPACK(struct vc4_compile *c, int i) +qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i) { struct qreg t = qir_get_temp(c); - qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef)); + qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef)); return t; } diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index cac2e6bbdca..4dd8609d9ca 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -210,7 +210,7 @@ serialize_insts(struct vc4_compile *c) void vc4_generate_code(struct vc4_compile *c) { - struct qpu_reg allocate_to_qpu_reg[3 + 32 + 32]; + struct qpu_reg allocate_to_qpu_reg[4 + 32 + 32]; bool reg_in_use[ARRAY_SIZE(allocate_to_qpu_reg)]; int *reg_allocated = calloc(c->num_temps, sizeof(*reg_allocated)); int *reg_uses_remaining = @@ -221,12 +221,15 @@ vc4_generate_code(struct vc4_compile *c) reg_in_use[i] = false; for (int i = 0; i < c->num_temps; i++) reg_allocated[i] = -1; - for (int i = 0; i < 3; i++) - allocate_to_qpu_reg[i] = qpu_rn(i); + + uint32_t next_reg = 0; + for (int i = 0; i < 4; i++) + allocate_to_qpu_reg[next_reg++] = qpu_rn(i == 3 ? 4 : i); for (int i = 0; i < 32; i++) - allocate_to_qpu_reg[i + 3] = qpu_ra(i); + allocate_to_qpu_reg[next_reg++] = qpu_ra(i); for (int i = 0; i < 32; i++) - allocate_to_qpu_reg[i + 3 + 32] = qpu_rb(i); + allocate_to_qpu_reg[next_reg++] = qpu_rb(i); + assert(next_reg == ARRAY_SIZE(allocate_to_qpu_reg)); make_empty_list(&c->qpu_inst_list); @@ -338,10 +341,31 @@ vc4_generate_code(struct vc4_compile *c) for (alloc = 0; alloc < ARRAY_SIZE(reg_in_use); alloc++) { - /* The pack flags require an A-file register. */ - if (qinst->op == QOP_PACK_SCALED && - allocate_to_qpu_reg[alloc].mux != QPU_MUX_A) { - continue; + struct qpu_reg reg = allocate_to_qpu_reg[alloc]; + + switch (qinst->op) { + case QOP_PACK_SCALED: + /* The pack flags require an + * A-file register. + */ + if (reg.mux != QPU_MUX_A) + continue; + break; + case QOP_TEX_RESULT: + case QOP_TLB_COLOR_READ: + /* Only R4-generating + * instructions get to store + * values in R4 for now, until + * we figure out how to do + * interference. + */ + if (reg.mux != QPU_MUX_R4) + continue; + break; + default: + if (reg.mux == QPU_MUX_R4) + continue; + break; } if (!reg_in_use[alloc]) @@ -549,7 +573,8 @@ vc4_generate_code(struct vc4_compile *c) case QOP_R4_UNPACK_B: case QOP_R4_UNPACK_C: case QOP_R4_UNPACK_D: - queue(c, qpu_a_MOV(dst, qpu_r4())); + assert(src[0].mux == QPU_MUX_R4); + queue(c, qpu_a_MOV(dst, src[0])); *last_inst(c) |= QPU_PM; *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_R4_8A + (qinst->op - -- 2.30.2