From 89b1b33f44bc6ce71109ac8668529c30b6d6d910 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 21 Aug 2015 00:08:13 -0700 Subject: [PATCH] vc4: Fold the 16-bit integer pack into the instructions generating it. total instructions in shared programs: 97580 -> 96798 (-0.80%) instructions in affected programs: 52826 -> 52044 (-1.48%) --- src/gallium/drivers/vc4/vc4_program.c | 19 ++++++++++--------- src/gallium/drivers/vc4/vc4_qir.c | 2 -- src/gallium/drivers/vc4/vc4_qir.h | 4 ---- src/gallium/drivers/vc4/vc4_qpu_emit.c | 14 +++++--------- .../drivers/vc4/vc4_register_allocate.c | 13 +++++++------ 5 files changed, 22 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 6bf4c9eab9b..e002983fdbb 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1186,22 +1186,23 @@ emit_frag_end(struct vc4_compile *c) static void emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w) { - struct qreg xyi[2]; + struct qreg packed = qir_get_temp(c); for (int i = 0; i < 2; i++) { struct qreg scale = qir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0); - xyi[i] = qir_FTOI(c, qir_FMUL(c, - qir_FMUL(c, - c->outputs[c->output_position_index + i], - scale), - rcp_w)); + struct qreg packed_chan = packed; + packed_chan.pack = QPU_PACK_A_16A + i; + + qir_FTOI_dest(c, packed_chan, + qir_FMUL(c, + qir_FMUL(c, + c->outputs[c->output_position_index + i], + scale), + rcp_w)); } - struct qreg packed = qir_get_temp(c); - qir_PACK_16A_I(c, packed, xyi[0]); - qir_PACK_16B_I(c, packed, xyi[1]); qir_VPM_WRITE(c, packed); } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index e5efb7cba7e..9d930715f9b 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -76,8 +76,6 @@ static const struct qir_op_info qir_op_info[] = { [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 }, [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 }, [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 }, - [QOP_PACK_16A_I] = { "pack_16a_i", 1, 1 }, - [QOP_PACK_16B_I] = { "pack_16b_i", 1, 1 }, [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index cbeff43d5be..a2b21fa17bb 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -110,8 +110,6 @@ enum qop { QOP_PACK_8B_F, QOP_PACK_8C_F, QOP_PACK_8D_F, - QOP_PACK_16A_I, - QOP_PACK_16B_I, QOP_TLB_DISCARD_SETUP, QOP_TLB_STENCIL_SETUP, QOP_TLB_Z_WRITE, @@ -604,8 +602,6 @@ QIR_PACK(PACK_8A_F) QIR_PACK(PACK_8B_F) QIR_PACK(PACK_8C_F) QIR_PACK(PACK_8D_F) -QIR_PACK(PACK_16A_I) -QIR_PACK(PACK_16B_I) QIR_ALU1(VARY_ADD_C) QIR_NODST_2(TEX_S) QIR_NODST_2(TEX_T) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index bf614a2c1fd..adf3a8b3658 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -402,15 +402,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) queue(c, qpu_a_FADD(dst, src[0], qpu_r5())); break; - case QOP_PACK_16A_I: - case QOP_PACK_16B_I: - queue(c, - qpu_a_MOV(dst, src[0]) | - QPU_SET_FIELD(qinst->op == QOP_PACK_16A_I ? - QPU_PACK_A_16A : QPU_PACK_A_16B, - QPU_PACK)); - break; - case QOP_TEX_S: case QOP_TEX_T: case QOP_TEX_R: @@ -516,6 +507,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) queue(c, qpu_a_alu2(translate[qinst->op].op, dst, src[0], src[1])); + if (qinst->dst.pack) { + assert(dst.mux == QPU_MUX_A); + *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack, + QPU_PACK); + } } break; diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 93013147d49..2ea88500227 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -268,16 +268,17 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2); break; - case QOP_PACK_16A_I: - case QOP_PACK_16B_I: - /* The pack flags require an A-file dst register. */ - class_bits[inst->dst.index] &= CLASS_BIT_A; - break; - default: break; } + if (inst->dst.pack && !qir_is_mul(inst)) { + /* The non-MUL pack flags require an A-file dst + * register. + */ + class_bits[inst->dst.index] &= CLASS_BIT_A; + } + if (qir_src_needs_a_file(inst)) { switch (inst->op) { case QOP_UNPACK_8A_F: -- 2.30.2