vc4: Add support for texturing (under simulation)
authorEric Anholt <eric@anholt.net>
Tue, 15 Jul 2014 19:29:32 +0000 (12:29 -0700)
committerEric Anholt <eric@anholt.net>
Mon, 11 Aug 2014 21:40:45 +0000 (14:40 -0700)
Only rgba8888 works, and only a single texture unit, and it's only under
simulation because I haven't built the kernel interface yet.

v2: Rebase on helpers.
v3: Fold in the don't-break-the-arm-build fix.

src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_draw.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_defines.h
src/gallium/drivers/vc4/vc4_qpu_disasm.c
src/gallium/drivers/vc4/vc4_qpu_emit.c

index f36b96b0c25ee2fa31d8442f89c779032a8a793b..18e6264b0b6884be78b9703c06f5f3392e96586d 100644 (file)
@@ -197,6 +197,7 @@ void *vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size);
 void vc4_get_uniform_bo(struct vc4_context *vc4,
                         struct vc4_compiled_shader *shader,
                         struct vc4_constbuf_stateobj *cb,
+                        struct vc4_texture_stateobj *texstate,
                         int shader_index, struct vc4_bo **out_bo,
                         uint32_t *out_offset);
 
index c0fb082c548e81588368f641138c097edda69b20..9e4454b141d9b1d8d2a37bb99f8f56700eb66041 100644 (file)
@@ -171,12 +171,15 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
         uint32_t fs_ubo_offset, vs_ubo_offset, cs_ubo_offset;
         vc4_get_uniform_bo(vc4, vc4->prog.fs,
                            &vc4->constbuf[PIPE_SHADER_FRAGMENT],
+                           &vc4->fragtex,
                            0, &fs_ubo, &fs_ubo_offset);
         vc4_get_uniform_bo(vc4, vc4->prog.vs,
                            &vc4->constbuf[PIPE_SHADER_VERTEX],
+                           &vc4->verttex,
                            0, &vs_ubo, &vs_ubo_offset);
         vc4_get_uniform_bo(vc4, vc4->prog.vs,
                            &vc4->constbuf[PIPE_SHADER_VERTEX],
+                           &vc4->verttex,
                            1, &cs_ubo, &cs_ubo_offset);
 
         cl_start_shader_reloc(&vc4->shader_rec, 6 + vtx->num_elements);
index 0b26f5adf5bd351b9bbfdf14802981b4b6fab9c6..b45507d154d7b689ebab1604fd59296913fc8127 100644 (file)
@@ -35,6 +35,9 @@
 #include "vc4_context.h"
 #include "vc4_qpu.h"
 #include "vc4_qir.h"
+#ifdef USE_VC4_SIMULATOR
+#include "simpenrose/simpenrose.h"
+#endif
 
 struct tgsi_to_qir {
         struct tgsi_parse_context parser;
@@ -157,6 +160,10 @@ get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i)
         case TGSI_FILE_INPUT:
                 r = trans->inputs[src->Index * 4 + s];
                 break;
+        case TGSI_FILE_SAMPLER:
+        case TGSI_FILE_SAMPLER_VIEW:
+                r = c->undef;
+                break;
         default:
                 fprintf(stderr, "unknown src file %d\n", src->File);
                 abort();
@@ -278,6 +285,51 @@ tgsi_to_qir_lrp(struct tgsi_to_qir *trans,
 
 }
 
+static void
+tgsi_to_qir_tex(struct tgsi_to_qir *trans,
+                struct tgsi_full_instruction *tgsi_inst,
+                enum qop op, struct qreg *src)
+{
+        struct qcompile *c = trans->c;
+
+        assert(!tgsi_inst->Instruction.Saturate);
+
+        struct qreg s = src[0 * 4 + 0];
+        struct qreg t = src[0 * 4 + 1];
+
+        if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
+                struct qreg proj = qir_RCP(c, src[0 * 4 + 3]);
+                s = qir_FMUL(c, s, proj);
+                t = qir_FMUL(c, t, proj);
+        }
+
+        uint32_t tex_and_sampler = 0; /* XXX */
+        qir_TEX_T(c, t, add_uniform(trans, QUNIFORM_TEXTURE_CONFIG_P0,
+                                    tex_and_sampler));
+
+        struct qreg sampler_p1 = add_uniform(trans, QUNIFORM_TEXTURE_CONFIG_P1,
+                                             tex_and_sampler);
+        if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
+                qir_TEX_B(c, src[0 * 4 + 3], sampler_p1);
+                qir_TEX_S(c, s, add_uniform(trans, QUNIFORM_CONSTANT, 0));
+        } else {
+                qir_TEX_S(c, s, sampler_p1);
+        }
+
+        qir_emit(c, qir_inst(QOP_TEX_RESULT, c->undef, c->undef, c->undef));
+
+        for (int i = 0; i < 4; i++) {
+                if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
+                        continue;
+
+                struct qreg dst = qir_get_temp(c);
+                qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i,
+                                     dst,
+                                     c->undef, c->undef));
+                update_dst(trans, tgsi_inst, i, dst);
+        }
+}
+
 static struct qreg
 tgsi_to_qir_pow(struct tgsi_to_qir *trans,
                 struct tgsi_full_instruction *tgsi_inst,
@@ -577,13 +629,6 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
         if (tgsi_op == TGSI_OPCODE_END)
                 return;
 
-        if (tgsi_op > ARRAY_SIZE(op_trans) || !op_trans[tgsi_op].func) {
-                fprintf(stderr, "unknown tgsi inst: ");
-                tgsi_dump_instruction(tgsi_inst, asdf++);
-                fprintf(stderr, "\n");
-                abort();
-        }
-
         struct qreg src_regs[12];
         for (int s = 0; s < 3; s++) {
                 for (int i = 0; i < 4; i++) {
@@ -592,6 +637,24 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
                 }
         }
 
+        switch (tgsi_op) {
+        case TGSI_OPCODE_TEX:
+        case TGSI_OPCODE_TXP:
+        case TGSI_OPCODE_TXB:
+                tgsi_to_qir_tex(trans, tgsi_inst,
+                                op_trans[tgsi_op].op, src_regs);
+                return;
+        default:
+                break;
+        }
+
+        if (tgsi_op > ARRAY_SIZE(op_trans) || !(op_trans[tgsi_op].func)) {
+                fprintf(stderr, "unknown tgsi inst: ");
+                tgsi_dump_instruction(tgsi_inst, asdf++);
+                fprintf(stderr, "\n");
+                abort();
+        }
+
         for (int i = 0; i < 4; i++) {
                 if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
                         continue;
@@ -1043,9 +1106,74 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
         free(so);
 }
 
+static uint32_t translate_wrap(uint32_t p_wrap)
+{
+        switch (p_wrap) {
+        case PIPE_TEX_WRAP_REPEAT:
+                return 0;
+        case PIPE_TEX_WRAP_CLAMP:
+        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+                return 1;
+        case PIPE_TEX_WRAP_MIRROR_REPEAT:
+                return 2;
+        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+                return 3;
+        default:
+                fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
+                assert(!"not reached");
+                return 0;
+        }
+}
+
+static uint32_t
+get_texture_p0(struct vc4_texture_stateobj *texstate,
+               uint32_t tex_and_sampler)
+{
+        uint32_t texi = (tex_and_sampler >> 0) & 0xff;
+        struct pipe_sampler_view *texture = texstate->textures[texi];
+        struct vc4_resource *rsc = vc4_resource(texture->texture);
+
+        return (texture->u.tex.last_level |
+#if USE_VC4_SIMULATOR
+                simpenrose_hw_addr(rsc->bo->map) /* XXX */
+#else
+                0 /* XXX */
+#endif
+                /* XXX: data type */);
+}
+
+static uint32_t
+get_texture_p1(struct vc4_texture_stateobj *texstate,
+               uint32_t tex_and_sampler)
+{
+        uint32_t texi = (tex_and_sampler >> 0) & 0xff;
+        uint32_t sampi = (tex_and_sampler >> 8) & 0xff;
+        struct pipe_sampler_view *texture = texstate->textures[texi];
+        struct pipe_sampler_state *sampler = texstate->samplers[sampi];
+        static const uint32_t mipfilter_map[] = {
+                [PIPE_TEX_MIPFILTER_NEAREST] = 2,
+                [PIPE_TEX_MIPFILTER_LINEAR] = 4,
+                [PIPE_TEX_MIPFILTER_NONE] = 0
+        };
+        static const uint32_t imgfilter_map[] = {
+                [PIPE_TEX_FILTER_NEAREST] = 1,
+                [PIPE_TEX_FILTER_LINEAR] = 0,
+        };
+
+        return ((1 << 31) /* XXX: data type */|
+                (texture->texture->height0 << 20) |
+                (texture->texture->width0 << 8) |
+                (imgfilter_map[sampler->mag_img_filter] << 7) |
+                ((imgfilter_map[sampler->min_img_filter] +
+                  mipfilter_map[sampler->min_mip_filter]) << 4) |
+                (translate_wrap(sampler->wrap_t) << 2) |
+                (translate_wrap(sampler->wrap_s) << 0));
+}
+
 void
 vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                    struct vc4_constbuf_stateobj *cb,
+                   struct vc4_texture_stateobj *texstate,
                    int shader_index, struct vc4_bo **out_bo,
                    uint32_t *out_offset)
 {
@@ -1055,6 +1183,7 @@ vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
         uint32_t *map = vc4_bo_map(ubo);
 
         for (int i = 0; i < uinfo->count; i++) {
+
                 switch (uinfo->contents[i]) {
                 case QUNIFORM_CONSTANT:
                         map[i] = uinfo->data[i];
@@ -1068,6 +1197,14 @@ vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                 case QUNIFORM_VIEWPORT_Y_SCALE:
                         map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f);
                         break;
+
+                case QUNIFORM_TEXTURE_CONFIG_P0:
+                        map[i] = get_texture_p0(texstate, uinfo->data[i]);
+                        break;
+
+                case QUNIFORM_TEXTURE_CONFIG_P1:
+                        map[i] = get_texture_p1(texstate, uinfo->data[i]);
+                        break;
                 }
 #if 0
                 fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
index 495d16e2a256bcd176fa8b61c430e77557e248b9..0499eb9406fab21c01f4aa8070c2fb2a61f6c66f 100644 (file)
@@ -63,6 +63,16 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_VPM_READ] = { "vpm_read", 0, 1, true },
         [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
         [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
+
+        [QOP_TEX_S] = { "tex_s", 0, 2 },
+        [QOP_TEX_T] = { "tex_t", 0, 2 },
+        [QOP_TEX_R] = { "tex_r", 0, 2 },
+        [QOP_TEX_B] = { "tex_b", 0, 2 },
+        [QOP_TEX_RESULT] = { "tex_result", 0, 0 },
+        [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 0 },
+        [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 0 },
+        [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 0 },
+        [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 0 },
 };
 
 static const char *
index 753f82e5021f6f019620263f17f8a76f78bb38b8..a76d091b327c2b30bd4785b097bb86cef202cc33 100644 (file)
@@ -72,6 +72,24 @@ enum qop {
         QOP_VPM_READ,
         QOP_TLB_COLOR_WRITE,
         QOP_VARY_ADD_C,
+
+        /** Texture x coordinate parameter write */
+        QOP_TEX_S,
+        /** Texture y coordinate parameter write */
+        QOP_TEX_T,
+        /** Texture border color parameter or cube map z coordinate write */
+        QOP_TEX_R,
+        /** Texture LOD bias parameter write */
+        QOP_TEX_B,
+        /**
+         * Signal of texture read being necessary and then reading r4 into
+         * the destination
+         */
+        QOP_TEX_RESULT,
+        QOP_R4_UNPACK_A,
+        QOP_R4_UNPACK_B,
+        QOP_R4_UNPACK_C,
+        QOP_R4_UNPACK_D
 };
 
 struct simple_node {
@@ -120,6 +138,25 @@ enum quniform_contents {
         QUNIFORM_VIEWPORT_X_SCALE,
         QUNIFORM_VIEWPORT_Y_SCALE,
         /** @} */
+
+        /**
+         * A reference to a texture config parameter 0 uniform.
+         *
+         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
+         * defines texture type, miplevels, and such.  It will be found as a
+         * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
+         */
+        QUNIFORM_TEXTURE_CONFIG_P0,
+
+        /**
+         * A reference to a texture config parameter 1 uniform.
+         *
+         * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
+         * defines texture width, height, filters, and wrap modes.  It will be
+         * found as a parameter to the second QOP_TEX_[STRB] instruction in a
+         * sequence.
+         */
+        QUNIFORM_TEXTURE_CONFIG_P1,
 };
 
 struct qcompile {
@@ -178,6 +215,20 @@ qir_##name(struct qcompile *c, struct qreg a, struct qreg b)             \
         return t;                                                        \
 }
 
+#define QIR_NODST_1(name)                                               \
+static inline void                                                      \
+qir_##name(struct qcompile *c, struct qreg a)                           \
+{                                                                       \
+        qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef));       \
+}
+
+#define QIR_NODST_2(name)                                               \
+static inline void                                                      \
+qir_##name(struct qcompile *c, struct qreg a, struct qreg b)            \
+{                                                                       \
+        qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
+}
+
 QIR_ALU1(MOV)
 QIR_ALU2(FADD)
 QIR_ALU2(FSUB)
@@ -194,12 +245,11 @@ QIR_ALU1(EXP2)
 QIR_ALU1(LOG2)
 QIR_ALU2(PACK_SCALED)
 QIR_ALU1(VARY_ADD_C)
-
-static inline void
-qir_VPM_WRITE(struct qcompile *c, struct qreg a)
-{
-        qir_emit(c, qir_inst(QOP_VPM_WRITE, c->undef, a, c->undef));
-}
+QIR_NODST_1(VPM_WRITE)
+QIR_NODST_2(TEX_S)
+QIR_NODST_2(TEX_T)
+QIR_NODST_2(TEX_R)
+QIR_NODST_2(TEX_B)
 
 static inline struct qreg
 qir_CMP(struct qcompile *c, struct qreg cmp, struct qreg a, struct qreg b)
index bdd5d94708ff032d2685c983f708b1c3f9303976..224d9aaa44de2506bab3dbab63f1e2b1a5b04f5a 100644 (file)
@@ -195,6 +195,17 @@ enum qpu_pack_a {
         QPU_PACK_A_8D_SAT,
 };
 
+enum qpu_unpack_r4 {
+        QPU_UNPACK_R4_NOP,
+        QPU_UNPACK_R4_F16A_TO_F32,
+        QPU_UNPACK_R4_F16B_TO_F32,
+        QPU_UNPACK_R4_8D_REP,
+        QPU_UNPACK_R4_8A,
+        QPU_UNPACK_R4_8B,
+        QPU_UNPACK_R4_8C,
+        QPU_UNPACK_R4_8D,
+};
+
 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
 /* Using the GNU statement expression extension */
 #define QPU_SET_FIELD(value, field)                                       \
@@ -209,6 +220,9 @@ enum qpu_pack_a {
 #define QPU_SIG_SHIFT                   60
 #define QPU_SIG_MASK                    QPU_MASK(63, 60)
 
+#define QPU_UNPACK_SHIFT                57
+#define QPU_UNPACK_MASK                 QPU_MASK(59, 57)
+
 /**
  * If set, the pack field means PACK_MUL or R4 packing, instead of normal
  * regfile a packing.
index 4ec6d9657b7cac7955be36d0840ca01cab0c75d8..525710585aedc7bbb5d256a56a5580bbfde6b992 100644 (file)
@@ -93,6 +93,17 @@ static const char *qpu_pack_mul[] = {
         [QPU_PACK_MUL_8D] = "8d",
 };
 
+static const char *qpu_unpack_r4[] = {
+        [QPU_UNPACK_R4_NOP] = "",
+        [QPU_UNPACK_R4_F16A_TO_F32] = "f16a",
+        [QPU_UNPACK_R4_F16B_TO_F32] = "f16b",
+        [QPU_UNPACK_R4_8D_REP] = "8d_rep",
+        [QPU_UNPACK_R4_8A] = "8a",
+        [QPU_UNPACK_R4_8B] = "8b",
+        [QPU_UNPACK_R4_8C] = "8c",
+        [QPU_UNPACK_R4_8D] = "8d",
+};
+
 static const char *special_read_a[] = {
         "uni",
         NULL,
@@ -263,6 +274,7 @@ print_alu_src(uint64_t inst, uint32_t mux)
         uint32_t raddr = (is_a ?
                           QPU_GET_FIELD(inst, QPU_RADDR_A) :
                           QPU_GET_FIELD(inst, QPU_RADDR_B));
+        uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK);
 
         if (mux <= QPU_MUX_R5)
                 fprintf(stderr, "r%d", mux);
@@ -287,6 +299,11 @@ print_alu_src(uint64_t inst, uint32_t mux)
                 else
                         fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
         }
+
+        if (mux == QPU_MUX_R4 && (inst & QPU_PM) &&
+            unpack != QPU_UNPACK_R4_NOP) {
+                fprintf(stderr, ".%s", DESC(qpu_unpack_r4, unpack));
+        }
 }
 
 static void
index 2eebabee419fe6b82cf3f1b6a4677b7a84e95a51..9d55390c67f31d0c55a37cc32da9fec1fbacf181 100644 (file)
@@ -490,6 +490,38 @@ vc4_generate_code(struct qcompile *c)
                         break;
                 }
 
+                case QOP_TEX_S:
+                case QOP_TEX_T:
+                case QOP_TEX_R:
+                case QOP_TEX_B:
+                        queue(c, qpu_inst(qpu_a_MOV(qpu_rb(QPU_W_TMU0_S +
+                                                           (qinst->op -
+                                                            QOP_TEX_S)),
+                                                    src[0]),
+                                          qpu_m_NOP()));
+                        break;
+
+                case QOP_TEX_RESULT:
+                        queue(c, qpu_inst(qpu_a_NOP(), qpu_m_NOP()));
+                        *last_inst(c) = qpu_set_sig(*last_inst(c),
+                                                    QPU_SIG_LOAD_TMU0);
+
+                        break;
+
+                case QOP_R4_UNPACK_A:
+                case QOP_R4_UNPACK_B:
+                case QOP_R4_UNPACK_C:
+                case QOP_R4_UNPACK_D:
+                        queue(c, qpu_inst(qpu_a_MOV(dst, qpu_r4()),
+                                          qpu_m_NOP()));
+                        *last_inst(c) |= QPU_PM;
+                        *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_R4_8A +
+                                                       (qinst->op -
+                                                        QOP_R4_UNPACK_A),
+                                                       QPU_UNPACK);
+
+                        break;
+
                 default:
                         assert(qinst->op < ARRAY_SIZE(translate));
                         assert(translate[qinst->op].op != 0); /* NOPs */