From 66c6c401279aa4152a24681f64d0e101aa004593 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 15 Jul 2014 12:29:32 -0700 Subject: [PATCH] vc4: Add support for texturing (under simulation) Only rgba8888 works, and only a single texture unit, and it's only under simulation because I haven't built the kernel interface yet. v2: Rebase on helpers. v3: Fold in the don't-break-the-arm-build fix. --- src/gallium/drivers/vc4/vc4_context.h | 1 + src/gallium/drivers/vc4/vc4_draw.c | 3 + src/gallium/drivers/vc4/vc4_program.c | 151 +++++++++++++++++++++- src/gallium/drivers/vc4/vc4_qir.c | 10 ++ src/gallium/drivers/vc4/vc4_qir.h | 62 ++++++++- src/gallium/drivers/vc4/vc4_qpu_defines.h | 14 ++ src/gallium/drivers/vc4/vc4_qpu_disasm.c | 17 +++ src/gallium/drivers/vc4/vc4_qpu_emit.c | 32 +++++ 8 files changed, 277 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index f36b96b0c25..18e6264b0b6 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -197,6 +197,7 @@ void *vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size); void vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader, struct vc4_constbuf_stateobj *cb, + struct vc4_texture_stateobj *texstate, int shader_index, struct vc4_bo **out_bo, uint32_t *out_offset); diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index c0fb082c548..9e4454b141d 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -171,12 +171,15 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) uint32_t fs_ubo_offset, vs_ubo_offset, cs_ubo_offset; vc4_get_uniform_bo(vc4, vc4->prog.fs, &vc4->constbuf[PIPE_SHADER_FRAGMENT], + &vc4->fragtex, 0, &fs_ubo, &fs_ubo_offset); vc4_get_uniform_bo(vc4, vc4->prog.vs, &vc4->constbuf[PIPE_SHADER_VERTEX], + &vc4->verttex, 0, &vs_ubo, &vs_ubo_offset); vc4_get_uniform_bo(vc4, vc4->prog.vs, &vc4->constbuf[PIPE_SHADER_VERTEX], + &vc4->verttex, 1, &cs_ubo, &cs_ubo_offset); cl_start_shader_reloc(&vc4->shader_rec, 6 + vtx->num_elements); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 0b26f5adf5b..b45507d154d 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -35,6 +35,9 @@ #include "vc4_context.h" #include "vc4_qpu.h" #include "vc4_qir.h" +#ifdef USE_VC4_SIMULATOR +#include "simpenrose/simpenrose.h" +#endif struct tgsi_to_qir { struct tgsi_parse_context parser; @@ -157,6 +160,10 @@ get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i) case TGSI_FILE_INPUT: r = trans->inputs[src->Index * 4 + s]; break; + case TGSI_FILE_SAMPLER: + case TGSI_FILE_SAMPLER_VIEW: + r = c->undef; + break; default: fprintf(stderr, "unknown src file %d\n", src->File); abort(); @@ -278,6 +285,51 @@ tgsi_to_qir_lrp(struct tgsi_to_qir *trans, } +static void +tgsi_to_qir_tex(struct tgsi_to_qir *trans, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src) +{ + struct qcompile *c = trans->c; + + assert(!tgsi_inst->Instruction.Saturate); + + struct qreg s = src[0 * 4 + 0]; + struct qreg t = src[0 * 4 + 1]; + + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) { + struct qreg proj = qir_RCP(c, src[0 * 4 + 3]); + s = qir_FMUL(c, s, proj); + t = qir_FMUL(c, t, proj); + } + + uint32_t tex_and_sampler = 0; /* XXX */ + qir_TEX_T(c, t, add_uniform(trans, QUNIFORM_TEXTURE_CONFIG_P0, + tex_and_sampler)); + + struct qreg sampler_p1 = add_uniform(trans, QUNIFORM_TEXTURE_CONFIG_P1, + tex_and_sampler); + if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) { + qir_TEX_B(c, src[0 * 4 + 3], sampler_p1); + qir_TEX_S(c, s, add_uniform(trans, QUNIFORM_CONSTANT, 0)); + } else { + qir_TEX_S(c, s, sampler_p1); + } + + qir_emit(c, qir_inst(QOP_TEX_RESULT, c->undef, c->undef, c->undef)); + + for (int i = 0; i < 4; i++) { + if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + struct qreg dst = qir_get_temp(c); + qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, + dst, + c->undef, c->undef)); + update_dst(trans, tgsi_inst, i, dst); + } +} + static struct qreg tgsi_to_qir_pow(struct tgsi_to_qir *trans, struct tgsi_full_instruction *tgsi_inst, @@ -577,13 +629,6 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans, if (tgsi_op == TGSI_OPCODE_END) return; - if (tgsi_op > ARRAY_SIZE(op_trans) || !op_trans[tgsi_op].func) { - fprintf(stderr, "unknown tgsi inst: "); - tgsi_dump_instruction(tgsi_inst, asdf++); - fprintf(stderr, "\n"); - abort(); - } - struct qreg src_regs[12]; for (int s = 0; s < 3; s++) { for (int i = 0; i < 4; i++) { @@ -592,6 +637,24 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans, } } + switch (tgsi_op) { + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXB: + tgsi_to_qir_tex(trans, tgsi_inst, + op_trans[tgsi_op].op, src_regs); + return; + default: + break; + } + + if (tgsi_op > ARRAY_SIZE(op_trans) || !(op_trans[tgsi_op].func)) { + fprintf(stderr, "unknown tgsi inst: "); + tgsi_dump_instruction(tgsi_inst, asdf++); + fprintf(stderr, "\n"); + abort(); + } + for (int i = 0; i < 4; i++) { if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -1043,9 +1106,74 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso) free(so); } +static uint32_t translate_wrap(uint32_t p_wrap) +{ + switch (p_wrap) { + case PIPE_TEX_WRAP_REPEAT: + return 0; + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return 1; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return 2; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return 3; + default: + fprintf(stderr, "Unknown wrap mode %d\n", p_wrap); + assert(!"not reached"); + return 0; + } +} + +static uint32_t +get_texture_p0(struct vc4_texture_stateobj *texstate, + uint32_t tex_and_sampler) +{ + uint32_t texi = (tex_and_sampler >> 0) & 0xff; + struct pipe_sampler_view *texture = texstate->textures[texi]; + struct vc4_resource *rsc = vc4_resource(texture->texture); + + return (texture->u.tex.last_level | +#if USE_VC4_SIMULATOR + simpenrose_hw_addr(rsc->bo->map) /* XXX */ +#else + 0 /* XXX */ +#endif + /* XXX: data type */); +} + +static uint32_t +get_texture_p1(struct vc4_texture_stateobj *texstate, + uint32_t tex_and_sampler) +{ + uint32_t texi = (tex_and_sampler >> 0) & 0xff; + uint32_t sampi = (tex_and_sampler >> 8) & 0xff; + struct pipe_sampler_view *texture = texstate->textures[texi]; + struct pipe_sampler_state *sampler = texstate->samplers[sampi]; + static const uint32_t mipfilter_map[] = { + [PIPE_TEX_MIPFILTER_NEAREST] = 2, + [PIPE_TEX_MIPFILTER_LINEAR] = 4, + [PIPE_TEX_MIPFILTER_NONE] = 0 + }; + static const uint32_t imgfilter_map[] = { + [PIPE_TEX_FILTER_NEAREST] = 1, + [PIPE_TEX_FILTER_LINEAR] = 0, + }; + + return ((1 << 31) /* XXX: data type */| + (texture->texture->height0 << 20) | + (texture->texture->width0 << 8) | + (imgfilter_map[sampler->mag_img_filter] << 7) | + ((imgfilter_map[sampler->min_img_filter] + + mipfilter_map[sampler->min_mip_filter]) << 4) | + (translate_wrap(sampler->wrap_t) << 2) | + (translate_wrap(sampler->wrap_s) << 0)); +} + void vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader, struct vc4_constbuf_stateobj *cb, + struct vc4_texture_stateobj *texstate, int shader_index, struct vc4_bo **out_bo, uint32_t *out_offset) { @@ -1055,6 +1183,7 @@ vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader, uint32_t *map = vc4_bo_map(ubo); for (int i = 0; i < uinfo->count; i++) { + switch (uinfo->contents[i]) { case QUNIFORM_CONSTANT: map[i] = uinfo->data[i]; @@ -1068,6 +1197,14 @@ vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader, case QUNIFORM_VIEWPORT_Y_SCALE: map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f); break; + + case QUNIFORM_TEXTURE_CONFIG_P0: + map[i] = get_texture_p0(texstate, uinfo->data[i]); + break; + + case QUNIFORM_TEXTURE_CONFIG_P1: + map[i] = get_texture_p1(texstate, uinfo->data[i]); + break; } #if 0 fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n", diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 495d16e2a25..0499eb9406f 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -63,6 +63,16 @@ static const struct qir_op_info qir_op_info[] = { [QOP_VPM_READ] = { "vpm_read", 0, 1, true }, [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true }, [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 }, + + [QOP_TEX_S] = { "tex_s", 0, 2 }, + [QOP_TEX_T] = { "tex_t", 0, 2 }, + [QOP_TEX_R] = { "tex_r", 0, 2 }, + [QOP_TEX_B] = { "tex_b", 0, 2 }, + [QOP_TEX_RESULT] = { "tex_result", 0, 0 }, + [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 0 }, + [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 0 }, + [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 0 }, + [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 0 }, }; static const char * diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 753f82e5021..a76d091b327 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -72,6 +72,24 @@ enum qop { QOP_VPM_READ, QOP_TLB_COLOR_WRITE, QOP_VARY_ADD_C, + + /** Texture x coordinate parameter write */ + QOP_TEX_S, + /** Texture y coordinate parameter write */ + QOP_TEX_T, + /** Texture border color parameter or cube map z coordinate write */ + QOP_TEX_R, + /** Texture LOD bias parameter write */ + QOP_TEX_B, + /** + * Signal of texture read being necessary and then reading r4 into + * the destination + */ + QOP_TEX_RESULT, + QOP_R4_UNPACK_A, + QOP_R4_UNPACK_B, + QOP_R4_UNPACK_C, + QOP_R4_UNPACK_D }; struct simple_node { @@ -120,6 +138,25 @@ enum quniform_contents { QUNIFORM_VIEWPORT_X_SCALE, QUNIFORM_VIEWPORT_Y_SCALE, /** @} */ + + /** + * A reference to a texture config parameter 0 uniform. + * + * This is a uniform implicitly loaded with a QPU_W_TMU* write, which + * defines texture type, miplevels, and such. It will be found as a + * parameter to the first QOP_TEX_[STRB] instruction in a sequence. + */ + QUNIFORM_TEXTURE_CONFIG_P0, + + /** + * A reference to a texture config parameter 1 uniform. + * + * This is a uniform implicitly loaded with a QPU_W_TMU* write, which + * defines texture width, height, filters, and wrap modes. It will be + * found as a parameter to the second QOP_TEX_[STRB] instruction in a + * sequence. + */ + QUNIFORM_TEXTURE_CONFIG_P1, }; struct qcompile { @@ -178,6 +215,20 @@ qir_##name(struct qcompile *c, struct qreg a, struct qreg b) \ return t; \ } +#define QIR_NODST_1(name) \ +static inline void \ +qir_##name(struct qcompile *c, struct qreg a) \ +{ \ + qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \ +} + +#define QIR_NODST_2(name) \ +static inline void \ +qir_##name(struct qcompile *c, struct qreg a, struct qreg b) \ +{ \ + qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \ +} + QIR_ALU1(MOV) QIR_ALU2(FADD) QIR_ALU2(FSUB) @@ -194,12 +245,11 @@ QIR_ALU1(EXP2) QIR_ALU1(LOG2) QIR_ALU2(PACK_SCALED) QIR_ALU1(VARY_ADD_C) - -static inline void -qir_VPM_WRITE(struct qcompile *c, struct qreg a) -{ - qir_emit(c, qir_inst(QOP_VPM_WRITE, c->undef, a, c->undef)); -} +QIR_NODST_1(VPM_WRITE) +QIR_NODST_2(TEX_S) +QIR_NODST_2(TEX_T) +QIR_NODST_2(TEX_R) +QIR_NODST_2(TEX_B) static inline struct qreg qir_CMP(struct qcompile *c, struct qreg cmp, struct qreg a, struct qreg b) diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h index bdd5d94708f..224d9aaa44d 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_defines.h +++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h @@ -195,6 +195,17 @@ enum qpu_pack_a { QPU_PACK_A_8D_SAT, }; +enum qpu_unpack_r4 { + QPU_UNPACK_R4_NOP, + QPU_UNPACK_R4_F16A_TO_F32, + QPU_UNPACK_R4_F16B_TO_F32, + QPU_UNPACK_R4_8D_REP, + QPU_UNPACK_R4_8A, + QPU_UNPACK_R4_8B, + QPU_UNPACK_R4_8C, + QPU_UNPACK_R4_8D, +}; + #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) /* Using the GNU statement expression extension */ #define QPU_SET_FIELD(value, field) \ @@ -209,6 +220,9 @@ enum qpu_pack_a { #define QPU_SIG_SHIFT 60 #define QPU_SIG_MASK QPU_MASK(63, 60) +#define QPU_UNPACK_SHIFT 57 +#define QPU_UNPACK_MASK QPU_MASK(59, 57) + /** * If set, the pack field means PACK_MUL or R4 packing, instead of normal * regfile a packing. diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c index 4ec6d9657b7..525710585ae 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c +++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c @@ -93,6 +93,17 @@ static const char *qpu_pack_mul[] = { [QPU_PACK_MUL_8D] = "8d", }; +static const char *qpu_unpack_r4[] = { + [QPU_UNPACK_R4_NOP] = "", + [QPU_UNPACK_R4_F16A_TO_F32] = "f16a", + [QPU_UNPACK_R4_F16B_TO_F32] = "f16b", + [QPU_UNPACK_R4_8D_REP] = "8d_rep", + [QPU_UNPACK_R4_8A] = "8a", + [QPU_UNPACK_R4_8B] = "8b", + [QPU_UNPACK_R4_8C] = "8c", + [QPU_UNPACK_R4_8D] = "8d", +}; + static const char *special_read_a[] = { "uni", NULL, @@ -263,6 +274,7 @@ print_alu_src(uint64_t inst, uint32_t mux) uint32_t raddr = (is_a ? QPU_GET_FIELD(inst, QPU_RADDR_A) : QPU_GET_FIELD(inst, QPU_RADDR_B)); + uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK); if (mux <= QPU_MUX_R5) fprintf(stderr, "r%d", mux); @@ -287,6 +299,11 @@ print_alu_src(uint64_t inst, uint32_t mux) else fprintf(stderr, "%s", DESC(special_read_b, raddr - 32)); } + + if (mux == QPU_MUX_R4 && (inst & QPU_PM) && + unpack != QPU_UNPACK_R4_NOP) { + fprintf(stderr, ".%s", DESC(qpu_unpack_r4, unpack)); + } } static void diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 2eebabee419..9d55390c67f 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -490,6 +490,38 @@ vc4_generate_code(struct qcompile *c) break; } + case QOP_TEX_S: + case QOP_TEX_T: + case QOP_TEX_R: + case QOP_TEX_B: + queue(c, qpu_inst(qpu_a_MOV(qpu_rb(QPU_W_TMU0_S + + (qinst->op - + QOP_TEX_S)), + src[0]), + qpu_m_NOP())); + break; + + case QOP_TEX_RESULT: + queue(c, qpu_inst(qpu_a_NOP(), qpu_m_NOP())); + *last_inst(c) = qpu_set_sig(*last_inst(c), + QPU_SIG_LOAD_TMU0); + + break; + + case QOP_R4_UNPACK_A: + case QOP_R4_UNPACK_B: + case QOP_R4_UNPACK_C: + case QOP_R4_UNPACK_D: + queue(c, qpu_inst(qpu_a_MOV(dst, qpu_r4()), + qpu_m_NOP())); + *last_inst(c) |= QPU_PM; + *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_R4_8A + + (qinst->op - + QOP_R4_UNPACK_A), + QPU_UNPACK); + + break; + default: assert(qinst->op < ARRAY_SIZE(translate)); assert(translate[qinst->op].op != 0); /* NOPs */ -- 2.30.2