From e59890aebbad990a02c2c27531525804de47115d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 24 Jun 2014 16:39:08 +0100 Subject: [PATCH] vc4: Start converting the driver to use vertex shaders. Note: This is the cutoff point where I switched from developing primarily on the Pi to developing o the simulator. As a result, from this point on the code is untested on the Pi (the kernel code I have currently wasn't rendering anything at this commit, though the simulator renders successfully, suggesting kernel bugs). --- src/gallium/drivers/vc4/vc4_context.h | 2 + src/gallium/drivers/vc4/vc4_draw.c | 92 +++++++++--------- src/gallium/drivers/vc4/vc4_program.c | 128 +++++++++++++++++++++++++- 3 files changed, 177 insertions(+), 45 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 73bf05bce19..0c906488756 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -66,6 +66,8 @@ struct vc4_texture_stateobj { struct vc4_shader_state { struct pipe_shader_state base; struct vc4_bo *bo; + + uint32_t coord_shader_offset; }; struct vc4_program_stateobj { diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 4c25dabae7d..34977bb6938 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -31,40 +31,11 @@ static struct vc4_bo * get_vbo(struct vc4_context *vc4, uint32_t width, uint32_t height) { struct { - uint16_t x, y; - float z, rhw, r, g, b; + float x, y, z, w; } verts[] = { - { - // Vertex: Top, red - (-(int)width / 3) << 4, // X in 12.4 fixed point - (-(int)height / 3) << 4, // Y in 12.4 fixed point - 1.0f, // Z - 1.0f, // 1/W - 1.0f, // Varying 0 (Red) - 0.0f, // Varying 1 (Green) - 0.0f, // Varying 2 (Blue) - }, - { - // Vertex: bottom left, Green - (width / 3) << 4, // X in 12.4 fixed point - (-(int)height / 3) << 4, // Y in 12.4 fixed point - 1.0f, // Z - 1.0f, // 1/W - 0.0f, // Varying 0 (Red) - 1.0f, // Varying 1 (Green) - 0.0f, // Varying 2 (Blue) - }, - - { - // Vertex: bottom right, Blue - (width / 3) << 4, // X in 12.4 fixed point - (height / 3) << 4, // Y in 12.4 fixed point - 1.0f, // Z - 1.0f, // 1/W - 0.0f, // Varying 0 (Red) - 0.0f, // Varying 1 (Green) - 1.0f, // Varying 2 (Blue) - }, + { -1, -1, 1, 1 }, + { 1, -1, 1, 1 }, + { -1, 1, 1, 1 }, }; return vc4_bo_alloc_mem(vc4->screen, verts, sizeof(verts), "verts"); @@ -118,8 +89,22 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) 48 * tilew * tileh, "tilestate"); struct vc4_bo *ibo = get_ibo(vc4); - struct vc4_bo *fs_uniform = vc4_bo_alloc(vc4->screen, 0x1000, "fsu"); struct vc4_bo *vbo = get_vbo(vc4, width, height); + static const uint32_t fs_uni[] = { 0 }; + uint32_t vs_uni[] = { + fui(vc4->framebuffer.width * 16.0f / 2.0f), + fui(vc4->framebuffer.height * 16.0f / 2.0f), + }; + uint32_t cs_uni[] = { + fui(vc4->framebuffer.width * 16.0f / 2.0f), + fui(vc4->framebuffer.height * 16.0f / 2.0f), + }; + struct vc4_bo *fs_ubo = vc4_bo_alloc_mem(vc4->screen, fs_uni, + sizeof(fs_uni), "fs_ubo"); + struct vc4_bo *vs_ubo = vc4_bo_alloc_mem(vc4->screen, vs_uni, + sizeof(vs_uni), "vs_ubo"); + struct vc4_bo *cs_ubo = vc4_bo_alloc_mem(vc4->screen, cs_uni, + sizeof(cs_uni), "cs_ubo"); vc4->needs_flush = true; @@ -137,16 +122,18 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING); cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT); - cl_u8(&vc4->bcl, 0x32); // 16 bit triangle + cl_u8(&vc4->bcl, 0x12); // 16 bit triangle vc4_emit_state(pctx); /* the actual draw call. */ - cl_u8(&vc4->bcl, VC4_PACKET_NV_SHADER_STATE); + uint32_t nr_attributes = 1; + cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE); #ifndef USE_VC4_SIMULATOR - cl_u32(&vc4->bcl, 0); /* offset into shader_rec */ + cl_u32(&vc4->bcl, nr_attributes & 0x7); /* offset into shader_rec */ #else - cl_u32(&vc4->bcl, simpenrose_hw_addr(vc4->shader_rec.next)); + cl_u32(&vc4->bcl, simpenrose_hw_addr(vc4->shader_rec.next) | + (nr_attributes & 0x7)); #endif cl_start_reloc(&vc4->bcl, 1); @@ -161,15 +148,32 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u8(&vc4->bcl, VC4_PACKET_HALT); // Shader Record - cl_start_shader_reloc(&vc4->shader_rec, 3); - cl_u8(&vc4->shader_rec, 0); - cl_u8(&vc4->shader_rec, 6*4); // stride - cl_u8(&vc4->shader_rec, 0xcc); // num uniforms (not used) - cl_u8(&vc4->shader_rec, 3); // num varyings + cl_start_shader_reloc(&vc4->shader_rec, 7); + cl_u16(&vc4->shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING); + cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */ + cl_u8(&vc4->shader_rec, 0); /* fs num varyings */ cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0); - cl_reloc(vc4, &vc4->shader_rec, fs_uniform, 0); + cl_reloc(vc4, &vc4->shader_rec, fs_ubo, 0); + + cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */ + cl_u8(&vc4->shader_rec, 1); /* vs attribute array bitfield */ + cl_u8(&vc4->shader_rec, 16); /* vs total attribute size */ + cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0); + cl_reloc(vc4, &vc4->shader_rec, vs_ubo, 0); + + cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */ + cl_u8(&vc4->shader_rec, 1); /* cs attribute array bitfield */ + cl_u8(&vc4->shader_rec, 16); /* vs total attribute size */ + cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, + vc4->prog.vs->coord_shader_offset); + cl_reloc(vc4, &vc4->shader_rec, cs_ubo, 0); + cl_reloc(vc4, &vc4->shader_rec, vbo, 0); + cl_u8(&vc4->shader_rec, 15); /* bytes - 1 in the attribute*/ + cl_u8(&vc4->shader_rec, 16); /* attribute stride */ + cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */ + cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */ vc4->shader_rec_count++; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 2fe7c216838..56fe37c5f5f 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -66,7 +66,7 @@ vc4_fs_state_create(struct pipe_context *pctx, uint64_t gen_fsc[100]; uint64_t cur_inst; int gen_fsc_len = 0; -#if 1 +#if 0 cur_inst = qpu_load_imm_f(qpu_r5(), 0.0f); gen_fsc[gen_fsc_len++] = cur_inst; @@ -176,14 +176,140 @@ vc4_fs_state_create(struct pipe_context *pctx, return so; } +static int +gen_vs_cs_code(uint64_t *gen, bool is_vs) +{ + uint32_t count = 0; + uint64_t cur_inst; + struct qpu_reg x = qpu_ra(0); + struct qpu_reg y = qpu_ra(1); + struct qpu_reg z = qpu_ra(2); + struct qpu_reg w = qpu_ra(3); + struct qpu_reg xy = qpu_ra(10); + struct qpu_reg xs = qpu_ra(12); + struct qpu_reg ys = qpu_ra(13); + struct qpu_reg vpmread = qpu_ra(QPU_R_VPM); + struct qpu_reg vpm = qpu_ra(QPU_W_VPM); + + gen[count++] = qpu_load_imm_ui(qpu_vrsetup(), 0x00401a00); + gen[count++] = qpu_load_imm_ui(qpu_vwsetup(), 0x00001a00); + +#if 1 + gen[count++] = qpu_inst(qpu_a_MOV(x, vpmread), qpu_m_NOP()); + gen[count++] = qpu_inst(qpu_a_MOV(y, vpmread), qpu_m_NOP()); + gen[count++] = qpu_inst(qpu_a_MOV(z, vpmread), qpu_m_NOP()); + gen[count++] = qpu_inst(qpu_a_MOV(w, vpmread), qpu_m_NOP()); + + + gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_FMUL(xs, x, + qpu_rb(QPU_R_UNIF))); + gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_FMUL(ys, y, + qpu_rb(QPU_R_UNIF))); + + cur_inst = qpu_inst(qpu_a_FTOI(xy, xs), qpu_m_NOP()); + cur_inst |= QPU_SET_FIELD(QPU_PACK_A_16A, QPU_PACK); + gen[count++] = cur_inst; + cur_inst = qpu_inst(qpu_a_FTOI(xy, ys), qpu_m_NOP()); + cur_inst |= QPU_SET_FIELD(QPU_PACK_A_16B, QPU_PACK); + gen[count++] = cur_inst; + +#else + + struct qpu_reg t = qpu_ra(20); + struct qpu_reg hundred = qpu_rb(21); + gen[count++] = qpu_inst(qpu_a_NOP(), + qpu_m_MUL24(t, + qpu_ra(QPU_R_ELEM_QPU), + qpu_ra(QPU_R_ELEM_QPU))); + gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + + gen[count++] = qpu_load_imm_ui(hundred, 400); + gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + + struct qpu_reg xm = qpu_ra(22), ym = qpu_ra(23); + gen[count++] = qpu_inst(qpu_a_NOP(), + qpu_m_MUL24(xm, hundred, qpu_ra(QPU_R_ELEM_QPU))); + gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + + gen[count++] = qpu_inst(qpu_a_NOP(), + qpu_m_MUL24(ym, hundred, t)); + gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + + cur_inst = qpu_inst(qpu_a_MOV(xy, xm), qpu_m_NOP()); + cur_inst |= QPU_SET_FIELD(QPU_PACK_A_16A, QPU_PACK); + gen[count++] = cur_inst; + cur_inst = qpu_inst(qpu_a_MOV(xy, ym), qpu_m_NOP()); + cur_inst |= QPU_SET_FIELD(QPU_PACK_A_16B, QPU_PACK); + gen[count++] = cur_inst; +#endif + + gen[count++] = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + + if (is_vs) { + gen[count++] = qpu_inst(qpu_a_MOV(vpm, xy), qpu_m_NOP()); + + /* XXX */ + gen[count++] = qpu_inst(qpu_a_MOV(vpm, z), qpu_m_NOP()); + gen[count++] = qpu_inst(qpu_a_MOV(vpm, w), qpu_m_NOP()); + + } else { + gen[count++] = qpu_inst(qpu_a_MOV(vpm, x), qpu_m_NOP()); + gen[count++] = qpu_inst(qpu_a_MOV(vpm, y), qpu_m_NOP()); + gen[count++] = qpu_inst(qpu_a_MOV(vpm, z), qpu_m_NOP()); + gen[count++] = qpu_inst(qpu_a_MOV(vpm, w), qpu_m_NOP()); + gen[count++] = qpu_inst(qpu_a_MOV(vpm, xy), qpu_m_NOP()); + + /* XXX */ + gen[count++] = qpu_inst(qpu_a_MOV(vpm, z), qpu_m_NOP()); + gen[count++] = qpu_inst(qpu_a_MOV(vpm, w), qpu_m_NOP()); + } + + /* PROGRAM END */ + cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + cur_inst = (cur_inst & ~QPU_SIG_MASK) | QPU_SET_FIELD(QPU_SIG_PROG_END, QPU_SIG); + gen[count++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + gen[count++] = cur_inst; + + cur_inst = qpu_inst(qpu_a_NOP(), qpu_m_NOP()); + gen[count++] = cur_inst; + + vc4_qpu_validate(gen, count); + + return count; +} + static void * vc4_vs_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { + struct vc4_context *vc4 = vc4_context(pctx); struct vc4_shader_state *so = vc4_shader_state_create(pctx, cso); if (!so) return NULL; + uint64_t gen[100]; + uint64_t count = 0; + uint64_t *vsc = gen; + + /* VS */ + count += gen_vs_cs_code(gen + count, true); + fprintf(stderr, "VS:\n"); + vc4_dump_program(vsc, count); + + /* CS */ + + /* XXX alignment? */ + uint64_t *csc = gen + count; + so->coord_shader_offset = count * sizeof(uint64_t); + count += gen_vs_cs_code(gen + count, false); + + fprintf(stderr, "CS:\n"); + vc4_dump_program(csc, count - (csc - gen)); + + so->bo = vc4_bo_alloc_mem(vc4->screen, gen, count * sizeof(uint64_t), + "vs_code"); return so; } -- 2.30.2