X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fa6xx%2Ffd6_program.c;h=98774a2cfe56de2ede7a507463b7ae109ceda3b8;hb=2447b3b9d3306b33c75d503c9caf9e7322c957bf;hp=6e19deee03ef8cd8ae813378ab93fe668c56fffd;hpb=a64d2661340a659bdd0b729090b3aa6c135e8b4c;p=mesa.git diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 6e19deee03e..98774a2cfe5 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -35,6 +35,7 @@ #include "freedreno_program.h" #include "fd6_program.h" +#include "fd6_const.h" #include "fd6_emit.h" #include "fd6_texture.h" #include "fd6_format.h" @@ -89,7 +90,7 @@ fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(sb) | CP_LOAD_STATE6_0_NUM_UNIT(so->instrlen)); - OUT_RELOCD(ring, so->bo, 0, 0, 0); + OUT_RELOC(ring, so->bo, 0, 0, 0); } /* Add any missing varyings needed for stream-out. Otherwise varyings not @@ -144,20 +145,24 @@ setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_varian struct ir3_shader_linkage *l) { const struct ir3_stream_output_info *strmout = &v->shader->stream_output; - struct fd6_streamout_state *tf = &state->tf; - memset(tf, 0, sizeof(*tf)); + uint32_t ncomp[PIPE_MAX_SO_BUFFERS]; + uint32_t prog[256/2]; + uint32_t prog_count; + + memset(ncomp, 0, sizeof(ncomp)); + memset(prog, 0, sizeof(prog)); - tf->prog_count = align(l->max_loc, 2) / 2; + prog_count = align(l->max_loc, 2) / 2; - debug_assert(tf->prog_count < ARRAY_SIZE(tf->prog)); + debug_assert(prog_count < ARRAY_SIZE(prog)); for (unsigned i = 0; i < strmout->num_outputs; i++) { const struct ir3_stream_output *out = &strmout->output[i]; unsigned k = out->register_index; unsigned idx; - tf->ncomp[out->output_buffer] += out->num_components; + ncomp[out->output_buffer] += out->num_components; /* linkage map sorted by order frag shader wants things, so * a bit less ideal here.. @@ -174,22 +179,40 @@ setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_varian unsigned off = j + out->dst_offset; /* in dwords */ if (loc & 1) { - tf->prog[loc/2] |= A6XX_VPC_SO_PROG_B_EN | + prog[loc/2] |= A6XX_VPC_SO_PROG_B_EN | A6XX_VPC_SO_PROG_B_BUF(out->output_buffer) | A6XX_VPC_SO_PROG_B_OFF(off * 4); } else { - tf->prog[loc/2] |= A6XX_VPC_SO_PROG_A_EN | + prog[loc/2] |= A6XX_VPC_SO_PROG_A_EN | A6XX_VPC_SO_PROG_A_BUF(out->output_buffer) | A6XX_VPC_SO_PROG_A_OFF(off * 4); } } } - tf->vpc_so_buf_cntl = A6XX_VPC_SO_BUF_CNTL_ENABLE | - COND(tf->ncomp[0] > 0, A6XX_VPC_SO_BUF_CNTL_BUF0) | - COND(tf->ncomp[1] > 0, A6XX_VPC_SO_BUF_CNTL_BUF1) | - COND(tf->ncomp[2] > 0, A6XX_VPC_SO_BUF_CNTL_BUF2) | - COND(tf->ncomp[3] > 0, A6XX_VPC_SO_BUF_CNTL_BUF3); + struct fd_ringbuffer *ring = state->streamout_stateobj; + + OUT_PKT7(ring, CP_CONTEXT_REG_BUNCH, 12 + (2 * prog_count)); + OUT_RING(ring, REG_A6XX_VPC_SO_BUF_CNTL); + OUT_RING(ring, A6XX_VPC_SO_BUF_CNTL_ENABLE | + COND(ncomp[0] > 0, A6XX_VPC_SO_BUF_CNTL_BUF0) | + COND(ncomp[1] > 0, A6XX_VPC_SO_BUF_CNTL_BUF1) | + COND(ncomp[2] > 0, A6XX_VPC_SO_BUF_CNTL_BUF2) | + COND(ncomp[3] > 0, A6XX_VPC_SO_BUF_CNTL_BUF3)); + OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(0)); + OUT_RING(ring, ncomp[0]); + OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(1)); + OUT_RING(ring, ncomp[1]); + OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(2)); + OUT_RING(ring, ncomp[2]); + OUT_RING(ring, REG_A6XX_VPC_SO_NCOMP(3)); + OUT_RING(ring, ncomp[3]); + OUT_RING(ring, REG_A6XX_VPC_SO_CNTL); + OUT_RING(ring, A6XX_VPC_SO_CNTL_ENABLE); + for (unsigned i = 0; i < prog_count; i++) { + OUT_RING(ring, REG_A6XX_VPC_SO_PROG); + OUT_RING(ring, prog[i]); + } } static void @@ -425,7 +448,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(vs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE)); fd6_emit_shader(ring, vs); - ir3_emit_immediates(screen, vs, ring); + fd6_emit_immediates(screen, vs, ring); struct ir3_shader_linkage l = {0}; const struct ir3_shader_variant *last_shader = fd6_last_shader(state); @@ -510,8 +533,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(hs->need_pixlod, A6XX_SP_HS_CTRL_REG0_PIXLODENABLE)); fd6_emit_shader(ring, hs); - ir3_emit_immediates(screen, hs, ring); - ir3_emit_link_map(screen, vs, hs, ring); + fd6_emit_immediates(screen, hs, ring); + fd6_emit_link_map(screen, vs, hs, ring); OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1); OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(TWO_QUADS) | @@ -520,8 +543,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(ds->need_pixlod, A6XX_SP_DS_CTRL_REG0_PIXLODENABLE)); fd6_emit_shader(ring, ds); - ir3_emit_immediates(screen, ds, ring); - ir3_emit_link_map(screen, hs, ds, ring); + fd6_emit_immediates(screen, ds, ring); + fd6_emit_link_map(screen, hs, ds, ring); shader_info *hs_info = &hs->shader->nir->info; OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1); @@ -623,7 +646,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_PKT4(ring, REG_A6XX_SP_FS_CTRL_REG0, 1); OUT_RING(ring, A6XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | COND(enable_varyings, A6XX_SP_FS_CTRL_REG0_VARYING) | - COND(fs->frag_coord, A6XX_SP_FS_CTRL_REG0_VARYING) | 0x1000000 | A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) | A6XX_SP_FS_CTRL_REG0_MERGEDREGS | @@ -643,12 +665,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, CONDREG(ij_samp_regid, A6XX_GRAS_CNTL_PERSAMP_VARYING) | COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_GRAS_CNTL_SIZE) | COND(VALIDREG(ij_size_regid) && sample_shading, A6XX_GRAS_CNTL_SIZE_PERSAMP) | - COND(fs->frag_coord, - A6XX_GRAS_CNTL_SIZE | - A6XX_GRAS_CNTL_XCOORD | - A6XX_GRAS_CNTL_YCOORD | - A6XX_GRAS_CNTL_ZCOORD | - A6XX_GRAS_CNTL_WCOORD) | + COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CNTL_SIZE | + A6XX_GRAS_CNTL_COORD_MASK(fs->fragcoord_compmask)) | COND(fs->frag_face, A6XX_GRAS_CNTL_SIZE)); OUT_PKT4(ring, REG_A6XX_RB_RENDER_CONTROL0, 2); @@ -659,12 +677,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) | COND(VALIDREG(ij_size_regid) && !sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE) | COND(VALIDREG(ij_size_regid) && sample_shading, A6XX_RB_RENDER_CONTROL0_SIZE_PERSAMP) | - COND(fs->frag_coord, - A6XX_RB_RENDER_CONTROL0_SIZE | - A6XX_RB_RENDER_CONTROL0_XCOORD | - A6XX_RB_RENDER_CONTROL0_YCOORD | - A6XX_RB_RENDER_CONTROL0_ZCOORD | - A6XX_RB_RENDER_CONTROL0_WCOORD) | + COND(fs->fragcoord_compmask != 0, A6XX_RB_RENDER_CONTROL0_SIZE | + A6XX_RB_RENDER_CONTROL0_COORD_MASK(fs->fragcoord_compmask)) | COND(fs->frag_face, A6XX_RB_RENDER_CONTROL0_SIZE)); OUT_RING(ring, @@ -701,11 +715,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(gs->need_pixlod, A6XX_SP_GS_CTRL_REG0_PIXLODENABLE)); fd6_emit_shader(ring, gs); - ir3_emit_immediates(screen, gs, ring); + fd6_emit_immediates(screen, gs, ring); if (ds) - ir3_emit_link_map(screen, ds, gs, ring); + fd6_emit_link_map(screen, ds, gs, ring); else - ir3_emit_link_map(screen, vs, gs, ring); + fd6_emit_link_map(screen, vs, gs, ring); OUT_PKT4(ring, REG_A6XX_VPC_PACK_GS, 1); OUT_RING(ring, A6XX_VPC_PACK_GS_POSITIONLOC(pos_loc) | @@ -771,12 +785,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9B07, 1); OUT_RING(ring, 0); - OUT_PKT4(ring, REG_A6XX_SP_GS_UNKNOWN_A871, 1); + OUT_PKT4(ring, REG_A6XX_SP_GS_PRIM_SIZE, 1); OUT_RING(ring, prev->shader->output_size); } else { OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); OUT_RING(ring, 0); - OUT_PKT4(ring, REG_A6XX_SP_GS_UNKNOWN_A871, 1); + OUT_PKT4(ring, REG_A6XX_SP_GS_PRIM_SIZE, 1); OUT_RING(ring, 0); } @@ -792,6 +806,18 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_PKT4(ring, REG_A6XX_PC_PRIMID_CNTL, 1); OUT_RING(ring, COND(primid_passthru, A6XX_PC_PRIMID_CNTL_PRIMID_PASSTHRU)); + uint32_t non_sysval_input_count = 0; + for (uint32_t i = 0; i < vs->inputs_count; i++) + if (!vs->inputs[i].sysval) + non_sysval_input_count++; + + OUT_PKT4(ring, REG_A6XX_VFD_DEST_CNTL(0), non_sysval_input_count); + for (uint32_t i = 0; i < non_sysval_input_count; i++) { + assert(vs->inputs[i].compmask); + OUT_RING(ring, A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) | + A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid)); + } + OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_1, 6); OUT_RING(ring, A6XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) | A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) | @@ -818,7 +844,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_RING(ring, COND(fragz, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z)); if (!binning_pass) - ir3_emit_immediates(screen, fs, ring); + fd6_emit_immediates(screen, fs, ring); } static struct fd_ringbuffer * @@ -981,6 +1007,8 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs, state->config_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); + state->streamout_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000); + #ifdef DEBUG if (!ds) { @@ -1008,6 +1036,7 @@ fd6_program_destroy(void *data, struct ir3_program_state *state) fd_ringbuffer_del(so->binning_stateobj); fd_ringbuffer_del(so->config_stateobj); fd_ringbuffer_del(so->interp_stateobj); + fd_ringbuffer_del(so->streamout_stateobj); free(so); } @@ -1026,11 +1055,11 @@ fd6_shader_state_create(struct pipe_context *pctx, const struct pipe_shader_stat unsigned packets, size; /* pre-calculate size required for userconst stateobj: */ - ir3_user_consts_size(&shader->ubo_state, &packets, &size); + fd6_user_consts_size(&shader->ubo_state, &packets, &size); /* also account for UBO addresses: */ packets += 1; - size += 2 * shader->const_state.num_ubos; + size += 2 * shader->num_ubos; unsigned sizedwords = (4 * packets) + size; shader->ubo_state.cmdstream_size = sizedwords * 4;