emit_constants(struct fd_ringbuffer *ring,
enum adreno_state_block sb,
struct fd_constbuf_stateobj *constbuf,
- struct fd3_shader_variant *shader)
+ struct ir3_shader_variant *shader)
{
uint32_t enabled_mask = constbuf->enabled_mask;
+ uint32_t first_immediate;
uint32_t base = 0;
unsigned i;
// they are clobbered by a clear, gmem2mem, or mem2gmem..
constbuf->dirty_mask = enabled_mask;
+ /* in particular, with binning shader and a unneeded consts no
+ * longer referenced, we could end up w/ constlen that is smaller
+ * than first_immediate. In that case truncate the user consts
+ * early to avoid HLSQ lockup caused by writing too many consts
+ */
+ first_immediate = MIN2(shader->first_immediate, shader->constlen);
+
/* emit user constants: */
while (enabled_mask) {
unsigned index = ffs(enabled_mask) - 1;
/* gallium could leave const buffers bound above what the
* current shader uses.. don't let that confuse us.
*/
- if (base >= (4 * shader->first_immediate))
+ if (base >= (4 * first_immediate))
break;
if (constbuf->dirty_mask & (1 << index)) {
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
+ */
+ size = MIN2(size, (4 * first_immediate) - base);
fd3_emit_constant(ring, sb, base,
cb->buffer_offset, size,
cb->user_buffer, cb->buffer);
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_samplers; i++) {
- struct fd3_sampler_stateobj *sampler =
- fd3_sampler_stateobj(tex->samplers[i]);
+ static const struct fd3_sampler_stateobj dummy_sampler = {};
+ const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
+ fd3_sampler_stateobj(tex->samplers[i]) :
+ &dummy_sampler;
OUT_RING(ring, sampler->texsamp0);
OUT_RING(ring, sampler->texsamp1);
}
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_textures; i++) {
- struct fd3_pipe_sampler_view *view =
- fd3_pipe_sampler_view(tex->textures[i]);
+ static const struct fd3_pipe_sampler_view dummy_view = {};
+ const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
+ fd3_pipe_sampler_view(tex->textures[i]) :
+ &dummy_view;
OUT_RING(ring, view->texconst0);
OUT_RING(ring, view->texconst1);
OUT_RING(ring, view->texconst2 |
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_textures; i++) {
- struct fd3_pipe_sampler_view *view =
- fd3_pipe_sampler_view(tex->textures[i]);
+ static const struct fd3_pipe_sampler_view dummy_view = {};
+ const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
+ fd3_pipe_sampler_view(tex->textures[i]) :
+ &dummy_view;
struct fd_resource *rsc = view->tex_resource;
for (j = 0; j < view->mipaddrs; j++) {
}
}
-static void
-emit_cache_flush(struct fd_ringbuffer *ring)
-{
- OUT_PKT3(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, CACHE_FLUSH);
-
- /* probably only really needed on a320: */
- OUT_PKT3(ring, CP_DRAW_INDX, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
- INDEX_SIZE_IGN, IGNORE_VISIBILITY));
- OUT_RING(ring, 0); /* NumIndices */
-
- OUT_PKT3(ring, CP_NOP, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-}
-
/* emit texture state for mem->gmem restore operation.. eventually it would
* be good to get rid of this and use normal CSO/etc state for more of these
* special cases, but for now the compiler is not sufficient..
void
fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
- struct fd3_shader_variant *vp,
+ struct ir3_shader_variant *vp,
struct fd3_vertex_buf *vbufs, uint32_t n)
{
- uint32_t i;
+ uint32_t i, j, last = 0;
+ uint32_t total_in = 0;
n = MIN2(n, vp->inputs_count);
- for (i = 0; i < n; i++) {
- struct pipe_resource *prsc = vbufs[i].prsc;
- struct fd_resource *rsc = fd_resource(prsc);
- enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(vbufs[i].format);
- bool switchnext = (i != (n - 1));
- uint32_t fs = util_format_get_blocksize(vbufs[i].format);
-
- OUT_PKT0(ring, REG_A3XX_VFD_FETCH(i), 2);
- OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
- A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
- COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
- A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
- A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
-
- OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
- OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
- A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
- A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
- A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
- A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
- A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
- COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+ for (i = 0; i < n; i++)
+ if (vp->inputs[i].compmask)
+ last = i;
+
+ for (i = 0, j = 0; i <= last; i++) {
+ if (vp->inputs[i].compmask) {
+ struct pipe_resource *prsc = vbufs[i].prsc;
+ struct fd_resource *rsc = fd_resource(prsc);
+ enum pipe_format pfmt = vbufs[i].format;
+ enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
+ bool switchnext = (i != last);
+ uint32_t fs = util_format_get_blocksize(pfmt);
+
+ debug_assert(fmt != ~0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
+ OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
+ COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
+ A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1);
+ OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+ A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+ A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+ A3XX_VFD_DECODE_INSTR_SWAP(fd3_pipe2swap(pfmt)) |
+ A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+ A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+ A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in += vp->inputs[i].ncomp;
+ j++;
+ }
}
+
+ OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
+ OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
+ A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
+ A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
+ A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
+ OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
+ A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
+ A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
}
void
fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd_program_stateobj *prog, uint32_t dirty,
- struct fd3_shader_key key)
+ struct ir3_shader_key key)
{
- struct fd3_shader_variant *vp;
- struct fd3_shader_variant *fp;
+ struct ir3_shader_variant *vp;
+ struct ir3_shader_variant *fp;
fp = fd3_shader_variant(prog->fp, key);
vp = fd3_shader_variant(prog->vp, key);
A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
}
- if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
- struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
- struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+ if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !key.binning_pass) {
+ uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
- if (!key.binning_pass) {
- struct fd3_context *fd3_ctx = fd3_context(ctx);
+ val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
+ val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD |
+ A3XX_RB_RENDER_CONTROL_YCOORD |
+ A3XX_RB_RENDER_CONTROL_ZCOORD |
+ A3XX_RB_RENDER_CONTROL_WCOORD);
- /* I suppose if we needed to (which I don't *think* we need
- * to), we could emit this for binning pass too. But we
- * would need to keep a different patch-list for binning
- * vs render pass.
- */
+ /* I suppose if we needed to (which I don't *think* we need
+ * to), we could emit this for binning pass too. But we
+ * would need to keep a different patch-list for binning
+ * vs render pass.
+ */
- OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
- OUT_RINGP(ring, zsa->rb_render_control,
- &fd3_ctx->rbrc_patches);
- }
+ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+ OUT_RINGP(ring, val, &fd3_context(ctx)->rbrc_patches);
+ }
+
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+ struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
+ struct pipe_stencil_ref *sr = &ctx->stencil_ref;
OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
OUT_RING(ring, zsa->rb_alpha_ref);
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
->gras_cl_clip_cntl;
- if (fp->writes_pos) {
- val |= A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE;
- }
+ val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
+ val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
+ A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, val);
}
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
- struct fd3_rasterizer_stateobj *rasterizer =
- fd3_rasterizer_stateobj(ctx->rasterizer);
- uint32_t stride_in_vpc;
+ uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
+ ->pc_prim_vtx_cntl;
+
+ if (!key.binning_pass) {
+ uint32_t stride_in_vpc = align(fp->total_in, 4) / 4;
+ if (stride_in_vpc > 0)
+ stride_in_vpc = MAX2(stride_in_vpc, 2);
+ val |= A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc);
+ }
- stride_in_vpc = align(fp->total_in, 4) / 4;
- if (stride_in_vpc > 0)
- stride_in_vpc = MAX2(stride_in_vpc, 2);
+ val |= COND(vp->writes_psize, A3XX_PC_PRIM_VTX_CNTL_PSIZE);
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
- OUT_RING(ring, rasterizer->pc_prim_vtx_cntl |
- A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc));
+ OUT_RING(ring, val);
}
if (dirty & FD_DIRTY_SCISSOR) {
}
if (dirty & FD_DIRTY_VIEWPORT) {
+ fd_wfi(ctx, ring);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5));
OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0]));
}
if (dirty & FD_DIRTY_PROG) {
- fd_wfi(ctx, ring);
fd3_program_emit(ring, prog, key);
}
+ /* TODO we should not need this or fd_wfi() before emit_constants():
+ */
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, HLSQ_FLUSH);
emit_constants(ring, SB_VERT_SHADER,
&ctx->constbuf[PIPE_SHADER_VERTEX],
(prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
- emit_constants(ring, SB_FRAG_SHADER,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+ if (!key.binning_pass) {
+ emit_constants(ring, SB_FRAG_SHADER,
+ &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+ (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+ }
}
if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
fd_wfi(ctx, ring);
- if (dirty & FD_DIRTY_VERTTEX)
- emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+ if (dirty & FD_DIRTY_VERTTEX) {
+ if (vp->has_samp)
+ emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+ else
+ dirty &= ~FD_DIRTY_VERTTEX;
+ }
- if (dirty & FD_DIRTY_FRAGTEX)
- emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+ if (dirty & FD_DIRTY_FRAGTEX) {
+ if (fp->has_samp)
+ emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+ else
+ dirty &= ~FD_DIRTY_FRAGTEX;
+ }
ctx->dirty &= ~dirty;
}
OUT_RING(ring, 0x00000000);
}
+ fd_wfi(ctx, ring);
OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00007fff);
OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
OUT_RING(ring, 0x00000000);
- emit_cache_flush(ring);
+ fd_event_write(ctx, ring, CACHE_FLUSH);
+
+ if (is_a3xx_p0(ctx->screen)) {
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+ OUT_RING(ring, 0); /* NumIndices */
+ }
+
+ OUT_PKT3(ring, CP_NOP, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
fd_wfi(ctx, ring);
+
+ ctx->needs_rb_fbd = true;
}