src = SS_DIRECT;
}
- /* we have this sometimes, not others.. perhaps we could be clever
- * and figure out actually when we need to invalidate cache:
- */
- OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
- OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
- OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
- A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
- A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
-
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
CP_LOAD_STATE_0_STATE_SRC(src) |
emit_constants(struct fd_ringbuffer *ring,
enum adreno_state_block sb,
struct fd_constbuf_stateobj *constbuf,
- struct fd3_shader_stateobj *shader)
+ struct ir3_shader_variant *shader)
{
uint32_t enabled_mask = constbuf->enabled_mask;
+ uint32_t first_immediate;
uint32_t base = 0;
unsigned i;
// they are clobbered by a clear, gmem2mem, or mem2gmem..
constbuf->dirty_mask = enabled_mask;
+ /* in particular, with binning shader and a unneeded consts no
+ * longer referenced, we could end up w/ constlen that is smaller
+ * than first_immediate. In that case truncate the user consts
+ * early to avoid HLSQ lockup caused by writing too many consts
+ */
+ first_immediate = MIN2(shader->first_immediate, shader->constlen);
+
/* emit user constants: */
while (enabled_mask) {
unsigned index = ffs(enabled_mask) - 1;
// I expect that size should be a multiple of vec4's:
assert(size == align(size, 4));
- /* gallium could have const-buffer still bound, even though the
- * shader is not using it. Writing consts above constlen (or
- * rather, HLSQ_{VS,FS}_CONTROL_REG.CONSTLENGTH) will cause a
- * hang.
+ /* gallium could leave const buffers bound above what the
+ * current shader uses.. don't let that confuse us.
*/
- if ((base / 4) >= shader->constlen)
+ if (base >= (4 * first_immediate))
break;
if (constbuf->dirty_mask & (1 << index)) {
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
+ */
+ size = MIN2(size, (4 * first_immediate) - base);
fd3_emit_constant(ring, sb, base,
cb->buffer_offset, size,
cb->user_buffer, cb->buffer);
/* emit shader immediates: */
if (shader) {
for (i = 0; i < shader->immediates_count; i++) {
- fd3_emit_constant(ring, sb,
- 4 * (shader->first_immediate + i),
- 0, 4, shader->immediates[i].val, NULL);
+ base = 4 * (shader->first_immediate + i);
+ if (base >= (4 * shader->constlen))
+ break;
+ fd3_emit_constant(ring, sb, base,
+ 0, 4, shader->immediates[i].val, NULL);
}
}
}
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_samplers; i++) {
- struct fd3_sampler_stateobj *sampler =
- fd3_sampler_stateobj(tex->samplers[i]);
+ static const struct fd3_sampler_stateobj dummy_sampler = {};
+ const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
+ fd3_sampler_stateobj(tex->samplers[i]) :
+ &dummy_sampler;
OUT_RING(ring, sampler->texsamp0);
OUT_RING(ring, sampler->texsamp1);
}
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_textures; i++) {
- struct fd3_pipe_sampler_view *view =
- fd3_pipe_sampler_view(tex->textures[i]);
+ static const struct fd3_pipe_sampler_view dummy_view = {};
+ const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
+ fd3_pipe_sampler_view(tex->textures[i]) :
+ &dummy_view;
OUT_RING(ring, view->texconst0);
OUT_RING(ring, view->texconst1);
OUT_RING(ring, view->texconst2 |
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_textures; i++) {
- struct fd3_pipe_sampler_view *view =
- fd3_pipe_sampler_view(tex->textures[i]);
+ static const struct fd3_pipe_sampler_view dummy_view = {};
+ const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
+ fd3_pipe_sampler_view(tex->textures[i]) :
+ &dummy_view;
struct fd_resource *rsc = view->tex_resource;
for (j = 0; j < view->mipaddrs; j++) {
}
}
-static void
-emit_cache_flush(struct fd_ringbuffer *ring)
-{
- OUT_PKT3(ring, CP_EVENT_WRITE, 1);
- OUT_RING(ring, CACHE_FLUSH);
-
- /* probably only really needed on a320: */
- OUT_PKT3(ring, CP_DRAW_INDX, 3);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
- INDEX_SIZE_IGN, IGNORE_VISIBILITY));
- OUT_RING(ring, 0); /* NumIndices */
-
- OUT_PKT3(ring, CP_NOP, 4);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
-
- OUT_WFI (ring);
-}
-
/* emit texture state for mem->gmem restore operation.. eventually it would
* be good to get rid of this and use normal CSO/etc state for more of these
* special cases, but for now the compiler is not sufficient..
void
fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog,
+ struct ir3_shader_variant *vp,
struct fd3_vertex_buf *vbufs, uint32_t n)
{
- struct fd3_shader_stateobj *vp = prog->vp;
- uint32_t i;
+ uint32_t i, j, last = 0;
+ uint32_t total_in = 0;
n = MIN2(n, vp->inputs_count);
- for (i = 0; i < n; i++) {
- struct pipe_resource *prsc = vbufs[i].prsc;
- struct fd_resource *rsc = fd_resource(prsc);
- enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(vbufs[i].format);
- bool switchnext = (i != (n - 1));
- uint32_t fs = util_format_get_blocksize(vbufs[i].format);
-
- OUT_PKT0(ring, REG_A3XX_VFD_FETCH(i), 2);
- OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
- A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
- COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
- A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
- A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
- OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
-
- OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
- OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
- A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
- A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
- A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
- A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
- A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
- COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+ for (i = 0; i < n; i++)
+ if (vp->inputs[i].compmask)
+ last = i;
+
+ for (i = 0, j = 0; i <= last; i++) {
+ if (vp->inputs[i].compmask) {
+ struct pipe_resource *prsc = vbufs[i].prsc;
+ struct fd_resource *rsc = fd_resource(prsc);
+ enum pipe_format pfmt = vbufs[i].format;
+ enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
+ bool switchnext = (i != last);
+ uint32_t fs = util_format_get_blocksize(pfmt);
+
+ debug_assert(fmt != ~0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
+ OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+ A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
+ COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+ A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
+ A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+ OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
+
+ OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1);
+ OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+ A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+ A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+ A3XX_VFD_DECODE_INSTR_SWAP(fd3_pipe2swap(pfmt)) |
+ A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+ A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+ A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+ COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+ total_in += vp->inputs[i].ncomp;
+ j++;
+ }
}
+
+ OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
+ OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
+ A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
+ A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
+ A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
+ OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
+ A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
+ A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
}
void
-fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
+fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ struct fd_program_stateobj *prog, uint32_t dirty,
+ struct ir3_shader_key key)
{
- struct fd_ringbuffer *ring = ctx->ring;
+ struct ir3_shader_variant *vp;
+ struct ir3_shader_variant *fp;
+
+ fp = fd3_shader_variant(prog->fp, key);
+ vp = fd3_shader_variant(prog->vp, key);
emit_marker(ring, 5);
A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
}
+ if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !key.binning_pass) {
+ uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
+
+ val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
+ val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD |
+ A3XX_RB_RENDER_CONTROL_YCOORD |
+ A3XX_RB_RENDER_CONTROL_ZCOORD |
+ A3XX_RB_RENDER_CONTROL_WCOORD);
+
+ /* I suppose if we needed to (which I don't *think* we need
+ * to), we could emit this for binning pass too. But we
+ * would need to keep a different patch-list for binning
+ * vs render pass.
+ */
+
+ OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1);
+ OUT_RINGP(ring, val, &fd3_context(ctx)->rbrc_patches);
+ }
+
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
- fd3_emit_rbrc_draw_state(ring, zsa->rb_render_control);
-
OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1);
OUT_RING(ring, zsa->rb_alpha_ref);
- OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
- OUT_RING(ring, zsa->rb_depth_control);
-
OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
OUT_RING(ring, zsa->rb_stencil_control);
A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
}
+ if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+ uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
+ if (fp->writes_pos) {
+ val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
+ val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+ }
+ OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, val);
+ }
+
if (dirty & FD_DIRTY_RASTERIZER) {
struct fd3_rasterizer_stateobj *rasterizer =
fd3_rasterizer_stateobj(ctx->rasterizer);
OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+ }
+ if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+ uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
+ ->gras_cl_clip_cntl;
+ val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
+ val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
+ A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
- OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
+ OUT_RING(ring, val);
}
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
- struct fd3_rasterizer_stateobj *rasterizer =
- fd3_rasterizer_stateobj(ctx->rasterizer);
- struct fd3_shader_stateobj *fp = ctx->prog.fp;
- uint32_t stride_in_vpc;
+ uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
+ ->pc_prim_vtx_cntl;
+
+ if (!key.binning_pass) {
+ uint32_t stride_in_vpc = align(fp->total_in, 4) / 4;
+ if (stride_in_vpc > 0)
+ stride_in_vpc = MAX2(stride_in_vpc, 2);
+ val |= A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc);
+ }
- stride_in_vpc = align(fp->total_in, 4) / 4;
- if (stride_in_vpc > 0)
- stride_in_vpc = MAX2(stride_in_vpc, 2);
+ val |= COND(vp->writes_psize, A3XX_PC_PRIM_VTX_CNTL_PSIZE);
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
- OUT_RING(ring, rasterizer->pc_prim_vtx_cntl |
- A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc));
+ OUT_RING(ring, val);
}
if (dirty & FD_DIRTY_SCISSOR) {
}
if (dirty & FD_DIRTY_VIEWPORT) {
+ fd_wfi(ctx, ring);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5));
OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0]));
OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
}
- if (dirty & FD_DIRTY_PROG)
- fd3_program_emit(ring, &ctx->prog);
+ if (dirty & FD_DIRTY_PROG) {
+ fd3_program_emit(ring, prog, key);
+ }
- if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
- struct fd_program_stateobj *prog = &ctx->prog;
+ /* TODO we should not need this or fd_wfi() before emit_constants():
+ */
+ OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+ OUT_RING(ring, HLSQ_FLUSH);
+ if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
+ /* evil hack to deal sanely with clear path: */
+ (prog == &ctx->prog)) {
+ fd_wfi(ctx, ring);
emit_constants(ring, SB_VERT_SHADER,
&ctx->constbuf[PIPE_SHADER_VERTEX],
- (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL);
- emit_constants(ring, SB_FRAG_SHADER,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- (prog->dirty & FD_SHADER_DIRTY_FP) ? prog->fp : NULL);
+ (prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
+ if (!key.binning_pass) {
+ emit_constants(ring, SB_FRAG_SHADER,
+ &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+ (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+ }
}
- if (dirty & FD_DIRTY_BLEND) {
+ if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
uint32_t i;
A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
}
- if (dirty & FD_DIRTY_VERTTEX)
- emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+ if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
+ fd_wfi(ctx, ring);
- if (dirty & FD_DIRTY_FRAGTEX)
- emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+ if (dirty & FD_DIRTY_VERTTEX) {
+ if (vp->has_samp)
+ emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+ else
+ dirty &= ~FD_DIRTY_VERTTEX;
+ }
+
+ if (dirty & FD_DIRTY_FRAGTEX) {
+ if (fp->has_samp)
+ emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+ else
+ dirty &= ~FD_DIRTY_FRAGTEX;
+ }
ctx->dirty &= ~dirty;
}
OUT_RING(ring, 0x00000000);
}
+ fd_wfi(ctx, ring);
OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
OUT_RING(ring, 0x00007fff);
OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
- OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 1);
- OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */
-
- OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
- OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+ OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+ OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+ A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */
}
- emit_cache_flush(ring);
+ OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1);
+ OUT_RING(ring, 0x00000000);
+
+ fd_event_write(ctx, ring, CACHE_FLUSH);
+
+ if (is_a3xx_p0(ctx->screen)) {
+ OUT_PKT3(ring, CP_DRAW_INDX, 3);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX,
+ INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+ OUT_RING(ring, 0); /* NumIndices */
+ }
+
+ OUT_PKT3(ring, CP_NOP, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ fd_wfi(ctx, ring);
+
+ ctx->needs_rb_fbd = true;
}