X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Ffreedreno_draw.c;h=66bb1163df234cfc4c5049a4cb6c891aa3242640;hb=e04db879f8933915501bfb9cce0d1359d62766bd;hp=b02b8b9f9f9336c447dd16edab665e9d69601bac;hpb=e9edbf0a688c68ef0896e5d4278f411f6b6f8398;p=mesa.git diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c index b02b8b9f9f9..66bb1163df2 100644 --- a/src/gallium/drivers/freedreno/freedreno_draw.c +++ b/src/gallium/drivers/freedreno/freedreno_draw.c @@ -36,77 +36,42 @@ #include "freedreno_context.h" #include "freedreno_state.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "freedreno_util.h" - -static enum pc_di_primtype -mode2primtype(unsigned mode) +static void +resource_used(struct fd_context *ctx, struct pipe_resource *prsc, + enum fd_resource_status status) { - switch (mode) { - case PIPE_PRIM_POINTS: return DI_PT_POINTLIST; - case PIPE_PRIM_LINES: return DI_PT_LINELIST; - case PIPE_PRIM_LINE_STRIP: return DI_PT_LINESTRIP; - case PIPE_PRIM_TRIANGLES: return DI_PT_TRILIST; - case PIPE_PRIM_TRIANGLE_STRIP: return DI_PT_TRISTRIP; - case PIPE_PRIM_TRIANGLE_FAN: return DI_PT_TRIFAN; - case PIPE_PRIM_QUADS: return DI_PT_QUADLIST; - case PIPE_PRIM_QUAD_STRIP: return DI_PT_QUADSTRIP; - case PIPE_PRIM_POLYGON: return DI_PT_POLYGON; - } - DBG("unsupported mode: (%s) %d", u_prim_name(mode), mode); - assert(0); - return DI_PT_NONE; + struct fd_resource *rsc; + + if (!prsc) + return; + + rsc = fd_resource(prsc); + rsc->status |= status; + if (rsc->stencil) + rsc->stencil->status |= status; + + /* TODO resources can actually be shared across contexts, + * so I'm not sure a single list-head will do the trick? + */ + debug_assert((rsc->pending_ctx == ctx) || !rsc->pending_ctx); + list_delinit(&rsc->list); + list_addtail(&rsc->list, &ctx->used_resources); + rsc->pending_ctx = ctx; } -static enum pc_di_index_size -size2indextype(unsigned index_size) +static void +resource_read(struct fd_context *ctx, struct pipe_resource *prsc) { - switch (index_size) { - case 1: return INDEX_SIZE_8_BIT; - case 2: return INDEX_SIZE_16_BIT; - case 4: return INDEX_SIZE_32_BIT; - } - DBG("unsupported index size: %d", index_size); - assert(0); - return INDEX_SIZE_IGN; + resource_used(ctx, prsc, FD_PENDING_READ); } -/* this is same for a2xx/a3xx, so split into helper: */ -void -fd_draw_emit(struct fd_context *ctx, const struct pipe_draw_info *info) +static void +resource_written(struct fd_context *ctx, struct pipe_resource *prsc) { - struct fd_ringbuffer *ring = ctx->ring; - struct pipe_index_buffer *idx = &ctx->indexbuf; - struct fd_bo *idx_bo = NULL; - enum pc_di_index_size idx_type = INDEX_SIZE_IGN; - enum pc_di_src_sel src_sel; - uint32_t idx_size, idx_offset; - - if (info->indexed) { - assert(!idx->user_buffer); - - idx_bo = fd_resource(idx->buffer)->bo; - idx_type = size2indextype(idx->index_size); - idx_size = idx->index_size * info->count; - idx_offset = idx->offset; - src_sel = DI_SRC_SEL_DMA; - } else { - idx_bo = NULL; - idx_type = INDEX_SIZE_IGN; - idx_size = 0; - idx_offset = 0; - src_sel = DI_SRC_SEL_AUTO_INDEX; - } - - OUT_PKT3(ring, CP_DRAW_INDX, info->indexed ? 5 : 3); - OUT_RING(ring, 0x00000000); /* viz query info. */ - OUT_RING(ring, DRAW(mode2primtype(info->mode), - src_sel, idx_type, IGNORE_VISIBILITY)); - OUT_RING(ring, info->count); /* NumIndices */ - if (info->indexed) { - OUT_RELOC(ring, idx_bo, idx_offset, 0); - OUT_RING (ring, idx_size); - } + resource_used(ctx, prsc, FD_PENDING_WRITE); } static void @@ -115,7 +80,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) struct fd_context *ctx = fd_context(pctx); struct pipe_framebuffer_state *pfb = &ctx->framebuffer; struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); - unsigned i, buffers = 0; + unsigned i, prims, buffers = 0; /* if we supported transform feedback, we'd have to disable this: */ if (((scissor->maxx - scissor->minx) * @@ -123,6 +88,20 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) return; } + /* TODO: push down the region versions into the tiles */ + if (!fd_render_condition_check(pctx)) + return; + + /* emulate unsupported primitives: */ + if (!fd_supported_prim(ctx, info->mode)) { + if (ctx->streamout.num_targets > 0) + debug_error("stream-out with emulated prims"); + util_primconvert_save_index_buffer(ctx->primconvert, &ctx->indexbuf); + util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer); + util_primconvert_draw_vbo(ctx->primconvert, info); + return; + } + ctx->needs_flush = true; /* @@ -131,13 +110,13 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) if (fd_depth_enabled(ctx)) { buffers |= FD_BUFFER_DEPTH; - fd_resource(pfb->zsbuf->texture)->dirty = true; + resource_written(ctx, pfb->zsbuf->texture); ctx->gmem_reason |= FD_GMEM_DEPTH_ENABLED; } if (fd_stencil_enabled(ctx)) { buffers |= FD_BUFFER_STENCIL; - fd_resource(pfb->zsbuf->texture)->dirty = true; + resource_written(ctx, pfb->zsbuf->texture); ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED; } @@ -145,10 +124,15 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) ctx->gmem_reason |= FD_GMEM_LOGICOP_ENABLED; for (i = 0; i < pfb->nr_cbufs; i++) { - struct pipe_resource *surf = pfb->cbufs[i]->texture; + struct pipe_resource *surf; + + if (!pfb->cbufs[i]) + continue; - fd_resource(surf)->dirty = true; - buffers |= FD_BUFFER_COLOR; + surf = pfb->cbufs[i]->texture; + + resource_written(ctx, surf); + buffers |= PIPE_CLEAR_COLOR0 << i; if (surf->nr_samples > 1) ctx->gmem_reason |= FD_GMEM_MSAA_ENABLED; @@ -157,14 +141,80 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) ctx->gmem_reason |= FD_GMEM_BLEND_ENABLED; } + /* Skip over buffer 0, that is sent along with the command stream */ + for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + resource_read(ctx, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer); + resource_read(ctx, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer); + } + + /* Mark VBOs as being read */ + for (i = 0; i < ctx->vtx.vertexbuf.count; i++) { + assert(!ctx->vtx.vertexbuf.vb[i].user_buffer); + resource_read(ctx, ctx->vtx.vertexbuf.vb[i].buffer); + } + + /* Mark index buffer as being read */ + resource_read(ctx, ctx->indexbuf.buffer); + + /* Mark textures as being read */ + for (i = 0; i < ctx->verttex.num_textures; i++) + if (ctx->verttex.textures[i]) + resource_read(ctx, ctx->verttex.textures[i]->texture); + for (i = 0; i < ctx->fragtex.num_textures; i++) + if (ctx->fragtex.textures[i]) + resource_read(ctx, ctx->fragtex.textures[i]->texture); + + /* Mark streamout buffers as being written.. */ + for (i = 0; i < ctx->streamout.num_targets; i++) + if (ctx->streamout.targets[i]) + resource_written(ctx, ctx->streamout.targets[i]->buffer); + ctx->num_draws++; - /* any buffers that haven't been cleared, we need to restore: */ + prims = u_reduced_prims_for_vertices(info->mode, info->count); + + ctx->stats.draw_calls++; + + /* TODO prims_emitted should be clipped when the stream-out buffer is + * not large enough. See max_tf_vtx().. probably need to move that + * into common code. Although a bit more annoying since a2xx doesn't + * use ir3 so no common way to get at the pipe_stream_output_info + * which is needed for this calculation. + */ + if (ctx->streamout.num_targets > 0) + ctx->stats.prims_emitted += prims; + ctx->stats.prims_generated += prims; + + /* any buffers that haven't been cleared yet, we need to restore: */ ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared); /* and any buffers used, need to be resolved: */ ctx->resolve |= buffers; - ctx->draw(ctx, info); + DBG("%x num_draws=%u (%s/%s)", buffers, ctx->num_draws, + util_format_short_name(pipe_surface_format(pfb->cbufs[0])), + util_format_short_name(pipe_surface_format(pfb->zsbuf))); + + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW); + ctx->draw_vbo(ctx, info); + + for (i = 0; i < ctx->streamout.num_targets; i++) + ctx->streamout.offsets[i] += info->count; + + if (fd_mesa_debug & FD_DBG_DDRAW) + ctx->dirty = 0xffffffff; + + /* if an app (or, well, piglit test) does many thousands of draws + * without flush (or anything which implicitly flushes, like + * changing render targets), we can exceed the ringbuffer size. + * Since we don't currently have a sane way to wrapparound, and + * we use the same buffer for both draw and tiling commands, for + * now we need to do this hack and trigger flush if we are running + * low on remaining space for cmds: + */ + if (((ctx->ring->cur - ctx->ring->start) > + (ctx->ring->size/4 - FD_TILING_COMMANDS_DWORDS)) || + (fd_mesa_debug & FD_DBG_FLUSH)) + fd_context_render(pctx); } /* TODO figure out how to make better use of existing state mechanism @@ -179,31 +229,64 @@ fd_clear(struct pipe_context *pctx, unsigned buffers, { struct fd_context *ctx = fd_context(pctx); struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + unsigned cleared_buffers; + int i; + + /* TODO: push down the region versions into the tiles */ + if (!fd_render_condition_check(pctx)) + return; - ctx->cleared |= buffers; + /* for bookkeeping about which buffers have been cleared (and thus + * can fully or partially skip mem2gmem) we need to ignore buffers + * that have already had a draw, in case apps do silly things like + * clear after draw (ie. if you only clear the color buffer, but + * something like alpha-test causes side effects from the draw in + * the depth buffer, etc) + */ + cleared_buffers = buffers & (FD_BUFFER_ALL & ~ctx->restore); + + /* do we have full-screen scissor? */ + if (!memcmp(scissor, &ctx->disabled_scissor, sizeof(*scissor))) { + ctx->cleared |= cleared_buffers; + } else { + ctx->partial_cleared |= cleared_buffers; + if (cleared_buffers & PIPE_CLEAR_COLOR) + ctx->cleared_scissor.color = *scissor; + if (cleared_buffers & PIPE_CLEAR_DEPTH) + ctx->cleared_scissor.depth = *scissor; + if (cleared_buffers & PIPE_CLEAR_STENCIL) + ctx->cleared_scissor.stencil = *scissor; + } ctx->resolve |= buffers; ctx->needs_flush = true; if (buffers & PIPE_CLEAR_COLOR) - fd_resource(pfb->cbufs[0]->texture)->dirty = true; + for (i = 0; i < pfb->nr_cbufs; i++) + if (buffers & (PIPE_CLEAR_COLOR0 << i)) + resource_written(ctx, pfb->cbufs[i]->texture); if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { - fd_resource(pfb->zsbuf->texture)->dirty = true; + resource_written(ctx, pfb->zsbuf->texture); ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL; } DBG("%x depth=%f, stencil=%u (%s/%s)", buffers, depth, stencil, - util_format_name(pfb->cbufs[0]->format), - pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none"); + util_format_short_name(pipe_surface_format(pfb->cbufs[0])), + util_format_short_name(pipe_surface_format(pfb->zsbuf))); + + fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_CLEAR); ctx->clear(ctx, buffers, color, depth, stencil); ctx->dirty |= FD_DIRTY_ZSA | + FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONSTBUF | - FD_DIRTY_BLEND; + FD_DIRTY_BLEND | + FD_DIRTY_FRAMEBUFFER; if (fd_mesa_debug & FD_DBG_DCLEAR) ctx->dirty = 0xffffffff; @@ -229,6 +312,8 @@ fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps, void fd_draw_init(struct pipe_context *pctx) { + list_inithead(&fd_context(pctx)->used_resources); + pctx->draw_vbo = fd_draw_vbo; pctx->clear = fd_clear; pctx->clear_render_target = fd_clear_render_target;