*/
#include "freedreno_context.h"
+#include "freedreno_blitter.h"
#include "freedreno_draw.h"
+#include "freedreno_fence.h"
#include "freedreno_program.h"
#include "freedreno_resource.h"
#include "freedreno_texture.h"
#include "freedreno_state.h"
#include "freedreno_gmem.h"
#include "freedreno_query.h"
+#include "freedreno_query_hw.h"
#include "freedreno_util.h"
-
-static struct fd_ringbuffer *next_rb(struct fd_context *ctx)
-{
- struct fd_ringbuffer *ring;
- uint32_t ts;
-
- /* grab next ringbuffer: */
- ring = ctx->rings[(ctx->rings_idx++) % ARRAY_SIZE(ctx->rings)];
-
- /* wait for new rb to be idle: */
- ts = fd_ringbuffer_timestamp(ring);
- if (ts) {
- DBG("wait: %u", ts);
- fd_pipe_wait(ctx->screen->pipe, ts);
- }
-
- fd_ringbuffer_reset(ring);
-
- return ring;
-}
+#include "util/u_upload_mgr.h"
static void
-fd_context_next_rb(struct pipe_context *pctx)
+fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fencep,
+ unsigned flags)
{
struct fd_context *ctx = fd_context(pctx);
- struct fd_ringbuffer *ring;
+ struct pipe_fence_handle *fence = NULL;
+ // TODO we want to lookup batch if it exists, but not create one if not.
+ struct fd_batch *batch = fd_context_batch(ctx);
- fd_ringmarker_del(ctx->draw_start);
- fd_ringmarker_del(ctx->draw_end);
+ DBG("%p: flush: flags=%x\n", ctx->batch, flags);
+
+ if (!batch)
+ return;
- ring = next_rb(ctx);
+ /* Take a ref to the batch's fence (batch can be unref'd when flushed: */
+ fd_fence_ref(pctx->screen, &fence, batch->fence);
- ctx->draw_start = fd_ringmarker_new(ring);
- ctx->draw_end = fd_ringmarker_new(ring);
+ if (flags & PIPE_FLUSH_FENCE_FD)
+ batch->needs_out_fence_fd = true;
- fd_ringbuffer_set_parent(ring, NULL);
- ctx->ring = ring;
+ if (!ctx->screen->reorder) {
+ fd_batch_flush(batch, true, false);
+ } else if (flags & PIPE_FLUSH_DEFERRED) {
+ fd_bc_flush_deferred(&ctx->screen->batch_cache, ctx);
+ } else {
+ fd_bc_flush(&ctx->screen->batch_cache, ctx);
+ }
- fd_ringmarker_del(ctx->binning_start);
- fd_ringmarker_del(ctx->binning_end);
+ if (fencep)
+ fd_fence_ref(pctx->screen, fencep, fence);
- ring = next_rb(ctx);
+ fd_fence_ref(pctx->screen, &fence, NULL);
+}
- ctx->binning_start = fd_ringmarker_new(ring);
- ctx->binning_end = fd_ringmarker_new(ring);
+static void
+fd_texture_barrier(struct pipe_context *pctx, unsigned flags)
+{
+ /* On devices that could sample from GMEM we could possibly do better.
+ * Or if we knew that we were doing GMEM bypass we could just emit a
+ * cache flush, perhaps? But we don't know if future draws would cause
+ * us to use GMEM, and a flush in bypass isn't the end of the world.
+ */
+ fd_context_flush(pctx, NULL, 0);
+}
- fd_ringbuffer_set_parent(ring, ctx->ring);
- ctx->binning_ring = ring;
+static void
+fd_memory_barrier(struct pipe_context *pctx, unsigned flags)
+{
+ fd_context_flush(pctx, NULL, 0);
+ /* TODO do we need to check for persistently mapped buffers and fd_bo_cpu_prep()?? */
}
-/* emit accumulated render cmds, needed for example if render target has
- * changed, or for flush()
+/**
+ * emit marker string as payload of a no-op packet, which can be
+ * decoded by cffdump.
*/
-void
-fd_context_render(struct pipe_context *pctx)
+static void
+fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len)
{
struct fd_context *ctx = fd_context(pctx);
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-
- DBG("needs_flush: %d", ctx->needs_flush);
+ struct fd_ringbuffer *ring;
+ const uint32_t *buf = (const void *)string;
- if (!ctx->needs_flush)
+ if (!ctx->batch)
return;
- fd_gmem_render_tiles(pctx);
+ ctx->batch->needs_flush = true;
- DBG("%p/%p/%p", ctx->ring->start, ctx->ring->cur, ctx->ring->end);
+ ring = ctx->batch->draw;
- /* if size in dwords is more than half the buffer size, then wait and
- * wrap around:
- */
- if ((ctx->ring->cur - ctx->ring->start) > ctx->ring->size/8)
- fd_context_next_rb(pctx);
-
- ctx->needs_flush = false;
- ctx->cleared = ctx->restore = ctx->resolve = 0;
- ctx->gmem_reason = 0;
- ctx->num_draws = 0;
-
- if (pfb->cbufs[0])
- fd_resource(pfb->cbufs[0]->texture)->dirty = false;
- if (pfb->zsbuf)
- fd_resource(pfb->zsbuf->texture)->dirty = false;
-}
-
-static void
-fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
- unsigned flags)
-{
- DBG("fence=%p", fence);
+ /* max packet size is 0x3fff dwords: */
+ len = MIN2(len, 0x3fff * 4);
-#if 0
- if (fence) {
- fd_fence_ref(ctx->screen->fence.current,
- (struct fd_fence **)fence);
+ if (ctx->screen->gpu_id >= 500)
+ OUT_PKT7(ring, CP_NOP, align(len, 4) / 4);
+ else
+ OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
+ while (len >= 4) {
+ OUT_RING(ring, *buf);
+ buf++;
+ len -= 4;
}
-#endif
- fd_context_render(pctx);
+ /* copy remainder bytes without reading past end of input string: */
+ if (len > 0) {
+ uint32_t w = 0;
+ memcpy(&w, buf, len);
+ OUT_RING(ring, w);
+ }
}
void
DBG("");
- fd_prog_fini(pctx);
+ if (ctx->screen->reorder && util_queue_is_initialized(&ctx->flush_queue))
+ util_queue_destroy(&ctx->flush_queue);
- util_slab_destroy(&ctx->transfer_pool);
+ util_copy_framebuffer_state(&ctx->framebuffer, NULL);
+ fd_batch_reference(&ctx->batch, NULL); /* unref current batch */
+ fd_bc_invalidate_context(ctx);
- util_dynarray_fini(&ctx->draw_patches);
+ fd_prog_fini(pctx);
if (ctx->blitter)
util_blitter_destroy(ctx->blitter);
+ if (pctx->stream_uploader)
+ u_upload_destroy(pctx->stream_uploader);
+
+ if (ctx->clear_rs_state)
+ pctx->delete_rasterizer_state(pctx, ctx->clear_rs_state);
+
if (ctx->primconvert)
util_primconvert_destroy(ctx->primconvert);
- fd_ringmarker_del(ctx->draw_start);
- fd_ringmarker_del(ctx->draw_end);
- fd_ringmarker_del(ctx->binning_start);
- fd_ringmarker_del(ctx->binning_end);
-
- for (i = 0; i < ARRAY_SIZE(ctx->rings); i++)
- fd_ringbuffer_del(ctx->rings[i]);
+ slab_destroy_child(&ctx->transfer_pool);
- for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
- struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+ for (i = 0; i < ARRAY_SIZE(ctx->vsc_pipe); i++) {
+ struct fd_vsc_pipe *pipe = &ctx->vsc_pipe[i];
if (!pipe->bo)
break;
fd_bo_del(pipe->bo);
}
fd_device_del(ctx->dev);
+ fd_pipe_del(ctx->pipe);
- FREE(ctx);
+ if (fd_mesa_debug & (FD_DBG_BSTAT | FD_DBG_MSGS)) {
+ printf("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, batch_restore=%u\n",
+ (uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem,
+ (uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw,
+ (uint32_t)ctx->stats.batch_restore);
+ }
+}
+
+static void
+fd_set_debug_callback(struct pipe_context *pctx,
+ const struct pipe_debug_callback *cb)
+{
+ struct fd_context *ctx = fd_context(pctx);
+
+ if (cb)
+ ctx->debug = *cb;
+ else
+ memset(&ctx->debug, 0, sizeof(ctx->debug));
+}
+
+/* TODO we could combine a few of these small buffers (solid_vbuf,
+ * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
+ * save a tiny bit of memory
+ */
+
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+ static const float init_shader_const[] = {
+ -1.000000, +1.000000, +1.000000,
+ +1.000000, -1.000000, +1.000000,
+ };
+ struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+ PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+ pipe_buffer_write(pctx, prsc, 0,
+ sizeof(init_shader_const), init_shader_const);
+ return prsc;
+}
+
+static struct pipe_resource *
+create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
+{
+ struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+ PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
+ return prsc;
+}
+
+void
+fd_context_setup_common_vbos(struct fd_context *ctx)
+{
+ struct pipe_context *pctx = &ctx->base;
+
+ ctx->solid_vbuf = create_solid_vertexbuf(pctx);
+ ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+
+ /* setup solid_vbuf_state: */
+ ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
+ pctx, 1, (struct pipe_vertex_element[]){{
+ .vertex_buffer_index = 0,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }});
+ ctx->solid_vbuf_state.vertexbuf.count = 1;
+ ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
+ ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf;
+
+ /* setup blit_vbuf_state: */
+ ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
+ pctx, 2, (struct pipe_vertex_element[]){{
+ .vertex_buffer_index = 0,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32_FLOAT,
+ }, {
+ .vertex_buffer_index = 1,
+ .src_offset = 0,
+ .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }});
+ ctx->blit_vbuf_state.vertexbuf.count = 2;
+ ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
+ ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->blit_texcoord_vbuf;
+ ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
+ ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf;
+}
+
+void
+fd_context_cleanup_common_vbos(struct fd_context *ctx)
+{
+ struct pipe_context *pctx = &ctx->base;
+
+ pctx->delete_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx);
+ pctx->delete_vertex_elements_state(pctx, ctx->blit_vbuf_state.vtx);
+
+ pipe_resource_reference(&ctx->solid_vbuf, NULL);
+ pipe_resource_reference(&ctx->blit_texcoord_vbuf, NULL);
}
struct pipe_context *
fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
- const uint8_t *primtypes, void *priv)
+ const uint8_t *primtypes, void *priv, unsigned flags)
{
struct fd_screen *screen = fd_screen(pscreen);
struct pipe_context *pctx;
+ unsigned prio = 1;
int i;
+ /* lower numerical value == higher priority: */
+ if (fd_mesa_debug & FD_DBG_HIPRIO)
+ prio = 0;
+ else if (flags & PIPE_CONTEXT_HIGH_PRIORITY)
+ prio = 0;
+ else if (flags & PIPE_CONTEXT_LOW_PRIORITY)
+ prio = 2;
+
ctx->screen = screen;
+ ctx->pipe = fd_pipe_new2(screen->dev, FD_PIPE_3D, prio);
ctx->primtypes = primtypes;
ctx->primtype_mask = 0;
pctx->screen = pscreen;
pctx->priv = priv;
pctx->flush = fd_context_flush;
+ pctx->emit_string_marker = fd_emit_string_marker;
+ pctx->set_debug_callback = fd_set_debug_callback;
+ pctx->create_fence_fd = fd_create_fence_fd;
+ pctx->fence_server_sync = fd_fence_server_sync;
+ pctx->texture_barrier = fd_texture_barrier;
+ pctx->memory_barrier = fd_memory_barrier;
+
+ pctx->stream_uploader = u_upload_create_default(pctx);
+ if (!pctx->stream_uploader)
+ goto fail;
+ pctx->const_uploader = pctx->stream_uploader;
- for (i = 0; i < ARRAY_SIZE(ctx->rings); i++) {
- ctx->rings[i] = fd_ringbuffer_new(screen->pipe, 0x100000);
- if (!ctx->rings[i])
- goto fail;
- }
-
- fd_context_next_rb(pctx);
- fd_reset_wfi(ctx);
+ if (!ctx->screen->reorder)
+ ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx);
- util_dynarray_init(&ctx->draw_patches);
+ slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
- util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
- 16, UTIL_SLAB_SINGLETHREADED);
+ if (!ctx->blit)
+ ctx->blit = fd_blitter_blit;
fd_draw_init(pctx);
fd_resource_context_init(pctx);
if (!ctx->primconvert)
goto fail;
+ list_inithead(&ctx->hw_active_queries);
+ list_inithead(&ctx->acc_active_queries);
+
return pctx;
fail: