X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Ffreedreno_context.c;h=47b7a27ad1f6e847f332dbeb5bb87b68a490b3b2;hb=b3b1fa5e2bc4a7df3c68bb0b234d4bcf68ffaa84;hp=4753f58d12d3cae1cdca282f94638abd14d3e5b7;hpb=732b0b5ebc5a30b6e87f67aabfb0e25d3e662d41;p=mesa.git

diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 4753f58d12d..47b7a27ad1f 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -1,5 +1,3 @@
-/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
-
 /*
  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
  *
@@ -27,124 +25,249 @@
  */
 
 #include "freedreno_context.h"
-#include "freedreno_vbo.h"
-#include "freedreno_blend.h"
-#include "freedreno_rasterizer.h"
-#include "freedreno_zsa.h"
-#include "freedreno_state.h"
-#include "freedreno_resource.h"
-#include "freedreno_clear.h"
+#include "freedreno_blitter.h"
+#include "freedreno_draw.h"
+#include "freedreno_fence.h"
 #include "freedreno_program.h"
+#include "freedreno_resource.h"
 #include "freedreno_texture.h"
+#include "freedreno_state.h"
 #include "freedreno_gmem.h"
+#include "freedreno_query.h"
+#include "freedreno_query_hw.h"
 #include "freedreno_util.h"
+#include "util/u_upload_mgr.h"
 
-/* there are two cases where we currently need to wait for render complete:
- * 1) pctx->flush() .. since at the moment we have no way for DDX to sync
- *    the presentation blit with the 3d core
- * 2) wrap-around for ringbuffer.. possibly we can do something more
- *    Intelligent here.  Right now we need to ensure there is enough room
- *    at the end of the drawcmds in the cmdstream buffer for all the per-
- *    tile cmds.  We do this the lamest way possible, by making the ringbuffer
- *    big, and flushing and resetting back to the beginning if we get too
- *    close to the end.
- */
 static void
-fd_context_wait(struct pipe_context *pctx)
+fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fencep,
+		unsigned flags)
 {
 	struct fd_context *ctx = fd_context(pctx);
-	uint32_t ts = fd_ringbuffer_timestamp(ctx->ring);
+	struct pipe_fence_handle *fence = NULL;
+	// TODO we want to lookup batch if it exists, but not create one if not.
+	struct fd_batch *batch = fd_context_batch(ctx);
 
-	DBG("wait: %u", ts);
+	DBG("%p: flush: flags=%x\n", ctx->batch, flags);
 
-	fd_pipe_wait(ctx->screen->pipe, ts);
-	fd_ringbuffer_reset(ctx->ring);
-	fd_ringmarker_mark(ctx->draw_start);
-}
-
-/* emit accumulated render cmds, needed for example if render target has
- * changed, or for flush()
- */
-void
-fd_context_render(struct pipe_context *pctx)
-{
-	struct fd_context *ctx = fd_context(pctx);
-	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+	/* In some sequence of events, we can end up with a last_fence that is
+	 * not an "fd" fence, which results in eglDupNativeFenceFDANDROID()
+	 * errors.
+	 *
+	 */
+	if (flags & PIPE_FLUSH_FENCE_FD)
+		fd_fence_ref(&ctx->last_fence, NULL);
 
-	DBG("needs_flush: %d", ctx->needs_flush);
+	/* if no rendering since last flush, ie. app just decided it needed
+	 * a fence, re-use the last one:
+	 */
+	if (ctx->last_fence) {
+		fd_fence_ref(&fence, ctx->last_fence);
+		goto out;
+	}
 
-	if (!ctx->needs_flush)
+	if (!batch)
 		return;
 
-	fd_gmem_render_tiles(pctx);
+	/* Take a ref to the batch's fence (batch can be unref'd when flushed: */
+	fd_fence_ref(&fence, batch->fence);
 
-	DBG("%p/%p/%p", ctx->ring->start, ctx->ring->cur, ctx->ring->end);
+	if (flags & PIPE_FLUSH_FENCE_FD)
+		batch->needs_out_fence_fd = true;
 
-	/* if size in dwords is more than half the buffer size, then wait and
-	 * wrap around:
-	 */
-	if ((ctx->ring->cur - ctx->ring->start) > ctx->ring->size/8)
-		fd_context_wait(pctx);
+	if (!ctx->screen->reorder) {
+		fd_batch_flush(batch, true);
+	} else if (flags & PIPE_FLUSH_DEFERRED) {
+		fd_bc_flush_deferred(&ctx->screen->batch_cache, ctx);
+	} else {
+		fd_bc_flush(&ctx->screen->batch_cache, ctx);
+	}
+
+out:
+	if (fencep)
+		fd_fence_ref(fencep, fence);
 
-	ctx->needs_flush = false;
-	ctx->cleared = ctx->restore = ctx->resolve = 0;
+	fd_fence_ref(&ctx->last_fence, fence);
 
-	fd_resource(pfb->cbufs[0]->texture)->dirty = false;
-	if (pfb->zsbuf)
-		fd_resource(pfb->zsbuf->texture)->dirty = false;
+	fd_fence_ref(&fence, NULL);
 }
 
 static void
-fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
-		enum pipe_flush_flags flags)
+fd_texture_barrier(struct pipe_context *pctx, unsigned flags)
 {
-	DBG("fence=%p", fence);
+	if (flags == PIPE_TEXTURE_BARRIER_FRAMEBUFFER) {
+		struct fd_context *ctx = fd_context(pctx);
 
-#if 0
-	if (fence) {
-		fd_fence_ref(ctx->screen->fence.current,
-				(struct fd_fence **)fence);
+		if (ctx->framebuffer_barrier) {
+			ctx->framebuffer_barrier(ctx);
+			return;
+		}
 	}
-#endif
 
-	fd_context_render(pctx);
-	fd_context_wait(pctx);
+	/* On devices that could sample from GMEM we could possibly do better.
+	 * Or if we knew that we were doing GMEM bypass we could just emit a
+	 * cache flush, perhaps?  But we don't know if future draws would cause
+	 * us to use GMEM, and a flush in bypass isn't the end of the world.
+	 */
+	fd_context_flush(pctx, NULL, 0);
+}
+
+static void
+fd_memory_barrier(struct pipe_context *pctx, unsigned flags)
+{
+	if (!(flags & ~PIPE_BARRIER_UPDATE))
+		return;
+
+	fd_context_flush(pctx, NULL, 0);
+	/* TODO do we need to check for persistently mapped buffers and fd_bo_cpu_prep()?? */
 }
 
+/**
+ * emit marker string as payload of a no-op packet, which can be
+ * decoded by cffdump.
+ */
 static void
+fd_emit_string_marker(struct pipe_context *pctx, const char *string, int len)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_ringbuffer *ring;
+	const uint32_t *buf = (const void *)string;
+
+	if (!ctx->batch)
+		return;
+
+	ctx->batch->needs_flush = true;
+
+	ring = ctx->batch->draw;
+
+	/* max packet size is 0x3fff dwords: */
+	len = MIN2(len, 0x3fff * 4);
+
+	if (ctx->screen->gpu_id >= 500)
+		OUT_PKT7(ring, CP_NOP, align(len, 4) / 4);
+	else
+		OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
+	while (len >= 4) {
+		OUT_RING(ring, *buf);
+		buf++;
+		len -= 4;
+	}
+
+	/* copy remainder bytes without reading past end of input string: */
+	if (len > 0) {
+		uint32_t w = 0;
+		memcpy(&w, buf, len);
+		OUT_RING(ring, w);
+	}
+}
+
+void
 fd_context_destroy(struct pipe_context *pctx)
 {
 	struct fd_context *ctx = fd_context(pctx);
+	unsigned i;
 
 	DBG("");
 
+	fd_fence_ref(&ctx->last_fence, NULL);
+
+	if (ctx->screen->reorder && util_queue_is_initialized(&ctx->flush_queue))
+		util_queue_destroy(&ctx->flush_queue);
+
+	util_copy_framebuffer_state(&ctx->framebuffer, NULL);
+	fd_batch_reference(&ctx->batch, NULL);  /* unref current batch */
+	fd_bc_invalidate_context(ctx);
+
+	fd_prog_fini(pctx);
+
 	if (ctx->blitter)
 		util_blitter_destroy(ctx->blitter);
 
-	fd_ringmarker_del(ctx->draw_start);
-	fd_ringmarker_del(ctx->draw_end);
-	fd_ringbuffer_del(ctx->ring);
+	if (pctx->stream_uploader)
+		u_upload_destroy(pctx->stream_uploader);
 
-	fd_prog_fini(pctx);
+	if (ctx->clear_rs_state)
+		pctx->delete_rasterizer_state(pctx, ctx->clear_rs_state);
+
+	if (ctx->primconvert)
+		util_primconvert_destroy(ctx->primconvert);
+
+	slab_destroy_child(&ctx->transfer_pool);
+
+	for (i = 0; i < ARRAY_SIZE(ctx->vsc_pipe_bo); i++) {
+		if (!ctx->vsc_pipe_bo[i])
+			break;
+		fd_bo_del(ctx->vsc_pipe_bo[i]);
+	}
 
-	FREE(ctx);
+	fd_device_del(ctx->dev);
+	fd_pipe_del(ctx->pipe);
+
+	mtx_destroy(&ctx->gmem_lock);
+
+	if (fd_mesa_debug & (FD_DBG_BSTAT | FD_DBG_MSGS)) {
+		printf("batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, batch_restore=%u\n",
+			(uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem,
+			(uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw,
+			(uint32_t)ctx->stats.batch_restore);
+	}
 }
 
+static void
+fd_set_debug_callback(struct pipe_context *pctx,
+		const struct pipe_debug_callback *cb)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	if (cb)
+		ctx->debug = *cb;
+	else
+		memset(&ctx->debug, 0, sizeof(ctx->debug));
+}
+
+static uint32_t
+fd_get_reset_count(struct fd_context *ctx, bool per_context)
+{
+	uint64_t val;
+	enum fd_param_id param =
+		per_context ? FD_CTX_FAULTS : FD_GLOBAL_FAULTS;
+	int ret = fd_pipe_get_param(ctx->pipe, param, &val);
+	debug_assert(!ret);
+	return val;
+}
+
+static enum pipe_reset_status
+fd_get_device_reset_status(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	int context_faults = fd_get_reset_count(ctx, true);
+	int global_faults  = fd_get_reset_count(ctx, false);
+	enum pipe_reset_status status;
+
+	if (context_faults != ctx->context_reset_count) {
+		status = PIPE_GUILTY_CONTEXT_RESET;
+	} else if (global_faults != ctx->global_reset_count) {
+		status = PIPE_INNOCENT_CONTEXT_RESET;
+	} else {
+		status = PIPE_NO_RESET;
+	}
+
+	ctx->context_reset_count = context_faults;
+	ctx->global_reset_count = global_faults;
+
+	return status;
+}
+
+/* TODO we could combine a few of these small buffers (solid_vbuf,
+ * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
+ * save a tiny bit of memory
+ */
+
 static struct pipe_resource *
 create_solid_vertexbuf(struct pipe_context *pctx)
 {
 	static const float init_shader_const[] = {
-			/* for clear/gmem2mem: */
-			-1.000000, +1.000000, +1.000000, +1.100000,
-			+1.000000, +1.000000, -1.000000, -1.100000,
-			+1.000000, +1.100000, -1.100000, +1.000000,
-			/* for mem2gmem: (vertices) */
-			-1.000000, +1.000000, +1.000000, +1.000000,
-			+1.000000, +1.000000, -1.000000, -1.000000,
-			+1.000000, +1.000000, -1.000000, +1.000000,
-			/* for mem2gmem: (tex coords) */
-			+0.000000, +0.000000, +1.000000, +0.000000,
-			+0.000000, +1.000000, +1.000000, +1.000000,
+			-1.000000, +1.000000, +1.000000,
+			+1.000000, -1.000000, +1.000000,
 	};
 	struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
 			PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
@@ -153,53 +276,143 @@ create_solid_vertexbuf(struct pipe_context *pctx)
 	return prsc;
 }
 
+static struct pipe_resource *
+create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
+{
+	struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+			PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
+	return prsc;
+}
+
+void
+fd_context_setup_common_vbos(struct fd_context *ctx)
+{
+	struct pipe_context *pctx = &ctx->base;
+
+	ctx->solid_vbuf = create_solid_vertexbuf(pctx);
+	ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+
+	/* setup solid_vbuf_state: */
+	ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
+			pctx, 1, (struct pipe_vertex_element[]){{
+				.vertex_buffer_index = 0,
+				.src_offset = 0,
+				.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+			}});
+	ctx->solid_vbuf_state.vertexbuf.count = 1;
+	ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
+	ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf;
+
+	/* setup blit_vbuf_state: */
+	ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
+			pctx, 2, (struct pipe_vertex_element[]){{
+				.vertex_buffer_index = 0,
+				.src_offset = 0,
+				.src_format = PIPE_FORMAT_R32G32_FLOAT,
+			}, {
+				.vertex_buffer_index = 1,
+				.src_offset = 0,
+				.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+			}});
+	ctx->blit_vbuf_state.vertexbuf.count = 2;
+	ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
+	ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->blit_texcoord_vbuf;
+	ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
+	ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf;
+}
+
+void
+fd_context_cleanup_common_vbos(struct fd_context *ctx)
+{
+	struct pipe_context *pctx = &ctx->base;
+
+	pctx->delete_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx);
+	pctx->delete_vertex_elements_state(pctx, ctx->blit_vbuf_state.vtx);
+
+	pipe_resource_reference(&ctx->solid_vbuf, NULL);
+	pipe_resource_reference(&ctx->blit_texcoord_vbuf, NULL);
+}
+
 struct pipe_context *
-fd_context_create(struct pipe_screen *pscreen, void *priv)
+fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
+		const uint8_t *primtypes, void *priv, unsigned flags)
 {
 	struct fd_screen *screen = fd_screen(pscreen);
-	struct fd_context *ctx = CALLOC_STRUCT(fd_context);
 	struct pipe_context *pctx;
+	unsigned prio = 1;
+	int i;
 
-	if (!ctx)
-		return NULL;
-
-	DBG("");
+	/* lower numerical value == higher priority: */
+	if (fd_mesa_debug & FD_DBG_HIPRIO)
+		prio = 0;
+	else if (flags & PIPE_CONTEXT_HIGH_PRIORITY)
+		prio = 0;
+	else if (flags & PIPE_CONTEXT_LOW_PRIORITY)
+		prio = 2;
 
 	ctx->screen = screen;
+	ctx->pipe = fd_pipe_new2(screen->dev, FD_PIPE_3D, prio);
+
+	if (fd_device_version(screen->dev) >= FD_VERSION_ROBUSTNESS) {
+		ctx->context_reset_count = fd_get_reset_count(ctx, true);
+		ctx->global_reset_count = fd_get_reset_count(ctx, false);
+	}
 
-	ctx->ring = fd_ringbuffer_new(screen->pipe, 0x100000);
-	ctx->draw_start = fd_ringmarker_new(ctx->ring);
-	ctx->draw_end = fd_ringmarker_new(ctx->ring);
+	ctx->primtypes = primtypes;
+	ctx->primtype_mask = 0;
+	for (i = 0; i < PIPE_PRIM_MAX; i++)
+		if (primtypes[i])
+			ctx->primtype_mask |= (1 << i);
+
+	(void) mtx_init(&ctx->gmem_lock, mtx_plain);
+
+	/* need some sane default in case state tracker doesn't
+	 * set some state:
+	 */
+	ctx->sample_mask = 0xffff;
 
 	pctx = &ctx->base;
 	pctx->screen = pscreen;
 	pctx->priv = priv;
 	pctx->flush = fd_context_flush;
-	pctx->destroy = fd_context_destroy;
+	pctx->emit_string_marker = fd_emit_string_marker;
+	pctx->set_debug_callback = fd_set_debug_callback;
+	pctx->get_device_reset_status = fd_get_device_reset_status;
+	pctx->create_fence_fd = fd_create_fence_fd;
+	pctx->fence_server_sync = fd_fence_server_sync;
+	pctx->texture_barrier = fd_texture_barrier;
+	pctx->memory_barrier = fd_memory_barrier;
 
-	util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
-			16, UTIL_SLAB_SINGLETHREADED);
+	pctx->stream_uploader = u_upload_create_default(pctx);
+	if (!pctx->stream_uploader)
+		goto fail;
+	pctx->const_uploader = pctx->stream_uploader;
 
-	fd_vbo_init(pctx);
-	fd_blend_init(pctx);
-	fd_rasterizer_init(pctx);
-	fd_zsa_init(pctx);
-	fd_state_init(pctx);
+	if (!ctx->screen->reorder)
+		ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx, false);
+
+	slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
+
+	fd_draw_init(pctx);
 	fd_resource_context_init(pctx);
-	fd_clear_init(pctx);
-	fd_prog_init(pctx);
+	fd_query_context_init(pctx);
 	fd_texture_init(pctx);
+	fd_state_init(pctx);
 
 	ctx->blitter = util_blitter_create(pctx);
-	if (!ctx->blitter) {
-		fd_context_destroy(pctx);
-		return NULL;
-	}
+	if (!ctx->blitter)
+		goto fail;
 
-	/* construct vertex state used for solid ops (clear, and gmem<->mem) */
-	ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
+	ctx->primconvert = util_primconvert_create(pctx, ctx->primtype_mask);
+	if (!ctx->primconvert)
+		goto fail;
 
-	fd_state_emit_setup(pctx);
+	list_inithead(&ctx->hw_active_queries);
+	list_inithead(&ctx->acc_active_queries);
 
 	return pctx;
+
+fail:
+	pctx->destroy(pctx);
+	return NULL;
 }