From 21a9ed620764bde2021aa7592cf2695dad8f3f74 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 15 Apr 2016 14:27:34 -0700 Subject: [PATCH] vc4: Don't flush on read-only access of buffers read by the CL. Fixes piglit mixed-immediate-and-vbo, and may significantly improve performance of applications that store a 4-byte IB in the same VBO as vertex data. --- src/gallium/drivers/vc4/vc4_context.c | 13 ++++++++----- src/gallium/drivers/vc4/vc4_context.h | 3 ++- src/gallium/drivers/vc4/vc4_resource.c | 7 ++++++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index a0888f23265..eeadea0b1db 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -133,7 +133,8 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, * This helps avoid flushing the command buffers when unnecessary. */ bool -vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo) +vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo, + bool include_reads) { struct vc4_context *vc4 = vc4_context(pctx); @@ -143,10 +144,12 @@ vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo) /* Walk all the referenced BOs in the drawing command list to see if * they match. */ - struct vc4_bo **referenced_bos = vc4->bo_pointers.base; - for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) { - if (referenced_bos[i] == bo) { - return true; + if (include_reads) { + struct vc4_bo **referenced_bos = vc4->bo_pointers.base; + for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) { + if (referenced_bos[i] == bo) { + return true; + } } } diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index aa973bddcda..2457e67941d 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -397,7 +397,8 @@ void vc4_flush(struct pipe_context *pctx); void vc4_job_init(struct vc4_context *vc4); void vc4_job_submit(struct vc4_context *vc4); void vc4_job_reset(struct vc4_context *vc4); -bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo); +bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo, + bool include_reads); void vc4_emit_state(struct pipe_context *pctx); void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c); struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c); diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 2f89da5f9f1..050941c3e6f 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -171,7 +171,12 @@ vc4_resource_transfer_map(struct pipe_context *pctx, vc4_flush(pctx); } } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { - if (vc4_cl_references_bo(pctx, rsc->bo)) { + /* If we're writing and the buffer is being used by the CL, we + * have to flush the CL first. If we're only reading, we need + * to flush if the CL has written our buffer. + */ + if (vc4_cl_references_bo(pctx, rsc->bo, + usage & PIPE_TRANSFER_WRITE)) { if ((usage & PIPE_TRANSFER_DISCARD_RANGE) && prsc->last_level == 0 && prsc->width0 == box->width && -- 2.30.2