From: Eric Anholt Date: Wed, 15 Apr 2015 05:42:02 +0000 (-0700) Subject: vc4: Just stream out fallback IB contents. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=10aacf5ae8f3e90e2f0967fbdcf96df93e346e20;p=mesa.git vc4: Just stream out fallback IB contents. The idea I had when I wrote the original shadow code was that you'd see a set_index_buffer to the IB, then a bunch of draws out of it. What's actually happening in openarena is that set_index_buffer occurs at every draw, so we end up making a new shadow BO every time, and converting more of the BO than is actually used in the draw. While I could maybe come up with a better caching scheme, for now just do the simple thing that doesn't result in a new shadow IB allocation per draw. Improves performance of isosurf in drawelements mode by 58.7967% +/- 3.86152% (n=8). --- diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index b394c186efb..a2b1cac952d 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -29,6 +29,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_blitter.h" +#include "util/u_upload_mgr.h" #include "indices/u_primconvert.h" #include "pipe/p_screen.h" @@ -410,6 +411,9 @@ vc4_context_destroy(struct pipe_context *pctx) if (vc4->primconvert) util_primconvert_destroy(vc4->primconvert); + if (vc4->uploader) + u_upload_destroy(vc4->uploader); + util_slab_destroy(&vc4->transfer_pool); pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL); @@ -466,6 +470,9 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv) if (!vc4->primconvert) goto fail; + vc4->uploader = u_upload_create(pctx, 16 * 1024, 4, + PIPE_BIND_INDEX_BUFFER); + vc4_debug |= saved_shaderdb_flag; return &vc4->base; diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index d89f1974e12..41dacb9172d 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -243,6 +243,8 @@ struct vc4_context { /** Seqno of the last CL flush's job. */ uint64_t last_emit_seqno; + struct u_upload_mgr *uploader; + /** @{ Current pipeline state objects */ struct pipe_scissor_state scissor; struct pipe_blend_state *blend; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 16418bf12da..15743ea7671 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -266,13 +266,17 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) * definitions, up to but not including QUADS. */ if (info->indexed) { - struct vc4_resource *rsc = vc4_resource(vc4->indexbuf.buffer); uint32_t offset = vc4->indexbuf.offset; uint32_t index_size = vc4->indexbuf.index_size; - if (rsc->shadow_parent) { - vc4_update_shadow_index_buffer(pctx, &vc4->indexbuf); - offset = 0; + struct pipe_resource *prsc; + if (vc4->indexbuf.index_size == 4) { + prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf, + info->count, &offset); + index_size = 2; + } else { + prsc = vc4->indexbuf.buffer; } + struct vc4_resource *rsc = vc4_resource(prsc); cl_start_reloc(&vc4->bcl, 1); cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); @@ -284,6 +288,9 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u32(&vc4->bcl, info->count); cl_reloc(vc4, &vc4->bcl, rsc->bo, offset); cl_u32(&vc4->bcl, max_index); + + if (vc4->indexbuf.index_size == 4) + pipe_resource_reference(&prsc, NULL); } else { cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); cl_u8(&vc4->bcl, info->mode); diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 3f180d5845d..14b135e2f45 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -26,6 +26,7 @@ #include "util/u_format.h" #include "util/u_inlines.h" #include "util/u_surface.h" +#include "util/u_upload_mgr.h" #include "vc4_screen.h" #include "vc4_context.h" @@ -638,41 +639,37 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, * was in user memory, it would be nice to not have uploaded it to a VBO * before translating. */ -void -vc4_update_shadow_index_buffer(struct pipe_context *pctx, - const struct pipe_index_buffer *ib) +struct pipe_resource * +vc4_get_shadow_index_buffer(struct pipe_context *pctx, + const struct pipe_index_buffer *ib, + uint32_t count, + uint32_t *shadow_offset) { - struct vc4_resource *shadow = vc4_resource(ib->buffer); - struct vc4_resource *orig = vc4_resource(shadow->shadow_parent); - uint32_t count = shadow->base.b.width0 / 2; - - if (shadow->writes == orig->writes) - return; - + struct vc4_context *vc4 = vc4_context(pctx); + struct vc4_resource *orig = vc4_resource(ib->buffer); perf_debug("Fallback conversion for %d uint indices\n", count); + void *data; + struct pipe_resource *shadow_rsc = NULL; + u_upload_alloc(vc4->uploader, 0, count * 2, + shadow_offset, &shadow_rsc, &data); + uint16_t *dst = data; + struct pipe_transfer *src_transfer; uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b, ib->offset, count * 4, PIPE_TRANSFER_READ, &src_transfer); - struct pipe_transfer *dst_transfer; - uint16_t *dst = pipe_buffer_map_range(pctx, &shadow->base.b, - 0, - count * 2, - PIPE_TRANSFER_WRITE, &dst_transfer); - for (int i = 0; i < count; i++) { uint32_t src_index = src[i]; assert(src_index <= 0xffff); dst[i] = src_index; } - pctx->transfer_unmap(pctx, dst_transfer); pctx->transfer_unmap(pctx, src_transfer); - shadow->writes = orig->writes; + return shadow_rsc; } void diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h index 2ed848bc7b9..b3cba8f2f65 100644 --- a/src/gallium/drivers/vc4/vc4_resource.h +++ b/src/gallium/drivers/vc4/vc4_resource.h @@ -107,8 +107,10 @@ struct pipe_resource *vc4_resource_create(struct pipe_screen *pscreen, const struct pipe_resource *tmpl); void vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, struct pipe_sampler_view *view); -void vc4_update_shadow_index_buffer(struct pipe_context *pctx, - const struct pipe_index_buffer *ib); +struct pipe_resource *vc4_get_shadow_index_buffer(struct pipe_context *pctx, + const struct pipe_index_buffer *ib, + uint32_t count, + uint32_t *offset); void vc4_dump_surface(struct pipe_surface *psurf); #endif /* VC4_RESOURCE_H */ diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 7875eff0325..4a1d4c3a4d6 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -304,24 +304,8 @@ vc4_set_index_buffer(struct pipe_context *pctx, if (ib) { assert(!ib->user_buffer); - - if (ib->index_size == 4) { - struct pipe_resource tmpl = *ib->buffer; - assert(tmpl.format == PIPE_FORMAT_R8_UNORM); - assert(tmpl.height0 == 1); - tmpl.width0 = (tmpl.width0 - ib->offset) / 2; - struct pipe_resource *pshadow = - vc4_resource_create(&vc4->screen->base, &tmpl); - struct vc4_resource *shadow = vc4_resource(pshadow); - pipe_resource_reference(&shadow->shadow_parent, ib->buffer); - - pipe_resource_reference(&vc4->indexbuf.buffer, NULL); - vc4->indexbuf.buffer = pshadow; - vc4->indexbuf.index_size = 2; - } else { - pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer); - vc4->indexbuf.index_size = ib->index_size; - } + pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer); + vc4->indexbuf.index_size = ib->index_size; vc4->indexbuf.offset = ib->offset; } else { pipe_resource_reference(&vc4->indexbuf.buffer, NULL);