From 4f74b379aa05e133cdd19865662eceacee4f63f4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 31 Aug 2016 14:49:41 -0600 Subject: [PATCH] svga: implement an index buffer translation cache Some OpenGL apps, like Cinebench R15, have many glDrawElements(GL_QUADS) calls. Since we don't directly support quads we have to convert these calls into GL_TRIANGLES which involves generating a new index buffer. This patch saves the new/translated index buffer in the hope that it can be reused for a later draw call. Cinebench R15 increases by about 20% with this change. The NobelClinician Viewer app also hits this code. Tested with full piglit run. Reviewed-by: Charmaine Lee --- src/gallium/drivers/svga/svga_draw_elements.c | 67 +++++++++++++++++-- .../drivers/svga/svga_resource_buffer.c | 9 +++ .../drivers/svga/svga_resource_buffer.h | 11 +++ src/gallium/drivers/svga/svga_screen.c | 2 + src/gallium/drivers/svga/svga_screen.h | 1 + 5 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c index f7953bcaefe..37d1fdb4f61 100644 --- a/src/gallium/drivers/svga/svga_draw_elements.c +++ b/src/gallium/drivers/svga/svga_draw_elements.c @@ -37,26 +37,66 @@ #include "svga_hw_reg.h" +/** + * Return a new index buffer which contains a translation of the original + * index buffer. An example of a translation is converting from QUAD + * primitives to TRIANGLE primitives. Each set of four indexes for a quad + * will be converted to six indices for two triangles. + * + * Before generating the new index buffer we'll check if the incoming + * buffer already has a translated buffer that can be re-used. + * This benefits demos like Cinebench R15 which has many + * glDrawElements(GL_QUADS) commands (we can't draw quads natively). + * + * \param offset offset in bytes to first index to translate in src buffer + * \param orig_prim original primitive type (like PIPE_PRIM_QUADS) + * \param gen_prim new/generated primitive type (like PIPE_PRIM_TRIANGLES) + * \param orig_nr number of indexes to translate in source buffer + * \param gen_nr number of indexes to write into new/dest buffer + * \param index_size bytes per index (2 or 4) + * \param translate the translation function from the u_translate module + * \param out_buf returns the new/translated index buffer + * \return error code to indicate success failure + */ static enum pipe_error translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src, - unsigned offset, enum pipe_prim_type prim, unsigned nr, + unsigned offset, + enum pipe_prim_type orig_prim, enum pipe_prim_type gen_prim, + unsigned orig_nr, unsigned gen_nr, unsigned index_size, u_translate_func translate, struct pipe_resource **out_buf) { struct pipe_context *pipe = &hwtnl->svga->pipe; + struct svga_screen *screen = svga_screen(pipe->screen); + struct svga_buffer *src_sbuf = svga_buffer(src); struct pipe_transfer *src_transfer = NULL; struct pipe_transfer *dst_transfer = NULL; - unsigned size = index_size * nr; + unsigned size = index_size * gen_nr; const void *src_map = NULL; struct pipe_resource *dst = NULL; void *dst_map = NULL; + assert(index_size == 2 || index_size == 4); + + if (!screen->debug.no_cache_index_buffers) { + /* Check if we already have a translated index buffer */ + if (src_sbuf->translated_indices.buffer && + src_sbuf->translated_indices.orig_prim == PIPE_PRIM_QUADS && + src_sbuf->translated_indices.new_prim == gen_prim && + src_sbuf->translated_indices.offset == offset && + src_sbuf->translated_indices.count == orig_nr && + src_sbuf->translated_indices.index_size == index_size) { + pipe_resource_reference(out_buf, src_sbuf->translated_indices.buffer); + return PIPE_OK; + } + } + /* Need to trim vertex count to make sure we don't write too much data * to the dst buffer in the translate() call. */ - u_trim_pipe_prim(prim, &nr); + u_trim_pipe_prim(gen_prim, &gen_nr); - size = index_size * nr; + size = index_size * gen_nr; dst = pipe_buffer_create(pipe->screen, PIPE_BIND_INDEX_BUFFER, PIPE_USAGE_DEFAULT, size); @@ -71,12 +111,25 @@ translate_indices(struct svga_hwtnl *hwtnl, struct pipe_resource *src, if (!dst_map) goto fail; - translate((const char *) src_map + offset, 0, 0, nr, 0, dst_map); + translate((const char *) src_map + offset, 0, 0, gen_nr, 0, dst_map); pipe_buffer_unmap(pipe, src_transfer); pipe_buffer_unmap(pipe, dst_transfer); *out_buf = dst; + + if (!screen->debug.no_cache_index_buffers) { + /* Save the new, translated index buffer in the hope we can use it + * again in the future. + */ + pipe_resource_reference(&src_sbuf->translated_indices.buffer, dst); + src_sbuf->translated_indices.orig_prim = PIPE_PRIM_QUADS; + src_sbuf->translated_indices.new_prim = gen_prim; + src_sbuf->translated_indices.offset = offset; + src_sbuf->translated_indices.count = orig_nr; + src_sbuf->translated_indices.index_size = index_size; + } + return PIPE_OK; fail: @@ -186,7 +239,9 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl, ret = translate_indices(hwtnl, index_buffer, start * index_size, - gen_prim, gen_nr, gen_size, gen_func, &gen_buf); + prim, gen_prim, + count, gen_nr, gen_size, + gen_func, &gen_buf); if (ret != PIPE_OK) goto done; diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index b47b07be254..99ed1a2e2e0 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -98,6 +98,13 @@ svga_buffer_transfer_map(struct pipe_context *pipe, transfer->stride = 0; transfer->layer_stride = 0; + if (usage & PIPE_TRANSFER_WRITE) { + /* If we write to the buffer for any reason, free any saved translated + * vertices. + */ + pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); + } + if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) { enum pipe_error ret; @@ -361,6 +368,8 @@ svga_buffer_destroy( struct pipe_screen *screen, if (sbuf->swbuf && !sbuf->user) align_free(sbuf->swbuf); + pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); + ss->hud.total_resource_bytes -= sbuf->size; assert(ss->hud.num_resources > 0); if (ss->hud.num_resources > 0) diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index daf9c18a95a..69075888892 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -192,6 +192,17 @@ struct svga_buffer unsigned size; /**< Approximate size in bytes */ boolean dirty; /**< Need to do a readback before mapping? */ + + /** In some cases we try to keep the results of the translate_indices() + * function from svga_draw_elements.c + */ + struct { + enum pipe_prim_type orig_prim, new_prim; + struct pipe_resource *buffer; + unsigned index_size; + unsigned offset; /**< first index */ + unsigned count; /**< num indices */ + } translated_indices; }; diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index ec29ae9f7c6..4c2d6718816 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -944,6 +944,8 @@ svga_screen_create(struct svga_winsys_screen *sws) debug_get_bool_option("SVGA_NO_SURFACE_VIEW", FALSE); svgascreen->debug.no_sampler_view = debug_get_bool_option("SVGA_NO_SAMPLER_VIEW", FALSE); + svgascreen->debug.no_cache_index_buffers = + debug_get_bool_option("SVGA_NO_CACHE_INDEX_BUFFERS", FALSE); screen = &svgascreen->screen; diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h index 98b56b2a6d1..6cafeba1480 100644 --- a/src/gallium/drivers/svga/svga_screen.h +++ b/src/gallium/drivers/svga/svga_screen.h @@ -62,6 +62,7 @@ struct svga_screen boolean no_surface_view; boolean force_sampler_view; boolean no_sampler_view; + boolean no_cache_index_buffers; } debug; unsigned texture_timestamp; -- 2.30.2