From 09196355b2b2b6a2d3935eb3c43fe784d226426a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 13 Aug 2020 10:14:11 -0700 Subject: [PATCH] gallium: Use unpack_rgba() instead of fetch_rgba in translate_generic MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is the only user of fetch_rgba outside of llvmpipe, and it's in the fallback path of this fallback path. Looking at an example of these two functions, b8g8r8a8's unpack_rgba is 2.7x as long as fetch_rgba. It feels reasonable to sacrifice some perf in this already slow (VBO readback, and a function pointer call per attribute per vertex) path to reduce our binary size. And, if I ever finish getting unpack codegen to switch to rows instead of rects, that factor will go back down. Saves 40kb of binary on non-llvmpipe gallium drivers. Reviewed-by: Marek Olšák Part-of: --- src/gallium/auxiliary/translate/translate_generic.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 14631da33cd..d48c8f96655 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -50,7 +50,9 @@ struct translate_generic { struct { enum translate_element_type type; - util_format_fetch_rgba_func_ptr fetch; + void (*fetch)(void *dst, unsigned dst_stride, + const uint8_t *src, unsigned src_stride, + unsigned width, unsigned height); unsigned buffer; unsigned input_offset; unsigned instance_divisor; @@ -623,7 +625,7 @@ generic_run_one(struct translate_generic *tg, if (likely(copy_size >= 0)) { memcpy(dst, src, copy_size); } else { - tg->attrib[attr].fetch(data, src, 0, 0); + tg->attrib[attr].fetch(data, 0, src, 0, 1, 1); if (0) debug_printf("Fetch linear attr %d from %p stride %d index %d: " @@ -796,6 +798,8 @@ translate_generic_create(const struct translate_key *key) for (i = 0; i < key->nr_elements; i++) { const struct util_format_description *format_desc = util_format_description(key->element[i].input_format); + const struct util_format_unpack_description *unpack = + util_format_unpack_description(key->element[i].input_format); assert(format_desc); @@ -811,8 +815,7 @@ translate_generic_create(const struct translate_key *key) } } - tg->attrib[i].fetch = - util_format_fetch_rgba_func(key->element[i].input_format); + tg->attrib[i].fetch = unpack->unpack_rgba; tg->attrib[i].buffer = key->element[i].input_buffer; tg->attrib[i].input_offset = key->element[i].input_offset; tg->attrib[i].instance_divisor = key->element[i].instance_divisor; -- 2.30.2