From: Eric Anholt Date: Wed, 8 Aug 2018 00:53:24 +0000 (-0700) Subject: vc4: Compile the LT image helper per cpp we might load/store. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3e06b918aab3c6a4ca30f5e935aa6996f7009d56;p=mesa.git vc4: Compile the LT image helper per cpp we might load/store. For the partial load/store support I'm about to add, we want the memcpy to be compiled out to a single load/store. This should also eliminate the calls to vc4_utile_width/height(). Improves x11perf -putimage100 performance by 3.76344% +/- 1.16978% (n=15) --- diff --git a/src/gallium/drivers/vc4/vc4_tiling_lt.c b/src/gallium/drivers/vc4/vc4_tiling_lt.c index b8f4c0405c2..8c875e7bd3a 100644 --- a/src/gallium/drivers/vc4/vc4_tiling_lt.c +++ b/src/gallium/drivers/vc4/vc4_tiling_lt.c @@ -289,12 +289,40 @@ vc4_lt_image_helper(void *gpu, uint32_t gpu_stride, } } +static inline void +vc4_lt_image_cpp_helper(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, const struct pipe_box *box, bool to_cpu) +{ + switch (cpp) { + case 1: + vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 1, box, + to_cpu); + break; + case 2: + vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 2, box, + to_cpu); + break; + case 4: + vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 4, box, + to_cpu); + break; + case 8: + vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 8, box, + to_cpu); + break; + default: + unreachable("bad cpp"); + } +} + void NEON_TAG(vc4_load_lt_image)(void *dst, uint32_t dst_stride, void *src, uint32_t src_stride, int cpp, const struct pipe_box *box) { - vc4_lt_image_helper(src, src_stride, dst, dst_stride, cpp, box, true); + vc4_lt_image_cpp_helper(src, src_stride, dst, dst_stride, cpp, box, + true); } void @@ -302,5 +330,6 @@ NEON_TAG(vc4_store_lt_image)(void *dst, uint32_t dst_stride, void *src, uint32_t src_stride, int cpp, const struct pipe_box *box) { - vc4_lt_image_helper(dst, dst_stride, src, src_stride, cpp, box, false); + vc4_lt_image_cpp_helper(dst, dst_stride, src, src_stride, cpp, box, + false); }