vc4: Compile the LT image helper per cpp we might load/store.
authorEric Anholt <eric@anholt.net>
Wed, 8 Aug 2018 00:53:24 +0000 (17:53 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 8 Aug 2018 22:53:25 +0000 (15:53 -0700)
For the partial load/store support I'm about to add, we want the memcpy to
be compiled out to a single load/store.  This should also eliminate the
calls to vc4_utile_width/height().

Improves x11perf -putimage100 performance by  3.76344% +/- 1.16978% (n=15)

src/gallium/drivers/vc4/vc4_tiling_lt.c

index b8f4c0405c205981d11c5f259f382f5a4f003188..8c875e7bd3a106098a5fb580f4423733704642b4 100644 (file)
@@ -289,12 +289,40 @@ vc4_lt_image_helper(void *gpu, uint32_t gpu_stride,
         }
 }
 
+static inline void
+vc4_lt_image_cpp_helper(void *gpu, uint32_t gpu_stride,
+                        void *cpu, uint32_t cpu_stride,
+                        int cpp, const struct pipe_box *box, bool to_cpu)
+{
+        switch (cpp) {
+        case 1:
+                vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 1, box,
+                                    to_cpu);
+                break;
+        case 2:
+                vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 2, box,
+                                    to_cpu);
+                break;
+        case 4:
+                vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 4, box,
+                                    to_cpu);
+                break;
+        case 8:
+                vc4_lt_image_helper(gpu, gpu_stride, cpu, cpu_stride, 8, box,
+                                    to_cpu);
+                break;
+        default:
+                unreachable("bad cpp");
+        }
+}
+
 void
 NEON_TAG(vc4_load_lt_image)(void *dst, uint32_t dst_stride,
                             void *src, uint32_t src_stride,
                             int cpp, const struct pipe_box *box)
 {
-        vc4_lt_image_helper(src, src_stride, dst, dst_stride, cpp, box, true);
+        vc4_lt_image_cpp_helper(src, src_stride, dst, dst_stride, cpp, box,
+                                true);
 }
 
 void
@@ -302,5 +330,6 @@ NEON_TAG(vc4_store_lt_image)(void *dst, uint32_t dst_stride,
                              void *src, uint32_t src_stride,
                              int cpp, const struct pipe_box *box)
 {
-        vc4_lt_image_helper(dst, dst_stride, src, src_stride, cpp, box, false);
+        vc4_lt_image_cpp_helper(dst, dst_stride, src, src_stride, cpp, box,
+                                false);
 }