vc4: Move LT tiling code to a separate file.
[mesa.git] / src / gallium / drivers / vc4 / vc4_tiling_lt.c
1 /*
2 * Copyright © 2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file vc4_tiling_lt.c
25 *
26 * Helper functions from vc4_tiling.c that will be compiled for using NEON
27 * assembly or not.
28 */
29
30 #include <string.h>
31 #include "pipe/p_state.h"
32 #include "vc4_tiling.h"
33
34 /** Returns the stride in bytes of a 64-byte microtile. */
35 static uint32_t
36 vc4_utile_stride(int cpp)
37 {
38 switch (cpp) {
39 case 1:
40 return 8;
41 case 2:
42 case 4:
43 case 8:
44 return 16;
45 default:
46 unreachable("bad cpp");
47 }
48 }
49
50 static void
51 vc4_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp)
52 {
53 uint32_t src_stride = vc4_utile_stride(cpp);
54
55 for (uint32_t src_offset = 0; src_offset < 64; src_offset += src_stride) {
56 memcpy(dst, src + src_offset, src_stride);
57 dst += dst_stride;
58 }
59 }
60
61 static void
62 vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp)
63 {
64 uint32_t dst_stride = vc4_utile_stride(cpp);
65
66 for (uint32_t dst_offset = 0; dst_offset < 64; dst_offset += dst_stride) {
67 memcpy(dst + dst_offset, src, dst_stride);
68 src += src_stride;
69 }
70 }
71
72 void
73 vc4_load_lt_image(void *dst, uint32_t dst_stride,
74 void *src, uint32_t src_stride,
75 int cpp, const struct pipe_box *box)
76 {
77 uint32_t utile_w = vc4_utile_width(cpp);
78 uint32_t utile_h = vc4_utile_height(cpp);
79 uint32_t xstart = box->x;
80 uint32_t ystart = box->y;
81
82 for (uint32_t y = 0; y < box->height; y += utile_h) {
83 for (int x = 0; x < box->width; x += utile_w) {
84 vc4_load_utile(dst + (dst_stride * y +
85 x * cpp),
86 src + ((ystart + y) * src_stride +
87 (xstart + x) * 64 / utile_w),
88 dst_stride, cpp);
89 }
90 }
91 }
92
93 void
94 vc4_store_lt_image(void *dst, uint32_t dst_stride,
95 void *src, uint32_t src_stride,
96 int cpp, const struct pipe_box *box)
97 {
98 uint32_t utile_w = vc4_utile_width(cpp);
99 uint32_t utile_h = vc4_utile_height(cpp);
100 uint32_t xstart = box->x;
101 uint32_t ystart = box->y;
102
103 for (uint32_t y = 0; y < box->height; y += utile_h) {
104 for (int x = 0; x < box->width; x += utile_w) {
105 vc4_store_utile(dst + ((ystart + y) * dst_stride +
106 (xstart + x) * 64 / utile_w),
107 src + (src_stride * y +
108 x * cpp),
109 src_stride, cpp);
110 }
111 }
112 }