v3d: Rename the driver files from "vc5" to "v3d".
[mesa.git] / src / gallium / drivers / v3d / v3d_tiling.c
1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file vc5_tiling.c
25 *
26 * Handles information about the VC5 tiling formats, and loading and storing
27 * from them.
28 */
29
30 #include <stdint.h>
31 #include "v3d_screen.h"
32 #include "v3d_context.h"
33 #include "v3d_tiling.h"
34
35 /** Return the width in pixels of a 64-byte microtile. */
36 uint32_t
37 vc5_utile_width(int cpp)
38 {
39 switch (cpp) {
40 case 1:
41 case 2:
42 return 8;
43 case 4:
44 case 8:
45 return 4;
46 case 16:
47 return 2;
48 default:
49 unreachable("unknown cpp");
50 }
51 }
52
53 /** Return the height in pixels of a 64-byte microtile. */
54 uint32_t
55 vc5_utile_height(int cpp)
56 {
57 switch (cpp) {
58 case 1:
59 return 8;
60 case 2:
61 case 4:
62 return 4;
63 case 8:
64 case 16:
65 return 2;
66 default:
67 unreachable("unknown cpp");
68 }
69 }
70
71 /**
72 * Returns the byte address for a given pixel within a utile.
73 *
74 * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
75 * arrangement.
76 */
77 static inline uint32_t
78 vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
79 {
80 uint32_t utile_w = vc5_utile_width(cpp);
81 uint32_t utile_h = vc5_utile_height(cpp);
82
83 assert(x < utile_w && y < utile_h);
84
85 return x * cpp + y * utile_w * cpp;
86 }
87
88 /**
89 * Returns the byte offset for a given pixel in a LINEARTILE layout.
90 *
91 * LINEARTILE is a single line of utiles in either the X or Y direction.
92 */
93 static inline uint32_t
94 vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
95 {
96 uint32_t utile_w = vc5_utile_width(cpp);
97 uint32_t utile_h = vc5_utile_height(cpp);
98 uint32_t utile_index_x = x / utile_w;
99 uint32_t utile_index_y = y / utile_h;
100
101 assert(utile_index_x == 0 || utile_index_y == 0);
102
103 return (64 * (utile_index_x + utile_index_y) +
104 vc5_get_utile_pixel_offset(cpp,
105 x & (utile_w - 1),
106 y & (utile_h - 1)));
107 }
108
109 /**
110 * Returns the byte offset for a given pixel in a UBLINEAR layout.
111 *
112 * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
113 * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
114 */
115 static inline uint32_t
116 vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
117 int ublinear_number)
118 {
119 uint32_t utile_w = vc5_utile_width(cpp);
120 uint32_t utile_h = vc5_utile_height(cpp);
121 uint32_t ub_w = utile_w * 2;
122 uint32_t ub_h = utile_h * 2;
123 uint32_t ub_x = x / ub_w;
124 uint32_t ub_y = y / ub_h;
125
126 return (256 * (ub_y * ublinear_number +
127 ub_x) +
128 ((x & utile_w) ? 64 : 0) +
129 ((y & utile_h) ? 128 : 0) +
130 + vc5_get_utile_pixel_offset(cpp,
131 x & (utile_w - 1),
132 y & (utile_h - 1)));
133 }
134
135 static inline uint32_t
136 vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
137 uint32_t x, uint32_t y)
138 {
139 return vc5_get_ublinear_pixel_offset(cpp, x, y, 2);
140 }
141
142 static inline uint32_t
143 vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
144 uint32_t x, uint32_t y)
145 {
146 return vc5_get_ublinear_pixel_offset(cpp, x, y, 1);
147 }
148
149 /**
150 * Returns the byte offset for a given pixel in a UIF layout.
151 *
152 * UIF is the general VC5 tiling layout shared across 3D, media, and scanout.
153 * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
154 * 4x4 groups, and those 4x4 groups are then stored in raster order.
155 */
156 static inline uint32_t
157 vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y,
158 bool do_xor)
159 {
160 uint32_t utile_w = vc5_utile_width(cpp);
161 uint32_t utile_h = vc5_utile_height(cpp);
162 uint32_t mb_width = utile_w * 2;
163 uint32_t mb_height = utile_h * 2;
164 uint32_t log2_mb_width = ffs(mb_width) - 1;
165 uint32_t log2_mb_height = ffs(mb_height) - 1;
166
167 /* Macroblock X, y */
168 uint32_t mb_x = x >> log2_mb_width;
169 uint32_t mb_y = y >> log2_mb_height;
170 /* X, y within the macroblock */
171 uint32_t mb_pixel_x = x - (mb_x << log2_mb_width);
172 uint32_t mb_pixel_y = y - (mb_y << log2_mb_height);
173
174 if (do_xor && (mb_x / 4) & 1)
175 mb_y ^= 0x10;
176
177 uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height;
178 uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4;
179
180 uint32_t mb_base_addr = mb_id * 256;
181
182 bool top = mb_pixel_y < utile_h;
183 bool left = mb_pixel_x < utile_w;
184
185 /* Docs have this in pixels, we do bytes here. */
186 uint32_t mb_tile_offset = (!top * 128 + !left * 64);
187
188 uint32_t utile_x = mb_pixel_x & (utile_w - 1);
189 uint32_t utile_y = mb_pixel_y & (utile_h - 1);
190
191 uint32_t mb_pixel_address = (mb_base_addr +
192 mb_tile_offset +
193 vc5_get_utile_pixel_offset(cpp,
194 utile_x,
195 utile_y));
196
197 return mb_pixel_address;
198 }
199
200 static inline uint32_t
201 vc5_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
202 uint32_t x, uint32_t y)
203 {
204 return vc5_get_uif_pixel_offset(cpp, image_h, x, y, true);
205 }
206
207 static inline uint32_t
208 vc5_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
209 uint32_t x, uint32_t y)
210 {
211 return vc5_get_uif_pixel_offset(cpp, image_h, x, y, false);
212 }
213
214 static inline void
215 vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
216 void *cpu, uint32_t cpu_stride,
217 int cpp, uint32_t image_h,
218 const struct pipe_box *box,
219 uint32_t (*get_pixel_offset)(uint32_t cpp,
220 uint32_t image_h,
221 uint32_t x, uint32_t y),
222 bool is_load)
223 {
224 for (uint32_t y = 0; y < box->height; y++) {
225 void *cpu_row = cpu + y * cpu_stride;
226
227 for (int x = 0; x < box->width; x++) {
228 uint32_t pixel_offset = get_pixel_offset(cpp, image_h,
229 box->x + x,
230 box->y + y);
231
232 if (false) {
233 fprintf(stderr, "%3d,%3d -> %d\n",
234 box->x + x, box->y + y,
235 pixel_offset);
236 }
237
238 if (is_load) {
239 memcpy(cpu_row + x * cpp,
240 gpu + pixel_offset,
241 cpp);
242 } else {
243 memcpy(gpu + pixel_offset,
244 cpu_row + x * cpp,
245 cpp);
246 }
247 }
248 }
249 }
250
251 static inline void
252 vc5_move_pixels_general(void *gpu, uint32_t gpu_stride,
253 void *cpu, uint32_t cpu_stride,
254 int cpp, uint32_t image_h,
255 const struct pipe_box *box,
256 uint32_t (*get_pixel_offset)(uint32_t cpp,
257 uint32_t image_h,
258 uint32_t x, uint32_t y),
259 bool is_load)
260 {
261 switch (cpp) {
262 case 1:
263 vc5_move_pixels_general_percpp(gpu, gpu_stride,
264 cpu, cpu_stride,
265 1, image_h, box,
266 get_pixel_offset,
267 is_load);
268 break;
269 case 2:
270 vc5_move_pixels_general_percpp(gpu, gpu_stride,
271 cpu, cpu_stride,
272 2, image_h, box,
273 get_pixel_offset,
274 is_load);
275 break;
276 case 4:
277 vc5_move_pixels_general_percpp(gpu, gpu_stride,
278 cpu, cpu_stride,
279 4, image_h, box,
280 get_pixel_offset,
281 is_load);
282 break;
283 case 8:
284 vc5_move_pixels_general_percpp(gpu, gpu_stride,
285 cpu, cpu_stride,
286 8, image_h, box,
287 get_pixel_offset,
288 is_load);
289 break;
290 case 16:
291 vc5_move_pixels_general_percpp(gpu, gpu_stride,
292 cpu, cpu_stride,
293 16, image_h, box,
294 get_pixel_offset,
295 is_load);
296 break;
297 }
298 }
299
300 static inline void
301 vc5_move_tiled_image(void *gpu, uint32_t gpu_stride,
302 void *cpu, uint32_t cpu_stride,
303 enum vc5_tiling_mode tiling_format,
304 int cpp,
305 uint32_t image_h,
306 const struct pipe_box *box,
307 bool is_load)
308 {
309 switch (tiling_format) {
310 case VC5_TILING_UIF_XOR:
311 vc5_move_pixels_general(gpu, gpu_stride,
312 cpu, cpu_stride,
313 cpp, image_h, box,
314 vc5_get_uif_xor_pixel_offset,
315 is_load);
316 break;
317 case VC5_TILING_UIF_NO_XOR:
318 vc5_move_pixels_general(gpu, gpu_stride,
319 cpu, cpu_stride,
320 cpp, image_h, box,
321 vc5_get_uif_no_xor_pixel_offset,
322 is_load);
323 break;
324 case VC5_TILING_UBLINEAR_2_COLUMN:
325 vc5_move_pixels_general(gpu, gpu_stride,
326 cpu, cpu_stride,
327 cpp, image_h, box,
328 vc5_get_ublinear_2_column_pixel_offset,
329 is_load);
330 break;
331 case VC5_TILING_UBLINEAR_1_COLUMN:
332 vc5_move_pixels_general(gpu, gpu_stride,
333 cpu, cpu_stride,
334 cpp, image_h, box,
335 vc5_get_ublinear_1_column_pixel_offset,
336 is_load);
337 break;
338 case VC5_TILING_LINEARTILE:
339 vc5_move_pixels_general(gpu, gpu_stride,
340 cpu, cpu_stride,
341 cpp, image_h, box,
342 vc5_get_lt_pixel_offset,
343 is_load);
344 break;
345 default:
346 unreachable("Unsupported tiling format");
347 break;
348 }
349 }
350
351 /**
352 * Loads pixel data from the start (microtile-aligned) box in \p src to the
353 * start of \p dst according to the given tiling format.
354 */
355 void
356 vc5_load_tiled_image(void *dst, uint32_t dst_stride,
357 void *src, uint32_t src_stride,
358 enum vc5_tiling_mode tiling_format, int cpp,
359 uint32_t image_h,
360 const struct pipe_box *box)
361 {
362 vc5_move_tiled_image(src, src_stride,
363 dst, dst_stride,
364 tiling_format,
365 cpp,
366 image_h,
367 box,
368 true);
369 }
370
371 /**
372 * Stores pixel data from the start of \p src into a (microtile-aligned) box in
373 * \p dst according to the given tiling format.
374 */
375 void
376 vc5_store_tiled_image(void *dst, uint32_t dst_stride,
377 void *src, uint32_t src_stride,
378 enum vc5_tiling_mode tiling_format, int cpp,
379 uint32_t image_h,
380 const struct pipe_box *box)
381 {
382 vc5_move_tiled_image(dst, dst_stride,
383 src, src_stride,
384 tiling_format,
385 cpp,
386 image_h,
387 box,
388 false);
389 }