2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file v3d_tiling.c
26 * Handles information about the VC5 tiling formats, and loading and storing
31 #include "v3d_screen.h"
32 #include "v3d_context.h"
33 #include "v3d_tiling.h"
34 #include "broadcom/common/v3d_cpu_tiling.h"
36 /** Return the width in pixels of a 64-byte microtile. */
38 v3d_utile_width(int cpp
)
50 unreachable("unknown cpp");
54 /** Return the height in pixels of a 64-byte microtile. */
56 v3d_utile_height(int cpp
)
68 unreachable("unknown cpp");
73 * Returns the byte address for a given pixel within a utile.
75 * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
78 static inline uint32_t
79 v3d_get_utile_pixel_offset(uint32_t cpp
, uint32_t x
, uint32_t y
)
81 uint32_t utile_w
= v3d_utile_width(cpp
);
82 uint32_t utile_h
= v3d_utile_height(cpp
);
84 assert(x
< utile_w
&& y
< utile_h
);
86 return x
* cpp
+ y
* utile_w
* cpp
;
90 * Returns the byte offset for a given pixel in a LINEARTILE layout.
92 * LINEARTILE is a single line of utiles in either the X or Y direction.
94 static inline uint32_t
95 v3d_get_lt_pixel_offset(uint32_t cpp
, uint32_t image_h
, uint32_t x
, uint32_t y
)
97 uint32_t utile_w
= v3d_utile_width(cpp
);
98 uint32_t utile_h
= v3d_utile_height(cpp
);
99 uint32_t utile_index_x
= x
/ utile_w
;
100 uint32_t utile_index_y
= y
/ utile_h
;
102 assert(utile_index_x
== 0 || utile_index_y
== 0);
104 return (64 * (utile_index_x
+ utile_index_y
) +
105 v3d_get_utile_pixel_offset(cpp
,
111 * Returns the byte offset for a given pixel in a UBLINEAR layout.
113 * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
114 * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
116 static inline uint32_t
117 v3d_get_ublinear_pixel_offset(uint32_t cpp
, uint32_t x
, uint32_t y
,
120 uint32_t utile_w
= v3d_utile_width(cpp
);
121 uint32_t utile_h
= v3d_utile_height(cpp
);
122 uint32_t ub_w
= utile_w
* 2;
123 uint32_t ub_h
= utile_h
* 2;
124 uint32_t ub_x
= x
/ ub_w
;
125 uint32_t ub_y
= y
/ ub_h
;
127 return (256 * (ub_y
* ublinear_number
+
129 ((x
& utile_w
) ? 64 : 0) +
130 ((y
& utile_h
) ? 128 : 0) +
131 + v3d_get_utile_pixel_offset(cpp
,
136 static inline uint32_t
137 v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp
, uint32_t image_h
,
138 uint32_t x
, uint32_t y
)
140 return v3d_get_ublinear_pixel_offset(cpp
, x
, y
, 2);
143 static inline uint32_t
144 v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp
, uint32_t image_h
,
145 uint32_t x
, uint32_t y
)
147 return v3d_get_ublinear_pixel_offset(cpp
, x
, y
, 1);
151 * Returns the byte offset for a given pixel in a UIF layout.
153 * UIF is the general VC5 tiling layout shared across 3D, media, and scanout.
154 * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
155 * 4x4 groups, and those 4x4 groups are then stored in raster order.
157 static inline uint32_t
158 v3d_get_uif_pixel_offset(uint32_t cpp
, uint32_t image_h
, uint32_t x
, uint32_t y
,
161 uint32_t utile_w
= v3d_utile_width(cpp
);
162 uint32_t utile_h
= v3d_utile_height(cpp
);
163 uint32_t mb_width
= utile_w
* 2;
164 uint32_t mb_height
= utile_h
* 2;
165 uint32_t log2_mb_width
= ffs(mb_width
) - 1;
166 uint32_t log2_mb_height
= ffs(mb_height
) - 1;
168 /* Macroblock X, y */
169 uint32_t mb_x
= x
>> log2_mb_width
;
170 uint32_t mb_y
= y
>> log2_mb_height
;
171 /* X, y within the macroblock */
172 uint32_t mb_pixel_x
= x
- (mb_x
<< log2_mb_width
);
173 uint32_t mb_pixel_y
= y
- (mb_y
<< log2_mb_height
);
175 if (do_xor
&& (mb_x
/ 4) & 1)
178 uint32_t mb_h
= align(image_h
, 1 << log2_mb_height
) >> log2_mb_height
;
179 uint32_t mb_id
= ((mb_x
/ 4) * ((mb_h
- 1) * 4)) + mb_x
+ mb_y
* 4;
181 uint32_t mb_base_addr
= mb_id
* 256;
183 bool top
= mb_pixel_y
< utile_h
;
184 bool left
= mb_pixel_x
< utile_w
;
186 /* Docs have this in pixels, we do bytes here. */
187 uint32_t mb_tile_offset
= (!top
* 128 + !left
* 64);
189 uint32_t utile_x
= mb_pixel_x
& (utile_w
- 1);
190 uint32_t utile_y
= mb_pixel_y
& (utile_h
- 1);
192 uint32_t mb_pixel_address
= (mb_base_addr
+
194 v3d_get_utile_pixel_offset(cpp
,
198 return mb_pixel_address
;
201 static inline uint32_t
202 v3d_get_uif_xor_pixel_offset(uint32_t cpp
, uint32_t image_h
,
203 uint32_t x
, uint32_t y
)
205 return v3d_get_uif_pixel_offset(cpp
, image_h
, x
, y
, true);
208 static inline uint32_t
209 v3d_get_uif_no_xor_pixel_offset(uint32_t cpp
, uint32_t image_h
,
210 uint32_t x
, uint32_t y
)
212 return v3d_get_uif_pixel_offset(cpp
, image_h
, x
, y
, false);
215 /* Loads/stores non-utile-aligned boxes by walking over the destination
216 * rectangle, computing the address on the GPU, and storing/loading a pixel at
220 v3d_move_pixels_unaligned(void *gpu
, uint32_t gpu_stride
,
221 void *cpu
, uint32_t cpu_stride
,
222 int cpp
, uint32_t image_h
,
223 const struct pipe_box
*box
,
224 uint32_t (*get_pixel_offset
)(uint32_t cpp
,
226 uint32_t x
, uint32_t y
),
229 for (uint32_t y
= 0; y
< box
->height
; y
++) {
230 void *cpu_row
= cpu
+ y
* cpu_stride
;
232 for (int x
= 0; x
< box
->width
; x
++) {
233 uint32_t pixel_offset
= get_pixel_offset(cpp
, image_h
,
238 fprintf(stderr
, "%3d,%3d -> %d\n",
239 box
->x
+ x
, box
->y
+ y
,
244 memcpy(cpu_row
+ x
* cpp
,
248 memcpy(gpu
+ pixel_offset
,
256 /* Breaks the image down into utiles and calls either the fast whole-utile
257 * load/store functions, or the unaligned fallback case.
260 v3d_move_pixels_general_percpp(void *gpu
, uint32_t gpu_stride
,
261 void *cpu
, uint32_t cpu_stride
,
262 int cpp
, uint32_t image_h
,
263 const struct pipe_box
*box
,
264 uint32_t (*get_pixel_offset
)(uint32_t cpp
,
266 uint32_t x
, uint32_t y
),
269 uint32_t utile_w
= v3d_utile_width(cpp
);
270 uint32_t utile_h
= v3d_utile_height(cpp
);
271 uint32_t utile_gpu_stride
= utile_w
* cpp
;
272 uint32_t x1
= box
->x
;
273 uint32_t y1
= box
->y
;
274 uint32_t x2
= box
->x
+ box
->width
;
275 uint32_t y2
= box
->y
+ box
->height
;
276 uint32_t align_x1
= align(x1
, utile_w
);
277 uint32_t align_y1
= align(y1
, utile_h
);
278 uint32_t align_x2
= x2
& ~(utile_w
- 1);
279 uint32_t align_y2
= y2
& ~(utile_h
- 1);
281 /* Load/store all the whole utiles first. */
282 for (uint32_t y
= align_y1
; y
< align_y2
; y
+= utile_h
) {
283 void *cpu_row
= cpu
+ (y
- box
->y
) * cpu_stride
;
285 for (uint32_t x
= align_x1
; x
< align_x2
; x
+= utile_w
) {
286 void *utile_gpu
= (gpu
+
287 get_pixel_offset(cpp
, image_h
, x
, y
));
288 void *utile_cpu
= cpu_row
+ (x
- box
->x
) * cpp
;
291 v3d_load_utile(utile_cpu
, cpu_stride
,
292 utile_gpu
, utile_gpu_stride
);
294 v3d_store_utile(utile_gpu
, utile_gpu_stride
,
295 utile_cpu
, cpu_stride
);
300 /* If there were no aligned utiles in the middle, load/store the whole
303 if (align_y2
<= align_y1
||
304 align_x2
<= align_x1
) {
305 v3d_move_pixels_unaligned(gpu
, gpu_stride
,
309 get_pixel_offset
, is_load
);
313 /* Load/store the partial utiles. */
314 struct pipe_box partial_boxes
[4] = {
320 .height
= align_y1
- y1
,
327 .height
= y2
- align_y2
,
332 .width
= align_x1
- x1
,
334 .height
= align_y2
- align_y1
,
339 .width
= x2
- align_x2
,
341 .height
= align_y2
- align_y1
,
344 for (int i
= 0; i
< ARRAY_SIZE(partial_boxes
); i
++) {
345 void *partial_cpu
= (cpu
+
346 (partial_boxes
[i
].y
- y1
) * cpu_stride
+
347 (partial_boxes
[i
].x
- x1
) * cpp
);
349 v3d_move_pixels_unaligned(gpu
, gpu_stride
,
350 partial_cpu
, cpu_stride
,
353 get_pixel_offset
, is_load
);
358 v3d_move_pixels_general(void *gpu
, uint32_t gpu_stride
,
359 void *cpu
, uint32_t cpu_stride
,
360 int cpp
, uint32_t image_h
,
361 const struct pipe_box
*box
,
362 uint32_t (*get_pixel_offset
)(uint32_t cpp
,
364 uint32_t x
, uint32_t y
),
369 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
376 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
383 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
390 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
397 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
407 v3d_move_tiled_image(void *gpu
, uint32_t gpu_stride
,
408 void *cpu
, uint32_t cpu_stride
,
409 enum v3d_tiling_mode tiling_format
,
412 const struct pipe_box
*box
,
415 switch (tiling_format
) {
416 case VC5_TILING_UIF_XOR
:
417 v3d_move_pixels_general(gpu
, gpu_stride
,
420 v3d_get_uif_xor_pixel_offset
,
423 case VC5_TILING_UIF_NO_XOR
:
424 v3d_move_pixels_general(gpu
, gpu_stride
,
427 v3d_get_uif_no_xor_pixel_offset
,
430 case VC5_TILING_UBLINEAR_2_COLUMN
:
431 v3d_move_pixels_general(gpu
, gpu_stride
,
434 v3d_get_ublinear_2_column_pixel_offset
,
437 case VC5_TILING_UBLINEAR_1_COLUMN
:
438 v3d_move_pixels_general(gpu
, gpu_stride
,
441 v3d_get_ublinear_1_column_pixel_offset
,
444 case VC5_TILING_LINEARTILE
:
445 v3d_move_pixels_general(gpu
, gpu_stride
,
448 v3d_get_lt_pixel_offset
,
452 unreachable("Unsupported tiling format");
458 * Loads pixel data from the start (microtile-aligned) box in \p src to the
459 * start of \p dst according to the given tiling format.
462 v3d_load_tiled_image(void *dst
, uint32_t dst_stride
,
463 void *src
, uint32_t src_stride
,
464 enum v3d_tiling_mode tiling_format
, int cpp
,
466 const struct pipe_box
*box
)
468 v3d_move_tiled_image(src
, src_stride
,
478 * Stores pixel data from the start of \p src into a (microtile-aligned) box in
479 * \p dst according to the given tiling format.
482 v3d_store_tiled_image(void *dst
, uint32_t dst_stride
,
483 void *src
, uint32_t src_stride
,
484 enum v3d_tiling_mode tiling_format
, int cpp
,
486 const struct pipe_box
*box
)
488 v3d_move_tiled_image(dst
, dst_stride
,