2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 /** @file v3d_tiling.c
26 * Handles information about the VC5 tiling formats, and loading and storing
31 #include "v3d_screen.h"
32 #include "v3d_context.h"
33 #include "v3d_tiling.h"
34 #include "broadcom/common/v3d_cpu_tiling.h"
36 /** Return the width in pixels of a 64-byte microtile. */
38 v3d_utile_width(int cpp
)
50 unreachable("unknown cpp");
54 /** Return the height in pixels of a 64-byte microtile. */
56 v3d_utile_height(int cpp
)
68 unreachable("unknown cpp");
73 * Returns the byte address for a given pixel within a utile.
75 * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
78 static inline uint32_t
79 v3d_get_utile_pixel_offset(uint32_t cpp
, uint32_t x
, uint32_t y
)
81 uint32_t utile_w
= v3d_utile_width(cpp
);
83 assert(x
< utile_w
&& y
< v3d_utile_height(cpp
));
85 return x
* cpp
+ y
* utile_w
* cpp
;
89 * Returns the byte offset for a given pixel in a LINEARTILE layout.
91 * LINEARTILE is a single line of utiles in either the X or Y direction.
93 static inline uint32_t
94 v3d_get_lt_pixel_offset(uint32_t cpp
, uint32_t image_h
, uint32_t x
, uint32_t y
)
96 uint32_t utile_w
= v3d_utile_width(cpp
);
97 uint32_t utile_h
= v3d_utile_height(cpp
);
98 uint32_t utile_index_x
= x
/ utile_w
;
99 uint32_t utile_index_y
= y
/ utile_h
;
101 assert(utile_index_x
== 0 || utile_index_y
== 0);
103 return (64 * (utile_index_x
+ utile_index_y
) +
104 v3d_get_utile_pixel_offset(cpp
,
110 * Returns the byte offset for a given pixel in a UBLINEAR layout.
112 * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
113 * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
115 static inline uint32_t
116 v3d_get_ublinear_pixel_offset(uint32_t cpp
, uint32_t x
, uint32_t y
,
119 uint32_t utile_w
= v3d_utile_width(cpp
);
120 uint32_t utile_h
= v3d_utile_height(cpp
);
121 uint32_t ub_w
= utile_w
* 2;
122 uint32_t ub_h
= utile_h
* 2;
123 uint32_t ub_x
= x
/ ub_w
;
124 uint32_t ub_y
= y
/ ub_h
;
126 return (256 * (ub_y
* ublinear_number
+
128 ((x
& utile_w
) ? 64 : 0) +
129 ((y
& utile_h
) ? 128 : 0) +
130 + v3d_get_utile_pixel_offset(cpp
,
135 static inline uint32_t
136 v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp
, uint32_t image_h
,
137 uint32_t x
, uint32_t y
)
139 return v3d_get_ublinear_pixel_offset(cpp
, x
, y
, 2);
142 static inline uint32_t
143 v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp
, uint32_t image_h
,
144 uint32_t x
, uint32_t y
)
146 return v3d_get_ublinear_pixel_offset(cpp
, x
, y
, 1);
150 * Returns the byte offset for a given pixel in a UIF layout.
152 * UIF is the general VC5 tiling layout shared across 3D, media, and scanout.
153 * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
154 * 4x4 groups, and those 4x4 groups are then stored in raster order.
156 static inline uint32_t
157 v3d_get_uif_pixel_offset(uint32_t cpp
, uint32_t image_h
, uint32_t x
, uint32_t y
,
160 uint32_t utile_w
= v3d_utile_width(cpp
);
161 uint32_t utile_h
= v3d_utile_height(cpp
);
162 uint32_t mb_width
= utile_w
* 2;
163 uint32_t mb_height
= utile_h
* 2;
164 uint32_t log2_mb_width
= ffs(mb_width
) - 1;
165 uint32_t log2_mb_height
= ffs(mb_height
) - 1;
167 /* Macroblock X, y */
168 uint32_t mb_x
= x
>> log2_mb_width
;
169 uint32_t mb_y
= y
>> log2_mb_height
;
170 /* X, y within the macroblock */
171 uint32_t mb_pixel_x
= x
- (mb_x
<< log2_mb_width
);
172 uint32_t mb_pixel_y
= y
- (mb_y
<< log2_mb_height
);
174 if (do_xor
&& (mb_x
/ 4) & 1)
177 uint32_t mb_h
= align(image_h
, 1 << log2_mb_height
) >> log2_mb_height
;
178 uint32_t mb_id
= ((mb_x
/ 4) * ((mb_h
- 1) * 4)) + mb_x
+ mb_y
* 4;
180 uint32_t mb_base_addr
= mb_id
* 256;
182 bool top
= mb_pixel_y
< utile_h
;
183 bool left
= mb_pixel_x
< utile_w
;
185 /* Docs have this in pixels, we do bytes here. */
186 uint32_t mb_tile_offset
= (!top
* 128 + !left
* 64);
188 uint32_t utile_x
= mb_pixel_x
& (utile_w
- 1);
189 uint32_t utile_y
= mb_pixel_y
& (utile_h
- 1);
191 uint32_t mb_pixel_address
= (mb_base_addr
+
193 v3d_get_utile_pixel_offset(cpp
,
197 return mb_pixel_address
;
200 static inline uint32_t
201 v3d_get_uif_xor_pixel_offset(uint32_t cpp
, uint32_t image_h
,
202 uint32_t x
, uint32_t y
)
204 return v3d_get_uif_pixel_offset(cpp
, image_h
, x
, y
, true);
207 static inline uint32_t
208 v3d_get_uif_no_xor_pixel_offset(uint32_t cpp
, uint32_t image_h
,
209 uint32_t x
, uint32_t y
)
211 return v3d_get_uif_pixel_offset(cpp
, image_h
, x
, y
, false);
214 /* Loads/stores non-utile-aligned boxes by walking over the destination
215 * rectangle, computing the address on the GPU, and storing/loading a pixel at
219 v3d_move_pixels_unaligned(void *gpu
, uint32_t gpu_stride
,
220 void *cpu
, uint32_t cpu_stride
,
221 int cpp
, uint32_t image_h
,
222 const struct pipe_box
*box
,
223 uint32_t (*get_pixel_offset
)(uint32_t cpp
,
225 uint32_t x
, uint32_t y
),
228 for (uint32_t y
= 0; y
< box
->height
; y
++) {
229 void *cpu_row
= cpu
+ y
* cpu_stride
;
231 for (int x
= 0; x
< box
->width
; x
++) {
232 uint32_t pixel_offset
= get_pixel_offset(cpp
, image_h
,
237 fprintf(stderr
, "%3d,%3d -> %d\n",
238 box
->x
+ x
, box
->y
+ y
,
243 memcpy(cpu_row
+ x
* cpp
,
247 memcpy(gpu
+ pixel_offset
,
255 /* Breaks the image down into utiles and calls either the fast whole-utile
256 * load/store functions, or the unaligned fallback case.
259 v3d_move_pixels_general_percpp(void *gpu
, uint32_t gpu_stride
,
260 void *cpu
, uint32_t cpu_stride
,
261 int cpp
, uint32_t image_h
,
262 const struct pipe_box
*box
,
263 uint32_t (*get_pixel_offset
)(uint32_t cpp
,
265 uint32_t x
, uint32_t y
),
268 uint32_t utile_w
= v3d_utile_width(cpp
);
269 uint32_t utile_h
= v3d_utile_height(cpp
);
270 uint32_t utile_gpu_stride
= utile_w
* cpp
;
271 uint32_t x1
= box
->x
;
272 uint32_t y1
= box
->y
;
273 uint32_t x2
= box
->x
+ box
->width
;
274 uint32_t y2
= box
->y
+ box
->height
;
275 uint32_t align_x1
= align(x1
, utile_w
);
276 uint32_t align_y1
= align(y1
, utile_h
);
277 uint32_t align_x2
= x2
& ~(utile_w
- 1);
278 uint32_t align_y2
= y2
& ~(utile_h
- 1);
280 /* Load/store all the whole utiles first. */
281 for (uint32_t y
= align_y1
; y
< align_y2
; y
+= utile_h
) {
282 void *cpu_row
= cpu
+ (y
- box
->y
) * cpu_stride
;
284 for (uint32_t x
= align_x1
; x
< align_x2
; x
+= utile_w
) {
285 void *utile_gpu
= (gpu
+
286 get_pixel_offset(cpp
, image_h
, x
, y
));
287 void *utile_cpu
= cpu_row
+ (x
- box
->x
) * cpp
;
290 v3d_load_utile(utile_cpu
, cpu_stride
,
291 utile_gpu
, utile_gpu_stride
);
293 v3d_store_utile(utile_gpu
, utile_gpu_stride
,
294 utile_cpu
, cpu_stride
);
299 /* If there were no aligned utiles in the middle, load/store the whole
302 if (align_y2
<= align_y1
||
303 align_x2
<= align_x1
) {
304 v3d_move_pixels_unaligned(gpu
, gpu_stride
,
308 get_pixel_offset
, is_load
);
312 /* Load/store the partial utiles. */
313 struct pipe_box partial_boxes
[4] = {
319 .height
= align_y1
- y1
,
326 .height
= y2
- align_y2
,
331 .width
= align_x1
- x1
,
333 .height
= align_y2
- align_y1
,
338 .width
= x2
- align_x2
,
340 .height
= align_y2
- align_y1
,
343 for (int i
= 0; i
< ARRAY_SIZE(partial_boxes
); i
++) {
344 void *partial_cpu
= (cpu
+
345 (partial_boxes
[i
].y
- y1
) * cpu_stride
+
346 (partial_boxes
[i
].x
- x1
) * cpp
);
348 v3d_move_pixels_unaligned(gpu
, gpu_stride
,
349 partial_cpu
, cpu_stride
,
352 get_pixel_offset
, is_load
);
357 v3d_move_pixels_general(void *gpu
, uint32_t gpu_stride
,
358 void *cpu
, uint32_t cpu_stride
,
359 int cpp
, uint32_t image_h
,
360 const struct pipe_box
*box
,
361 uint32_t (*get_pixel_offset
)(uint32_t cpp
,
363 uint32_t x
, uint32_t y
),
368 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
375 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
382 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
389 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
396 v3d_move_pixels_general_percpp(gpu
, gpu_stride
,
406 v3d_move_tiled_image(void *gpu
, uint32_t gpu_stride
,
407 void *cpu
, uint32_t cpu_stride
,
408 enum v3d_tiling_mode tiling_format
,
411 const struct pipe_box
*box
,
414 switch (tiling_format
) {
415 case VC5_TILING_UIF_XOR
:
416 v3d_move_pixels_general(gpu
, gpu_stride
,
419 v3d_get_uif_xor_pixel_offset
,
422 case VC5_TILING_UIF_NO_XOR
:
423 v3d_move_pixels_general(gpu
, gpu_stride
,
426 v3d_get_uif_no_xor_pixel_offset
,
429 case VC5_TILING_UBLINEAR_2_COLUMN
:
430 v3d_move_pixels_general(gpu
, gpu_stride
,
433 v3d_get_ublinear_2_column_pixel_offset
,
436 case VC5_TILING_UBLINEAR_1_COLUMN
:
437 v3d_move_pixels_general(gpu
, gpu_stride
,
440 v3d_get_ublinear_1_column_pixel_offset
,
443 case VC5_TILING_LINEARTILE
:
444 v3d_move_pixels_general(gpu
, gpu_stride
,
447 v3d_get_lt_pixel_offset
,
451 unreachable("Unsupported tiling format");
457 * Loads pixel data from the start (microtile-aligned) box in \p src to the
458 * start of \p dst according to the given tiling format.
461 v3d_load_tiled_image(void *dst
, uint32_t dst_stride
,
462 void *src
, uint32_t src_stride
,
463 enum v3d_tiling_mode tiling_format
, int cpp
,
465 const struct pipe_box
*box
)
467 v3d_move_tiled_image(src
, src_stride
,
477 * Stores pixel data from the start of \p src into a (microtile-aligned) box in
478 * \p dst according to the given tiling format.
481 v3d_store_tiled_image(void *dst
, uint32_t dst_stride
,
482 void *src
, uint32_t src_stride
,
483 enum v3d_tiling_mode tiling_format
, int cpp
,
485 const struct pipe_box
*box
)
487 v3d_move_tiled_image(dst
, dst_stride
,