2 * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_screen.h"
29 #include "radeon_tile.h"
34 #include "main/macros.h"
35 #include "radeon_debug.h"
37 #define MICRO_TILE_SIZE 32
39 static void micro_tile_8_x_4_8bit(const void * const src
, unsigned src_pitch
,
40 void * const dst
, unsigned dst_pitch
,
41 unsigned width
, unsigned height
)
43 unsigned row
; /* current source row */
44 unsigned col
; /* current source column */
45 unsigned k
; /* number of processed tiles */
46 const unsigned tile_width
= 8, tile_height
= 4;
47 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
50 for (row
= 0; row
< height
; row
+= tile_height
)
52 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
54 uint8_t *src2
= (uint8_t *)src
+ src_pitch
* row
+ col
;
55 uint8_t *dst2
= (uint8_t *)dst
+ row
* dst_pitch
+
56 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint8_t);
59 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
61 unsigned columns
= MIN2(tile_width
, width
- col
);
62 memcpy(dst2
, src2
, columns
* sizeof(uint8_t));
70 static void micro_tile_4_x_4_16bit(const void * const src
, unsigned src_pitch
,
71 void * const dst
, unsigned dst_pitch
,
72 unsigned width
, unsigned height
)
74 unsigned row
; /* current source row */
75 unsigned col
; /* current source column */
76 unsigned k
; /* number of processed tiles */
77 const unsigned tile_width
= 4, tile_height
= 4;
78 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
81 for (row
= 0; row
< height
; row
+= tile_height
)
83 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
85 uint16_t *src2
= (uint16_t *)src
+ src_pitch
* row
+ col
;
86 uint16_t *dst2
= (uint16_t *)dst
+ row
* dst_pitch
+
87 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
90 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
92 unsigned columns
= MIN2(tile_width
, width
- col
);
93 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
101 static void micro_tile_8_x_2_16bit(const void * const src
, unsigned src_pitch
,
102 void * const dst
, unsigned dst_pitch
,
103 unsigned width
, unsigned height
)
105 unsigned row
; /* current source row */
106 unsigned col
; /* current source column */
107 unsigned k
; /* number of processed tiles */
108 const unsigned tile_width
= 8, tile_height
= 2;
109 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
112 for (row
= 0; row
< height
; row
+= tile_height
)
114 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
116 uint16_t *src2
= (uint16_t *)src
+ src_pitch
* row
+ col
;
117 uint16_t *dst2
= (uint16_t *)dst
+ row
* dst_pitch
+
118 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
121 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
123 unsigned columns
= MIN2(tile_width
, width
- col
);
124 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
132 static void micro_tile_4_x_2_32bit(const void * const src
, unsigned src_pitch
,
133 void * const dst
, unsigned dst_pitch
,
134 unsigned width
, unsigned height
)
136 unsigned row
; /* current source row */
137 unsigned col
; /* current source column */
138 unsigned k
; /* number of processed tiles */
139 const unsigned tile_width
= 4, tile_height
= 2;
140 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
143 for (row
= 0; row
< height
; row
+= tile_height
)
145 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
147 uint32_t *src2
= (uint32_t *)src
+ src_pitch
* row
+ col
;
148 uint32_t *dst2
= (uint32_t *)dst
+ row
* dst_pitch
+
149 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint32_t);
152 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
154 unsigned columns
= MIN2(tile_width
, width
- col
);
155 memcpy(dst2
, src2
, columns
* sizeof(uint32_t));
163 static void micro_tile_2_x_2_64bit(const void * const src
, unsigned src_pitch
,
164 void * const dst
, unsigned dst_pitch
,
165 unsigned width
, unsigned height
)
167 unsigned row
; /* current source row */
168 unsigned col
; /* current source column */
169 unsigned k
; /* number of processed tiles */
170 const unsigned tile_width
= 2, tile_height
= 2;
171 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
174 for (row
= 0; row
< height
; row
+= tile_height
)
176 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
178 uint64_t *src2
= (uint64_t *)src
+ src_pitch
* row
+ col
;
179 uint64_t *dst2
= (uint64_t *)dst
+ row
* dst_pitch
+
180 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint64_t);
183 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
185 unsigned columns
= MIN2(tile_width
, width
- col
);
186 memcpy(dst2
, src2
, columns
* sizeof(uint64_t));
194 static void micro_tile_1_x_1_128bit(const void * src
, unsigned src_pitch
,
195 void * dst
, unsigned dst_pitch
,
196 unsigned width
, unsigned height
)
199 const unsigned elem_size
= 16; /* sizeof(uint128_t) */
201 for (j
= 0; j
< height
; ++j
)
203 for (i
= 0; i
< width
; ++i
)
205 memcpy(dst
, src
, width
* elem_size
);
206 dst
+= dst_pitch
* elem_size
;
207 src
+= src_pitch
* elem_size
;
212 void tile_image(const void * src
, unsigned src_pitch
,
213 void *dst
, unsigned dst_pitch
,
214 mesa_format format
, unsigned width
, unsigned height
)
216 assert(src_pitch
>= width
);
217 assert(dst_pitch
>= width
);
219 radeon_print(RADEON_TEXTURE
, RADEON_TRACE
,
220 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
221 src_pitch
, dst_pitch
, width
, height
, _mesa_get_format_bytes(format
));
223 switch (_mesa_get_format_bytes(format
))
226 micro_tile_1_x_1_128bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
229 micro_tile_2_x_2_64bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
232 micro_tile_4_x_2_32bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
235 if (_mesa_get_format_bits(format
, GL_DEPTH_BITS
))
237 micro_tile_4_x_4_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
241 micro_tile_8_x_2_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
245 micro_tile_8_x_4_8bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
253 static void micro_untile_8_x_4_8bit(const void * const src
, unsigned src_pitch
,
254 void * const dst
, unsigned dst_pitch
,
255 unsigned width
, unsigned height
)
257 unsigned row
; /* current destination row */
258 unsigned col
; /* current destination column */
259 unsigned k
; /* current tile number */
260 const unsigned tile_width
= 8, tile_height
= 4;
261 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
263 assert(src_pitch
% tile_width
== 0);
266 for (row
= 0; row
< height
; row
+= tile_height
)
268 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
270 uint8_t *src2
= (uint8_t *)src
+ row
* src_pitch
+
271 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint8_t);
272 uint8_t *dst2
= (uint8_t *)dst
+ dst_pitch
* row
+ col
;
275 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
277 unsigned columns
= MIN2(tile_width
, width
- col
);
278 memcpy(dst2
, src2
, columns
* sizeof(uint8_t));
286 static void micro_untile_8_x_2_16bit(const void * const src
, unsigned src_pitch
,
287 void * const dst
, unsigned dst_pitch
,
288 unsigned width
, unsigned height
)
290 unsigned row
; /* current destination row */
291 unsigned col
; /* current destination column */
292 unsigned k
; /* current tile number */
293 const unsigned tile_width
= 8, tile_height
= 2;
294 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
296 assert(src_pitch
% tile_width
== 0);
299 for (row
= 0; row
< height
; row
+= tile_height
)
301 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
303 uint16_t *src2
= (uint16_t *)src
+ row
* src_pitch
+
304 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
305 uint16_t *dst2
= (uint16_t *)dst
+ dst_pitch
* row
+ col
;
308 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
310 unsigned columns
= MIN2(tile_width
, width
- col
);
311 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
319 static void micro_untile_4_x_4_16bit(const void * const src
, unsigned src_pitch
,
320 void * const dst
, unsigned dst_pitch
,
321 unsigned width
, unsigned height
)
323 unsigned row
; /* current destination row */
324 unsigned col
; /* current destination column */
325 unsigned k
; /* current tile number */
326 const unsigned tile_width
= 4, tile_height
= 4;
327 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
329 assert(src_pitch
% tile_width
== 0);
332 for (row
= 0; row
< height
; row
+= tile_height
)
334 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
336 uint16_t *src2
= (uint16_t *)src
+ row
* src_pitch
+
337 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
338 uint16_t *dst2
= (uint16_t *)dst
+ dst_pitch
* row
+ col
;
341 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
343 unsigned columns
= MIN2(tile_width
, width
- col
);
344 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
352 static void micro_untile_4_x_2_32bit(const void * const src
, unsigned src_pitch
,
353 void * const dst
, unsigned dst_pitch
,
354 unsigned width
, unsigned height
)
356 unsigned row
; /* current destination row */
357 unsigned col
; /* current destination column */
358 unsigned k
; /* current tile number */
359 const unsigned tile_width
= 4, tile_height
= 2;
360 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
362 assert(src_pitch
% tile_width
== 0);
365 for (row
= 0; row
< height
; row
+= tile_height
)
367 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
369 uint32_t *src2
= (uint32_t *)src
+ row
* src_pitch
+
370 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint32_t);
371 uint32_t *dst2
= (uint32_t *)dst
+ dst_pitch
* row
+ col
;
374 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
376 unsigned columns
= MIN2(tile_width
, width
- col
);
377 memcpy(dst2
, src2
, columns
* sizeof(uint32_t));
385 static void micro_untile_2_x_2_64bit(const void * const src
, unsigned src_pitch
,
386 void * const dst
, unsigned dst_pitch
,
387 unsigned width
, unsigned height
)
389 unsigned row
; /* current destination row */
390 unsigned col
; /* current destination column */
391 unsigned k
; /* current tile number */
392 const unsigned tile_width
= 2, tile_height
= 2;
393 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
395 assert(src_pitch
% tile_width
== 0);
398 for (row
= 0; row
< height
; row
+= tile_height
)
400 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
402 uint64_t *src2
= (uint64_t *)src
+ row
* src_pitch
+
403 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint64_t);
404 uint64_t *dst2
= (uint64_t *)dst
+ dst_pitch
* row
+ col
;
407 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
409 unsigned columns
= MIN2(tile_width
, width
- col
);
410 memcpy(dst2
, src2
, columns
* sizeof(uint64_t));
418 static void micro_untile_1_x_1_128bit(const void * src
, unsigned src_pitch
,
419 void * dst
, unsigned dst_pitch
,
420 unsigned width
, unsigned height
)
423 const unsigned elem_size
= 16; /* sizeof(uint128_t) */
425 for (j
= 0; j
< height
; ++j
)
427 for (i
= 0; i
< width
; ++i
)
429 memcpy(dst
, src
, width
* elem_size
);
430 dst
+= dst_pitch
* elem_size
;
431 src
+= src_pitch
* elem_size
;
436 void untile_image(const void * src
, unsigned src_pitch
,
437 void *dst
, unsigned dst_pitch
,
438 mesa_format format
, unsigned width
, unsigned height
)
440 assert(src_pitch
>= width
);
441 assert(dst_pitch
>= width
);
443 radeon_print(RADEON_TEXTURE
, RADEON_TRACE
,
444 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
445 src_pitch
, dst_pitch
, width
, height
, _mesa_get_format_bytes(format
));
447 switch (_mesa_get_format_bytes(format
))
450 micro_untile_1_x_1_128bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
453 micro_untile_2_x_2_64bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
456 micro_untile_4_x_2_32bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
459 if (_mesa_get_format_bits(format
, GL_DEPTH_BITS
))
461 micro_untile_4_x_4_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
465 micro_untile_8_x_2_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
469 micro_untile_8_x_4_8bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
477 void get_tile_size(mesa_format format
, unsigned *block_width
, unsigned *block_height
)
479 switch (_mesa_get_format_bytes(format
))
494 if (_mesa_get_format_bits(format
, GL_DEPTH_BITS
))