2 * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_tile.h"
33 #include <main/macros.h>
35 #define MICRO_TILE_SIZE 32
37 static void micro_tile_8_x_4_8bit(const void * const src
, unsigned src_pitch
,
38 void * const dst
, unsigned dst_pitch
,
39 unsigned width
, unsigned height
)
41 unsigned row
; /* current source row */
42 unsigned col
; /* current source column */
43 unsigned k
; /* number of processed tiles */
44 const unsigned tile_width
= 8, tile_height
= 4;
45 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
48 for (row
= 0; row
< height
; row
+= tile_height
)
50 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
52 uint8_t *src2
= (uint8_t *)src
+ src_pitch
* row
+ col
;
53 uint8_t *dst2
= (uint8_t *)dst
+ row
* dst_pitch
+
54 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint8_t);
57 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
59 unsigned columns
= MIN2(tile_width
, width
- col
);
60 memcpy(dst2
, src2
, columns
* sizeof(uint8_t));
68 static void micro_tile_4_x_4_16bit(const void * const src
, unsigned src_pitch
,
69 void * const dst
, unsigned dst_pitch
,
70 unsigned width
, unsigned height
)
72 unsigned row
; /* current source row */
73 unsigned col
; /* current source column */
74 unsigned k
; /* number of processed tiles */
75 const unsigned tile_width
= 4, tile_height
= 4;
76 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
79 for (row
= 0; row
< height
; row
+= tile_height
)
81 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
83 uint16_t *src2
= (uint16_t *)src
+ src_pitch
* row
+ col
;
84 uint16_t *dst2
= (uint16_t *)dst
+ row
* dst_pitch
+
85 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
88 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
90 unsigned columns
= MIN2(tile_width
, width
- col
);
91 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
99 static void micro_tile_8_x_2_16bit(const void * const src
, unsigned src_pitch
,
100 void * const dst
, unsigned dst_pitch
,
101 unsigned width
, unsigned height
)
103 unsigned row
; /* current source row */
104 unsigned col
; /* current source column */
105 unsigned k
; /* number of processed tiles */
106 const unsigned tile_width
= 8, tile_height
= 2;
107 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
110 for (row
= 0; row
< height
; row
+= tile_height
)
112 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
114 uint16_t *src2
= (uint16_t *)src
+ src_pitch
* row
+ col
;
115 uint16_t *dst2
= (uint16_t *)dst
+ row
* dst_pitch
+
116 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
119 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
121 unsigned columns
= MIN2(tile_width
, width
- col
);
122 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
130 static void micro_tile_4_x_2_32bit(const void * const src
, unsigned src_pitch
,
131 void * const dst
, unsigned dst_pitch
,
132 unsigned width
, unsigned height
)
134 unsigned row
; /* current source row */
135 unsigned col
; /* current source column */
136 unsigned k
; /* number of processed tiles */
137 const unsigned tile_width
= 4, tile_height
= 2;
138 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
141 for (row
= 0; row
< height
; row
+= tile_height
)
143 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
145 uint32_t *src2
= (uint32_t *)src
+ src_pitch
* row
+ col
;
146 uint32_t *dst2
= (uint32_t *)dst
+ row
* dst_pitch
+
147 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint32_t);
150 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
152 unsigned columns
= MIN2(tile_width
, width
- col
);
153 memcpy(dst2
, src2
, columns
* sizeof(uint32_t));
161 static void micro_tile_2_x_2_64bit(const void * const src
, unsigned src_pitch
,
162 void * const dst
, unsigned dst_pitch
,
163 unsigned width
, unsigned height
)
165 unsigned row
; /* current source row */
166 unsigned col
; /* current source column */
167 unsigned k
; /* number of processed tiles */
168 const unsigned tile_width
= 2, tile_height
= 2;
169 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
172 for (row
= 0; row
< height
; row
+= tile_height
)
174 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
176 uint64_t *src2
= (uint64_t *)src
+ src_pitch
* row
+ col
;
177 uint64_t *dst2
= (uint64_t *)dst
+ row
* dst_pitch
+
178 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint64_t);
181 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
183 unsigned columns
= MIN2(tile_width
, width
- col
);
184 memcpy(dst2
, src2
, columns
* sizeof(uint64_t));
192 static void micro_tile_1_x_1_128bit(const void * src
, unsigned src_pitch
,
193 void * dst
, unsigned dst_pitch
,
194 unsigned width
, unsigned height
)
197 const unsigned elem_size
= 16; /* sizeof(uint128_t) */
199 for (j
= 0; j
< height
; ++j
)
201 for (i
= 0; i
< width
; ++i
)
203 memcpy(dst
, src
, width
* elem_size
);
204 dst
+= dst_pitch
* elem_size
;
205 src
+= src_pitch
* elem_size
;
210 void tile_image(const void * src
, unsigned src_pitch
,
211 void *dst
, unsigned dst_pitch
,
212 gl_format format
, unsigned width
, unsigned height
)
214 assert(src_pitch
>= width
);
215 assert(dst_pitch
>= width
);
216 assert(dst_pitch
* _mesa_get_format_bytes(format
) % MICRO_TILE_SIZE
== 0);
218 switch (_mesa_get_format_bytes(format
))
221 micro_tile_1_x_1_128bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
224 micro_tile_2_x_2_64bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
227 micro_tile_4_x_2_32bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
230 if (_mesa_get_format_bits(format
, GL_DEPTH_BITS
))
232 micro_tile_4_x_4_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
236 micro_tile_8_x_2_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
240 micro_tile_8_x_4_8bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
248 static void micro_untile_8_x_4_8bit(const void * const src
, unsigned src_pitch
,
249 void * const dst
, unsigned dst_pitch
,
250 unsigned width
, unsigned height
)
252 unsigned row
; /* current destination row */
253 unsigned col
; /* current destination column */
254 unsigned k
; /* current tile number */
255 const unsigned tile_width
= 8, tile_height
= 4;
256 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
258 assert(src_pitch
% tile_width
== 0);
261 for (row
= 0; row
< height
; row
+= tile_height
)
263 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
265 uint8_t *src2
= (uint8_t *)src
+ row
* src_pitch
+
266 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint8_t);
267 uint8_t *dst2
= (uint8_t *)dst
+ dst_pitch
* row
+ col
;
270 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
272 unsigned columns
= MIN2(tile_width
, width
- col
);
273 memcpy(dst2
, src2
, columns
* sizeof(uint8_t));
281 static void micro_untile_8_x_2_16bit(const void * const src
, unsigned src_pitch
,
282 void * const dst
, unsigned dst_pitch
,
283 unsigned width
, unsigned height
)
285 unsigned row
; /* current destination row */
286 unsigned col
; /* current destination column */
287 unsigned k
; /* current tile number */
288 const unsigned tile_width
= 8, tile_height
= 2;
289 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
291 assert(src_pitch
% tile_width
== 0);
294 for (row
= 0; row
< height
; row
+= tile_height
)
296 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
298 uint16_t *src2
= (uint16_t *)src
+ row
* src_pitch
+
299 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
300 uint16_t *dst2
= (uint16_t *)dst
+ dst_pitch
* row
+ col
;
303 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
305 unsigned columns
= MIN2(tile_width
, width
- col
);
306 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
314 static void micro_untile_4_x_4_16bit(const void * const src
, unsigned src_pitch
,
315 void * const dst
, unsigned dst_pitch
,
316 unsigned width
, unsigned height
)
318 unsigned row
; /* current destination row */
319 unsigned col
; /* current destination column */
320 unsigned k
; /* current tile number */
321 const unsigned tile_width
= 4, tile_height
= 4;
322 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
324 assert(src_pitch
% tile_width
== 0);
327 for (row
= 0; row
< height
; row
+= tile_height
)
329 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
331 uint16_t *src2
= (uint16_t *)src
+ row
* src_pitch
+
332 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
333 uint16_t *dst2
= (uint16_t *)dst
+ dst_pitch
* row
+ col
;
336 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
338 unsigned columns
= MIN2(tile_width
, width
- col
);
339 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
347 static void micro_untile_4_x_2_32bit(const void * const src
, unsigned src_pitch
,
348 void * const dst
, unsigned dst_pitch
,
349 unsigned width
, unsigned height
)
351 unsigned row
; /* current destination row */
352 unsigned col
; /* current destination column */
353 unsigned k
; /* current tile number */
354 const unsigned tile_width
= 4, tile_height
= 2;
355 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
357 assert(src_pitch
% tile_width
== 0);
360 for (row
= 0; row
< height
; row
+= tile_height
)
362 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
364 uint32_t *src2
= (uint32_t *)src
+ row
* src_pitch
+
365 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint32_t);
366 uint32_t *dst2
= (uint32_t *)dst
+ dst_pitch
* row
+ col
;
369 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
371 unsigned columns
= MIN2(tile_width
, width
- col
);
372 memcpy(dst2
, src2
, columns
* sizeof(uint32_t));
380 static void micro_untile_2_x_2_64bit(const void * const src
, unsigned src_pitch
,
381 void * const dst
, unsigned dst_pitch
,
382 unsigned width
, unsigned height
)
384 unsigned row
; /* current destination row */
385 unsigned col
; /* current destination column */
386 unsigned k
; /* current tile number */
387 const unsigned tile_width
= 2, tile_height
= 2;
388 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
390 assert(src_pitch
% tile_width
== 0);
393 for (row
= 0; row
< height
; row
+= tile_height
)
395 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
397 uint64_t *src2
= (uint64_t *)src
+ row
* src_pitch
+
398 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint64_t);
399 uint64_t *dst2
= (uint64_t *)dst
+ dst_pitch
* row
+ col
;
402 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
404 unsigned columns
= MIN2(tile_width
, width
- col
);
405 memcpy(dst2
, src2
, columns
* sizeof(uint64_t));
413 static void micro_untile_1_x_1_128bit(const void * src
, unsigned src_pitch
,
414 void * dst
, unsigned dst_pitch
,
415 unsigned width
, unsigned height
)
418 const unsigned elem_size
= 16; /* sizeof(uint128_t) */
420 for (j
= 0; j
< height
; ++j
)
422 for (i
= 0; i
< width
; ++i
)
424 memcpy(dst
, src
, width
* elem_size
);
425 dst
+= dst_pitch
* elem_size
;
426 src
+= src_pitch
* elem_size
;
431 void untile_image(const void * src
, unsigned src_pitch
,
432 void *dst
, unsigned dst_pitch
,
433 gl_format format
, unsigned width
, unsigned height
)
435 assert(src_pitch
>= width
);
436 assert(dst_pitch
>= width
);
437 assert(src_pitch
* _mesa_get_format_bytes(format
) % MICRO_TILE_SIZE
== 0);
439 switch (_mesa_get_format_bytes(format
))
442 micro_untile_1_x_1_128bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
445 micro_untile_2_x_2_64bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
448 micro_untile_4_x_2_32bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
451 if (_mesa_get_format_bits(format
, GL_DEPTH_BITS
))
453 micro_untile_4_x_4_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
457 micro_untile_8_x_2_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
461 micro_untile_8_x_4_8bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
469 void get_tile_size(gl_format format
, unsigned *block_width
, unsigned *block_height
)
471 switch (_mesa_get_format_bytes(format
))
486 if (_mesa_get_format_bits(format
, GL_DEPTH_BITS
))