2 * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "radeon_tile.h"
33 #include "main/macros.h"
34 #include "radeon_debug.h"
36 #define MICRO_TILE_SIZE 32
38 static void micro_tile_8_x_4_8bit(const void * const src
, unsigned src_pitch
,
39 void * const dst
, unsigned dst_pitch
,
40 unsigned width
, unsigned height
)
42 unsigned row
; /* current source row */
43 unsigned col
; /* current source column */
44 unsigned k
; /* number of processed tiles */
45 const unsigned tile_width
= 8, tile_height
= 4;
46 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
49 for (row
= 0; row
< height
; row
+= tile_height
)
51 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
53 uint8_t *src2
= (uint8_t *)src
+ src_pitch
* row
+ col
;
54 uint8_t *dst2
= (uint8_t *)dst
+ row
* dst_pitch
+
55 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint8_t);
58 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
60 unsigned columns
= MIN2(tile_width
, width
- col
);
61 memcpy(dst2
, src2
, columns
* sizeof(uint8_t));
69 static void micro_tile_4_x_4_16bit(const void * const src
, unsigned src_pitch
,
70 void * const dst
, unsigned dst_pitch
,
71 unsigned width
, unsigned height
)
73 unsigned row
; /* current source row */
74 unsigned col
; /* current source column */
75 unsigned k
; /* number of processed tiles */
76 const unsigned tile_width
= 4, tile_height
= 4;
77 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
80 for (row
= 0; row
< height
; row
+= tile_height
)
82 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
84 uint16_t *src2
= (uint16_t *)src
+ src_pitch
* row
+ col
;
85 uint16_t *dst2
= (uint16_t *)dst
+ row
* dst_pitch
+
86 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
89 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
91 unsigned columns
= MIN2(tile_width
, width
- col
);
92 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
100 static void micro_tile_8_x_2_16bit(const void * const src
, unsigned src_pitch
,
101 void * const dst
, unsigned dst_pitch
,
102 unsigned width
, unsigned height
)
104 unsigned row
; /* current source row */
105 unsigned col
; /* current source column */
106 unsigned k
; /* number of processed tiles */
107 const unsigned tile_width
= 8, tile_height
= 2;
108 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
111 for (row
= 0; row
< height
; row
+= tile_height
)
113 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
115 uint16_t *src2
= (uint16_t *)src
+ src_pitch
* row
+ col
;
116 uint16_t *dst2
= (uint16_t *)dst
+ row
* dst_pitch
+
117 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
120 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
122 unsigned columns
= MIN2(tile_width
, width
- col
);
123 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
131 static void micro_tile_4_x_2_32bit(const void * const src
, unsigned src_pitch
,
132 void * const dst
, unsigned dst_pitch
,
133 unsigned width
, unsigned height
)
135 unsigned row
; /* current source row */
136 unsigned col
; /* current source column */
137 unsigned k
; /* number of processed tiles */
138 const unsigned tile_width
= 4, tile_height
= 2;
139 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
142 for (row
= 0; row
< height
; row
+= tile_height
)
144 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
146 uint32_t *src2
= (uint32_t *)src
+ src_pitch
* row
+ col
;
147 uint32_t *dst2
= (uint32_t *)dst
+ row
* dst_pitch
+
148 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint32_t);
151 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
153 unsigned columns
= MIN2(tile_width
, width
- col
);
154 memcpy(dst2
, src2
, columns
* sizeof(uint32_t));
162 static void micro_tile_2_x_2_64bit(const void * const src
, unsigned src_pitch
,
163 void * const dst
, unsigned dst_pitch
,
164 unsigned width
, unsigned height
)
166 unsigned row
; /* current source row */
167 unsigned col
; /* current source column */
168 unsigned k
; /* number of processed tiles */
169 const unsigned tile_width
= 2, tile_height
= 2;
170 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
173 for (row
= 0; row
< height
; row
+= tile_height
)
175 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
177 uint64_t *src2
= (uint64_t *)src
+ src_pitch
* row
+ col
;
178 uint64_t *dst2
= (uint64_t *)dst
+ row
* dst_pitch
+
179 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint64_t);
182 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
184 unsigned columns
= MIN2(tile_width
, width
- col
);
185 memcpy(dst2
, src2
, columns
* sizeof(uint64_t));
193 static void micro_tile_1_x_1_128bit(const void * src
, unsigned src_pitch
,
194 void * dst
, unsigned dst_pitch
,
195 unsigned width
, unsigned height
)
198 const unsigned elem_size
= 16; /* sizeof(uint128_t) */
200 for (j
= 0; j
< height
; ++j
)
202 for (i
= 0; i
< width
; ++i
)
204 memcpy(dst
, src
, width
* elem_size
);
205 dst
+= dst_pitch
* elem_size
;
206 src
+= src_pitch
* elem_size
;
211 void tile_image(const void * src
, unsigned src_pitch
,
212 void *dst
, unsigned dst_pitch
,
213 gl_format format
, unsigned width
, unsigned height
)
215 assert(src_pitch
>= width
);
216 assert(dst_pitch
>= width
);
218 radeon_print(RADEON_TEXTURE
, RADEON_TRACE
,
219 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
220 src_pitch
, dst_pitch
, width
, height
, _mesa_get_format_bytes(format
));
222 switch (_mesa_get_format_bytes(format
))
225 micro_tile_1_x_1_128bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
228 micro_tile_2_x_2_64bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
231 micro_tile_4_x_2_32bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
234 if (_mesa_get_format_bits(format
, GL_DEPTH_BITS
))
236 micro_tile_4_x_4_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
240 micro_tile_8_x_2_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
244 micro_tile_8_x_4_8bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
252 static void micro_untile_8_x_4_8bit(const void * const src
, unsigned src_pitch
,
253 void * const dst
, unsigned dst_pitch
,
254 unsigned width
, unsigned height
)
256 unsigned row
; /* current destination row */
257 unsigned col
; /* current destination column */
258 unsigned k
; /* current tile number */
259 const unsigned tile_width
= 8, tile_height
= 4;
260 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
262 assert(src_pitch
% tile_width
== 0);
265 for (row
= 0; row
< height
; row
+= tile_height
)
267 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
269 uint8_t *src2
= (uint8_t *)src
+ row
* src_pitch
+
270 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint8_t);
271 uint8_t *dst2
= (uint8_t *)dst
+ dst_pitch
* row
+ col
;
274 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
276 unsigned columns
= MIN2(tile_width
, width
- col
);
277 memcpy(dst2
, src2
, columns
* sizeof(uint8_t));
285 static void micro_untile_8_x_2_16bit(const void * const src
, unsigned src_pitch
,
286 void * const dst
, unsigned dst_pitch
,
287 unsigned width
, unsigned height
)
289 unsigned row
; /* current destination row */
290 unsigned col
; /* current destination column */
291 unsigned k
; /* current tile number */
292 const unsigned tile_width
= 8, tile_height
= 2;
293 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
295 assert(src_pitch
% tile_width
== 0);
298 for (row
= 0; row
< height
; row
+= tile_height
)
300 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
302 uint16_t *src2
= (uint16_t *)src
+ row
* src_pitch
+
303 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
304 uint16_t *dst2
= (uint16_t *)dst
+ dst_pitch
* row
+ col
;
307 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
309 unsigned columns
= MIN2(tile_width
, width
- col
);
310 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
318 static void micro_untile_4_x_4_16bit(const void * const src
, unsigned src_pitch
,
319 void * const dst
, unsigned dst_pitch
,
320 unsigned width
, unsigned height
)
322 unsigned row
; /* current destination row */
323 unsigned col
; /* current destination column */
324 unsigned k
; /* current tile number */
325 const unsigned tile_width
= 4, tile_height
= 4;
326 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
328 assert(src_pitch
% tile_width
== 0);
331 for (row
= 0; row
< height
; row
+= tile_height
)
333 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
335 uint16_t *src2
= (uint16_t *)src
+ row
* src_pitch
+
336 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint16_t);
337 uint16_t *dst2
= (uint16_t *)dst
+ dst_pitch
* row
+ col
;
340 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
342 unsigned columns
= MIN2(tile_width
, width
- col
);
343 memcpy(dst2
, src2
, columns
* sizeof(uint16_t));
351 static void micro_untile_4_x_2_32bit(const void * const src
, unsigned src_pitch
,
352 void * const dst
, unsigned dst_pitch
,
353 unsigned width
, unsigned height
)
355 unsigned row
; /* current destination row */
356 unsigned col
; /* current destination column */
357 unsigned k
; /* current tile number */
358 const unsigned tile_width
= 4, tile_height
= 2;
359 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
361 assert(src_pitch
% tile_width
== 0);
364 for (row
= 0; row
< height
; row
+= tile_height
)
366 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
368 uint32_t *src2
= (uint32_t *)src
+ row
* src_pitch
+
369 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint32_t);
370 uint32_t *dst2
= (uint32_t *)dst
+ dst_pitch
* row
+ col
;
373 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
375 unsigned columns
= MIN2(tile_width
, width
- col
);
376 memcpy(dst2
, src2
, columns
* sizeof(uint32_t));
384 static void micro_untile_2_x_2_64bit(const void * const src
, unsigned src_pitch
,
385 void * const dst
, unsigned dst_pitch
,
386 unsigned width
, unsigned height
)
388 unsigned row
; /* current destination row */
389 unsigned col
; /* current destination column */
390 unsigned k
; /* current tile number */
391 const unsigned tile_width
= 2, tile_height
= 2;
392 const unsigned tiles_in_row
= (width
+ (tile_width
- 1)) / tile_width
;
394 assert(src_pitch
% tile_width
== 0);
397 for (row
= 0; row
< height
; row
+= tile_height
)
399 for (col
= 0; col
< width
; col
+= tile_width
, ++k
)
401 uint64_t *src2
= (uint64_t *)src
+ row
* src_pitch
+
402 (k
% tiles_in_row
) * MICRO_TILE_SIZE
/ sizeof(uint64_t);
403 uint64_t *dst2
= (uint64_t *)dst
+ dst_pitch
* row
+ col
;
406 for (j
= 0; j
< MIN2(tile_height
, height
- row
); ++j
)
408 unsigned columns
= MIN2(tile_width
, width
- col
);
409 memcpy(dst2
, src2
, columns
* sizeof(uint64_t));
417 static void micro_untile_1_x_1_128bit(const void * src
, unsigned src_pitch
,
418 void * dst
, unsigned dst_pitch
,
419 unsigned width
, unsigned height
)
422 const unsigned elem_size
= 16; /* sizeof(uint128_t) */
424 for (j
= 0; j
< height
; ++j
)
426 for (i
= 0; i
< width
; ++i
)
428 memcpy(dst
, src
, width
* elem_size
);
429 dst
+= dst_pitch
* elem_size
;
430 src
+= src_pitch
* elem_size
;
435 void untile_image(const void * src
, unsigned src_pitch
,
436 void *dst
, unsigned dst_pitch
,
437 gl_format format
, unsigned width
, unsigned height
)
439 assert(src_pitch
>= width
);
440 assert(dst_pitch
>= width
);
442 radeon_print(RADEON_TEXTURE
, RADEON_TRACE
,
443 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
444 src_pitch
, dst_pitch
, width
, height
, _mesa_get_format_bytes(format
));
446 switch (_mesa_get_format_bytes(format
))
449 micro_untile_1_x_1_128bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
452 micro_untile_2_x_2_64bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
455 micro_untile_4_x_2_32bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
458 if (_mesa_get_format_bits(format
, GL_DEPTH_BITS
))
460 micro_untile_4_x_4_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
464 micro_untile_8_x_2_16bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
468 micro_untile_8_x_4_8bit(src
, src_pitch
, dst
, dst_pitch
, width
, height
);
476 void get_tile_size(gl_format format
, unsigned *block_width
, unsigned *block_height
)
478 switch (_mesa_get_format_bytes(format
))
493 if (_mesa_get_format_bits(format
, GL_DEPTH_BITS
))