radeon: add software untiling functions
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_tile.c
1 /*
2 * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_tile.h"
29
30 #include <stdint.h>
31 #include <string.h>
32
33 #include <main/macros.h>
34
35 #define MICRO_TILE_SIZE 32
36
37 static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
38 void * const dst, unsigned dst_pitch,
39 unsigned width, unsigned height)
40 {
41 unsigned row; /* current source row */
42 unsigned col; /* current source column */
43 unsigned k; /* number of processed tiles */
44 const unsigned tile_width = 8, tile_height = 4;
45 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
46
47 k = 0;
48 for (row = 0; row < height; row += tile_height)
49 {
50 for (col = 0; col < width; col += tile_width, ++k)
51 {
52 uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
53 uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
54 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
55 unsigned j;
56
57 for (j = 0; j < MIN2(tile_height, height - row); ++j)
58 {
59 unsigned columns = MIN2(tile_width, width - col);
60 memcpy(dst2, src2, columns * sizeof(uint8_t));
61 dst2 += tile_width;
62 src2 += src_pitch;
63 }
64 }
65 }
66 }
67
68 static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
69 void * const dst, unsigned dst_pitch,
70 unsigned width, unsigned height)
71 {
72 unsigned row; /* current source row */
73 unsigned col; /* current source column */
74 unsigned k; /* number of processed tiles */
75 const unsigned tile_width = 4, tile_height = 4;
76 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
77
78 k = 0;
79 for (row = 0; row < height; row += tile_height)
80 {
81 for (col = 0; col < width; col += tile_width, ++k)
82 {
83 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
84 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
85 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
86 unsigned j;
87
88 for (j = 0; j < MIN2(tile_height, height - row); ++j)
89 {
90 unsigned columns = MIN2(tile_width, width - col);
91 memcpy(dst2, src2, columns * sizeof(uint16_t));
92 dst2 += tile_width;
93 src2 += src_pitch;
94 }
95 }
96 }
97 }
98
99 static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
100 void * const dst, unsigned dst_pitch,
101 unsigned width, unsigned height)
102 {
103 unsigned row; /* current source row */
104 unsigned col; /* current source column */
105 unsigned k; /* number of processed tiles */
106 const unsigned tile_width = 8, tile_height = 2;
107 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
108
109 k = 0;
110 for (row = 0; row < height; row += tile_height)
111 {
112 for (col = 0; col < width; col += tile_width, ++k)
113 {
114 uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
115 uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
116 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
117 unsigned j;
118
119 for (j = 0; j < MIN2(tile_height, height - row); ++j)
120 {
121 unsigned columns = MIN2(tile_width, width - col);
122 memcpy(dst2, src2, columns * sizeof(uint16_t));
123 dst2 += tile_width;
124 src2 += src_pitch;
125 }
126 }
127 }
128 }
129
130 static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
131 void * const dst, unsigned dst_pitch,
132 unsigned width, unsigned height)
133 {
134 unsigned row; /* current source row */
135 unsigned col; /* current source column */
136 unsigned k; /* number of processed tiles */
137 const unsigned tile_width = 4, tile_height = 2;
138 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
139
140 k = 0;
141 for (row = 0; row < height; row += tile_height)
142 {
143 for (col = 0; col < width; col += tile_width, ++k)
144 {
145 uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
146 uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
147 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
148 unsigned j;
149
150 for (j = 0; j < MIN2(tile_height, height - row); ++j)
151 {
152 unsigned columns = MIN2(tile_width, width - col);
153 memcpy(dst2, src2, columns * sizeof(uint32_t));
154 dst2 += tile_width;
155 src2 += src_pitch;
156 }
157 }
158 }
159 }
160
161 static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
162 void * const dst, unsigned dst_pitch,
163 unsigned width, unsigned height)
164 {
165 unsigned row; /* current source row */
166 unsigned col; /* current source column */
167 unsigned k; /* number of processed tiles */
168 const unsigned tile_width = 2, tile_height = 2;
169 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
170
171 k = 0;
172 for (row = 0; row < height; row += tile_height)
173 {
174 for (col = 0; col < width; col += tile_width, ++k)
175 {
176 uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
177 uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
178 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
179 unsigned j;
180
181 for (j = 0; j < MIN2(tile_height, height - row); ++j)
182 {
183 unsigned columns = MIN2(tile_width, width - col);
184 memcpy(dst2, src2, columns * sizeof(uint64_t));
185 dst2 += tile_width;
186 src2 += src_pitch;
187 }
188 }
189 }
190 }
191
192 static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
193 void * dst, unsigned dst_pitch,
194 unsigned width, unsigned height)
195 {
196 unsigned i, j;
197 const unsigned elem_size = 16; /* sizeof(uint128_t) */
198
199 for (j = 0; j < height; ++j)
200 {
201 for (i = 0; i < width; ++i)
202 {
203 memcpy(dst, src, width * elem_size);
204 dst += dst_pitch * elem_size;
205 src += src_pitch * elem_size;
206 }
207 }
208 }
209
210 void tile_image(const void * src, unsigned src_pitch,
211 void *dst, unsigned dst_pitch,
212 gl_format format, unsigned width, unsigned height)
213 {
214 assert(src_pitch >= width);
215 assert(dst_pitch >= width);
216 assert(dst_pitch * _mesa_get_format_bytes(format) % MICRO_TILE_SIZE == 0);
217
218 switch (_mesa_get_format_bytes(format))
219 {
220 case 16:
221 micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
222 break;
223 case 8:
224 micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
225 break;
226 case 4:
227 micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
228 break;
229 case 2:
230 if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
231 {
232 micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
233 }
234 else
235 {
236 micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
237 }
238 break;
239 case 1:
240 micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
241 break;
242 default:
243 assert(0);
244 break;
245 }
246 }
247
248 static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
249 void * const dst, unsigned dst_pitch,
250 unsigned width, unsigned height)
251 {
252 unsigned row; /* current destination row */
253 unsigned col; /* current destination column */
254 unsigned k; /* current tile number */
255 const unsigned tile_width = 8, tile_height = 4;
256 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
257
258 assert(src_pitch % tile_width == 0);
259
260 k = 0;
261 for (row = 0; row < height; row += tile_height)
262 {
263 for (col = 0; col < width; col += tile_width, ++k)
264 {
265 uint8_t *src2 = (uint8_t *)src + row * src_pitch +
266 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
267 uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
268 unsigned j;
269
270 for (j = 0; j < MIN2(tile_height, height - row); ++j)
271 {
272 unsigned columns = MIN2(tile_width, width - col);
273 memcpy(dst2, src2, columns * sizeof(uint8_t));
274 dst2 += dst_pitch;
275 src2 += tile_width;
276 }
277 }
278 }
279 }
280
281 static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
282 void * const dst, unsigned dst_pitch,
283 unsigned width, unsigned height)
284 {
285 unsigned row; /* current destination row */
286 unsigned col; /* current destination column */
287 unsigned k; /* current tile number */
288 const unsigned tile_width = 8, tile_height = 2;
289 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
290
291 assert(src_pitch % tile_width == 0);
292
293 k = 0;
294 for (row = 0; row < height; row += tile_height)
295 {
296 for (col = 0; col < width; col += tile_width, ++k)
297 {
298 uint16_t *src2 = (uint16_t *)src + row * src_pitch +
299 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
300 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
301 unsigned j;
302
303 for (j = 0; j < MIN2(tile_height, height - row); ++j)
304 {
305 unsigned columns = MIN2(tile_width, width - col);
306 memcpy(dst2, src2, columns * sizeof(uint16_t));
307 dst2 += dst_pitch;
308 src2 += tile_width;
309 }
310 }
311 }
312 }
313
314 static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
315 void * const dst, unsigned dst_pitch,
316 unsigned width, unsigned height)
317 {
318 unsigned row; /* current destination row */
319 unsigned col; /* current destination column */
320 unsigned k; /* current tile number */
321 const unsigned tile_width = 4, tile_height = 4;
322 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
323
324 assert(src_pitch % tile_width == 0);
325
326 k = 0;
327 for (row = 0; row < height; row += tile_height)
328 {
329 for (col = 0; col < width; col += tile_width, ++k)
330 {
331 uint16_t *src2 = (uint16_t *)src + row * src_pitch +
332 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
333 uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
334 unsigned j;
335
336 for (j = 0; j < MIN2(tile_height, height - row); ++j)
337 {
338 unsigned columns = MIN2(tile_width, width - col);
339 memcpy(dst2, src2, columns * sizeof(uint16_t));
340 dst2 += dst_pitch;
341 src2 += tile_width;
342 }
343 }
344 }
345 }
346
347 static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
348 void * const dst, unsigned dst_pitch,
349 unsigned width, unsigned height)
350 {
351 unsigned row; /* current destination row */
352 unsigned col; /* current destination column */
353 unsigned k; /* current tile number */
354 const unsigned tile_width = 4, tile_height = 2;
355 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
356
357 assert(src_pitch % tile_width == 0);
358
359 k = 0;
360 for (row = 0; row < height; row += tile_height)
361 {
362 for (col = 0; col < width; col += tile_width, ++k)
363 {
364 uint32_t *src2 = (uint32_t *)src + row * src_pitch +
365 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
366 uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
367 unsigned j;
368
369 for (j = 0; j < MIN2(tile_height, height - row); ++j)
370 {
371 unsigned columns = MIN2(tile_width, width - col);
372 memcpy(dst2, src2, columns * sizeof(uint32_t));
373 dst2 += dst_pitch;
374 src2 += tile_width;
375 }
376 }
377 }
378 }
379
380 static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
381 void * const dst, unsigned dst_pitch,
382 unsigned width, unsigned height)
383 {
384 unsigned row; /* current destination row */
385 unsigned col; /* current destination column */
386 unsigned k; /* current tile number */
387 const unsigned tile_width = 2, tile_height = 2;
388 const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
389
390 assert(src_pitch % tile_width == 0);
391
392 k = 0;
393 for (row = 0; row < height; row += tile_height)
394 {
395 for (col = 0; col < width; col += tile_width, ++k)
396 {
397 uint64_t *src2 = (uint64_t *)src + row * src_pitch +
398 (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
399 uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
400 unsigned j;
401
402 for (j = 0; j < MIN2(tile_height, height - row); ++j)
403 {
404 unsigned columns = MIN2(tile_width, width - col);
405 memcpy(dst2, src2, columns * sizeof(uint64_t));
406 dst2 += dst_pitch;
407 src2 += tile_width;
408 }
409 }
410 }
411 }
412
413 static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
414 void * dst, unsigned dst_pitch,
415 unsigned width, unsigned height)
416 {
417 unsigned i, j;
418 const unsigned elem_size = 16; /* sizeof(uint128_t) */
419
420 for (j = 0; j < height; ++j)
421 {
422 for (i = 0; i < width; ++i)
423 {
424 memcpy(dst, src, width * elem_size);
425 dst += dst_pitch * elem_size;
426 src += src_pitch * elem_size;
427 }
428 }
429 }
430
431 void untile_image(const void * src, unsigned src_pitch,
432 void *dst, unsigned dst_pitch,
433 gl_format format, unsigned width, unsigned height)
434 {
435 assert(src_pitch >= width);
436 assert(dst_pitch >= width);
437 assert(src_pitch * _mesa_get_format_bytes(format) % MICRO_TILE_SIZE == 0);
438
439 switch (_mesa_get_format_bytes(format))
440 {
441 case 16:
442 micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
443 break;
444 case 8:
445 micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
446 break;
447 case 4:
448 micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
449 break;
450 case 2:
451 if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
452 {
453 micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
454 }
455 else
456 {
457 micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
458 }
459 break;
460 case 1:
461 micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
462 break;
463 default:
464 assert(0);
465 break;
466 }
467 }
468
469 void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height)
470 {
471 switch (_mesa_get_format_bytes(format))
472 {
473 case 16:
474 *block_width = 1;
475 *block_height = 1;
476 break;
477 case 8:
478 *block_width = 2;
479 *block_height = 2;
480 break;
481 case 4:
482 *block_width = 4;
483 *block_height = 2;
484 break;
485 case 2:
486 if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
487 {
488 *block_width = 4;
489 *block_height = 4;
490 }
491 else
492 {
493 *block_width = 8;
494 *block_height = 2;
495 }
496 break;
497 case 1:
498 *block_width = 8;
499 *block_height = 4;
500 break;
501 default:
502 assert(0);
503 break;
504 }
505 }