gallivm: Universal format support on lp_build_fetch_rgba_aos via util_format_descript...
[mesa.git] / src / gallium / drivers / llvmpipe / lp_tile_image.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc. All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27
28 /**
29 * Code to convert images from tiled to linear and back.
30 * XXX there are quite a few assumptions about color and z/stencil being
31 * 32bpp.
32 */
33
34
35 #include "util/u_format.h"
36 #include "lp_tile_soa.h"
37 #include "lp_tile_image.h"
38
39
40 #define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4)
41
42
43 /**
44 * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
45 * at dst, with dst_stride words between rows.
46 */
47 static void
48 untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
49 {
50 uint32_t *d0 = dst;
51 uint32_t *d1 = d0 + dst_stride;
52 uint32_t *d2 = d1 + dst_stride;
53 uint32_t *d3 = d2 + dst_stride;
54
55 d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5];
56 d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7];
57 d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13];
58 d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15];
59 }
60
61
62
63 /**
64 * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
65 * at dst, with dst_stride words between rows.
66 */
67 static void
68 untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride)
69 {
70 uint16_t *d0 = dst;
71 uint16_t *d1 = d0 + dst_stride;
72 uint16_t *d2 = d1 + dst_stride;
73 uint16_t *d3 = d2 + dst_stride;
74
75 d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5];
76 d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7];
77 d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13];
78 d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15];
79 }
80
81
82
83 /**
84 * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
85 * layout (in which all 16 words are contiguous).
86 */
87 static void
88 tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
89 {
90 const uint32_t *s0 = src;
91 const uint32_t *s1 = s0 + src_stride;
92 const uint32_t *s2 = s1 + src_stride;
93 const uint32_t *s3 = s2 + src_stride;
94
95 dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3];
96 dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3];
97 dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3];
98 dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3];
99 }
100
101
102
103 /**
104 * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
105 * layout (in which all 16 words are contiguous).
106 */
107 static void
108 tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride)
109 {
110 const uint16_t *s0 = src;
111 const uint16_t *s1 = s0 + src_stride;
112 const uint16_t *s2 = s1 + src_stride;
113 const uint16_t *s3 = s2 + src_stride;
114
115 dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3];
116 dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3];
117 dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3];
118 dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3];
119 }
120
121
122
123 /**
124 * Convert a tiled image into a linear image.
125 * \param src_stride source row stride in bytes (bytes per row of tiles)
126 * \param dst_stride dest row stride in bytes
127 */
128 void
129 lp_tiled_to_linear(const void *src, void *dst,
130 unsigned x, unsigned y,
131 unsigned width, unsigned height,
132 enum pipe_format format, unsigned dst_stride)
133 {
134 assert(x % TILE_SIZE == 0);
135 assert(y % TILE_SIZE == 0);
136 /*assert(width % TILE_SIZE == 0);
137 assert(height % TILE_SIZE == 0);*/
138
139 /* Note that Z/stencil surfaces use a different tiling size than
140 * color surfaces.
141 */
142 if (util_format_is_depth_or_stencil(format)) {
143 const uint bpp = util_format_get_blocksize(format);
144 const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
145 const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
146 const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
147
148 dst_stride /= bpp; /* convert from bytes to words */
149
150 if (bpp == 4) {
151 const uint32_t *src32 = (const uint32_t *) src;
152 uint32_t *dst32 = (uint32_t *) dst;
153 uint i, j;
154
155 for (j = 0; j < height; j += tile_h) {
156 for (i = 0; i < width; i += tile_w) {
157 /* compute offsets in 32-bit words */
158 uint ii = i + x, jj = j + y;
159 uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
160 * (tile_w * tile_h);
161 uint dst_offset = jj * dst_stride + ii;
162 untile_4_4_uint32(src32 + src_offset,
163 dst32 + dst_offset,
164 dst_stride);
165 }
166 }
167 }
168 else {
169 const uint16_t *src16 = (const uint16_t *) src;
170 uint16_t *dst16 = (uint16_t *) dst;
171 uint i, j;
172
173 assert(bpp == 2);
174
175 for (j = 0; j < height; j += tile_h) {
176 for (i = 0; i < width; i += tile_w) {
177 /* compute offsets in 16-bit words */
178 uint ii = i + x, jj = j + y;
179 uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
180 * (tile_w * tile_h);
181 uint dst_offset = jj * dst_stride + ii;
182 untile_4_4_uint16(src16 + src_offset,
183 dst16 + dst_offset,
184 dst_stride);
185 }
186 }
187 }
188 }
189 else {
190 /* color image */
191 const uint bpp = 4;
192 const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
193 const uint bytes_per_tile = tile_w * tile_h * bpp;
194 const uint src_stride = dst_stride * tile_w;
195 const uint tiles_per_row = src_stride / bytes_per_tile;
196 uint i, j;
197
198 for (j = 0; j < height; j += tile_h) {
199 for (i = 0; i < width; i += tile_w) {
200 uint ii = i + x, jj = j + y;
201 uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
202 uint byte_offset = tile_offset * bytes_per_tile;
203 const uint8_t *src_tile = (uint8_t *) src + byte_offset;
204
205 lp_tile_write_4ub(format,
206 src_tile,
207 dst, dst_stride,
208 ii, jj, tile_w, tile_h);
209 }
210 }
211 }
212 }
213
214
215 /**
216 * Convert a linear image into a tiled image.
217 * \param src_stride source row stride in bytes
218 * \param dst_stride dest row stride in bytes (bytes per row of tiles)
219 */
220 void
221 lp_linear_to_tiled(const void *src, void *dst,
222 unsigned x, unsigned y,
223 unsigned width, unsigned height,
224 enum pipe_format format, unsigned src_stride)
225 {
226 assert(x % TILE_SIZE == 0);
227 assert(y % TILE_SIZE == 0);
228 /*
229 assert(width % TILE_SIZE == 0);
230 assert(height % TILE_SIZE == 0);
231 */
232
233 if (util_format_is_depth_or_stencil(format)) {
234 const uint bpp = util_format_get_blocksize(format);
235 const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
236 const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
237 const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
238
239 src_stride /= bpp; /* convert from bytes to words */
240
241 if (bpp == 4) {
242 const uint32_t *src32 = (const uint32_t *) src;
243 uint32_t *dst32 = (uint32_t *) dst;
244 uint i, j;
245
246 for (j = 0; j < height; j += tile_h) {
247 for (i = 0; i < width; i += tile_w) {
248 /* compute offsets in 32-bit words */
249 uint ii = i + x, jj = j + y;
250 uint src_offset = jj * src_stride + ii;
251 uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
252 * (tile_w * tile_h);
253 tile_4_4_uint32(src32 + src_offset,
254 dst32 + dst_offset,
255 src_stride);
256 }
257 }
258 }
259 else {
260 const uint16_t *src16 = (const uint16_t *) src;
261 uint16_t *dst16 = (uint16_t *) dst;
262 uint i, j;
263
264 assert(bpp == 2);
265
266 for (j = 0; j < height; j += tile_h) {
267 for (i = 0; i < width; i += tile_w) {
268 /* compute offsets in 16-bit words */
269 uint ii = i + x, jj = j + y;
270 uint src_offset = jj * src_stride + ii;
271 uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
272 * (tile_w * tile_h);
273 tile_4_4_uint16(src16 + src_offset,
274 dst16 + dst_offset,
275 src_stride);
276 }
277 }
278 }
279 }
280 else {
281 const uint bpp = 4;
282 const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
283 const uint bytes_per_tile = tile_w * tile_h * bpp;
284 const uint dst_stride = src_stride * tile_w;
285 const uint tiles_per_row = dst_stride / bytes_per_tile;
286 uint i, j;
287
288 for (j = 0; j < height; j += TILE_SIZE) {
289 for (i = 0; i < width; i += TILE_SIZE) {
290 uint ii = i + x, jj = j + y;
291 uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
292 uint byte_offset = tile_offset * bytes_per_tile;
293 uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
294
295 lp_tile_read_4ub(format,
296 dst_tile,
297 src, src_stride,
298 ii, jj, tile_w, tile_h);
299 }
300 }
301 }
302 }
303
304
305 /**
306 * For testing only.
307 */
308 void
309 test_tiled_linear_conversion(void *data,
310 enum pipe_format format,
311 unsigned width, unsigned height,
312 unsigned stride)
313 {
314 /* size in tiles */
315 unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
316 unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;
317
318 uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4);
319
320 /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
321
322 lp_linear_to_tiled(data, tiled, 0, 0, width, height, format,
323 stride);
324
325 lp_tiled_to_linear(tiled, data, 0, 0, width, height, format,
326 stride);
327
328 free(tiled);
329 }
330