gallivm,llvmpipe,draw: Support multiple constant buffers.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_tile_image.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc. All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27
28 /**
29 * Code to convert images from tiled to linear and back.
30 * XXX there are quite a few assumptions about color and z/stencil being
31 * 32bpp.
32 */
33
34
35 #include "util/u_format.h"
36 #include "util/u_memory.h"
37 #include "lp_limits.h"
38 #include "lp_tile_image.h"
39
40
41 #define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4)
42
43
44 /**
45 * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
46 * at dst, with dst_stride words between rows.
47 */
48 static void
49 untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
50 {
51 uint32_t *d0 = dst;
52 uint32_t *d1 = d0 + dst_stride;
53 uint32_t *d2 = d1 + dst_stride;
54 uint32_t *d3 = d2 + dst_stride;
55
56 d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5];
57 d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7];
58 d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13];
59 d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15];
60 }
61
62
63
64 /**
65 * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
66 * at dst, with dst_stride words between rows.
67 */
68 static void
69 untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride)
70 {
71 uint16_t *d0 = dst;
72 uint16_t *d1 = d0 + dst_stride;
73 uint16_t *d2 = d1 + dst_stride;
74 uint16_t *d3 = d2 + dst_stride;
75
76 d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5];
77 d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7];
78 d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13];
79 d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15];
80 }
81
82
83
84 /**
85 * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
86 * layout (in which all 16 words are contiguous).
87 */
88 static void
89 tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
90 {
91 const uint32_t *s0 = src;
92 const uint32_t *s1 = s0 + src_stride;
93 const uint32_t *s2 = s1 + src_stride;
94 const uint32_t *s3 = s2 + src_stride;
95
96 dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3];
97 dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3];
98 dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3];
99 dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3];
100 }
101
102
103
104 /**
105 * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
106 * layout (in which all 16 words are contiguous).
107 */
108 static void
109 tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride)
110 {
111 const uint16_t *s0 = src;
112 const uint16_t *s1 = s0 + src_stride;
113 const uint16_t *s2 = s1 + src_stride;
114 const uint16_t *s3 = s2 + src_stride;
115
116 dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3];
117 dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3];
118 dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3];
119 dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3];
120 }
121
122
123
124 /**
125 * Convert a tiled image into a linear image.
126 * \param dst_stride dest row stride in bytes
127 */
128 void
129 lp_tiled_to_linear(const void *src, void *dst,
130 unsigned x, unsigned y,
131 unsigned width, unsigned height,
132 enum pipe_format format,
133 unsigned dst_stride,
134 unsigned tiles_per_row)
135 {
136 assert(x % TILE_SIZE == 0);
137 assert(y % TILE_SIZE == 0);
138 /*assert(width % TILE_SIZE == 0);
139 assert(height % TILE_SIZE == 0);*/
140
141 /* Note that Z/stencil surfaces use a different tiling size than
142 * color surfaces.
143 */
144 if (util_format_is_depth_or_stencil(format)) {
145 const uint bpp = util_format_get_blocksize(format);
146 const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
147 const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
148 const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
149
150 dst_stride /= bpp; /* convert from bytes to words */
151
152 if (bpp == 4) {
153 const uint32_t *src32 = (const uint32_t *) src;
154 uint32_t *dst32 = (uint32_t *) dst;
155 uint i, j;
156
157 for (j = 0; j < height; j += tile_h) {
158 for (i = 0; i < width; i += tile_w) {
159 /* compute offsets in 32-bit words */
160 uint ii = i + x, jj = j + y;
161 uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
162 * (tile_w * tile_h);
163 uint dst_offset = jj * dst_stride + ii;
164 untile_4_4_uint32(src32 + src_offset,
165 dst32 + dst_offset,
166 dst_stride);
167 }
168 }
169 }
170 else {
171 const uint16_t *src16 = (const uint16_t *) src;
172 uint16_t *dst16 = (uint16_t *) dst;
173 uint i, j;
174
175 assert(bpp == 2);
176
177 for (j = 0; j < height; j += tile_h) {
178 for (i = 0; i < width; i += tile_w) {
179 /* compute offsets in 16-bit words */
180 uint ii = i + x, jj = j + y;
181 uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
182 * (tile_w * tile_h);
183 uint dst_offset = jj * dst_stride + ii;
184 untile_4_4_uint16(src16 + src_offset,
185 dst16 + dst_offset,
186 dst_stride);
187 }
188 }
189 }
190 }
191 else {
192 assert(0);
193 }
194 }
195
196
197 /**
198 * Convert a linear image into a tiled image.
199 * \param src_stride source row stride in bytes
200 */
201 void
202 lp_linear_to_tiled(const void *src, void *dst,
203 unsigned x, unsigned y,
204 unsigned width, unsigned height,
205 enum pipe_format format,
206 unsigned src_stride,
207 unsigned tiles_per_row)
208 {
209 assert(x % TILE_SIZE == 0);
210 assert(y % TILE_SIZE == 0);
211 /*
212 assert(width % TILE_SIZE == 0);
213 assert(height % TILE_SIZE == 0);
214 */
215
216 if (util_format_is_depth_or_stencil(format)) {
217 const uint bpp = util_format_get_blocksize(format);
218 const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
219 const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
220 const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
221
222 src_stride /= bpp; /* convert from bytes to words */
223
224 if (bpp == 4) {
225 const uint32_t *src32 = (const uint32_t *) src;
226 uint32_t *dst32 = (uint32_t *) dst;
227 uint i, j;
228
229 for (j = 0; j < height; j += tile_h) {
230 for (i = 0; i < width; i += tile_w) {
231 /* compute offsets in 32-bit words */
232 uint ii = i + x, jj = j + y;
233 uint src_offset = jj * src_stride + ii;
234 uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
235 * (tile_w * tile_h);
236 tile_4_4_uint32(src32 + src_offset,
237 dst32 + dst_offset,
238 src_stride);
239 }
240 }
241 }
242 else {
243 const uint16_t *src16 = (const uint16_t *) src;
244 uint16_t *dst16 = (uint16_t *) dst;
245 uint i, j;
246
247 assert(bpp == 2);
248
249 for (j = 0; j < height; j += tile_h) {
250 for (i = 0; i < width; i += tile_w) {
251 /* compute offsets in 16-bit words */
252 uint ii = i + x, jj = j + y;
253 uint src_offset = jj * src_stride + ii;
254 uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
255 * (tile_w * tile_h);
256 tile_4_4_uint16(src16 + src_offset,
257 dst16 + dst_offset,
258 src_stride);
259 }
260 }
261 }
262 }
263 else {
264 assert(0);
265 }
266 }
267
268
269 /**
270 * For testing only.
271 */
272 void
273 test_tiled_linear_conversion(void *data,
274 enum pipe_format format,
275 unsigned width, unsigned height,
276 unsigned stride)
277 {
278 /* size in tiles */
279 unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
280 unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;
281
282 uint8_t *tiled = MALLOC(wt * ht * TILE_SIZE * TILE_SIZE * 4);
283
284 /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
285
286 lp_linear_to_tiled(data, tiled, 0, 0, width, height, format,
287 stride, wt);
288
289 lp_tiled_to_linear(tiled, data, 0, 0, width, height, format,
290 stride, wt);
291
292 FREE(tiled);
293 }
294