u_tile: Skip the packed temporary and just store tiles directly.
[mesa.git] / src / gallium / auxiliary / util / u_tile.c
1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * RGBA/float tile get/put functions.
30 * Usable both by drivers and state trackers.
31 */
32
33
34 #include "pipe/p_defines.h"
35 #include "util/u_inlines.h"
36
37 #include "util/format/u_format.h"
38 #include "util/format/u_format_bptc.h"
39 #include "util/u_math.h"
40 #include "util/u_memory.h"
41 #include "util/u_surface.h"
42 #include "util/u_tile.h"
43
44
45 /**
46 * Move raw block of pixels from transfer object to user memory.
47 */
48 void
49 pipe_get_tile_raw(struct pipe_transfer *pt,
50 const void *src,
51 uint x, uint y, uint w, uint h,
52 void *dst, int dst_stride)
53 {
54 if (dst_stride == 0)
55 dst_stride = util_format_get_stride(pt->resource->format, w);
56
57 if (u_clip_tile(x, y, &w, &h, &pt->box))
58 return;
59
60 util_copy_rect(dst, pt->resource->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y);
61 }
62
63
64 /**
65 * Move raw block of pixels from user memory to transfer object.
66 */
67 void
68 pipe_put_tile_raw(struct pipe_transfer *pt,
69 void *dst,
70 uint x, uint y, uint w, uint h,
71 const void *src, int src_stride)
72 {
73 enum pipe_format format = pt->resource->format;
74
75 if (src_stride == 0)
76 src_stride = util_format_get_stride(format, w);
77
78 if (u_clip_tile(x, y, &w, &h, &pt->box))
79 return;
80
81 util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0);
82 }
83
84
85
86
87 /** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */
88 #define SHORT_TO_FLOAT(S) ((2.0F * (S) + 1.0F) * (1.0F/65535.0F))
89
90 #define UNCLAMPED_FLOAT_TO_SHORT(us, f) \
91 us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) )
92
93
94
95 /*** PIPE_FORMAT_Z16_UNORM ***/
96
97 /**
98 * Return each Z value as four floats in [0,1].
99 */
100 static void
101 z16_get_tile_rgba(const ushort *src,
102 unsigned w, unsigned h,
103 float *p,
104 unsigned dst_stride)
105 {
106 const float scale = 1.0f / 65535.0f;
107 unsigned i, j;
108
109 for (i = 0; i < h; i++) {
110 float *pRow = p;
111 for (j = 0; j < w; j++, pRow += 4) {
112 pRow[0] =
113 pRow[1] =
114 pRow[2] =
115 pRow[3] = *src++ * scale;
116 }
117 p += dst_stride;
118 }
119 }
120
121
122
123
124 /*** PIPE_FORMAT_Z32_UNORM ***/
125
126 /**
127 * Return each Z value as four floats in [0,1].
128 */
129 static void
130 z32_get_tile_rgba(const unsigned *src,
131 unsigned w, unsigned h,
132 float *p,
133 unsigned dst_stride)
134 {
135 const double scale = 1.0 / (double) 0xffffffff;
136 unsigned i, j;
137
138 for (i = 0; i < h; i++) {
139 float *pRow = p;
140 for (j = 0; j < w; j++, pRow += 4) {
141 pRow[0] =
142 pRow[1] =
143 pRow[2] =
144 pRow[3] = (float) (*src++ * scale);
145 }
146 p += dst_stride;
147 }
148 }
149
150
151 /*** PIPE_FORMAT_Z24_UNORM_S8_UINT ***/
152
153 /**
154 * Return Z component as four float in [0,1]. Stencil part ignored.
155 */
156 static void
157 s8z24_get_tile_rgba(const unsigned *src,
158 unsigned w, unsigned h,
159 float *p,
160 unsigned dst_stride)
161 {
162 const double scale = 1.0 / ((1 << 24) - 1);
163 unsigned i, j;
164
165 for (i = 0; i < h; i++) {
166 float *pRow = p;
167 for (j = 0; j < w; j++, pRow += 4) {
168 pRow[0] =
169 pRow[1] =
170 pRow[2] =
171 pRow[3] = (float) (scale * (*src++ & 0xffffff));
172 }
173 p += dst_stride;
174 }
175 }
176
177
178 /*** PIPE_FORMAT_S8_UINT_Z24_UNORM ***/
179
180 /**
181 * Return Z component as four float in [0,1]. Stencil part ignored.
182 */
183 static void
184 z24s8_get_tile_rgba(const unsigned *src,
185 unsigned w, unsigned h,
186 float *p,
187 unsigned dst_stride)
188 {
189 const double scale = 1.0 / ((1 << 24) - 1);
190 unsigned i, j;
191
192 for (i = 0; i < h; i++) {
193 float *pRow = p;
194 for (j = 0; j < w; j++, pRow += 4) {
195 pRow[0] =
196 pRow[1] =
197 pRow[2] =
198 pRow[3] = (float) (scale * (*src++ >> 8));
199 }
200 p += dst_stride;
201 }
202 }
203
204 /*** PIPE_FORMAT_S8X24_UINT ***/
205
206 /**
207 * Return S component as four uint32_t in [0..255]. Z part ignored.
208 */
209 static void
210 s8x24_get_tile_rgba(const unsigned *src,
211 unsigned w, unsigned h,
212 float *p,
213 unsigned dst_stride)
214 {
215 unsigned i, j;
216
217 for (i = 0; i < h; i++) {
218 uint32_t *pRow = (uint32_t *)p;
219
220 for (j = 0; j < w; j++, pRow += 4) {
221 pRow[0] =
222 pRow[1] =
223 pRow[2] =
224 pRow[3] = ((*src++ >> 24) & 0xff);
225 }
226
227 p += dst_stride;
228 }
229 }
230
231 /*** PIPE_FORMAT_X24S8_UINT ***/
232
233 /**
234 * Return S component as four uint32_t in [0..255]. Z part ignored.
235 */
236 static void
237 x24s8_get_tile_rgba(const unsigned *src,
238 unsigned w, unsigned h,
239 float *p,
240 unsigned dst_stride)
241 {
242 unsigned i, j;
243
244 for (i = 0; i < h; i++) {
245 uint32_t *pRow = (uint32_t *)p;
246 for (j = 0; j < w; j++, pRow += 4) {
247 pRow[0] =
248 pRow[1] =
249 pRow[2] =
250 pRow[3] = (*src++ & 0xff);
251 }
252 p += dst_stride;
253 }
254 }
255
256
257 /**
258 * Return S component as four uint32_t in [0..255]. Z part ignored.
259 */
260 static void
261 s8_get_tile_rgba(const unsigned char *src,
262 unsigned w, unsigned h,
263 float *p,
264 unsigned dst_stride)
265 {
266 unsigned i, j;
267
268 for (i = 0; i < h; i++) {
269 uint32_t *pRow = (uint32_t *)p;
270 for (j = 0; j < w; j++, pRow += 4) {
271 pRow[0] =
272 pRow[1] =
273 pRow[2] =
274 pRow[3] = (*src++ & 0xff);
275 }
276 p += dst_stride;
277 }
278 }
279
280 /*** PIPE_FORMAT_Z32_FLOAT ***/
281
282 /**
283 * Return each Z value as four floats in [0,1].
284 */
285 static void
286 z32f_get_tile_rgba(const float *src,
287 unsigned w, unsigned h,
288 float *p,
289 unsigned dst_stride)
290 {
291 unsigned i, j;
292
293 for (i = 0; i < h; i++) {
294 float *pRow = p;
295 for (j = 0; j < w; j++, pRow += 4) {
296 pRow[0] =
297 pRow[1] =
298 pRow[2] =
299 pRow[3] = *src++;
300 }
301 p += dst_stride;
302 }
303 }
304
305 /*** PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ***/
306
307 /**
308 * Return each Z value as four floats in [0,1].
309 */
310 static void
311 z32f_x24s8_get_tile_rgba(const float *src,
312 unsigned w, unsigned h,
313 float *p,
314 unsigned dst_stride)
315 {
316 unsigned i, j;
317
318 for (i = 0; i < h; i++) {
319 float *pRow = p;
320 for (j = 0; j < w; j++, pRow += 4) {
321 pRow[0] =
322 pRow[1] =
323 pRow[2] =
324 pRow[3] = *src;
325 src += 2;
326 }
327 p += dst_stride;
328 }
329 }
330
331 /*** PIPE_FORMAT_X32_S8X24_UINT ***/
332
333 /**
334 * Return S component as four uint32_t in [0..255]. Z part ignored.
335 */
336 static void
337 x32_s8_get_tile_rgba(const unsigned *src,
338 unsigned w, unsigned h,
339 float *p,
340 unsigned dst_stride)
341 {
342 unsigned i, j;
343
344 for (i = 0; i < h; i++) {
345 uint32_t *pRow = (uint32_t *)p;
346 for (j = 0; j < w; j++, pRow += 4) {
347 src++;
348 pRow[0] =
349 pRow[1] =
350 pRow[2] =
351 pRow[3] = (*src++ & 0xff);
352 }
353 p += dst_stride;
354 }
355 }
356
357 void
358 pipe_put_tile_rgba(struct pipe_transfer *pt,
359 void *dst,
360 uint x, uint y, uint w, uint h,
361 enum pipe_format format, const void *p)
362 {
363 unsigned src_stride = w * 4;
364
365 if (u_clip_tile(x, y, &w, &h, &pt->box))
366 return;
367
368 /* softpipe's S8_UINT texture cache fetch needs to take the rgba_format
369 * path, not ui (since there's no ui unpack for s8, but it's technically
370 * pure integer).
371 */
372 if (util_format_is_pure_uint(format)) {
373 util_format_write_4ui(format,
374 p, src_stride * sizeof(float),
375 dst, pt->stride,
376 x, y, w, h);
377 } else if (util_format_is_pure_sint(format)) {
378 util_format_write_4i(format,
379 p, src_stride * sizeof(float),
380 dst, pt->stride,
381 x, y, w, h);
382 } else {
383 switch (format) {
384 case PIPE_FORMAT_Z16_UNORM:
385 /*z16_put_tile_rgba((ushort *) dst, w, h, p, src_stride);*/
386 break;
387 case PIPE_FORMAT_Z32_UNORM:
388 /*z32_put_tile_rgba((unsigned *) dst, w, h, p, src_stride);*/
389 break;
390 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
391 case PIPE_FORMAT_Z24X8_UNORM:
392 /*s8z24_put_tile_rgba((unsigned *) dst, w, h, p, src_stride);*/
393 break;
394 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
395 case PIPE_FORMAT_X8Z24_UNORM:
396 /*z24s8_put_tile_rgba((unsigned *) dst, w, h, p, src_stride);*/
397 break;
398 case PIPE_FORMAT_Z32_FLOAT:
399 /*z32f_put_tile_rgba((unsigned *) dst, w, h, p, src_stride);*/
400 break;
401 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
402 /*z32f_s8x24_put_tile_rgba((unsigned *) dst, w, h, p, src_stride);*/
403 break;
404 default:
405 util_format_write_4f(format,
406 p, src_stride * sizeof(float),
407 dst, pt->stride,
408 x, y, w, h);
409 }
410 }
411 }
412
413 void
414 pipe_get_tile_rgba(struct pipe_transfer *pt,
415 const void *src,
416 uint x, uint y, uint w, uint h,
417 enum pipe_format format,
418 void *dst)
419 {
420 unsigned dst_stride = w * 4;
421 void *packed;
422
423 if (u_clip_tile(x, y, &w, &h, &pt->box)) {
424 return;
425 }
426
427 packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format));
428 if (!packed) {
429 return;
430 }
431
432 if (format == PIPE_FORMAT_UYVY || format == PIPE_FORMAT_YUYV) {
433 assert((x & 1) == 0);
434 }
435
436 pipe_get_tile_raw(pt, src, x, y, w, h, packed, 0);
437
438 if (util_format_is_pure_uint(format) &&
439 !util_format_is_depth_or_stencil(format)) {
440 util_format_read_4ui(format,
441 dst, dst_stride * sizeof(float),
442 packed, util_format_get_stride(format, w),
443 0, 0, w, h);
444 } else if (util_format_is_pure_sint(format)) {
445 util_format_read_4i(format,
446 dst, dst_stride * sizeof(float),
447 packed, util_format_get_stride(format, w),
448 0, 0, w, h);
449 } else {
450 switch (format) {
451 case PIPE_FORMAT_Z16_UNORM:
452 z16_get_tile_rgba((ushort *) packed, w, h, dst, dst_stride);
453 break;
454 case PIPE_FORMAT_Z32_UNORM:
455 z32_get_tile_rgba((unsigned *) packed, w, h, dst, dst_stride);
456 break;
457 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
458 case PIPE_FORMAT_Z24X8_UNORM:
459 s8z24_get_tile_rgba((unsigned *) packed, w, h, dst, dst_stride);
460 break;
461 case PIPE_FORMAT_S8_UINT:
462 s8_get_tile_rgba((unsigned char *) packed, w, h, dst, dst_stride);
463 break;
464 case PIPE_FORMAT_X24S8_UINT:
465 s8x24_get_tile_rgba((unsigned *) packed, w, h, dst, dst_stride);
466 break;
467 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
468 case PIPE_FORMAT_X8Z24_UNORM:
469 z24s8_get_tile_rgba((unsigned *) packed, w, h, dst, dst_stride);
470 break;
471 case PIPE_FORMAT_S8X24_UINT:
472 x24s8_get_tile_rgba((unsigned *) packed, w, h, dst, dst_stride);
473 break;
474 case PIPE_FORMAT_Z32_FLOAT:
475 z32f_get_tile_rgba((float *) packed, w, h, dst, dst_stride);
476 break;
477 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
478 z32f_x24s8_get_tile_rgba((float *) packed, w, h, dst, dst_stride);
479 break;
480 case PIPE_FORMAT_X32_S8X24_UINT:
481 x32_s8_get_tile_rgba((unsigned *) packed, w, h, dst, dst_stride);
482 break;
483 default:
484 util_format_read_4f(format,
485 dst, dst_stride * sizeof(float),
486 packed, util_format_get_stride(format, w),
487 0, 0, w, h);
488 }
489 }
490
491 FREE(packed);
492 }