gallivm: handle 16bit float fetches in lp_build_fetch_rgba_soa
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_defines.h"
30
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
34
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
42 #include "lp_bld_arit.h"
43
44
45 void
46 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
47 struct lp_build_context *bld,
48 const LLVMValueRef *unswizzled,
49 LLVMValueRef swizzled_out[4])
50 {
51 assert(PIPE_SWIZZLE_0 == (int)PIPE_SWIZZLE_0);
52 assert(PIPE_SWIZZLE_1 == (int)PIPE_SWIZZLE_1);
53
54 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
55 enum pipe_swizzle swizzle;
56 LLVMValueRef depth_or_stencil;
57
58 if (util_format_has_stencil(format_desc) &&
59 !util_format_has_depth(format_desc)) {
60 assert(!bld->type.floating);
61 swizzle = format_desc->swizzle[1];
62 }
63 else {
64 assert(bld->type.floating);
65 swizzle = format_desc->swizzle[0];
66 }
67 /*
68 * Return zzz1 or sss1 for depth-stencil formats here.
69 * Correct swizzling will be handled by apply_sampler_swizzle() later.
70 */
71 depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
72
73 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
74 swizzled_out[3] = bld->one;
75 }
76 else {
77 unsigned chan;
78 for (chan = 0; chan < 4; ++chan) {
79 enum pipe_swizzle swizzle = format_desc->swizzle[chan];
80 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
81 }
82 }
83 }
84
85
86 /**
87 * Unpack several pixels in SoA.
88 *
89 * It takes a vector of packed pixels:
90 *
91 * packed = {P0, P1, P2, P3, ..., Pn}
92 *
93 * And will produce four vectors:
94 *
95 * red = {R0, R1, R2, R3, ..., Rn}
96 * green = {G0, G1, G2, G3, ..., Gn}
97 * blue = {B0, B1, B2, B3, ..., Bn}
98 * alpha = {A0, A1, A2, A3, ..., An}
99 *
100 * It requires that a packed pixel fits into an element of the output
101 * channels. The common case is when converting pixel with a depth of 32 bit or
102 * less into floats.
103 *
104 * \param format_desc the format of the 'packed' incoming pixel vector
105 * \param type the desired type for rgba_out (type.length = n, above)
106 * \param packed the incoming vector of packed pixels
107 * \param rgba_out returns the SoA R,G,B,A vectors
108 */
109 void
110 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
111 const struct util_format_description *format_desc,
112 struct lp_type type,
113 LLVMValueRef packed,
114 LLVMValueRef rgba_out[4])
115 {
116 LLVMBuilderRef builder = gallivm->builder;
117 struct lp_build_context bld;
118 LLVMValueRef inputs[4];
119 unsigned chan;
120
121 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
122 assert(format_desc->block.width == 1);
123 assert(format_desc->block.height == 1);
124 assert(format_desc->block.bits <= type.width);
125 /* FIXME: Support more output types */
126 assert(type.width == 32);
127
128 lp_build_context_init(&bld, gallivm, type);
129
130 /* Decode the input vector components */
131 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
132 const unsigned width = format_desc->channel[chan].size;
133 const unsigned start = format_desc->channel[chan].shift;
134 const unsigned stop = start + width;
135 LLVMValueRef input;
136
137 input = packed;
138
139 switch(format_desc->channel[chan].type) {
140 case UTIL_FORMAT_TYPE_VOID:
141 input = lp_build_undef(gallivm, type);
142 break;
143
144 case UTIL_FORMAT_TYPE_UNSIGNED:
145 /*
146 * Align the LSB
147 */
148
149 if (start) {
150 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
151 }
152
153 /*
154 * Zero the MSBs
155 */
156
157 if (stop < format_desc->block.bits) {
158 unsigned mask = ((unsigned long long)1 << width) - 1;
159 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
160 }
161
162 /*
163 * Type conversion
164 */
165
166 if (type.floating) {
167 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
168 if (format_desc->swizzle[3] == chan) {
169 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
170 }
171 else {
172 struct lp_type conv_type = lp_uint_type(type);
173 input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
174 }
175 }
176 else {
177 if(format_desc->channel[chan].normalized)
178 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
179 else
180 input = LLVMBuildSIToFP(builder, input,
181 lp_build_vec_type(gallivm, type), "");
182 }
183 }
184 else if (format_desc->channel[chan].pure_integer) {
185 /* Nothing to do */
186 } else {
187 /* FIXME */
188 assert(0);
189 }
190
191 break;
192
193 case UTIL_FORMAT_TYPE_SIGNED:
194 /*
195 * Align the sign bit first.
196 */
197
198 if (stop < type.width) {
199 unsigned bits = type.width - stop;
200 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
201 input = LLVMBuildShl(builder, input, bits_val, "");
202 }
203
204 /*
205 * Align the LSB (with an arithmetic shift to preserve the sign)
206 */
207
208 if (format_desc->channel[chan].size < type.width) {
209 unsigned bits = type.width - format_desc->channel[chan].size;
210 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
211 input = LLVMBuildAShr(builder, input, bits_val, "");
212 }
213
214 /*
215 * Type conversion
216 */
217
218 if (type.floating) {
219 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
220 if (format_desc->channel[chan].normalized) {
221 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
222 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
223 input = LLVMBuildFMul(builder, input, scale_val, "");
224 /* the formula above will produce value below -1.0 for most negative
225 * value but everything seems happy with that hence disable for now */
226 if (0)
227 input = lp_build_max(&bld, input,
228 lp_build_const_vec(gallivm, type, -1.0f));
229 }
230 }
231 else if (format_desc->channel[chan].pure_integer) {
232 /* Nothing to do */
233 } else {
234 /* FIXME */
235 assert(0);
236 }
237
238 break;
239
240 case UTIL_FORMAT_TYPE_FLOAT:
241 if (type.floating) {
242 if (format_desc->channel[chan].size == 16) {
243 struct lp_type f16i_type = type;
244 f16i_type.width /= 2;
245 f16i_type.floating = 0;
246 if (start) {
247 input = LLVMBuildLShr(builder, input,
248 lp_build_const_int_vec(gallivm, type, start), "");
249 }
250 input = LLVMBuildTrunc(builder, input,
251 lp_build_vec_type(gallivm, f16i_type), "");
252 input = lp_build_half_to_float(gallivm, input);
253 } else {
254 assert(start == 0);
255 assert(stop == 32);
256 assert(type.width == 32);
257 }
258 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
259 }
260 else {
261 /* FIXME */
262 assert(0);
263 input = lp_build_undef(gallivm, type);
264 }
265 break;
266
267 case UTIL_FORMAT_TYPE_FIXED:
268 if (type.floating) {
269 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
270 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
271 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
272 input = LLVMBuildFMul(builder, input, scale_val, "");
273 }
274 else {
275 /* FIXME */
276 assert(0);
277 input = lp_build_undef(gallivm, type);
278 }
279 break;
280
281 default:
282 assert(0);
283 input = lp_build_undef(gallivm, type);
284 break;
285 }
286
287 inputs[chan] = input;
288 }
289
290 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
291 }
292
293
294 /**
295 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
296 *
297 * \param dst_type The desired return type. For pure integer formats
298 * this should be a 32bit wide int or uint vector type,
299 * otherwise a float vector type.
300 *
301 * \param packed The rgba8 values to pack.
302 *
303 * \param rgba The 4 SoA return vectors.
304 */
305 void
306 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
307 struct lp_type dst_type,
308 LLVMValueRef packed,
309 LLVMValueRef *rgba)
310 {
311 LLVMBuilderRef builder = gallivm->builder;
312 LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
313 unsigned chan;
314
315 /* XXX technically shouldn't use that for uint dst_type */
316 packed = LLVMBuildBitCast(builder, packed,
317 lp_build_int_vec_type(gallivm, dst_type), "");
318
319 /* Decode the input vector components */
320 for (chan = 0; chan < 4; ++chan) {
321 #ifdef PIPE_ARCH_LITTLE_ENDIAN
322 unsigned start = chan*8;
323 #else
324 unsigned start = (3-chan)*8;
325 #endif
326 unsigned stop = start + 8;
327 LLVMValueRef input;
328
329 input = packed;
330
331 if (start)
332 input = LLVMBuildLShr(builder, input,
333 lp_build_const_int_vec(gallivm, dst_type, start), "");
334
335 if (stop < 32)
336 input = LLVMBuildAnd(builder, input, mask, "");
337
338 if (dst_type.floating)
339 input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
340
341 rgba[chan] = input;
342 }
343 }
344
345
346
347 /**
348 * Fetch a texels from a texture, returning them in SoA layout.
349 *
350 * \param type the desired return type for 'rgba'. The vector length
351 * is the number of texels to fetch
352 *
353 * \param base_ptr points to the base of the texture mip tree.
354 * \param offset offset to start of the texture image block. For non-
355 * compressed formats, this simply is an offset to the texel.
356 * For compressed formats, it is an offset to the start of the
357 * compressed data block.
358 *
359 * \param i, j the sub-block pixel coordinates. For non-compressed formats
360 * these will always be (0,0). For compressed formats, i will
361 * be in [0, block_width-1] and j will be in [0, block_height-1].
362 * \param cache optional value pointing to a lp_build_format_cache structure
363 */
364 void
365 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
366 const struct util_format_description *format_desc,
367 struct lp_type type,
368 LLVMValueRef base_ptr,
369 LLVMValueRef offset,
370 LLVMValueRef i,
371 LLVMValueRef j,
372 LLVMValueRef cache,
373 LLVMValueRef rgba_out[4])
374 {
375 LLVMBuilderRef builder = gallivm->builder;
376
377 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
378 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
379 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
380 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
381 format_desc->block.width == 1 &&
382 format_desc->block.height == 1 &&
383 format_desc->block.bits <= type.width &&
384 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
385 format_desc->channel[0].size == 32 ||
386 format_desc->channel[0].size == 16))
387 {
388 /*
389 * The packed pixel fits into an element of the destination format. Put
390 * the packed pixels into a vector and extract each component for all
391 * vector elements in parallel.
392 */
393
394 LLVMValueRef packed;
395
396 /*
397 * gather the texels from the texture
398 * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
399 */
400 assert(format_desc->block.bits <= type.width);
401 packed = lp_build_gather(gallivm,
402 type.length,
403 format_desc->block.bits,
404 type.width,
405 TRUE,
406 base_ptr, offset, FALSE);
407
408 /*
409 * convert texels to float rgba
410 */
411 lp_build_unpack_rgba_soa(gallivm,
412 format_desc,
413 type,
414 packed, rgba_out);
415 return;
416 }
417
418 if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
419 format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
420 /*
421 * similar conceptually to above but requiring special
422 * AoS packed -> SoA float conversion code.
423 */
424 LLVMValueRef packed;
425
426 assert(type.floating);
427 assert(type.width == 32);
428
429 packed = lp_build_gather(gallivm, type.length,
430 format_desc->block.bits,
431 type.width, TRUE,
432 base_ptr, offset, FALSE);
433 if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
434 lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
435 }
436 else {
437 lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
438 }
439 return;
440 }
441
442 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
443 format_desc->block.bits == 64) {
444 /*
445 * special case the format is 64 bits but we only require
446 * 32bit (or 8bit) from each block.
447 */
448 LLVMValueRef packed;
449
450 if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
451 /*
452 * for stencil simply fix up offsets - could in fact change
453 * base_ptr instead even outside the shader.
454 */
455 unsigned mask = (1 << 8) - 1;
456 LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
457 offset = LLVMBuildAdd(builder, offset, s_offset, "");
458 packed = lp_build_gather(gallivm, type.length, 32, type.width,
459 TRUE, base_ptr, offset, FALSE);
460 packed = LLVMBuildAnd(builder, packed,
461 lp_build_const_int_vec(gallivm, type, mask), "");
462 }
463 else {
464 assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
465 packed = lp_build_gather(gallivm, type.length, 32, type.width,
466 TRUE, base_ptr, offset, TRUE);
467 packed = LLVMBuildBitCast(builder, packed,
468 lp_build_vec_type(gallivm, type), "");
469 }
470 /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
471 rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
472 rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
473 return;
474 }
475
476 /*
477 * Try calling lp_build_fetch_rgba_aos for all pixels.
478 */
479
480 if (util_format_fits_8unorm(format_desc) &&
481 type.floating && type.width == 32 &&
482 (type.length == 1 || (type.length % 4 == 0))) {
483 struct lp_type tmp_type;
484 LLVMValueRef tmp;
485
486 memset(&tmp_type, 0, sizeof tmp_type);
487 tmp_type.width = 8;
488 tmp_type.length = type.length * 4;
489 tmp_type.norm = TRUE;
490
491 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
492 TRUE, base_ptr, offset, i, j, cache);
493
494 lp_build_rgba8_to_fi32_soa(gallivm,
495 type,
496 tmp,
497 rgba_out);
498
499 return;
500 }
501
502 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
503 /* non-srgb case is already handled above */
504 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
505 type.floating && type.width == 32 &&
506 (type.length == 1 || (type.length % 4 == 0)) &&
507 cache) {
508 const struct util_format_description *format_decompressed;
509 const struct util_format_description *flinear_desc;
510 LLVMValueRef packed;
511 flinear_desc = util_format_description(util_format_linear(format_desc->format));
512 packed = lp_build_fetch_cached_texels(gallivm,
513 flinear_desc,
514 type.length,
515 base_ptr,
516 offset,
517 i, j,
518 cache);
519 packed = LLVMBuildBitCast(builder, packed,
520 lp_build_int_vec_type(gallivm, type), "");
521 /*
522 * The values are now packed so they match ordinary srgb RGBA8 format,
523 * hence need to use matching format for unpack.
524 */
525 format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
526
527 lp_build_unpack_rgba_soa(gallivm,
528 format_decompressed,
529 type,
530 packed, rgba_out);
531
532 return;
533 }
534
535 /*
536 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
537 *
538 * This is not the most efficient way of fetching pixels, as we
539 * miss some opportunities to do vectorization, but this is
540 * convenient for formats or scenarios for which there was no
541 * opportunity or incentive to optimize.
542 */
543
544 {
545 unsigned k, chan;
546 struct lp_type tmp_type;
547
548 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
549 debug_printf("%s: scalar unpacking of %s\n",
550 __FUNCTION__, format_desc->short_name);
551 }
552
553 tmp_type = type;
554 tmp_type.length = 4;
555
556 for (chan = 0; chan < 4; ++chan) {
557 rgba_out[chan] = lp_build_undef(gallivm, type);
558 }
559
560 /* loop over number of pixels */
561 for(k = 0; k < type.length; ++k) {
562 LLVMValueRef index = lp_build_const_int32(gallivm, k);
563 LLVMValueRef offset_elem;
564 LLVMValueRef i_elem, j_elem;
565 LLVMValueRef tmp;
566
567 offset_elem = LLVMBuildExtractElement(builder, offset,
568 index, "");
569
570 i_elem = LLVMBuildExtractElement(builder, i, index, "");
571 j_elem = LLVMBuildExtractElement(builder, j, index, "");
572
573 /* Get a single float[4]={R,G,B,A} pixel */
574 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
575 TRUE, base_ptr, offset_elem,
576 i_elem, j_elem, cache);
577
578 /*
579 * Insert the AoS tmp value channels into the SoA result vectors at
580 * position = 'index'.
581 */
582 for (chan = 0; chan < 4; ++chan) {
583 LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
584 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
585 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
586 tmp_chan, index, "");
587 }
588 }
589 }
590 }