freedreno/a3xx: shadow sampler support
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_defines.h"
30
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
34
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
42 #include "lp_bld_arit.h"
43
44
45 void
46 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
47 struct lp_build_context *bld,
48 const LLVMValueRef *unswizzled,
49 LLVMValueRef swizzled_out[4])
50 {
51 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
52 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
53
54 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
55 enum util_format_swizzle swizzle;
56 LLVMValueRef depth_or_stencil;
57
58 if (util_format_has_stencil(format_desc) &&
59 !util_format_has_depth(format_desc)) {
60 assert(!bld->type.floating);
61 swizzle = format_desc->swizzle[1];
62 }
63 else {
64 assert(bld->type.floating);
65 swizzle = format_desc->swizzle[0];
66 }
67 /*
68 * Return zzz1 or sss1 for depth-stencil formats here.
69 * Correct swizzling will be handled by apply_sampler_swizzle() later.
70 */
71 depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
72
73 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
74 swizzled_out[3] = bld->one;
75 }
76 else {
77 unsigned chan;
78 for (chan = 0; chan < 4; ++chan) {
79 enum util_format_swizzle swizzle = format_desc->swizzle[chan];
80 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
81 }
82 }
83 }
84
85
86 /**
87 * Unpack several pixels in SoA.
88 *
89 * It takes a vector of packed pixels:
90 *
91 * packed = {P0, P1, P2, P3, ..., Pn}
92 *
93 * And will produce four vectors:
94 *
95 * red = {R0, R1, R2, R3, ..., Rn}
96 * green = {G0, G1, G2, G3, ..., Gn}
97 * blue = {B0, B1, B2, B3, ..., Bn}
98 * alpha = {A0, A1, A2, A3, ..., An}
99 *
100 * It requires that a packed pixel fits into an element of the output
101 * channels. The common case is when converting pixel with a depth of 32 bit or
102 * less into floats.
103 *
104 * \param format_desc the format of the 'packed' incoming pixel vector
105 * \param type the desired type for rgba_out (type.length = n, above)
106 * \param packed the incoming vector of packed pixels
107 * \param rgba_out returns the SoA R,G,B,A vectors
108 */
109 void
110 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
111 const struct util_format_description *format_desc,
112 struct lp_type type,
113 LLVMValueRef packed,
114 LLVMValueRef rgba_out[4])
115 {
116 LLVMBuilderRef builder = gallivm->builder;
117 struct lp_build_context bld;
118 LLVMValueRef inputs[4];
119 unsigned chan;
120
121 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
122 assert(format_desc->block.width == 1);
123 assert(format_desc->block.height == 1);
124 assert(format_desc->block.bits <= type.width);
125 /* FIXME: Support more output types */
126 assert(type.width == 32);
127
128 lp_build_context_init(&bld, gallivm, type);
129
130 /* Decode the input vector components */
131 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
132 const unsigned width = format_desc->channel[chan].size;
133 const unsigned start = format_desc->channel[chan].shift;
134 const unsigned stop = start + width;
135 LLVMValueRef input;
136
137 input = packed;
138
139 switch(format_desc->channel[chan].type) {
140 case UTIL_FORMAT_TYPE_VOID:
141 input = lp_build_undef(gallivm, type);
142 break;
143
144 case UTIL_FORMAT_TYPE_UNSIGNED:
145 /*
146 * Align the LSB
147 */
148
149 if (start) {
150 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
151 }
152
153 /*
154 * Zero the MSBs
155 */
156
157 if (stop < format_desc->block.bits) {
158 unsigned mask = ((unsigned long long)1 << width) - 1;
159 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
160 }
161
162 /*
163 * Type conversion
164 */
165
166 if (type.floating) {
167 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
168 if (format_desc->swizzle[3] == chan) {
169 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
170 }
171 else {
172 struct lp_type conv_type = lp_uint_type(type);
173 input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
174 }
175 }
176 else {
177 if(format_desc->channel[chan].normalized)
178 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
179 else
180 input = LLVMBuildSIToFP(builder, input,
181 lp_build_vec_type(gallivm, type), "");
182 }
183 }
184 else if (format_desc->channel[chan].pure_integer) {
185 /* Nothing to do */
186 } else {
187 /* FIXME */
188 assert(0);
189 }
190
191 break;
192
193 case UTIL_FORMAT_TYPE_SIGNED:
194 /*
195 * Align the sign bit first.
196 */
197
198 if (stop < type.width) {
199 unsigned bits = type.width - stop;
200 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
201 input = LLVMBuildShl(builder, input, bits_val, "");
202 }
203
204 /*
205 * Align the LSB (with an arithmetic shift to preserve the sign)
206 */
207
208 if (format_desc->channel[chan].size < type.width) {
209 unsigned bits = type.width - format_desc->channel[chan].size;
210 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
211 input = LLVMBuildAShr(builder, input, bits_val, "");
212 }
213
214 /*
215 * Type conversion
216 */
217
218 if (type.floating) {
219 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
220 if (format_desc->channel[chan].normalized) {
221 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
222 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
223 input = LLVMBuildFMul(builder, input, scale_val, "");
224 /* the formula above will produce value below -1.0 for most negative
225 * value but everything seems happy with that hence disable for now */
226 if (0)
227 input = lp_build_max(&bld, input,
228 lp_build_const_vec(gallivm, type, -1.0f));
229 }
230 }
231 else if (format_desc->channel[chan].pure_integer) {
232 /* Nothing to do */
233 } else {
234 /* FIXME */
235 assert(0);
236 }
237
238 break;
239
240 case UTIL_FORMAT_TYPE_FLOAT:
241 if (type.floating) {
242 assert(start == 0);
243 assert(stop == 32);
244 assert(type.width == 32);
245 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
246 }
247 else {
248 /* FIXME */
249 assert(0);
250 input = lp_build_undef(gallivm, type);
251 }
252 break;
253
254 case UTIL_FORMAT_TYPE_FIXED:
255 if (type.floating) {
256 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
257 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
258 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
259 input = LLVMBuildFMul(builder, input, scale_val, "");
260 }
261 else {
262 /* FIXME */
263 assert(0);
264 input = lp_build_undef(gallivm, type);
265 }
266 break;
267
268 default:
269 assert(0);
270 input = lp_build_undef(gallivm, type);
271 break;
272 }
273
274 inputs[chan] = input;
275 }
276
277 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
278 }
279
280
281 /**
282 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
283 *
284 * \param dst_type The desired return type. For pure integer formats
285 * this should be a 32bit wide int or uint vector type,
286 * otherwise a float vector type.
287 *
288 * \param packed The rgba8 values to pack.
289 *
290 * \param rgba The 4 SoA return vectors.
291 */
292 void
293 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
294 struct lp_type dst_type,
295 LLVMValueRef packed,
296 LLVMValueRef *rgba)
297 {
298 LLVMBuilderRef builder = gallivm->builder;
299 LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
300 unsigned chan;
301
302 /* XXX technically shouldn't use that for uint dst_type */
303 packed = LLVMBuildBitCast(builder, packed,
304 lp_build_int_vec_type(gallivm, dst_type), "");
305
306 /* Decode the input vector components */
307 for (chan = 0; chan < 4; ++chan) {
308 #ifdef PIPE_ARCH_LITTLE_ENDIAN
309 unsigned start = chan*8;
310 #else
311 unsigned start = (3-chan)*8;
312 #endif
313 unsigned stop = start + 8;
314 LLVMValueRef input;
315
316 input = packed;
317
318 if (start)
319 input = LLVMBuildLShr(builder, input,
320 lp_build_const_int_vec(gallivm, dst_type, start), "");
321
322 if (stop < 32)
323 input = LLVMBuildAnd(builder, input, mask, "");
324
325 if (dst_type.floating)
326 input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
327
328 rgba[chan] = input;
329 }
330 }
331
332
333
334 /**
335 * Fetch a texels from a texture, returning them in SoA layout.
336 *
337 * \param type the desired return type for 'rgba'. The vector length
338 * is the number of texels to fetch
339 *
340 * \param base_ptr points to the base of the texture mip tree.
341 * \param offset offset to start of the texture image block. For non-
342 * compressed formats, this simply is an offset to the texel.
343 * For compressed formats, it is an offset to the start of the
344 * compressed data block.
345 *
346 * \param i, j the sub-block pixel coordinates. For non-compressed formats
347 * these will always be (0,0). For compressed formats, i will
348 * be in [0, block_width-1] and j will be in [0, block_height-1].
349 */
350 void
351 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
352 const struct util_format_description *format_desc,
353 struct lp_type type,
354 LLVMValueRef base_ptr,
355 LLVMValueRef offset,
356 LLVMValueRef i,
357 LLVMValueRef j,
358 LLVMValueRef rgba_out[4])
359 {
360 LLVMBuilderRef builder = gallivm->builder;
361
362 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
363 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
364 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
365 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
366 format_desc->block.width == 1 &&
367 format_desc->block.height == 1 &&
368 format_desc->block.bits <= type.width &&
369 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
370 format_desc->channel[0].size == 32))
371 {
372 /*
373 * The packed pixel fits into an element of the destination format. Put
374 * the packed pixels into a vector and extract each component for all
375 * vector elements in parallel.
376 */
377
378 LLVMValueRef packed;
379
380 /*
381 * gather the texels from the texture
382 * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
383 */
384 assert(format_desc->block.bits <= type.width);
385 packed = lp_build_gather(gallivm,
386 type.length,
387 format_desc->block.bits,
388 type.width,
389 base_ptr, offset, FALSE);
390
391 /*
392 * convert texels to float rgba
393 */
394 lp_build_unpack_rgba_soa(gallivm,
395 format_desc,
396 type,
397 packed, rgba_out);
398 return;
399 }
400
401 if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
402 format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
403 /*
404 * similar conceptually to above but requiring special
405 * AoS packed -> SoA float conversion code.
406 */
407 LLVMValueRef packed;
408
409 assert(type.floating);
410 assert(type.width == 32);
411
412 packed = lp_build_gather(gallivm, type.length,
413 format_desc->block.bits,
414 type.width, base_ptr, offset,
415 FALSE);
416 if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
417 lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
418 }
419 else {
420 lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
421 }
422 return;
423 }
424
425 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
426 format_desc->block.bits == 64) {
427 /*
428 * special case the format is 64 bits but we only require
429 * 32bit (or 8bit) from each block.
430 */
431 LLVMValueRef packed;
432
433 if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
434 /*
435 * for stencil simply fix up offsets - could in fact change
436 * base_ptr instead even outside the shader.
437 */
438 unsigned mask = (1 << 8) - 1;
439 LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
440 offset = LLVMBuildAdd(builder, offset, s_offset, "");
441 packed = lp_build_gather(gallivm, type.length,
442 32, type.width, base_ptr, offset, FALSE);
443 packed = LLVMBuildAnd(builder, packed,
444 lp_build_const_int_vec(gallivm, type, mask), "");
445 }
446 else {
447 assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
448 packed = lp_build_gather(gallivm, type.length,
449 32, type.width, base_ptr, offset, TRUE);
450 packed = LLVMBuildBitCast(builder, packed,
451 lp_build_vec_type(gallivm, type), "");
452 }
453 /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
454 rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
455 rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
456 return;
457 }
458
459 /*
460 * Try calling lp_build_fetch_rgba_aos for all pixels.
461 */
462
463 if (util_format_fits_8unorm(format_desc) &&
464 type.floating && type.width == 32 &&
465 (type.length == 1 || (type.length % 4 == 0))) {
466 struct lp_type tmp_type;
467 LLVMValueRef tmp;
468
469 memset(&tmp_type, 0, sizeof tmp_type);
470 tmp_type.width = 8;
471 tmp_type.length = type.length * 4;
472 tmp_type.norm = TRUE;
473
474 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
475 base_ptr, offset, i, j);
476
477 lp_build_rgba8_to_fi32_soa(gallivm,
478 type,
479 tmp,
480 rgba_out);
481
482 return;
483 }
484
485 /*
486 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
487 *
488 * This is not the most efficient way of fetching pixels, as we
489 * miss some opportunities to do vectorization, but this is
490 * convenient for formats or scenarios for which there was no
491 * opportunity or incentive to optimize.
492 */
493
494 {
495 unsigned k, chan;
496 struct lp_type tmp_type;
497
498 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
499 debug_printf("%s: scalar unpacking of %s\n",
500 __FUNCTION__, format_desc->short_name);
501 }
502
503 tmp_type = type;
504 tmp_type.length = 4;
505
506 for (chan = 0; chan < 4; ++chan) {
507 rgba_out[chan] = lp_build_undef(gallivm, type);
508 }
509
510 /* loop over number of pixels */
511 for(k = 0; k < type.length; ++k) {
512 LLVMValueRef index = lp_build_const_int32(gallivm, k);
513 LLVMValueRef offset_elem;
514 LLVMValueRef i_elem, j_elem;
515 LLVMValueRef tmp;
516
517 offset_elem = LLVMBuildExtractElement(builder, offset,
518 index, "");
519
520 i_elem = LLVMBuildExtractElement(builder, i, index, "");
521 j_elem = LLVMBuildExtractElement(builder, j, index, "");
522
523 /* Get a single float[4]={R,G,B,A} pixel */
524 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
525 base_ptr, offset_elem,
526 i_elem, j_elem);
527
528 /*
529 * Insert the AoS tmp value channels into the SoA result vectors at
530 * position = 'index'.
531 */
532 for (chan = 0; chan < 4; ++chan) {
533 LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
534 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
535 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
536 tmp_chan, index, "");
537 }
538 }
539 }
540 }