freedreno/ir3: array rework
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_defines.h"
30
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
34
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
42 #include "lp_bld_arit.h"
43
44
45 void
46 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
47 struct lp_build_context *bld,
48 const LLVMValueRef *unswizzled,
49 LLVMValueRef swizzled_out[4])
50 {
51 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
52 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
53
54 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
55 enum util_format_swizzle swizzle;
56 LLVMValueRef depth_or_stencil;
57
58 if (util_format_has_stencil(format_desc) &&
59 !util_format_has_depth(format_desc)) {
60 assert(!bld->type.floating);
61 swizzle = format_desc->swizzle[1];
62 }
63 else {
64 assert(bld->type.floating);
65 swizzle = format_desc->swizzle[0];
66 }
67 /*
68 * Return zzz1 or sss1 for depth-stencil formats here.
69 * Correct swizzling will be handled by apply_sampler_swizzle() later.
70 */
71 depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
72
73 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
74 swizzled_out[3] = bld->one;
75 }
76 else {
77 unsigned chan;
78 for (chan = 0; chan < 4; ++chan) {
79 enum util_format_swizzle swizzle = format_desc->swizzle[chan];
80 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
81 }
82 }
83 }
84
85
86 /**
87 * Unpack several pixels in SoA.
88 *
89 * It takes a vector of packed pixels:
90 *
91 * packed = {P0, P1, P2, P3, ..., Pn}
92 *
93 * And will produce four vectors:
94 *
95 * red = {R0, R1, R2, R3, ..., Rn}
96 * green = {G0, G1, G2, G3, ..., Gn}
97 * blue = {B0, B1, B2, B3, ..., Bn}
98 * alpha = {A0, A1, A2, A3, ..., An}
99 *
100 * It requires that a packed pixel fits into an element of the output
101 * channels. The common case is when converting pixel with a depth of 32 bit or
102 * less into floats.
103 *
104 * \param format_desc the format of the 'packed' incoming pixel vector
105 * \param type the desired type for rgba_out (type.length = n, above)
106 * \param packed the incoming vector of packed pixels
107 * \param rgba_out returns the SoA R,G,B,A vectors
108 */
109 void
110 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
111 const struct util_format_description *format_desc,
112 struct lp_type type,
113 LLVMValueRef packed,
114 LLVMValueRef rgba_out[4])
115 {
116 LLVMBuilderRef builder = gallivm->builder;
117 struct lp_build_context bld;
118 LLVMValueRef inputs[4];
119 unsigned chan;
120
121 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
122 assert(format_desc->block.width == 1);
123 assert(format_desc->block.height == 1);
124 assert(format_desc->block.bits <= type.width);
125 /* FIXME: Support more output types */
126 assert(type.width == 32);
127
128 lp_build_context_init(&bld, gallivm, type);
129
130 /* Decode the input vector components */
131 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
132 const unsigned width = format_desc->channel[chan].size;
133 const unsigned start = format_desc->channel[chan].shift;
134 const unsigned stop = start + width;
135 LLVMValueRef input;
136
137 input = packed;
138
139 switch(format_desc->channel[chan].type) {
140 case UTIL_FORMAT_TYPE_VOID:
141 input = lp_build_undef(gallivm, type);
142 break;
143
144 case UTIL_FORMAT_TYPE_UNSIGNED:
145 /*
146 * Align the LSB
147 */
148
149 if (start) {
150 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
151 }
152
153 /*
154 * Zero the MSBs
155 */
156
157 if (stop < format_desc->block.bits) {
158 unsigned mask = ((unsigned long long)1 << width) - 1;
159 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
160 }
161
162 /*
163 * Type conversion
164 */
165
166 if (type.floating) {
167 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
168 if (format_desc->swizzle[3] == chan) {
169 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
170 }
171 else {
172 struct lp_type conv_type = lp_uint_type(type);
173 input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
174 }
175 }
176 else {
177 if(format_desc->channel[chan].normalized)
178 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
179 else
180 input = LLVMBuildSIToFP(builder, input,
181 lp_build_vec_type(gallivm, type), "");
182 }
183 }
184 else if (format_desc->channel[chan].pure_integer) {
185 /* Nothing to do */
186 } else {
187 /* FIXME */
188 assert(0);
189 }
190
191 break;
192
193 case UTIL_FORMAT_TYPE_SIGNED:
194 /*
195 * Align the sign bit first.
196 */
197
198 if (stop < type.width) {
199 unsigned bits = type.width - stop;
200 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
201 input = LLVMBuildShl(builder, input, bits_val, "");
202 }
203
204 /*
205 * Align the LSB (with an arithmetic shift to preserve the sign)
206 */
207
208 if (format_desc->channel[chan].size < type.width) {
209 unsigned bits = type.width - format_desc->channel[chan].size;
210 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
211 input = LLVMBuildAShr(builder, input, bits_val, "");
212 }
213
214 /*
215 * Type conversion
216 */
217
218 if (type.floating) {
219 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
220 if (format_desc->channel[chan].normalized) {
221 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
222 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
223 input = LLVMBuildFMul(builder, input, scale_val, "");
224 /* the formula above will produce value below -1.0 for most negative
225 * value but everything seems happy with that hence disable for now */
226 if (0)
227 input = lp_build_max(&bld, input,
228 lp_build_const_vec(gallivm, type, -1.0f));
229 }
230 }
231 else if (format_desc->channel[chan].pure_integer) {
232 /* Nothing to do */
233 } else {
234 /* FIXME */
235 assert(0);
236 }
237
238 break;
239
240 case UTIL_FORMAT_TYPE_FLOAT:
241 if (type.floating) {
242 assert(start == 0);
243 assert(stop == 32);
244 assert(type.width == 32);
245 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
246 }
247 else {
248 /* FIXME */
249 assert(0);
250 input = lp_build_undef(gallivm, type);
251 }
252 break;
253
254 case UTIL_FORMAT_TYPE_FIXED:
255 if (type.floating) {
256 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
257 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
258 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
259 input = LLVMBuildFMul(builder, input, scale_val, "");
260 }
261 else {
262 /* FIXME */
263 assert(0);
264 input = lp_build_undef(gallivm, type);
265 }
266 break;
267
268 default:
269 assert(0);
270 input = lp_build_undef(gallivm, type);
271 break;
272 }
273
274 inputs[chan] = input;
275 }
276
277 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
278 }
279
280
281 /**
282 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
283 *
284 * \param dst_type The desired return type. For pure integer formats
285 * this should be a 32bit wide int or uint vector type,
286 * otherwise a float vector type.
287 *
288 * \param packed The rgba8 values to pack.
289 *
290 * \param rgba The 4 SoA return vectors.
291 */
292 void
293 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
294 struct lp_type dst_type,
295 LLVMValueRef packed,
296 LLVMValueRef *rgba)
297 {
298 LLVMBuilderRef builder = gallivm->builder;
299 LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
300 unsigned chan;
301
302 /* XXX technically shouldn't use that for uint dst_type */
303 packed = LLVMBuildBitCast(builder, packed,
304 lp_build_int_vec_type(gallivm, dst_type), "");
305
306 /* Decode the input vector components */
307 for (chan = 0; chan < 4; ++chan) {
308 #ifdef PIPE_ARCH_LITTLE_ENDIAN
309 unsigned start = chan*8;
310 #else
311 unsigned start = (3-chan)*8;
312 #endif
313 unsigned stop = start + 8;
314 LLVMValueRef input;
315
316 input = packed;
317
318 if (start)
319 input = LLVMBuildLShr(builder, input,
320 lp_build_const_int_vec(gallivm, dst_type, start), "");
321
322 if (stop < 32)
323 input = LLVMBuildAnd(builder, input, mask, "");
324
325 if (dst_type.floating)
326 input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
327
328 rgba[chan] = input;
329 }
330 }
331
332
333
334 /**
335 * Fetch a texels from a texture, returning them in SoA layout.
336 *
337 * \param type the desired return type for 'rgba'. The vector length
338 * is the number of texels to fetch
339 *
340 * \param base_ptr points to the base of the texture mip tree.
341 * \param offset offset to start of the texture image block. For non-
342 * compressed formats, this simply is an offset to the texel.
343 * For compressed formats, it is an offset to the start of the
344 * compressed data block.
345 *
346 * \param i, j the sub-block pixel coordinates. For non-compressed formats
347 * these will always be (0,0). For compressed formats, i will
348 * be in [0, block_width-1] and j will be in [0, block_height-1].
349 * \param cache optional value pointing to a lp_build_format_cache structure
350 */
351 void
352 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
353 const struct util_format_description *format_desc,
354 struct lp_type type,
355 LLVMValueRef base_ptr,
356 LLVMValueRef offset,
357 LLVMValueRef i,
358 LLVMValueRef j,
359 LLVMValueRef cache,
360 LLVMValueRef rgba_out[4])
361 {
362 LLVMBuilderRef builder = gallivm->builder;
363
364 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
365 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
366 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
367 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
368 format_desc->block.width == 1 &&
369 format_desc->block.height == 1 &&
370 format_desc->block.bits <= type.width &&
371 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
372 format_desc->channel[0].size == 32))
373 {
374 /*
375 * The packed pixel fits into an element of the destination format. Put
376 * the packed pixels into a vector and extract each component for all
377 * vector elements in parallel.
378 */
379
380 LLVMValueRef packed;
381
382 /*
383 * gather the texels from the texture
384 * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
385 */
386 assert(format_desc->block.bits <= type.width);
387 packed = lp_build_gather(gallivm,
388 type.length,
389 format_desc->block.bits,
390 type.width,
391 TRUE,
392 base_ptr, offset, FALSE);
393
394 /*
395 * convert texels to float rgba
396 */
397 lp_build_unpack_rgba_soa(gallivm,
398 format_desc,
399 type,
400 packed, rgba_out);
401 return;
402 }
403
404 if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
405 format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
406 /*
407 * similar conceptually to above but requiring special
408 * AoS packed -> SoA float conversion code.
409 */
410 LLVMValueRef packed;
411
412 assert(type.floating);
413 assert(type.width == 32);
414
415 packed = lp_build_gather(gallivm, type.length,
416 format_desc->block.bits,
417 type.width, TRUE,
418 base_ptr, offset, FALSE);
419 if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
420 lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
421 }
422 else {
423 lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
424 }
425 return;
426 }
427
428 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
429 format_desc->block.bits == 64) {
430 /*
431 * special case the format is 64 bits but we only require
432 * 32bit (or 8bit) from each block.
433 */
434 LLVMValueRef packed;
435
436 if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
437 /*
438 * for stencil simply fix up offsets - could in fact change
439 * base_ptr instead even outside the shader.
440 */
441 unsigned mask = (1 << 8) - 1;
442 LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
443 offset = LLVMBuildAdd(builder, offset, s_offset, "");
444 packed = lp_build_gather(gallivm, type.length, 32, type.width,
445 TRUE, base_ptr, offset, FALSE);
446 packed = LLVMBuildAnd(builder, packed,
447 lp_build_const_int_vec(gallivm, type, mask), "");
448 }
449 else {
450 assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
451 packed = lp_build_gather(gallivm, type.length, 32, type.width,
452 TRUE, base_ptr, offset, TRUE);
453 packed = LLVMBuildBitCast(builder, packed,
454 lp_build_vec_type(gallivm, type), "");
455 }
456 /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
457 rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
458 rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
459 return;
460 }
461
462 /*
463 * Try calling lp_build_fetch_rgba_aos for all pixels.
464 */
465
466 if (util_format_fits_8unorm(format_desc) &&
467 type.floating && type.width == 32 &&
468 (type.length == 1 || (type.length % 4 == 0))) {
469 struct lp_type tmp_type;
470 LLVMValueRef tmp;
471
472 memset(&tmp_type, 0, sizeof tmp_type);
473 tmp_type.width = 8;
474 tmp_type.length = type.length * 4;
475 tmp_type.norm = TRUE;
476
477 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
478 TRUE, base_ptr, offset, i, j, cache);
479
480 lp_build_rgba8_to_fi32_soa(gallivm,
481 type,
482 tmp,
483 rgba_out);
484
485 return;
486 }
487
488 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
489 /* non-srgb case is already handled above */
490 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
491 type.floating && type.width == 32 &&
492 (type.length == 1 || (type.length % 4 == 0)) &&
493 cache) {
494 const struct util_format_description *format_decompressed;
495 const struct util_format_description *flinear_desc;
496 LLVMValueRef packed;
497 flinear_desc = util_format_description(util_format_linear(format_desc->format));
498 packed = lp_build_fetch_cached_texels(gallivm,
499 flinear_desc,
500 type.length,
501 base_ptr,
502 offset,
503 i, j,
504 cache);
505 packed = LLVMBuildBitCast(builder, packed,
506 lp_build_int_vec_type(gallivm, type), "");
507 /*
508 * The values are now packed so they match ordinary srgb RGBA8 format,
509 * hence need to use matching format for unpack.
510 */
511 format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
512
513 lp_build_unpack_rgba_soa(gallivm,
514 format_decompressed,
515 type,
516 packed, rgba_out);
517
518 return;
519 }
520
521 /*
522 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
523 *
524 * This is not the most efficient way of fetching pixels, as we
525 * miss some opportunities to do vectorization, but this is
526 * convenient for formats or scenarios for which there was no
527 * opportunity or incentive to optimize.
528 */
529
530 {
531 unsigned k, chan;
532 struct lp_type tmp_type;
533
534 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
535 debug_printf("%s: scalar unpacking of %s\n",
536 __FUNCTION__, format_desc->short_name);
537 }
538
539 tmp_type = type;
540 tmp_type.length = 4;
541
542 for (chan = 0; chan < 4; ++chan) {
543 rgba_out[chan] = lp_build_undef(gallivm, type);
544 }
545
546 /* loop over number of pixels */
547 for(k = 0; k < type.length; ++k) {
548 LLVMValueRef index = lp_build_const_int32(gallivm, k);
549 LLVMValueRef offset_elem;
550 LLVMValueRef i_elem, j_elem;
551 LLVMValueRef tmp;
552
553 offset_elem = LLVMBuildExtractElement(builder, offset,
554 index, "");
555
556 i_elem = LLVMBuildExtractElement(builder, i, index, "");
557 j_elem = LLVMBuildExtractElement(builder, j, index, "");
558
559 /* Get a single float[4]={R,G,B,A} pixel */
560 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
561 TRUE, base_ptr, offset_elem,
562 i_elem, j_elem, cache);
563
564 /*
565 * Insert the AoS tmp value channels into the SoA result vectors at
566 * position = 'index'.
567 */
568 for (chan = 0; chan < 4; ++chan) {
569 LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
570 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
571 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
572 tmp_chan, index, "");
573 }
574 }
575 }
576 }