gallium: Fix llvmpipe on big-endian machines
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_defines.h"
30
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
34
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
42
43
44 void
45 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
46 struct lp_build_context *bld,
47 const LLVMValueRef *unswizzled,
48 LLVMValueRef swizzled_out[4])
49 {
50 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
51 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
52
53 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
54 enum util_format_swizzle swizzle;
55 LLVMValueRef depth_or_stencil;
56
57 if (util_format_has_stencil(format_desc) &&
58 !util_format_has_depth(format_desc)) {
59 assert(!bld->type.floating);
60 swizzle = format_desc->swizzle[1];
61 }
62 else {
63 assert(bld->type.floating);
64 swizzle = format_desc->swizzle[0];
65 }
66 /*
67 * Return zzz1 or sss1 for depth-stencil formats here.
68 * Correct swizzling will be handled by apply_sampler_swizzle() later.
69 */
70 depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
71
72 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
73 swizzled_out[3] = bld->one;
74 }
75 else {
76 unsigned chan;
77 for (chan = 0; chan < 4; ++chan) {
78 enum util_format_swizzle swizzle = format_desc->swizzle[chan];
79 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
80 }
81 }
82 }
83
84
85 /**
86 * Unpack several pixels in SoA.
87 *
88 * It takes a vector of packed pixels:
89 *
90 * packed = {P0, P1, P2, P3, ..., Pn}
91 *
92 * And will produce four vectors:
93 *
94 * red = {R0, R1, R2, R3, ..., Rn}
95 * green = {G0, G1, G2, G3, ..., Gn}
96 * blue = {B0, B1, B2, B3, ..., Bn}
97 * alpha = {A0, A1, A2, A3, ..., An}
98 *
99 * It requires that a packed pixel fits into an element of the output
100 * channels. The common case is when converting pixel with a depth of 32 bit or
101 * less into floats.
102 *
103 * \param format_desc the format of the 'packed' incoming pixel vector
104 * \param type the desired type for rgba_out (type.length = n, above)
105 * \param packed the incoming vector of packed pixels
106 * \param rgba_out returns the SoA R,G,B,A vectors
107 */
108 void
109 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
110 const struct util_format_description *format_desc,
111 struct lp_type type,
112 LLVMValueRef packed,
113 LLVMValueRef rgba_out[4])
114 {
115 LLVMBuilderRef builder = gallivm->builder;
116 struct lp_build_context bld;
117 LLVMValueRef inputs[4];
118 unsigned chan;
119
120 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
121 assert(format_desc->block.width == 1);
122 assert(format_desc->block.height == 1);
123 assert(format_desc->block.bits <= type.width);
124 /* FIXME: Support more output types */
125 assert(type.width == 32);
126
127 lp_build_context_init(&bld, gallivm, type);
128
129 /* Decode the input vector components */
130 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
131 const unsigned width = format_desc->channel[chan].size;
132 const unsigned start = format_desc->channel[chan].shift;
133 const unsigned stop = start + width;
134 LLVMValueRef input;
135
136 input = packed;
137
138 switch(format_desc->channel[chan].type) {
139 case UTIL_FORMAT_TYPE_VOID:
140 input = lp_build_undef(gallivm, type);
141 break;
142
143 case UTIL_FORMAT_TYPE_UNSIGNED:
144 /*
145 * Align the LSB
146 */
147
148 if (start) {
149 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
150 }
151
152 /*
153 * Zero the MSBs
154 */
155
156 if (stop < format_desc->block.bits) {
157 unsigned mask = ((unsigned long long)1 << width) - 1;
158 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
159 }
160
161 /*
162 * Type conversion
163 */
164
165 if (type.floating) {
166 if(format_desc->channel[chan].normalized)
167 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
168 else
169 input = LLVMBuildSIToFP(builder, input,
170 lp_build_vec_type(gallivm, type), "");
171 }
172 else if (format_desc->channel[chan].pure_integer) {
173 /* Nothing to do */
174 } else {
175 /* FIXME */
176 assert(0);
177 }
178
179 break;
180
181 case UTIL_FORMAT_TYPE_SIGNED:
182 /*
183 * Align the sign bit first.
184 */
185
186 if (stop < type.width) {
187 unsigned bits = type.width - stop;
188 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
189 input = LLVMBuildShl(builder, input, bits_val, "");
190 }
191
192 /*
193 * Align the LSB (with an arithmetic shift to preserve the sign)
194 */
195
196 if (format_desc->channel[chan].size < type.width) {
197 unsigned bits = type.width - format_desc->channel[chan].size;
198 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
199 input = LLVMBuildAShr(builder, input, bits_val, "");
200 }
201
202 /*
203 * Type conversion
204 */
205
206 if (type.floating) {
207 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
208 if (format_desc->channel[chan].normalized) {
209 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
210 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
211 input = LLVMBuildFMul(builder, input, scale_val, "");
212 }
213 }
214 else if (format_desc->channel[chan].pure_integer) {
215 /* Nothing to do */
216 } else {
217 /* FIXME */
218 assert(0);
219 }
220
221 break;
222
223 case UTIL_FORMAT_TYPE_FLOAT:
224 if (type.floating) {
225 assert(start == 0);
226 assert(stop == 32);
227 assert(type.width == 32);
228 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
229 }
230 else {
231 /* FIXME */
232 assert(0);
233 input = lp_build_undef(gallivm, type);
234 }
235 break;
236
237 case UTIL_FORMAT_TYPE_FIXED:
238 if (type.floating) {
239 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
240 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
241 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
242 input = LLVMBuildFMul(builder, input, scale_val, "");
243 }
244 else {
245 /* FIXME */
246 assert(0);
247 input = lp_build_undef(gallivm, type);
248 }
249 break;
250
251 default:
252 assert(0);
253 input = lp_build_undef(gallivm, type);
254 break;
255 }
256
257 inputs[chan] = input;
258 }
259
260 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
261 }
262
263
264 /**
265 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
266 *
267 * \param dst_type The desired return type. For pure integer formats
268 * this should be a 32bit wide int or uint vector type,
269 * otherwise a float vector type.
270 *
271 * \param packed The rgba8 values to pack.
272 *
273 * \param rgba The 4 SoA return vectors.
274 */
275 void
276 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
277 struct lp_type dst_type,
278 LLVMValueRef packed,
279 LLVMValueRef *rgba)
280 {
281 LLVMBuilderRef builder = gallivm->builder;
282 LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
283 unsigned chan;
284
285 /* XXX technically shouldn't use that for uint dst_type */
286 packed = LLVMBuildBitCast(builder, packed,
287 lp_build_int_vec_type(gallivm, dst_type), "");
288
289 /* Decode the input vector components */
290 for (chan = 0; chan < 4; ++chan) {
291 #ifdef PIPE_ARCH_LITTLE_ENDIAN
292 unsigned start = chan*8;
293 #else
294 unsigned start = (3-chan)*8;
295 #endif
296 unsigned stop = start + 8;
297 LLVMValueRef input;
298
299 input = packed;
300
301 if (start)
302 input = LLVMBuildLShr(builder, input,
303 lp_build_const_int_vec(gallivm, dst_type, start), "");
304
305 if (stop < 32)
306 input = LLVMBuildAnd(builder, input, mask, "");
307
308 if (dst_type.floating)
309 input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
310
311 rgba[chan] = input;
312 }
313 }
314
315
316
317 /**
318 * Fetch a texels from a texture, returning them in SoA layout.
319 *
320 * \param type the desired return type for 'rgba'. The vector length
321 * is the number of texels to fetch
322 *
323 * \param base_ptr points to the base of the texture mip tree.
324 * \param offset offset to start of the texture image block. For non-
325 * compressed formats, this simply is an offset to the texel.
326 * For compressed formats, it is an offset to the start of the
327 * compressed data block.
328 *
329 * \param i, j the sub-block pixel coordinates. For non-compressed formats
330 * these will always be (0,0). For compressed formats, i will
331 * be in [0, block_width-1] and j will be in [0, block_height-1].
332 */
333 void
334 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
335 const struct util_format_description *format_desc,
336 struct lp_type type,
337 LLVMValueRef base_ptr,
338 LLVMValueRef offset,
339 LLVMValueRef i,
340 LLVMValueRef j,
341 LLVMValueRef rgba_out[4])
342 {
343 LLVMBuilderRef builder = gallivm->builder;
344
345 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
346 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
347 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
348 format_desc->block.width == 1 &&
349 format_desc->block.height == 1 &&
350 format_desc->block.bits <= type.width &&
351 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
352 format_desc->channel[0].size == 32))
353 {
354 /*
355 * The packed pixel fits into an element of the destination format. Put
356 * the packed pixels into a vector and extract each component for all
357 * vector elements in parallel.
358 */
359
360 LLVMValueRef packed;
361
362 /*
363 * gather the texels from the texture
364 * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
365 */
366 assert(format_desc->block.bits <= type.width);
367 packed = lp_build_gather(gallivm,
368 type.length,
369 format_desc->block.bits,
370 type.width,
371 base_ptr, offset, FALSE);
372
373 /*
374 * convert texels to float rgba
375 */
376 lp_build_unpack_rgba_soa(gallivm,
377 format_desc,
378 type,
379 packed, rgba_out);
380 return;
381 }
382
383 if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
384 format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
385 /*
386 * similar conceptually to above but requiring special
387 * AoS packed -> SoA float conversion code.
388 */
389 LLVMValueRef packed;
390
391 assert(type.floating);
392 assert(type.width == 32);
393
394 packed = lp_build_gather(gallivm, type.length,
395 format_desc->block.bits,
396 type.width, base_ptr, offset,
397 FALSE);
398 if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
399 lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
400 }
401 else {
402 lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
403 }
404 return;
405 }
406
407 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
408 format_desc->block.bits == 64) {
409 /*
410 * special case the format is 64 bits but we only require
411 * 32bit (or 8bit) from each block.
412 */
413 LLVMValueRef packed;
414
415 if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
416 /*
417 * for stencil simply fix up offsets - could in fact change
418 * base_ptr instead even outside the shader.
419 */
420 unsigned mask = (1 << 8) - 1;
421 LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
422 offset = LLVMBuildAdd(builder, offset, s_offset, "");
423 packed = lp_build_gather(gallivm, type.length,
424 32, type.width, base_ptr, offset, FALSE);
425 packed = LLVMBuildAnd(builder, packed,
426 lp_build_const_int_vec(gallivm, type, mask), "");
427 }
428 else {
429 assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
430 packed = lp_build_gather(gallivm, type.length,
431 32, type.width, base_ptr, offset, TRUE);
432 packed = LLVMBuildBitCast(builder, packed,
433 lp_build_vec_type(gallivm, type), "");
434 }
435 /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
436 rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
437 rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
438 return;
439 }
440
441 /*
442 * Try calling lp_build_fetch_rgba_aos for all pixels.
443 */
444
445 if (util_format_fits_8unorm(format_desc) &&
446 type.floating && type.width == 32 &&
447 (type.length == 1 || (type.length % 4 == 0))) {
448 struct lp_type tmp_type;
449 LLVMValueRef tmp;
450
451 memset(&tmp_type, 0, sizeof tmp_type);
452 tmp_type.width = 8;
453 tmp_type.length = type.length * 4;
454 tmp_type.norm = TRUE;
455
456 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
457 base_ptr, offset, i, j);
458
459 lp_build_rgba8_to_fi32_soa(gallivm,
460 type,
461 tmp,
462 rgba_out);
463
464 return;
465 }
466
467 /*
468 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
469 *
470 * This is not the most efficient way of fetching pixels, as we
471 * miss some opportunities to do vectorization, but this is
472 * convenient for formats or scenarios for which there was no
473 * opportunity or incentive to optimize.
474 */
475
476 {
477 unsigned k, chan;
478 struct lp_type tmp_type;
479
480 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
481 debug_printf("%s: scalar unpacking of %s\n",
482 __FUNCTION__, format_desc->short_name);
483 }
484
485 tmp_type = type;
486 tmp_type.length = 4;
487
488 for (chan = 0; chan < 4; ++chan) {
489 rgba_out[chan] = lp_build_undef(gallivm, type);
490 }
491
492 /* loop over number of pixels */
493 for(k = 0; k < type.length; ++k) {
494 LLVMValueRef index = lp_build_const_int32(gallivm, k);
495 LLVMValueRef offset_elem;
496 LLVMValueRef i_elem, j_elem;
497 LLVMValueRef tmp;
498
499 offset_elem = LLVMBuildExtractElement(builder, offset,
500 index, "");
501
502 i_elem = LLVMBuildExtractElement(builder, i, index, "");
503 j_elem = LLVMBuildExtractElement(builder, j, index, "");
504
505 /* Get a single float[4]={R,G,B,A} pixel */
506 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
507 base_ptr, offset_elem,
508 i_elem, j_elem);
509
510 /*
511 * Insert the AoS tmp value channels into the SoA result vectors at
512 * position = 'index'.
513 */
514 for (chan = 0; chan < 4; ++chan) {
515 LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
516 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
517 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
518 tmp_chan, index, "");
519 }
520 }
521 }
522 }