1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_defines.h"
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
42 #include "lp_bld_arit.h"
46 lp_build_format_swizzle_soa(const struct util_format_description
*format_desc
,
47 struct lp_build_context
*bld
,
48 const LLVMValueRef
*unswizzled
,
49 LLVMValueRef swizzled_out
[4])
51 assert(UTIL_FORMAT_SWIZZLE_0
== PIPE_SWIZZLE_ZERO
);
52 assert(UTIL_FORMAT_SWIZZLE_1
== PIPE_SWIZZLE_ONE
);
54 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
55 enum util_format_swizzle swizzle
;
56 LLVMValueRef depth_or_stencil
;
58 if (util_format_has_stencil(format_desc
) &&
59 !util_format_has_depth(format_desc
)) {
60 assert(!bld
->type
.floating
);
61 swizzle
= format_desc
->swizzle
[1];
64 assert(bld
->type
.floating
);
65 swizzle
= format_desc
->swizzle
[0];
68 * Return zzz1 or sss1 for depth-stencil formats here.
69 * Correct swizzling will be handled by apply_sampler_swizzle() later.
71 depth_or_stencil
= lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
73 swizzled_out
[2] = swizzled_out
[1] = swizzled_out
[0] = depth_or_stencil
;
74 swizzled_out
[3] = bld
->one
;
78 for (chan
= 0; chan
< 4; ++chan
) {
79 enum util_format_swizzle swizzle
= format_desc
->swizzle
[chan
];
80 swizzled_out
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
87 * Unpack several pixels in SoA.
89 * It takes a vector of packed pixels:
91 * packed = {P0, P1, P2, P3, ..., Pn}
93 * And will produce four vectors:
95 * red = {R0, R1, R2, R3, ..., Rn}
96 * green = {G0, G1, G2, G3, ..., Gn}
97 * blue = {B0, B1, B2, B3, ..., Bn}
98 * alpha = {A0, A1, A2, A3, ..., An}
100 * It requires that a packed pixel fits into an element of the output
101 * channels. The common case is when converting pixel with a depth of 32 bit or
104 * \param format_desc the format of the 'packed' incoming pixel vector
105 * \param type the desired type for rgba_out (type.length = n, above)
106 * \param packed the incoming vector of packed pixels
107 * \param rgba_out returns the SoA R,G,B,A vectors
110 lp_build_unpack_rgba_soa(struct gallivm_state
*gallivm
,
111 const struct util_format_description
*format_desc
,
114 LLVMValueRef rgba_out
[4])
116 LLVMBuilderRef builder
= gallivm
->builder
;
117 struct lp_build_context bld
;
118 LLVMValueRef inputs
[4];
121 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
122 assert(format_desc
->block
.width
== 1);
123 assert(format_desc
->block
.height
== 1);
124 assert(format_desc
->block
.bits
<= type
.width
);
125 /* FIXME: Support more output types */
126 assert(type
.width
== 32);
128 lp_build_context_init(&bld
, gallivm
, type
);
130 /* Decode the input vector components */
131 for (chan
= 0; chan
< format_desc
->nr_channels
; ++chan
) {
132 const unsigned width
= format_desc
->channel
[chan
].size
;
133 const unsigned start
= format_desc
->channel
[chan
].shift
;
134 const unsigned stop
= start
+ width
;
139 switch(format_desc
->channel
[chan
].type
) {
140 case UTIL_FORMAT_TYPE_VOID
:
141 input
= lp_build_undef(gallivm
, type
);
144 case UTIL_FORMAT_TYPE_UNSIGNED
:
150 input
= LLVMBuildLShr(builder
, input
, lp_build_const_int_vec(gallivm
, type
, start
), "");
157 if (stop
< format_desc
->block
.bits
) {
158 unsigned mask
= ((unsigned long long)1 << width
) - 1;
159 input
= LLVMBuildAnd(builder
, input
, lp_build_const_int_vec(gallivm
, type
, mask
), "");
167 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
) {
168 if (format_desc
->swizzle
[3] == chan
) {
169 input
= lp_build_unsigned_norm_to_float(gallivm
, width
, type
, input
);
172 struct lp_type conv_type
= lp_uint_type(type
);
173 input
= lp_build_srgb_to_linear(gallivm
, conv_type
, width
, input
);
177 if(format_desc
->channel
[chan
].normalized
)
178 input
= lp_build_unsigned_norm_to_float(gallivm
, width
, type
, input
);
180 input
= LLVMBuildSIToFP(builder
, input
,
181 lp_build_vec_type(gallivm
, type
), "");
184 else if (format_desc
->channel
[chan
].pure_integer
) {
193 case UTIL_FORMAT_TYPE_SIGNED
:
195 * Align the sign bit first.
198 if (stop
< type
.width
) {
199 unsigned bits
= type
.width
- stop
;
200 LLVMValueRef bits_val
= lp_build_const_int_vec(gallivm
, type
, bits
);
201 input
= LLVMBuildShl(builder
, input
, bits_val
, "");
205 * Align the LSB (with an arithmetic shift to preserve the sign)
208 if (format_desc
->channel
[chan
].size
< type
.width
) {
209 unsigned bits
= type
.width
- format_desc
->channel
[chan
].size
;
210 LLVMValueRef bits_val
= lp_build_const_int_vec(gallivm
, type
, bits
);
211 input
= LLVMBuildAShr(builder
, input
, bits_val
, "");
219 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
220 if (format_desc
->channel
[chan
].normalized
) {
221 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
- 1)) - 1);
222 LLVMValueRef scale_val
= lp_build_const_vec(gallivm
, type
, scale
);
223 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
224 /* the formula above will produce value below -1.0 for most negative
225 * value but everything seems happy with that hence disable for now */
227 input
= lp_build_max(&bld
, input
,
228 lp_build_const_vec(gallivm
, type
, -1.0f
));
231 else if (format_desc
->channel
[chan
].pure_integer
) {
240 case UTIL_FORMAT_TYPE_FLOAT
:
244 assert(type
.width
== 32);
245 input
= LLVMBuildBitCast(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
250 input
= lp_build_undef(gallivm
, type
);
254 case UTIL_FORMAT_TYPE_FIXED
:
256 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
/2)) - 1);
257 LLVMValueRef scale_val
= lp_build_const_vec(gallivm
, type
, scale
);
258 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
259 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
264 input
= lp_build_undef(gallivm
, type
);
270 input
= lp_build_undef(gallivm
, type
);
274 inputs
[chan
] = input
;
277 lp_build_format_swizzle_soa(format_desc
, &bld
, inputs
, rgba_out
);
282 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
284 * \param dst_type The desired return type. For pure integer formats
285 * this should be a 32bit wide int or uint vector type,
286 * otherwise a float vector type.
288 * \param packed The rgba8 values to pack.
290 * \param rgba The 4 SoA return vectors.
293 lp_build_rgba8_to_fi32_soa(struct gallivm_state
*gallivm
,
294 struct lp_type dst_type
,
298 LLVMBuilderRef builder
= gallivm
->builder
;
299 LLVMValueRef mask
= lp_build_const_int_vec(gallivm
, dst_type
, 0xff);
302 /* XXX technically shouldn't use that for uint dst_type */
303 packed
= LLVMBuildBitCast(builder
, packed
,
304 lp_build_int_vec_type(gallivm
, dst_type
), "");
306 /* Decode the input vector components */
307 for (chan
= 0; chan
< 4; ++chan
) {
308 #ifdef PIPE_ARCH_LITTLE_ENDIAN
309 unsigned start
= chan
*8;
311 unsigned start
= (3-chan
)*8;
313 unsigned stop
= start
+ 8;
319 input
= LLVMBuildLShr(builder
, input
,
320 lp_build_const_int_vec(gallivm
, dst_type
, start
), "");
323 input
= LLVMBuildAnd(builder
, input
, mask
, "");
325 if (dst_type
.floating
)
326 input
= lp_build_unsigned_norm_to_float(gallivm
, 8, dst_type
, input
);
335 * Fetch a texels from a texture, returning them in SoA layout.
337 * \param type the desired return type for 'rgba'. The vector length
338 * is the number of texels to fetch
340 * \param base_ptr points to the base of the texture mip tree.
341 * \param offset offset to start of the texture image block. For non-
342 * compressed formats, this simply is an offset to the texel.
343 * For compressed formats, it is an offset to the start of the
344 * compressed data block.
346 * \param i, j the sub-block pixel coordinates. For non-compressed formats
347 * these will always be (0,0). For compressed formats, i will
348 * be in [0, block_width-1] and j will be in [0, block_height-1].
351 lp_build_fetch_rgba_soa(struct gallivm_state
*gallivm
,
352 const struct util_format_description
*format_desc
,
354 LLVMValueRef base_ptr
,
358 LLVMValueRef rgba_out
[4])
360 LLVMBuilderRef builder
= gallivm
->builder
;
362 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
363 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
364 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
||
365 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
366 format_desc
->block
.width
== 1 &&
367 format_desc
->block
.height
== 1 &&
368 format_desc
->block
.bits
<= type
.width
&&
369 (format_desc
->channel
[0].type
!= UTIL_FORMAT_TYPE_FLOAT
||
370 format_desc
->channel
[0].size
== 32))
373 * The packed pixel fits into an element of the destination format. Put
374 * the packed pixels into a vector and extract each component for all
375 * vector elements in parallel.
381 * gather the texels from the texture
382 * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
384 assert(format_desc
->block
.bits
<= type
.width
);
385 packed
= lp_build_gather(gallivm
,
387 format_desc
->block
.bits
,
389 base_ptr
, offset
, FALSE
);
392 * convert texels to float rgba
394 lp_build_unpack_rgba_soa(gallivm
,
401 if (format_desc
->format
== PIPE_FORMAT_R11G11B10_FLOAT
||
402 format_desc
->format
== PIPE_FORMAT_R9G9B9E5_FLOAT
) {
404 * similar conceptually to above but requiring special
405 * AoS packed -> SoA float conversion code.
409 assert(type
.floating
);
410 assert(type
.width
== 32);
412 packed
= lp_build_gather(gallivm
, type
.length
,
413 format_desc
->block
.bits
,
414 type
.width
, base_ptr
, offset
,
416 if (format_desc
->format
== PIPE_FORMAT_R11G11B10_FLOAT
) {
417 lp_build_r11g11b10_to_float(gallivm
, packed
, rgba_out
);
420 lp_build_rgb9e5_to_float(gallivm
, packed
, rgba_out
);
425 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
&&
426 format_desc
->block
.bits
== 64) {
428 * special case the format is 64 bits but we only require
429 * 32bit (or 8bit) from each block.
433 if (format_desc
->format
== PIPE_FORMAT_X32_S8X24_UINT
) {
435 * for stencil simply fix up offsets - could in fact change
436 * base_ptr instead even outside the shader.
438 unsigned mask
= (1 << 8) - 1;
439 LLVMValueRef s_offset
= lp_build_const_int_vec(gallivm
, type
, 4);
440 offset
= LLVMBuildAdd(builder
, offset
, s_offset
, "");
441 packed
= lp_build_gather(gallivm
, type
.length
,
442 32, type
.width
, base_ptr
, offset
, FALSE
);
443 packed
= LLVMBuildAnd(builder
, packed
,
444 lp_build_const_int_vec(gallivm
, type
, mask
), "");
447 assert (format_desc
->format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
448 packed
= lp_build_gather(gallivm
, type
.length
,
449 32, type
.width
, base_ptr
, offset
, TRUE
);
450 packed
= LLVMBuildBitCast(builder
, packed
,
451 lp_build_vec_type(gallivm
, type
), "");
453 /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
454 rgba_out
[0] = rgba_out
[1] = rgba_out
[2] = packed
;
455 rgba_out
[3] = lp_build_const_vec(gallivm
, type
, 1.0f
);
460 * Try calling lp_build_fetch_rgba_aos for all pixels.
463 if (util_format_fits_8unorm(format_desc
) &&
464 type
.floating
&& type
.width
== 32 &&
465 (type
.length
== 1 || (type
.length
% 4 == 0))) {
466 struct lp_type tmp_type
;
469 memset(&tmp_type
, 0, sizeof tmp_type
);
471 tmp_type
.length
= type
.length
* 4;
472 tmp_type
.norm
= TRUE
;
474 tmp
= lp_build_fetch_rgba_aos(gallivm
, format_desc
, tmp_type
,
475 base_ptr
, offset
, i
, j
);
477 lp_build_rgba8_to_fi32_soa(gallivm
,
486 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
488 * This is not the most efficient way of fetching pixels, as we
489 * miss some opportunities to do vectorization, but this is
490 * convenient for formats or scenarios for which there was no
491 * opportunity or incentive to optimize.
496 struct lp_type tmp_type
;
498 if (gallivm_debug
& GALLIVM_DEBUG_PERF
) {
499 debug_printf("%s: scalar unpacking of %s\n",
500 __FUNCTION__
, format_desc
->short_name
);
506 for (chan
= 0; chan
< 4; ++chan
) {
507 rgba_out
[chan
] = lp_build_undef(gallivm
, type
);
510 /* loop over number of pixels */
511 for(k
= 0; k
< type
.length
; ++k
) {
512 LLVMValueRef index
= lp_build_const_int32(gallivm
, k
);
513 LLVMValueRef offset_elem
;
514 LLVMValueRef i_elem
, j_elem
;
517 offset_elem
= LLVMBuildExtractElement(builder
, offset
,
520 i_elem
= LLVMBuildExtractElement(builder
, i
, index
, "");
521 j_elem
= LLVMBuildExtractElement(builder
, j
, index
, "");
523 /* Get a single float[4]={R,G,B,A} pixel */
524 tmp
= lp_build_fetch_rgba_aos(gallivm
, format_desc
, tmp_type
,
525 base_ptr
, offset_elem
,
529 * Insert the AoS tmp value channels into the SoA result vectors at
530 * position = 'index'.
532 for (chan
= 0; chan
< 4; ++chan
) {
533 LLVMValueRef chan_val
= lp_build_const_int32(gallivm
, chan
),
534 tmp_chan
= LLVMBuildExtractElement(builder
, tmp
, chan_val
, "");
535 rgba_out
[chan
] = LLVMBuildInsertElement(builder
, rgba_out
[chan
],
536 tmp_chan
, index
, "");