1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_defines.h"
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
42 #include "lp_bld_arit.h"
43 #include "lp_bld_pack.h"
47 convert_to_soa(struct gallivm_state
*gallivm
,
48 LLVMValueRef src_aos
[LP_MAX_VECTOR_WIDTH
/ 32],
49 LLVMValueRef dst_soa
[4],
50 const struct lp_type soa_type
)
53 struct lp_type aos_channel_type
= soa_type
;
55 LLVMValueRef aos_channels
[4];
56 unsigned pixels_per_channel
= soa_type
.length
/ 4;
58 debug_assert((soa_type
.length
% 4) == 0);
60 aos_channel_type
.length
>>= 1;
62 for (j
= 0; j
< 4; ++j
) {
63 LLVMValueRef channel
[LP_MAX_VECTOR_LENGTH
] = { 0 };
65 assert(pixels_per_channel
<= LP_MAX_VECTOR_LENGTH
);
67 for (k
= 0; k
< pixels_per_channel
; ++k
) {
68 channel
[k
] = src_aos
[j
+ 4 * k
];
71 aos_channels
[j
] = lp_build_concat(gallivm
, channel
, aos_channel_type
, pixels_per_channel
);
74 lp_build_transpose_aos(gallivm
, soa_type
, aos_channels
, dst_soa
);
79 lp_build_format_swizzle_soa(const struct util_format_description
*format_desc
,
80 struct lp_build_context
*bld
,
81 const LLVMValueRef
*unswizzled
,
82 LLVMValueRef swizzled_out
[4])
84 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
85 enum pipe_swizzle swizzle
;
86 LLVMValueRef depth_or_stencil
;
88 if (util_format_has_stencil(format_desc
) &&
89 !util_format_has_depth(format_desc
)) {
90 assert(!bld
->type
.floating
);
91 swizzle
= format_desc
->swizzle
[1];
94 assert(bld
->type
.floating
);
95 swizzle
= format_desc
->swizzle
[0];
98 * Return zzz1 or sss1 for depth-stencil formats here.
99 * Correct swizzling will be handled by apply_sampler_swizzle() later.
101 depth_or_stencil
= lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
103 swizzled_out
[2] = swizzled_out
[1] = swizzled_out
[0] = depth_or_stencil
;
104 swizzled_out
[3] = bld
->one
;
108 for (chan
= 0; chan
< 4; ++chan
) {
109 enum pipe_swizzle swizzle
= format_desc
->swizzle
[chan
];
110 swizzled_out
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
117 * Unpack several pixels in SoA.
119 * It takes a vector of packed pixels:
121 * packed = {P0, P1, P2, P3, ..., Pn}
123 * And will produce four vectors:
125 * red = {R0, R1, R2, R3, ..., Rn}
126 * green = {G0, G1, G2, G3, ..., Gn}
127 * blue = {B0, B1, B2, B3, ..., Bn}
128 * alpha = {A0, A1, A2, A3, ..., An}
130 * It requires that a packed pixel fits into an element of the output
131 * channels. The common case is when converting pixel with a depth of 32 bit or
134 * \param format_desc the format of the 'packed' incoming pixel vector
135 * \param type the desired type for rgba_out (type.length = n, above)
136 * \param packed the incoming vector of packed pixels
137 * \param rgba_out returns the SoA R,G,B,A vectors
140 lp_build_unpack_rgba_soa(struct gallivm_state
*gallivm
,
141 const struct util_format_description
*format_desc
,
144 LLVMValueRef rgba_out
[4])
146 LLVMBuilderRef builder
= gallivm
->builder
;
147 struct lp_build_context bld
;
148 LLVMValueRef inputs
[4];
151 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
152 assert(format_desc
->block
.width
== 1);
153 assert(format_desc
->block
.height
== 1);
154 assert(format_desc
->block
.bits
<= type
.width
);
155 /* FIXME: Support more output types */
156 assert(type
.width
== 32);
158 lp_build_context_init(&bld
, gallivm
, type
);
160 /* Decode the input vector components */
161 for (chan
= 0; chan
< format_desc
->nr_channels
; ++chan
) {
162 const unsigned width
= format_desc
->channel
[chan
].size
;
163 const unsigned start
= format_desc
->channel
[chan
].shift
;
164 const unsigned stop
= start
+ width
;
169 switch(format_desc
->channel
[chan
].type
) {
170 case UTIL_FORMAT_TYPE_VOID
:
171 input
= lp_build_undef(gallivm
, type
);
174 case UTIL_FORMAT_TYPE_UNSIGNED
:
180 input
= LLVMBuildLShr(builder
, input
, lp_build_const_int_vec(gallivm
, type
, start
), "");
187 if (stop
< format_desc
->block
.bits
) {
188 unsigned mask
= ((unsigned long long)1 << width
) - 1;
189 input
= LLVMBuildAnd(builder
, input
, lp_build_const_int_vec(gallivm
, type
, mask
), "");
197 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
) {
198 if (format_desc
->swizzle
[3] == chan
) {
199 input
= lp_build_unsigned_norm_to_float(gallivm
, width
, type
, input
);
202 struct lp_type conv_type
= lp_uint_type(type
);
203 input
= lp_build_srgb_to_linear(gallivm
, conv_type
, width
, input
);
207 if(format_desc
->channel
[chan
].normalized
)
208 input
= lp_build_unsigned_norm_to_float(gallivm
, width
, type
, input
);
210 input
= LLVMBuildSIToFP(builder
, input
,
211 lp_build_vec_type(gallivm
, type
), "");
214 else if (format_desc
->channel
[chan
].pure_integer
) {
223 case UTIL_FORMAT_TYPE_SIGNED
:
225 * Align the sign bit first.
228 if (stop
< type
.width
) {
229 unsigned bits
= type
.width
- stop
;
230 LLVMValueRef bits_val
= lp_build_const_int_vec(gallivm
, type
, bits
);
231 input
= LLVMBuildShl(builder
, input
, bits_val
, "");
235 * Align the LSB (with an arithmetic shift to preserve the sign)
238 if (format_desc
->channel
[chan
].size
< type
.width
) {
239 unsigned bits
= type
.width
- format_desc
->channel
[chan
].size
;
240 LLVMValueRef bits_val
= lp_build_const_int_vec(gallivm
, type
, bits
);
241 input
= LLVMBuildAShr(builder
, input
, bits_val
, "");
249 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
250 if (format_desc
->channel
[chan
].normalized
) {
251 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
- 1)) - 1);
252 LLVMValueRef scale_val
= lp_build_const_vec(gallivm
, type
, scale
);
253 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
254 /* the formula above will produce value below -1.0 for most negative
255 * value but everything seems happy with that hence disable for now */
257 input
= lp_build_max(&bld
, input
,
258 lp_build_const_vec(gallivm
, type
, -1.0f
));
261 else if (format_desc
->channel
[chan
].pure_integer
) {
270 case UTIL_FORMAT_TYPE_FLOAT
:
272 if (format_desc
->channel
[chan
].size
== 16) {
273 struct lp_type f16i_type
= type
;
274 f16i_type
.width
/= 2;
275 f16i_type
.floating
= 0;
277 input
= LLVMBuildLShr(builder
, input
,
278 lp_build_const_int_vec(gallivm
, type
, start
), "");
280 input
= LLVMBuildTrunc(builder
, input
,
281 lp_build_vec_type(gallivm
, f16i_type
), "");
282 input
= lp_build_half_to_float(gallivm
, input
);
286 assert(type
.width
== 32);
288 input
= LLVMBuildBitCast(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
293 input
= lp_build_undef(gallivm
, type
);
297 case UTIL_FORMAT_TYPE_FIXED
:
299 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
/2)) - 1);
300 LLVMValueRef scale_val
= lp_build_const_vec(gallivm
, type
, scale
);
301 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
302 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
307 input
= lp_build_undef(gallivm
, type
);
313 input
= lp_build_undef(gallivm
, type
);
317 inputs
[chan
] = input
;
320 lp_build_format_swizzle_soa(format_desc
, &bld
, inputs
, rgba_out
);
325 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
327 * \param dst_type The desired return type. For pure integer formats
328 * this should be a 32bit wide int or uint vector type,
329 * otherwise a float vector type.
331 * \param packed The rgba8 values to pack.
333 * \param rgba The 4 SoA return vectors.
336 lp_build_rgba8_to_fi32_soa(struct gallivm_state
*gallivm
,
337 struct lp_type dst_type
,
341 LLVMBuilderRef builder
= gallivm
->builder
;
342 LLVMValueRef mask
= lp_build_const_int_vec(gallivm
, dst_type
, 0xff);
345 /* XXX technically shouldn't use that for uint dst_type */
346 packed
= LLVMBuildBitCast(builder
, packed
,
347 lp_build_int_vec_type(gallivm
, dst_type
), "");
349 /* Decode the input vector components */
350 for (chan
= 0; chan
< 4; ++chan
) {
351 #ifdef PIPE_ARCH_LITTLE_ENDIAN
352 unsigned start
= chan
*8;
354 unsigned start
= (3-chan
)*8;
356 unsigned stop
= start
+ 8;
362 input
= LLVMBuildLShr(builder
, input
,
363 lp_build_const_int_vec(gallivm
, dst_type
, start
), "");
366 input
= LLVMBuildAnd(builder
, input
, mask
, "");
368 if (dst_type
.floating
)
369 input
= lp_build_unsigned_norm_to_float(gallivm
, 8, dst_type
, input
);
378 * Fetch a texels from a texture, returning them in SoA layout.
380 * \param type the desired return type for 'rgba'. The vector length
381 * is the number of texels to fetch
382 * \param aligned if the offset is guaranteed to be aligned to element width
384 * \param base_ptr points to the base of the texture mip tree.
385 * \param offset offset to start of the texture image block. For non-
386 * compressed formats, this simply is an offset to the texel.
387 * For compressed formats, it is an offset to the start of the
388 * compressed data block.
390 * \param i, j the sub-block pixel coordinates. For non-compressed formats
391 * these will always be (0,0). For compressed formats, i will
392 * be in [0, block_width-1] and j will be in [0, block_height-1].
393 * \param cache optional value pointing to a lp_build_format_cache structure
396 lp_build_fetch_rgba_soa(struct gallivm_state
*gallivm
,
397 const struct util_format_description
*format_desc
,
400 LLVMValueRef base_ptr
,
405 LLVMValueRef rgba_out
[4])
407 LLVMBuilderRef builder
= gallivm
->builder
;
409 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
410 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
411 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
||
412 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
413 format_desc
->block
.width
== 1 &&
414 format_desc
->block
.height
== 1 &&
415 format_desc
->block
.bits
<= type
.width
&&
416 (format_desc
->channel
[0].type
!= UTIL_FORMAT_TYPE_FLOAT
||
417 format_desc
->channel
[0].size
== 32 ||
418 format_desc
->channel
[0].size
== 16))
421 * The packed pixel fits into an element of the destination format. Put
422 * the packed pixels into a vector and extract each component for all
423 * vector elements in parallel.
429 * gather the texels from the texture
430 * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
432 assert(format_desc
->block
.bits
<= type
.width
);
433 packed
= lp_build_gather(gallivm
,
435 format_desc
->block
.bits
,
438 base_ptr
, offset
, FALSE
);
441 * convert texels to float rgba
443 lp_build_unpack_rgba_soa(gallivm
,
450 if (format_desc
->format
== PIPE_FORMAT_R11G11B10_FLOAT
||
451 format_desc
->format
== PIPE_FORMAT_R9G9B9E5_FLOAT
) {
453 * similar conceptually to above but requiring special
454 * AoS packed -> SoA float conversion code.
458 assert(type
.floating
);
459 assert(type
.width
== 32);
461 packed
= lp_build_gather(gallivm
, type
.length
,
462 format_desc
->block
.bits
,
464 base_ptr
, offset
, FALSE
);
465 if (format_desc
->format
== PIPE_FORMAT_R11G11B10_FLOAT
) {
466 lp_build_r11g11b10_to_float(gallivm
, packed
, rgba_out
);
469 lp_build_rgb9e5_to_float(gallivm
, packed
, rgba_out
);
474 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
&&
475 format_desc
->block
.bits
== 64) {
477 * special case the format is 64 bits but we only require
478 * 32bit (or 8bit) from each block.
482 if (format_desc
->format
== PIPE_FORMAT_X32_S8X24_UINT
) {
484 * for stencil simply fix up offsets - could in fact change
485 * base_ptr instead even outside the shader.
487 unsigned mask
= (1 << 8) - 1;
488 LLVMValueRef s_offset
= lp_build_const_int_vec(gallivm
, type
, 4);
489 offset
= LLVMBuildAdd(builder
, offset
, s_offset
, "");
490 packed
= lp_build_gather(gallivm
, type
.length
, 32, type
.width
,
491 aligned
, base_ptr
, offset
, FALSE
);
492 packed
= LLVMBuildAnd(builder
, packed
,
493 lp_build_const_int_vec(gallivm
, type
, mask
), "");
496 assert (format_desc
->format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
497 packed
= lp_build_gather(gallivm
, type
.length
, 32, type
.width
,
498 aligned
, base_ptr
, offset
, TRUE
);
499 packed
= LLVMBuildBitCast(builder
, packed
,
500 lp_build_vec_type(gallivm
, type
), "");
502 /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
503 rgba_out
[0] = rgba_out
[1] = rgba_out
[2] = packed
;
504 rgba_out
[3] = lp_build_const_vec(gallivm
, type
, 1.0f
);
509 * Try calling lp_build_fetch_rgba_aos for all pixels.
512 if (util_format_fits_8unorm(format_desc
) &&
513 type
.floating
&& type
.width
== 32 &&
514 (type
.length
== 1 || (type
.length
% 4 == 0))) {
515 struct lp_type tmp_type
;
518 memset(&tmp_type
, 0, sizeof tmp_type
);
520 tmp_type
.length
= type
.length
* 4;
521 tmp_type
.norm
= TRUE
;
523 tmp
= lp_build_fetch_rgba_aos(gallivm
, format_desc
, tmp_type
,
524 aligned
, base_ptr
, offset
, i
, j
, cache
);
526 lp_build_rgba8_to_fi32_soa(gallivm
,
534 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_S3TC
&&
535 /* non-srgb case is already handled above */
536 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
&&
537 type
.floating
&& type
.width
== 32 &&
538 (type
.length
== 1 || (type
.length
% 4 == 0)) &&
540 const struct util_format_description
*format_decompressed
;
541 const struct util_format_description
*flinear_desc
;
543 flinear_desc
= util_format_description(util_format_linear(format_desc
->format
));
544 /* This probably only works with aligned data */
545 packed
= lp_build_fetch_cached_texels(gallivm
,
552 packed
= LLVMBuildBitCast(builder
, packed
,
553 lp_build_int_vec_type(gallivm
, type
), "");
555 * The values are now packed so they match ordinary srgb RGBA8 format,
556 * hence need to use matching format for unpack.
558 format_decompressed
= util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB
);
560 lp_build_unpack_rgba_soa(gallivm
,
569 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
571 * This is not the most efficient way of fetching pixels, as we
572 * miss some opportunities to do vectorization, but this is
573 * convenient for formats or scenarios for which there was no
574 * opportunity or incentive to optimize.
579 struct lp_type tmp_type
;
580 LLVMValueRef aos_fetch
[LP_MAX_VECTOR_WIDTH
/ 32];
582 if (gallivm_debug
& GALLIVM_DEBUG_PERF
) {
583 debug_printf("%s: AoS fetch fallback for %s\n",
584 __FUNCTION__
, format_desc
->short_name
);
591 * Note that vector transpose can be worse compared to insert/extract
592 * for aos->soa conversion (for formats with 1 or 2 channels). However,
593 * we should try to avoid getting here for just about all formats, so
597 /* loop over number of pixels */
598 for(k
= 0; k
< type
.length
; ++k
) {
599 LLVMValueRef index
= lp_build_const_int32(gallivm
, k
);
600 LLVMValueRef offset_elem
;
601 LLVMValueRef i_elem
, j_elem
;
603 offset_elem
= LLVMBuildExtractElement(builder
, offset
,
606 i_elem
= LLVMBuildExtractElement(builder
, i
, index
, "");
607 j_elem
= LLVMBuildExtractElement(builder
, j
, index
, "");
609 /* Get a single float[4]={R,G,B,A} pixel */
610 aos_fetch
[k
] = lp_build_fetch_rgba_aos(gallivm
, format_desc
, tmp_type
,
611 aligned
, base_ptr
, offset_elem
,
612 i_elem
, j_elem
, cache
);
615 convert_to_soa(gallivm
, aos_fetch
, rgba_out
, type
);