1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_defines.h"
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
45 lp_build_format_swizzle_soa(const struct util_format_description
*format_desc
,
46 struct lp_build_context
*bld
,
47 const LLVMValueRef
*unswizzled
,
48 LLVMValueRef swizzled_out
[4])
50 assert(UTIL_FORMAT_SWIZZLE_0
== PIPE_SWIZZLE_ZERO
);
51 assert(UTIL_FORMAT_SWIZZLE_1
== PIPE_SWIZZLE_ONE
);
53 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
54 enum util_format_swizzle swizzle
;
55 LLVMValueRef depth_or_stencil
;
57 if (util_format_has_stencil(format_desc
) &&
58 !util_format_has_depth(format_desc
)) {
59 assert(!bld
->type
.floating
);
60 swizzle
= format_desc
->swizzle
[1];
63 assert(bld
->type
.floating
);
64 swizzle
= format_desc
->swizzle
[0];
67 * Return zzz1 or sss1 for depth-stencil formats here.
68 * Correct swizzling will be handled by apply_sampler_swizzle() later.
70 depth_or_stencil
= lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
72 swizzled_out
[2] = swizzled_out
[1] = swizzled_out
[0] = depth_or_stencil
;
73 swizzled_out
[3] = bld
->one
;
77 for (chan
= 0; chan
< 4; ++chan
) {
78 enum util_format_swizzle swizzle
= format_desc
->swizzle
[chan
];
79 swizzled_out
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
86 * Unpack several pixels in SoA.
88 * It takes a vector of packed pixels:
90 * packed = {P0, P1, P2, P3, ..., Pn}
92 * And will produce four vectors:
94 * red = {R0, R1, R2, R3, ..., Rn}
95 * green = {G0, G1, G2, G3, ..., Gn}
96 * blue = {B0, B1, B2, B3, ..., Bn}
97 * alpha = {A0, A1, A2, A3, ..., An}
99 * It requires that a packed pixel fits into an element of the output
100 * channels. The common case is when converting pixel with a depth of 32 bit or
103 * \param format_desc the format of the 'packed' incoming pixel vector
104 * \param type the desired type for rgba_out (type.length = n, above)
105 * \param packed the incoming vector of packed pixels
106 * \param rgba_out returns the SoA R,G,B,A vectors
109 lp_build_unpack_rgba_soa(struct gallivm_state
*gallivm
,
110 const struct util_format_description
*format_desc
,
113 LLVMValueRef rgba_out
[4])
115 LLVMBuilderRef builder
= gallivm
->builder
;
116 struct lp_build_context bld
;
117 LLVMValueRef inputs
[4];
120 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
121 assert(format_desc
->block
.width
== 1);
122 assert(format_desc
->block
.height
== 1);
123 assert(format_desc
->block
.bits
<= type
.width
);
124 /* FIXME: Support more output types */
125 assert(type
.width
== 32);
127 lp_build_context_init(&bld
, gallivm
, type
);
129 /* Decode the input vector components */
130 for (chan
= 0; chan
< format_desc
->nr_channels
; ++chan
) {
131 const unsigned width
= format_desc
->channel
[chan
].size
;
132 const unsigned start
= format_desc
->channel
[chan
].shift
;
133 const unsigned stop
= start
+ width
;
138 switch(format_desc
->channel
[chan
].type
) {
139 case UTIL_FORMAT_TYPE_VOID
:
140 input
= lp_build_undef(gallivm
, type
);
143 case UTIL_FORMAT_TYPE_UNSIGNED
:
149 input
= LLVMBuildLShr(builder
, input
, lp_build_const_int_vec(gallivm
, type
, start
), "");
156 if (stop
< format_desc
->block
.bits
) {
157 unsigned mask
= ((unsigned long long)1 << width
) - 1;
158 input
= LLVMBuildAnd(builder
, input
, lp_build_const_int_vec(gallivm
, type
, mask
), "");
166 if(format_desc
->channel
[chan
].normalized
)
167 input
= lp_build_unsigned_norm_to_float(gallivm
, width
, type
, input
);
169 input
= LLVMBuildSIToFP(builder
, input
,
170 lp_build_vec_type(gallivm
, type
), "");
172 else if (format_desc
->channel
[chan
].pure_integer
) {
181 case UTIL_FORMAT_TYPE_SIGNED
:
183 * Align the sign bit first.
186 if (stop
< type
.width
) {
187 unsigned bits
= type
.width
- stop
;
188 LLVMValueRef bits_val
= lp_build_const_int_vec(gallivm
, type
, bits
);
189 input
= LLVMBuildShl(builder
, input
, bits_val
, "");
193 * Align the LSB (with an arithmetic shift to preserve the sign)
196 if (format_desc
->channel
[chan
].size
< type
.width
) {
197 unsigned bits
= type
.width
- format_desc
->channel
[chan
].size
;
198 LLVMValueRef bits_val
= lp_build_const_int_vec(gallivm
, type
, bits
);
199 input
= LLVMBuildAShr(builder
, input
, bits_val
, "");
207 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
208 if (format_desc
->channel
[chan
].normalized
) {
209 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
- 1)) - 1);
210 LLVMValueRef scale_val
= lp_build_const_vec(gallivm
, type
, scale
);
211 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
214 else if (format_desc
->channel
[chan
].pure_integer
) {
223 case UTIL_FORMAT_TYPE_FLOAT
:
227 assert(type
.width
== 32);
228 input
= LLVMBuildBitCast(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
233 input
= lp_build_undef(gallivm
, type
);
237 case UTIL_FORMAT_TYPE_FIXED
:
239 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
/2)) - 1);
240 LLVMValueRef scale_val
= lp_build_const_vec(gallivm
, type
, scale
);
241 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
242 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
247 input
= lp_build_undef(gallivm
, type
);
253 input
= lp_build_undef(gallivm
, type
);
257 inputs
[chan
] = input
;
260 lp_build_format_swizzle_soa(format_desc
, &bld
, inputs
, rgba_out
);
265 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
267 * \param dst_type The desired return type. For pure integer formats
268 * this should be a 32bit wide int or uint vector type,
269 * otherwise a float vector type.
271 * \param packed The rgba8 values to pack.
273 * \param rgba The 4 SoA return vectors.
276 lp_build_rgba8_to_fi32_soa(struct gallivm_state
*gallivm
,
277 struct lp_type dst_type
,
281 LLVMBuilderRef builder
= gallivm
->builder
;
282 LLVMValueRef mask
= lp_build_const_int_vec(gallivm
, dst_type
, 0xff);
285 /* XXX technically shouldn't use that for uint dst_type */
286 packed
= LLVMBuildBitCast(builder
, packed
,
287 lp_build_int_vec_type(gallivm
, dst_type
), "");
289 /* Decode the input vector components */
290 for (chan
= 0; chan
< 4; ++chan
) {
291 #ifdef PIPE_ARCH_LITTLE_ENDIAN
292 unsigned start
= chan
*8;
294 unsigned start
= (3-chan
)*8;
296 unsigned stop
= start
+ 8;
302 input
= LLVMBuildLShr(builder
, input
,
303 lp_build_const_int_vec(gallivm
, dst_type
, start
), "");
306 input
= LLVMBuildAnd(builder
, input
, mask
, "");
308 if (dst_type
.floating
)
309 input
= lp_build_unsigned_norm_to_float(gallivm
, 8, dst_type
, input
);
318 * Fetch a texels from a texture, returning them in SoA layout.
320 * \param type the desired return type for 'rgba'. The vector length
321 * is the number of texels to fetch
323 * \param base_ptr points to the base of the texture mip tree.
324 * \param offset offset to start of the texture image block. For non-
325 * compressed formats, this simply is an offset to the texel.
326 * For compressed formats, it is an offset to the start of the
327 * compressed data block.
329 * \param i, j the sub-block pixel coordinates. For non-compressed formats
330 * these will always be (0,0). For compressed formats, i will
331 * be in [0, block_width-1] and j will be in [0, block_height-1].
334 lp_build_fetch_rgba_soa(struct gallivm_state
*gallivm
,
335 const struct util_format_description
*format_desc
,
337 LLVMValueRef base_ptr
,
341 LLVMValueRef rgba_out
[4])
343 LLVMBuilderRef builder
= gallivm
->builder
;
345 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
346 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
347 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
348 format_desc
->block
.width
== 1 &&
349 format_desc
->block
.height
== 1 &&
350 format_desc
->block
.bits
<= type
.width
&&
351 (format_desc
->channel
[0].type
!= UTIL_FORMAT_TYPE_FLOAT
||
352 format_desc
->channel
[0].size
== 32))
355 * The packed pixel fits into an element of the destination format. Put
356 * the packed pixels into a vector and extract each component for all
357 * vector elements in parallel.
363 * gather the texels from the texture
364 * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
366 assert(format_desc
->block
.bits
<= type
.width
);
367 packed
= lp_build_gather(gallivm
,
369 format_desc
->block
.bits
,
371 base_ptr
, offset
, FALSE
);
374 * convert texels to float rgba
376 lp_build_unpack_rgba_soa(gallivm
,
383 if (format_desc
->format
== PIPE_FORMAT_R11G11B10_FLOAT
||
384 format_desc
->format
== PIPE_FORMAT_R9G9B9E5_FLOAT
) {
386 * similar conceptually to above but requiring special
387 * AoS packed -> SoA float conversion code.
391 assert(type
.floating
);
392 assert(type
.width
== 32);
394 packed
= lp_build_gather(gallivm
, type
.length
,
395 format_desc
->block
.bits
,
396 type
.width
, base_ptr
, offset
,
398 if (format_desc
->format
== PIPE_FORMAT_R11G11B10_FLOAT
) {
399 lp_build_r11g11b10_to_float(gallivm
, packed
, rgba_out
);
402 lp_build_rgb9e5_to_float(gallivm
, packed
, rgba_out
);
407 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
&&
408 format_desc
->block
.bits
== 64) {
410 * special case the format is 64 bits but we only require
411 * 32bit (or 8bit) from each block.
415 if (format_desc
->format
== PIPE_FORMAT_X32_S8X24_UINT
) {
417 * for stencil simply fix up offsets - could in fact change
418 * base_ptr instead even outside the shader.
420 unsigned mask
= (1 << 8) - 1;
421 LLVMValueRef s_offset
= lp_build_const_int_vec(gallivm
, type
, 4);
422 offset
= LLVMBuildAdd(builder
, offset
, s_offset
, "");
423 packed
= lp_build_gather(gallivm
, type
.length
,
424 32, type
.width
, base_ptr
, offset
, FALSE
);
425 packed
= LLVMBuildAnd(builder
, packed
,
426 lp_build_const_int_vec(gallivm
, type
, mask
), "");
429 assert (format_desc
->format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
);
430 packed
= lp_build_gather(gallivm
, type
.length
,
431 32, type
.width
, base_ptr
, offset
, TRUE
);
432 packed
= LLVMBuildBitCast(builder
, packed
,
433 lp_build_vec_type(gallivm
, type
), "");
435 /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
436 rgba_out
[0] = rgba_out
[1] = rgba_out
[2] = packed
;
437 rgba_out
[3] = lp_build_const_vec(gallivm
, type
, 1.0f
);
442 * Try calling lp_build_fetch_rgba_aos for all pixels.
445 if (util_format_fits_8unorm(format_desc
) &&
446 type
.floating
&& type
.width
== 32 &&
447 (type
.length
== 1 || (type
.length
% 4 == 0))) {
448 struct lp_type tmp_type
;
451 memset(&tmp_type
, 0, sizeof tmp_type
);
453 tmp_type
.length
= type
.length
* 4;
454 tmp_type
.norm
= TRUE
;
456 tmp
= lp_build_fetch_rgba_aos(gallivm
, format_desc
, tmp_type
,
457 base_ptr
, offset
, i
, j
);
459 lp_build_rgba8_to_fi32_soa(gallivm
,
468 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
470 * This is not the most efficient way of fetching pixels, as we
471 * miss some opportunities to do vectorization, but this is
472 * convenient for formats or scenarios for which there was no
473 * opportunity or incentive to optimize.
478 struct lp_type tmp_type
;
480 if (gallivm_debug
& GALLIVM_DEBUG_PERF
) {
481 debug_printf("%s: scalar unpacking of %s\n",
482 __FUNCTION__
, format_desc
->short_name
);
488 for (chan
= 0; chan
< 4; ++chan
) {
489 rgba_out
[chan
] = lp_build_undef(gallivm
, type
);
492 /* loop over number of pixels */
493 for(k
= 0; k
< type
.length
; ++k
) {
494 LLVMValueRef index
= lp_build_const_int32(gallivm
, k
);
495 LLVMValueRef offset_elem
;
496 LLVMValueRef i_elem
, j_elem
;
499 offset_elem
= LLVMBuildExtractElement(builder
, offset
,
502 i_elem
= LLVMBuildExtractElement(builder
, i
, index
, "");
503 j_elem
= LLVMBuildExtractElement(builder
, j
, index
, "");
505 /* Get a single float[4]={R,G,B,A} pixel */
506 tmp
= lp_build_fetch_rgba_aos(gallivm
, format_desc
, tmp_type
,
507 base_ptr
, offset_elem
,
511 * Insert the AoS tmp value channels into the SoA result vectors at
512 * position = 'index'.
514 for (chan
= 0; chan
< 4; ++chan
) {
515 LLVMValueRef chan_val
= lp_build_const_int32(gallivm
, chan
),
516 tmp_chan
= LLVMBuildExtractElement(builder
, tmp
, chan_val
, "");
517 rgba_out
[chan
] = LLVMBuildInsertElement(builder
, rgba_out
[chan
],
518 tmp_chan
, index
, "");