1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_defines.h"
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_sample.h" /* for lp_build_gather */
40 #include "lp_bld_format.h"
44 lp_build_format_swizzle_soa(const struct util_format_description
*format_desc
,
45 struct lp_build_context
*bld
,
46 const LLVMValueRef
*unswizzled
,
47 LLVMValueRef swizzled_out
[4])
49 assert(UTIL_FORMAT_SWIZZLE_0
== PIPE_SWIZZLE_ZERO
);
50 assert(UTIL_FORMAT_SWIZZLE_1
== PIPE_SWIZZLE_ONE
);
52 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
54 * Return zzz1 for depth-stencil formats.
56 * XXX: Allow to control the depth swizzle with an additional parameter,
57 * as the caller may wish another depth swizzle, or retain the stencil
60 enum util_format_swizzle swizzle
= format_desc
->swizzle
[0];
61 LLVMValueRef depth
= lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
62 swizzled_out
[2] = swizzled_out
[1] = swizzled_out
[0] = depth
;
63 swizzled_out
[3] = bld
->one
;
67 for (chan
= 0; chan
< 4; ++chan
) {
68 enum util_format_swizzle swizzle
= format_desc
->swizzle
[chan
];
69 swizzled_out
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
76 * Unpack several pixels in SoA.
78 * It takes a vector of packed pixels:
80 * packed = {P0, P1, P2, P3, ..., Pn}
82 * And will produce four vectors:
84 * red = {R0, R1, R2, R3, ..., Rn}
85 * green = {G0, G1, G2, G3, ..., Gn}
86 * blue = {B0, B1, B2, B3, ..., Bn}
87 * alpha = {A0, A1, A2, A3, ..., An}
89 * It requires that a packed pixel fits into an element of the output
90 * channels. The common case is when converting pixel with a depth of 32 bit or
93 * \param format_desc the format of the 'packed' incoming pixel vector
94 * \param type the desired type for rgba_out (type.length = n, above)
95 * \param packed the incoming vector of packed pixels
96 * \param rgba_out returns the SoA R,G,B,A vectors
99 lp_build_unpack_rgba_soa(LLVMBuilderRef builder
,
100 const struct util_format_description
*format_desc
,
103 LLVMValueRef rgba_out
[4])
105 struct lp_build_context bld
;
106 LLVMValueRef inputs
[4];
110 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
111 assert(format_desc
->block
.width
== 1);
112 assert(format_desc
->block
.height
== 1);
113 assert(format_desc
->block
.bits
<= type
.width
);
114 /* FIXME: Support more output types */
115 assert(type
.floating
);
116 assert(type
.width
== 32);
118 lp_build_context_init(&bld
, builder
, type
);
120 /* Decode the input vector components */
122 for (chan
= 0; chan
< format_desc
->nr_channels
; ++chan
) {
123 const unsigned width
= format_desc
->channel
[chan
].size
;
124 const unsigned stop
= start
+ width
;
129 switch(format_desc
->channel
[chan
].type
) {
130 case UTIL_FORMAT_TYPE_VOID
:
131 input
= lp_build_undef(type
);
134 case UTIL_FORMAT_TYPE_UNSIGNED
:
140 input
= LLVMBuildLShr(builder
, input
, lp_build_const_int_vec(type
, start
), "");
147 if (stop
< format_desc
->block
.bits
) {
148 unsigned mask
= ((unsigned long long)1 << width
) - 1;
149 input
= LLVMBuildAnd(builder
, input
, lp_build_const_int_vec(type
, mask
), "");
157 if(format_desc
->channel
[chan
].normalized
)
158 input
= lp_build_unsigned_norm_to_float(builder
, width
, type
, input
);
160 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
165 input
= lp_build_undef(type
);
170 case UTIL_FORMAT_TYPE_SIGNED
:
172 * Align the sign bit first.
175 if (stop
< type
.width
) {
176 unsigned bits
= type
.width
- stop
;
177 LLVMValueRef bits_val
= lp_build_const_int_vec(type
, bits
);
178 input
= LLVMBuildShl(builder
, input
, bits_val
, "");
182 * Align the LSB (with an arithmetic shift to preserve the sign)
185 if (format_desc
->channel
[chan
].size
< type
.width
) {
186 unsigned bits
= type
.width
- format_desc
->channel
[chan
].size
;
187 LLVMValueRef bits_val
= lp_build_const_int_vec(type
, bits
);
188 input
= LLVMBuildAShr(builder
, input
, bits_val
, "");
196 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
197 if (format_desc
->channel
[chan
].normalized
) {
198 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
- 1)) - 1);
199 LLVMValueRef scale_val
= lp_build_const_vec(type
, scale
);
200 input
= LLVMBuildMul(builder
, input
, scale_val
, "");
206 input
= lp_build_undef(type
);
211 case UTIL_FORMAT_TYPE_FLOAT
:
215 assert(type
.width
== 32);
216 input
= LLVMBuildBitCast(builder
, input
, lp_build_vec_type(type
), "");
221 input
= lp_build_undef(type
);
225 case UTIL_FORMAT_TYPE_FIXED
:
227 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
/2)) - 1);
228 LLVMValueRef scale_val
= lp_build_const_vec(type
, scale
);
229 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
230 input
= LLVMBuildMul(builder
, input
, scale_val
, "");
235 input
= lp_build_undef(type
);
241 input
= lp_build_undef(type
);
245 inputs
[chan
] = input
;
250 lp_build_format_swizzle_soa(format_desc
, &bld
, inputs
, rgba_out
);
255 * Fetch a texels from a texture, returning them in SoA layout.
257 * \param type the desired return type for 'rgba'. The vector length
258 * is the number of texels to fetch
260 * \param base_ptr points to start of the texture image block. For non-
261 * compressed formats, this simply points to the texel.
262 * For compressed formats, it points to the start of the
263 * compressed data block.
265 * \param i, j the sub-block pixel coordinates. For non-compressed formats
266 * these will always be (0,0). For compressed formats, i will
267 * be in [0, block_width-1] and j will be in [0, block_height-1].
270 lp_build_fetch_rgba_soa(LLVMBuilderRef builder
,
271 const struct util_format_description
*format_desc
,
273 LLVMValueRef base_ptr
,
277 LLVMValueRef rgba_out
[4])
280 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
281 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
282 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
283 format_desc
->block
.width
== 1 &&
284 format_desc
->block
.height
== 1 &&
285 format_desc
->block
.bits
<= type
.width
&&
286 (format_desc
->channel
[0].type
!= UTIL_FORMAT_TYPE_FLOAT
||
287 format_desc
->channel
[0].size
== 32))
290 * The packed pixel fits into an element of the destination format. Put
291 * the packed pixels into a vector and extract each component for all
292 * vector elements in parallel.
298 * gather the texels from the texture
299 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
301 packed
= lp_build_gather(builder
,
303 format_desc
->block
.bits
,
308 * convert texels to float rgba
310 lp_build_unpack_rgba_soa(builder
,
317 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
319 * This is not the most efficient way of fetching pixels, as we
320 * miss some opportunities to do vectorization, but this is
321 * convenient for formats or scenarios for which there was no
322 * opportunity or incentive to optimize.
327 assert(type
.floating
);
329 for (chan
= 0; chan
< 4; ++chan
) {
330 rgba_out
[chan
] = lp_build_undef(type
);
333 /* loop over number of pixels */
334 for(k
= 0; k
< type
.length
; ++k
) {
335 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), k
, 0);
336 LLVMValueRef offset_elem
;
338 LLVMValueRef i_elem
, j_elem
;
341 offset_elem
= LLVMBuildExtractElement(builder
, offset
, index
, "");
342 ptr
= LLVMBuildGEP(builder
, base_ptr
, &offset_elem
, 1, "");
344 i_elem
= LLVMBuildExtractElement(builder
, i
, index
, "");
345 j_elem
= LLVMBuildExtractElement(builder
, j
, index
, "");
347 /* Get a single float[4]={R,G,B,A} pixel */
348 tmp
= lp_build_fetch_rgba_aos(builder
, format_desc
, ptr
,
352 * Insert the AoS tmp value channels into the SoA result vectors at
353 * position = 'index'.
355 for (chan
= 0; chan
< 4; ++chan
) {
356 LLVMValueRef chan_val
= LLVMConstInt(LLVMInt32Type(), chan
, 0),
357 tmp_chan
= LLVMBuildExtractElement(builder
, tmp
, chan_val
, "");
358 rgba_out
[chan
] = LLVMBuildInsertElement(builder
, rgba_out
[chan
],
359 tmp_chan
, index
, "");