1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_defines.h"
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_format.h"
44 lp_build_format_swizzle_soa(const struct util_format_description
*format_desc
,
45 struct lp_build_context
*bld
,
46 const LLVMValueRef
*unswizzled
,
47 LLVMValueRef swizzled_out
[4])
49 assert(UTIL_FORMAT_SWIZZLE_0
== PIPE_SWIZZLE_ZERO
);
50 assert(UTIL_FORMAT_SWIZZLE_1
== PIPE_SWIZZLE_ONE
);
52 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
54 * Return zzz1 for depth-stencil formats.
56 * XXX: Allow to control the depth swizzle with an additional parameter,
57 * as the caller may wish another depth swizzle, or retain the stencil
60 enum util_format_swizzle swizzle
= format_desc
->swizzle
[0];
61 LLVMValueRef depth
= lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
62 swizzled_out
[2] = swizzled_out
[1] = swizzled_out
[0] = depth
;
63 swizzled_out
[3] = bld
->one
;
67 for (chan
= 0; chan
< 4; ++chan
) {
68 enum util_format_swizzle swizzle
= format_desc
->swizzle
[chan
];
69 swizzled_out
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
76 * Unpack several pixels in SoA.
78 * It takes a vector of packed pixels:
80 * packed = {P0, P1, P2, P3, ..., Pn}
82 * And will produce four vectors:
84 * red = {R0, R1, R2, R3, ..., Rn}
85 * green = {G0, G1, G2, G3, ..., Gn}
86 * blue = {B0, B1, B2, B3, ..., Bn}
87 * alpha = {A0, A1, A2, A3, ..., An}
89 * It requires that a packed pixel fits into an element of the output
90 * channels. The common case is when converting pixel with a depth of 32 bit or
93 * \param format_desc the format of the 'packed' incoming pixel vector
94 * \param type the desired type for rgba_out (type.length = n, above)
95 * \param packed the incoming vector of packed pixels
96 * \param rgba_out returns the SoA R,G,B,A vectors
99 lp_build_unpack_rgba_soa(LLVMBuilderRef builder
,
100 const struct util_format_description
*format_desc
,
103 LLVMValueRef rgba_out
[4])
105 struct lp_build_context bld
;
106 LLVMValueRef inputs
[4];
110 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
111 assert(format_desc
->block
.width
== 1);
112 assert(format_desc
->block
.height
== 1);
113 assert(format_desc
->block
.bits
<= type
.width
);
114 /* FIXME: Support more output types */
115 assert(type
.floating
);
116 assert(type
.width
== 32);
118 lp_build_context_init(&bld
, builder
, type
);
120 /* Decode the input vector components */
122 for (chan
= 0; chan
< format_desc
->nr_channels
; ++chan
) {
123 const unsigned width
= format_desc
->channel
[chan
].size
;
124 const unsigned stop
= start
+ width
;
129 switch(format_desc
->channel
[chan
].type
) {
130 case UTIL_FORMAT_TYPE_VOID
:
131 input
= lp_build_undef(type
);
134 case UTIL_FORMAT_TYPE_UNSIGNED
:
140 input
= LLVMBuildLShr(builder
, input
, lp_build_const_int_vec(type
, start
), "");
147 if (stop
< format_desc
->block
.bits
) {
148 unsigned mask
= ((unsigned long long)1 << width
) - 1;
149 input
= LLVMBuildAnd(builder
, input
, lp_build_const_int_vec(type
, mask
), "");
157 if(format_desc
->channel
[chan
].normalized
)
158 input
= lp_build_unsigned_norm_to_float(builder
, width
, type
, input
);
160 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
165 input
= lp_build_undef(type
);
170 case UTIL_FORMAT_TYPE_SIGNED
:
172 * Align the sign bit first.
175 if (stop
< type
.width
) {
176 unsigned bits
= type
.width
- stop
;
177 LLVMValueRef bits_val
= lp_build_const_int_vec(type
, bits
);
178 input
= LLVMBuildShl(builder
, input
, bits_val
, "");
182 * Align the LSB (with an arithmetic shift to preserve the sign)
185 if (format_desc
->channel
[chan
].size
< type
.width
) {
186 unsigned bits
= type
.width
- format_desc
->channel
[chan
].size
;
187 LLVMValueRef bits_val
= lp_build_const_int_vec(type
, bits
);
188 input
= LLVMBuildAShr(builder
, input
, bits_val
, "");
196 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
197 if (format_desc
->channel
[chan
].normalized
) {
198 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
- 1)) - 1);
199 LLVMValueRef scale_val
= lp_build_const_vec(type
, scale
);
200 input
= LLVMBuildMul(builder
, input
, scale_val
, "");
206 input
= lp_build_undef(type
);
211 case UTIL_FORMAT_TYPE_FLOAT
:
215 assert(type
.width
== 32);
216 input
= LLVMBuildBitCast(builder
, input
, lp_build_vec_type(type
), "");
221 input
= lp_build_undef(type
);
225 case UTIL_FORMAT_TYPE_FIXED
:
227 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
/2)) - 1);
228 LLVMValueRef scale_val
= lp_build_const_vec(type
, scale
);
229 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
230 input
= LLVMBuildMul(builder
, input
, scale_val
, "");
235 input
= lp_build_undef(type
);
241 input
= lp_build_undef(type
);
245 inputs
[chan
] = input
;
250 lp_build_format_swizzle_soa(format_desc
, &bld
, inputs
, rgba_out
);
255 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder
,
256 struct lp_type dst_type
,
260 LLVMValueRef mask
= lp_build_const_int_vec(dst_type
, 0xff);
263 packed
= LLVMBuildBitCast(builder
, packed
,
264 lp_build_int_vec_type(dst_type
), "");
266 /* Decode the input vector components */
267 for (chan
= 0; chan
< 4; ++chan
) {
268 unsigned start
= chan
*8;
269 unsigned stop
= start
+ 8;
275 input
= LLVMBuildLShr(builder
, input
,
276 lp_build_const_int_vec(dst_type
, start
), "");
279 input
= LLVMBuildAnd(builder
, input
, mask
, "");
281 input
= lp_build_unsigned_norm_to_float(builder
, 8, dst_type
, input
);
290 * Fetch a texels from a texture, returning them in SoA layout.
292 * \param type the desired return type for 'rgba'. The vector length
293 * is the number of texels to fetch
295 * \param base_ptr points to start of the texture image block. For non-
296 * compressed formats, this simply points to the texel.
297 * For compressed formats, it points to the start of the
298 * compressed data block.
300 * \param i, j the sub-block pixel coordinates. For non-compressed formats
301 * these will always be (0,0). For compressed formats, i will
302 * be in [0, block_width-1] and j will be in [0, block_height-1].
305 lp_build_fetch_rgba_soa(LLVMBuilderRef builder
,
306 const struct util_format_description
*format_desc
,
308 LLVMValueRef base_ptr
,
312 LLVMValueRef rgba_out
[4])
315 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
316 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
317 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
318 format_desc
->block
.width
== 1 &&
319 format_desc
->block
.height
== 1 &&
320 format_desc
->block
.bits
<= type
.width
&&
321 (format_desc
->channel
[0].type
!= UTIL_FORMAT_TYPE_FLOAT
||
322 format_desc
->channel
[0].size
== 32))
325 * The packed pixel fits into an element of the destination format. Put
326 * the packed pixels into a vector and extract each component for all
327 * vector elements in parallel.
333 * gather the texels from the texture
334 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
336 packed
= lp_build_gather(builder
,
338 format_desc
->block
.bits
,
343 * convert texels to float rgba
345 lp_build_unpack_rgba_soa(builder
,
353 * Try calling lp_build_fetch_rgba_aos for all pixels.
356 if (util_format_fits_8unorm(format_desc
) &&
357 type
.floating
&& type
.width
== 32 && type
.length
== 4) {
358 struct lp_type tmp_type
;
361 memset(&tmp_type
, 0, sizeof tmp_type
);
363 tmp_type
.length
= type
.length
* 4;
364 tmp_type
.norm
= TRUE
;
366 tmp
= lp_build_fetch_rgba_aos(builder
, format_desc
, tmp_type
,
367 base_ptr
, offset
, i
, j
);
369 lp_build_rgba8_to_f32_soa(builder
,
378 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
380 * This is not the most efficient way of fetching pixels, as we
381 * miss some opportunities to do vectorization, but this is
382 * convenient for formats or scenarios for which there was no
383 * opportunity or incentive to optimize.
388 struct lp_type tmp_type
;
393 for (chan
= 0; chan
< 4; ++chan
) {
394 rgba_out
[chan
] = lp_build_undef(type
);
397 /* loop over number of pixels */
398 for(k
= 0; k
< type
.length
; ++k
) {
399 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), k
, 0);
400 LLVMValueRef offset_elem
;
401 LLVMValueRef i_elem
, j_elem
;
404 offset_elem
= LLVMBuildExtractElement(builder
, offset
, index
, "");
406 i_elem
= LLVMBuildExtractElement(builder
, i
, index
, "");
407 j_elem
= LLVMBuildExtractElement(builder
, j
, index
, "");
409 /* Get a single float[4]={R,G,B,A} pixel */
410 tmp
= lp_build_fetch_rgba_aos(builder
, format_desc
, tmp_type
,
411 base_ptr
, offset_elem
,
415 * Insert the AoS tmp value channels into the SoA result vectors at
416 * position = 'index'.
418 for (chan
= 0; chan
< 4; ++chan
) {
419 LLVMValueRef chan_val
= LLVMConstInt(LLVMInt32Type(), chan
, 0),
420 tmp_chan
= LLVMBuildExtractElement(builder
, tmp
, chan_val
, "");
421 rgba_out
[chan
] = LLVMBuildInsertElement(builder
, rgba_out
[chan
],
422 tmp_chan
, index
, "");