1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_defines.h"
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
45 lp_build_format_swizzle_soa(const struct util_format_description
*format_desc
,
46 struct lp_build_context
*bld
,
47 const LLVMValueRef
*unswizzled
,
48 LLVMValueRef swizzled_out
[4])
50 assert(UTIL_FORMAT_SWIZZLE_0
== PIPE_SWIZZLE_ZERO
);
51 assert(UTIL_FORMAT_SWIZZLE_1
== PIPE_SWIZZLE_ONE
);
53 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
55 * Return zzz1 for depth-stencil formats.
57 * XXX: Allow to control the depth swizzle with an additional parameter,
58 * as the caller may wish another depth swizzle, or retain the stencil
61 enum util_format_swizzle swizzle
= format_desc
->swizzle
[0];
62 LLVMValueRef depth
= lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
63 swizzled_out
[2] = swizzled_out
[1] = swizzled_out
[0] = depth
;
64 swizzled_out
[3] = bld
->one
;
68 for (chan
= 0; chan
< 4; ++chan
) {
69 enum util_format_swizzle swizzle
= format_desc
->swizzle
[chan
];
70 swizzled_out
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
77 * Unpack several pixels in SoA.
79 * It takes a vector of packed pixels:
81 * packed = {P0, P1, P2, P3, ..., Pn}
83 * And will produce four vectors:
85 * red = {R0, R1, R2, R3, ..., Rn}
86 * green = {G0, G1, G2, G3, ..., Gn}
87 * blue = {B0, B1, B2, B3, ..., Bn}
88 * alpha = {A0, A1, A2, A3, ..., An}
90 * It requires that a packed pixel fits into an element of the output
91 * channels. The common case is when converting pixel with a depth of 32 bit or
94 * \param format_desc the format of the 'packed' incoming pixel vector
95 * \param type the desired type for rgba_out (type.length = n, above)
96 * \param packed the incoming vector of packed pixels
97 * \param rgba_out returns the SoA R,G,B,A vectors
100 lp_build_unpack_rgba_soa(struct gallivm_state
*gallivm
,
101 const struct util_format_description
*format_desc
,
104 LLVMValueRef rgba_out
[4])
106 LLVMBuilderRef builder
= gallivm
->builder
;
107 struct lp_build_context bld
;
108 LLVMValueRef inputs
[4];
112 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
113 assert(format_desc
->block
.width
== 1);
114 assert(format_desc
->block
.height
== 1);
115 assert(format_desc
->block
.bits
<= type
.width
);
116 /* FIXME: Support more output types */
117 assert(type
.floating
);
118 assert(type
.width
== 32);
120 lp_build_context_init(&bld
, gallivm
, type
);
122 /* Decode the input vector components */
124 for (chan
= 0; chan
< format_desc
->nr_channels
; ++chan
) {
125 const unsigned width
= format_desc
->channel
[chan
].size
;
126 const unsigned stop
= start
+ width
;
131 switch(format_desc
->channel
[chan
].type
) {
132 case UTIL_FORMAT_TYPE_VOID
:
133 input
= lp_build_undef(gallivm
, type
);
136 case UTIL_FORMAT_TYPE_UNSIGNED
:
142 input
= LLVMBuildLShr(builder
, input
, lp_build_const_int_vec(gallivm
, type
, start
), "");
149 if (stop
< format_desc
->block
.bits
) {
150 unsigned mask
= ((unsigned long long)1 << width
) - 1;
151 input
= LLVMBuildAnd(builder
, input
, lp_build_const_int_vec(gallivm
, type
, mask
), "");
159 if(format_desc
->channel
[chan
].normalized
)
160 input
= lp_build_unsigned_norm_to_float(gallivm
, width
, type
, input
);
162 input
= LLVMBuildSIToFP(builder
, input
,
163 lp_build_vec_type(gallivm
, type
), "");
168 input
= lp_build_undef(gallivm
, type
);
173 case UTIL_FORMAT_TYPE_SIGNED
:
175 * Align the sign bit first.
178 if (stop
< type
.width
) {
179 unsigned bits
= type
.width
- stop
;
180 LLVMValueRef bits_val
= lp_build_const_int_vec(gallivm
, type
, bits
);
181 input
= LLVMBuildShl(builder
, input
, bits_val
, "");
185 * Align the LSB (with an arithmetic shift to preserve the sign)
188 if (format_desc
->channel
[chan
].size
< type
.width
) {
189 unsigned bits
= type
.width
- format_desc
->channel
[chan
].size
;
190 LLVMValueRef bits_val
= lp_build_const_int_vec(gallivm
, type
, bits
);
191 input
= LLVMBuildAShr(builder
, input
, bits_val
, "");
199 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
200 if (format_desc
->channel
[chan
].normalized
) {
201 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
- 1)) - 1);
202 LLVMValueRef scale_val
= lp_build_const_vec(gallivm
, type
, scale
);
203 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
209 input
= lp_build_undef(gallivm
, type
);
214 case UTIL_FORMAT_TYPE_FLOAT
:
218 assert(type
.width
== 32);
219 input
= LLVMBuildBitCast(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
224 input
= lp_build_undef(gallivm
, type
);
228 case UTIL_FORMAT_TYPE_FIXED
:
230 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
/2)) - 1);
231 LLVMValueRef scale_val
= lp_build_const_vec(gallivm
, type
, scale
);
232 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(gallivm
, type
), "");
233 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
238 input
= lp_build_undef(gallivm
, type
);
244 input
= lp_build_undef(gallivm
, type
);
248 inputs
[chan
] = input
;
253 lp_build_format_swizzle_soa(format_desc
, &bld
, inputs
, rgba_out
);
258 lp_build_rgba8_to_f32_soa(struct gallivm_state
*gallivm
,
259 struct lp_type dst_type
,
263 LLVMBuilderRef builder
= gallivm
->builder
;
264 LLVMValueRef mask
= lp_build_const_int_vec(gallivm
, dst_type
, 0xff);
267 packed
= LLVMBuildBitCast(builder
, packed
,
268 lp_build_int_vec_type(gallivm
, dst_type
), "");
270 /* Decode the input vector components */
271 for (chan
= 0; chan
< 4; ++chan
) {
272 unsigned start
= chan
*8;
273 unsigned stop
= start
+ 8;
279 input
= LLVMBuildLShr(builder
, input
,
280 lp_build_const_int_vec(gallivm
, dst_type
, start
), "");
283 input
= LLVMBuildAnd(builder
, input
, mask
, "");
285 input
= lp_build_unsigned_norm_to_float(gallivm
, 8, dst_type
, input
);
294 * Fetch a texels from a texture, returning them in SoA layout.
296 * \param type the desired return type for 'rgba'. The vector length
297 * is the number of texels to fetch
299 * \param base_ptr points to start of the texture image block. For non-
300 * compressed formats, this simply points to the texel.
301 * For compressed formats, it points to the start of the
302 * compressed data block.
304 * \param i, j the sub-block pixel coordinates. For non-compressed formats
305 * these will always be (0,0). For compressed formats, i will
306 * be in [0, block_width-1] and j will be in [0, block_height-1].
309 lp_build_fetch_rgba_soa(struct gallivm_state
*gallivm
,
310 const struct util_format_description
*format_desc
,
312 LLVMValueRef base_ptr
,
316 LLVMValueRef rgba_out
[4])
318 LLVMBuilderRef builder
= gallivm
->builder
;
320 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
321 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
322 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
323 format_desc
->block
.width
== 1 &&
324 format_desc
->block
.height
== 1 &&
325 format_desc
->block
.bits
<= type
.width
&&
326 (format_desc
->channel
[0].type
!= UTIL_FORMAT_TYPE_FLOAT
||
327 format_desc
->channel
[0].size
== 32))
330 * The packed pixel fits into an element of the destination format. Put
331 * the packed pixels into a vector and extract each component for all
332 * vector elements in parallel.
338 * gather the texels from the texture
339 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
341 packed
= lp_build_gather(gallivm
,
343 format_desc
->block
.bits
,
348 * convert texels to float rgba
350 lp_build_unpack_rgba_soa(gallivm
,
358 * Try calling lp_build_fetch_rgba_aos for all pixels.
361 if (util_format_fits_8unorm(format_desc
) &&
362 type
.floating
&& type
.width
== 32 && type
.length
== 4) {
363 struct lp_type tmp_type
;
366 memset(&tmp_type
, 0, sizeof tmp_type
);
368 tmp_type
.length
= type
.length
* 4;
369 tmp_type
.norm
= TRUE
;
371 tmp
= lp_build_fetch_rgba_aos(gallivm
, format_desc
, tmp_type
,
372 base_ptr
, offset
, i
, j
);
374 lp_build_rgba8_to_f32_soa(gallivm
,
383 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
385 * This is not the most efficient way of fetching pixels, as we
386 * miss some opportunities to do vectorization, but this is
387 * convenient for formats or scenarios for which there was no
388 * opportunity or incentive to optimize.
393 struct lp_type tmp_type
;
395 if (gallivm_debug
& GALLIVM_DEBUG_PERF
) {
396 debug_printf("%s: scalar unpacking of %s\n",
397 __FUNCTION__
, format_desc
->short_name
);
403 for (chan
= 0; chan
< 4; ++chan
) {
404 rgba_out
[chan
] = lp_build_undef(gallivm
, type
);
407 /* loop over number of pixels */
408 for(k
= 0; k
< type
.length
; ++k
) {
409 LLVMValueRef index
= lp_build_const_int32(gallivm
, k
);
410 LLVMValueRef offset_elem
;
411 LLVMValueRef i_elem
, j_elem
;
414 offset_elem
= LLVMBuildExtractElement(builder
, offset
,
417 i_elem
= LLVMBuildExtractElement(builder
, i
, index
, "");
418 j_elem
= LLVMBuildExtractElement(builder
, j
, index
, "");
420 /* Get a single float[4]={R,G,B,A} pixel */
421 tmp
= lp_build_fetch_rgba_aos(gallivm
, format_desc
, tmp_type
,
422 base_ptr
, offset_elem
,
426 * Insert the AoS tmp value channels into the SoA result vectors at
427 * position = 'index'.
429 for (chan
= 0; chan
< 4; ++chan
) {
430 LLVMValueRef chan_val
= lp_build_const_int32(gallivm
, chan
),
431 tmp_chan
= LLVMBuildExtractElement(builder
, tmp
, chan_val
, "");
432 rgba_out
[chan
] = LLVMBuildInsertElement(builder
, rgba_out
[chan
],
433 tmp_chan
, index
, "");