1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_defines.h"
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
45 lp_build_format_swizzle_soa(const struct util_format_description
*format_desc
,
46 struct lp_build_context
*bld
,
47 const LLVMValueRef
*unswizzled
,
48 LLVMValueRef swizzled_out
[4])
50 assert(UTIL_FORMAT_SWIZZLE_0
== PIPE_SWIZZLE_ZERO
);
51 assert(UTIL_FORMAT_SWIZZLE_1
== PIPE_SWIZZLE_ONE
);
53 if (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
55 * Return zzz1 for depth-stencil formats.
57 * XXX: Allow to control the depth swizzle with an additional parameter,
58 * as the caller may wish another depth swizzle, or retain the stencil
61 enum util_format_swizzle swizzle
= format_desc
->swizzle
[0];
62 LLVMValueRef depth
= lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
63 swizzled_out
[2] = swizzled_out
[1] = swizzled_out
[0] = depth
;
64 swizzled_out
[3] = bld
->one
;
68 for (chan
= 0; chan
< 4; ++chan
) {
69 enum util_format_swizzle swizzle
= format_desc
->swizzle
[chan
];
70 swizzled_out
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
, swizzle
);
77 * Unpack several pixels in SoA.
79 * It takes a vector of packed pixels:
81 * packed = {P0, P1, P2, P3, ..., Pn}
83 * And will produce four vectors:
85 * red = {R0, R1, R2, R3, ..., Rn}
86 * green = {G0, G1, G2, G3, ..., Gn}
87 * blue = {B0, B1, B2, B3, ..., Bn}
88 * alpha = {A0, A1, A2, A3, ..., An}
90 * It requires that a packed pixel fits into an element of the output
91 * channels. The common case is when converting pixel with a depth of 32 bit or
94 * \param format_desc the format of the 'packed' incoming pixel vector
95 * \param type the desired type for rgba_out (type.length = n, above)
96 * \param packed the incoming vector of packed pixels
97 * \param rgba_out returns the SoA R,G,B,A vectors
100 lp_build_unpack_rgba_soa(LLVMBuilderRef builder
,
101 const struct util_format_description
*format_desc
,
104 LLVMValueRef rgba_out
[4])
106 struct lp_build_context bld
;
107 LLVMValueRef inputs
[4];
111 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
112 assert(format_desc
->block
.width
== 1);
113 assert(format_desc
->block
.height
== 1);
114 assert(format_desc
->block
.bits
<= type
.width
);
115 /* FIXME: Support more output types */
116 assert(type
.floating
);
117 assert(type
.width
== 32);
119 lp_build_context_init(&bld
, builder
, type
);
121 /* Decode the input vector components */
123 for (chan
= 0; chan
< format_desc
->nr_channels
; ++chan
) {
124 const unsigned width
= format_desc
->channel
[chan
].size
;
125 const unsigned stop
= start
+ width
;
130 switch(format_desc
->channel
[chan
].type
) {
131 case UTIL_FORMAT_TYPE_VOID
:
132 input
= lp_build_undef(type
);
135 case UTIL_FORMAT_TYPE_UNSIGNED
:
141 input
= LLVMBuildLShr(builder
, input
, lp_build_const_int_vec(type
, start
), "");
148 if (stop
< format_desc
->block
.bits
) {
149 unsigned mask
= ((unsigned long long)1 << width
) - 1;
150 input
= LLVMBuildAnd(builder
, input
, lp_build_const_int_vec(type
, mask
), "");
158 if(format_desc
->channel
[chan
].normalized
)
159 input
= lp_build_unsigned_norm_to_float(builder
, width
, type
, input
);
161 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
166 input
= lp_build_undef(type
);
171 case UTIL_FORMAT_TYPE_SIGNED
:
173 * Align the sign bit first.
176 if (stop
< type
.width
) {
177 unsigned bits
= type
.width
- stop
;
178 LLVMValueRef bits_val
= lp_build_const_int_vec(type
, bits
);
179 input
= LLVMBuildShl(builder
, input
, bits_val
, "");
183 * Align the LSB (with an arithmetic shift to preserve the sign)
186 if (format_desc
->channel
[chan
].size
< type
.width
) {
187 unsigned bits
= type
.width
- format_desc
->channel
[chan
].size
;
188 LLVMValueRef bits_val
= lp_build_const_int_vec(type
, bits
);
189 input
= LLVMBuildAShr(builder
, input
, bits_val
, "");
197 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
198 if (format_desc
->channel
[chan
].normalized
) {
199 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
- 1)) - 1);
200 LLVMValueRef scale_val
= lp_build_const_vec(type
, scale
);
201 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
207 input
= lp_build_undef(type
);
212 case UTIL_FORMAT_TYPE_FLOAT
:
216 assert(type
.width
== 32);
217 input
= LLVMBuildBitCast(builder
, input
, lp_build_vec_type(type
), "");
222 input
= lp_build_undef(type
);
226 case UTIL_FORMAT_TYPE_FIXED
:
228 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
/2)) - 1);
229 LLVMValueRef scale_val
= lp_build_const_vec(type
, scale
);
230 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
231 input
= LLVMBuildFMul(builder
, input
, scale_val
, "");
236 input
= lp_build_undef(type
);
242 input
= lp_build_undef(type
);
246 inputs
[chan
] = input
;
251 lp_build_format_swizzle_soa(format_desc
, &bld
, inputs
, rgba_out
);
256 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder
,
257 struct lp_type dst_type
,
261 LLVMValueRef mask
= lp_build_const_int_vec(dst_type
, 0xff);
264 packed
= LLVMBuildBitCast(builder
, packed
,
265 lp_build_int_vec_type(dst_type
), "");
267 /* Decode the input vector components */
268 for (chan
= 0; chan
< 4; ++chan
) {
269 unsigned start
= chan
*8;
270 unsigned stop
= start
+ 8;
276 input
= LLVMBuildLShr(builder
, input
,
277 lp_build_const_int_vec(dst_type
, start
), "");
280 input
= LLVMBuildAnd(builder
, input
, mask
, "");
282 input
= lp_build_unsigned_norm_to_float(builder
, 8, dst_type
, input
);
291 * Fetch a texels from a texture, returning them in SoA layout.
293 * \param type the desired return type for 'rgba'. The vector length
294 * is the number of texels to fetch
296 * \param base_ptr points to start of the texture image block. For non-
297 * compressed formats, this simply points to the texel.
298 * For compressed formats, it points to the start of the
299 * compressed data block.
301 * \param i, j the sub-block pixel coordinates. For non-compressed formats
302 * these will always be (0,0). For compressed formats, i will
303 * be in [0, block_width-1] and j will be in [0, block_height-1].
306 lp_build_fetch_rgba_soa(LLVMBuilderRef builder
,
307 const struct util_format_description
*format_desc
,
309 LLVMValueRef base_ptr
,
313 LLVMValueRef rgba_out
[4])
316 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
317 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
318 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
319 format_desc
->block
.width
== 1 &&
320 format_desc
->block
.height
== 1 &&
321 format_desc
->block
.bits
<= type
.width
&&
322 (format_desc
->channel
[0].type
!= UTIL_FORMAT_TYPE_FLOAT
||
323 format_desc
->channel
[0].size
== 32))
326 * The packed pixel fits into an element of the destination format. Put
327 * the packed pixels into a vector and extract each component for all
328 * vector elements in parallel.
334 * gather the texels from the texture
335 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
337 packed
= lp_build_gather(builder
,
339 format_desc
->block
.bits
,
344 * convert texels to float rgba
346 lp_build_unpack_rgba_soa(builder
,
354 * Try calling lp_build_fetch_rgba_aos for all pixels.
357 if (util_format_fits_8unorm(format_desc
) &&
358 type
.floating
&& type
.width
== 32 && type
.length
== 4) {
359 struct lp_type tmp_type
;
362 memset(&tmp_type
, 0, sizeof tmp_type
);
364 tmp_type
.length
= type
.length
* 4;
365 tmp_type
.norm
= TRUE
;
367 tmp
= lp_build_fetch_rgba_aos(builder
, format_desc
, tmp_type
,
368 base_ptr
, offset
, i
, j
);
370 lp_build_rgba8_to_f32_soa(builder
,
379 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
381 * This is not the most efficient way of fetching pixels, as we
382 * miss some opportunities to do vectorization, but this is
383 * convenient for formats or scenarios for which there was no
384 * opportunity or incentive to optimize.
389 struct lp_type tmp_type
;
391 if (gallivm_debug
& GALLIVM_DEBUG_PERF
) {
392 debug_printf("%s: scalar unpacking of %s\n",
393 __FUNCTION__
, format_desc
->short_name
);
399 for (chan
= 0; chan
< 4; ++chan
) {
400 rgba_out
[chan
] = lp_build_undef(type
);
403 /* loop over number of pixels */
404 for(k
= 0; k
< type
.length
; ++k
) {
405 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), k
, 0);
406 LLVMValueRef offset_elem
;
407 LLVMValueRef i_elem
, j_elem
;
410 offset_elem
= LLVMBuildExtractElement(builder
, offset
, index
, "");
412 i_elem
= LLVMBuildExtractElement(builder
, i
, index
, "");
413 j_elem
= LLVMBuildExtractElement(builder
, j
, index
, "");
415 /* Get a single float[4]={R,G,B,A} pixel */
416 tmp
= lp_build_fetch_rgba_aos(builder
, format_desc
, tmp_type
,
417 base_ptr
, offset_elem
,
421 * Insert the AoS tmp value channels into the SoA result vectors at
422 * position = 'index'.
424 for (chan
= 0; chan
< 4; ++chan
) {
425 LLVMValueRef chan_val
= LLVMConstInt(LLVMInt32Type(), chan
, 0),
426 tmp_chan
= LLVMBuildExtractElement(builder
, tmp
, chan_val
, "");
427 rgba_out
[chan
] = LLVMBuildInsertElement(builder
, rgba_out
[chan
],
428 tmp_chan
, index
, "");