1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_format.h"
30 #include "util/u_memory.h"
31 #include "util/u_string.h"
33 #include "lp_bld_type.h"
34 #include "lp_bld_const.h"
35 #include "lp_bld_conv.h"
36 #include "lp_bld_sample.h" /* for lp_build_gather */
37 #include "lp_bld_init.h"
38 #include "lp_bld_format.h"
42 lp_build_format_swizzle_chan_soa(struct lp_type type
,
43 const LLVMValueRef
*unswizzled
,
44 enum util_format_swizzle swizzle
)
47 case UTIL_FORMAT_SWIZZLE_X
:
48 case UTIL_FORMAT_SWIZZLE_Y
:
49 case UTIL_FORMAT_SWIZZLE_Z
:
50 case UTIL_FORMAT_SWIZZLE_W
:
51 return unswizzled
[swizzle
];
52 case UTIL_FORMAT_SWIZZLE_0
:
53 return lp_build_zero(type
);
54 case UTIL_FORMAT_SWIZZLE_1
:
55 return lp_build_one(type
);
56 case UTIL_FORMAT_SWIZZLE_NONE
:
57 return lp_build_undef(type
);
60 return lp_build_undef(type
);
66 lp_build_format_swizzle_soa(const struct util_format_description
*format_desc
,
68 const LLVMValueRef
*unswizzled
,
69 LLVMValueRef
*swizzled
)
71 if(format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
72 enum util_format_swizzle swizzle
= format_desc
->swizzle
[0];
73 LLVMValueRef depth
= lp_build_format_swizzle_chan_soa(type
, unswizzled
, swizzle
);
74 swizzled
[2] = swizzled
[1] = swizzled
[0] = depth
;
75 swizzled
[3] = lp_build_one(type
);
79 for (chan
= 0; chan
< 4; ++chan
) {
80 enum util_format_swizzle swizzle
= format_desc
->swizzle
[chan
];
81 swizzled
[chan
] = lp_build_format_swizzle_chan_soa(type
, unswizzled
, swizzle
);
88 * Unpack several pixels in SoA.
90 * It takes a vector of packed pixels:
92 * packed = {P0, P1, P2, P3, ..., Pn}
94 * And will produce four vectors:
96 * red = {R0, R1, R2, R3, ..., Rn}
97 * green = {G0, G1, G2, G3, ..., Gn}
98 * blue = {B0, B1, B2, B3, ..., Bn}
99 * alpha = {A0, A1, A2, A3, ..., An}
101 * It requires that a packed pixel fits into an element of the output
102 * channels. The common case is when converting pixel with a depth of 32 bit or
106 lp_build_unpack_rgba_soa(LLVMBuilderRef builder
,
107 const struct util_format_description
*format_desc
,
112 LLVMValueRef inputs
[4];
116 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
117 assert(format_desc
->block
.width
== 1);
118 assert(format_desc
->block
.height
== 1);
119 assert(format_desc
->block
.bits
<= type
.width
);
120 /* FIXME: Support more output types */
121 assert(type
.floating
);
122 assert(type
.width
== 32);
124 /* Decode the input vector components */
126 for (chan
= 0; chan
< format_desc
->nr_channels
; ++chan
) {
127 unsigned width
= format_desc
->channel
[chan
].size
;
128 unsigned stop
= start
+ width
;
133 switch(format_desc
->channel
[chan
].type
) {
134 case UTIL_FORMAT_TYPE_VOID
:
135 input
= lp_build_undef(type
);
138 case UTIL_FORMAT_TYPE_UNSIGNED
:
144 input
= LLVMBuildLShr(builder
, input
, lp_build_const_int_vec(type
, start
), "");
151 if (stop
< format_desc
->block
.bits
) {
152 unsigned mask
= ((unsigned long long)1 << width
) - 1;
153 input
= LLVMBuildAnd(builder
, input
, lp_build_const_int_vec(type
, mask
), "");
161 if(format_desc
->channel
[chan
].normalized
)
162 input
= lp_build_unsigned_norm_to_float(builder
, width
, type
, input
);
164 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
169 input
= lp_build_undef(type
);
174 case UTIL_FORMAT_TYPE_SIGNED
:
176 * Align the sign bit first.
179 if (stop
< type
.width
) {
180 unsigned bits
= type
.width
- stop
;
181 LLVMValueRef bits_val
= lp_build_const_int_vec(type
, bits
);
182 input
= LLVMBuildShl(builder
, input
, bits_val
, "");
186 * Align the LSB (with an arithmetic shift to preserve the sign)
189 if (format_desc
->channel
[chan
].size
< type
.width
) {
190 unsigned bits
= type
.width
- format_desc
->channel
[chan
].size
;
191 LLVMValueRef bits_val
= lp_build_const_int_vec(type
, bits
);
192 input
= LLVMBuildAShr(builder
, input
, bits_val
, "");
200 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
201 if (format_desc
->channel
[chan
].normalized
) {
202 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
- 1)) - 1);
203 LLVMValueRef scale_val
= lp_build_const_vec(type
, scale
);
204 input
= LLVMBuildMul(builder
, input
, scale_val
, "");
210 input
= lp_build_undef(type
);
215 case UTIL_FORMAT_TYPE_FLOAT
:
219 assert(type
.width
== 32);
220 input
= LLVMBuildBitCast(builder
, input
, lp_build_vec_type(type
), "");
225 input
= lp_build_undef(type
);
229 case UTIL_FORMAT_TYPE_FIXED
:
231 double scale
= 1.0 / ((1 << (format_desc
->channel
[chan
].size
/2)) - 1);
232 LLVMValueRef scale_val
= lp_build_const_vec(type
, scale
);
233 input
= LLVMBuildSIToFP(builder
, input
, lp_build_vec_type(type
), "");
234 input
= LLVMBuildMul(builder
, input
, scale_val
, "");
239 input
= lp_build_undef(type
);
245 input
= lp_build_undef(type
);
249 inputs
[chan
] = input
;
254 lp_build_format_swizzle_soa(format_desc
, type
, inputs
, rgba
);
259 * Fetch a pixel into a SoA.
261 * i and j are the sub-block pixel coordinates.
264 lp_build_fetch_rgba_soa(LLVMBuilderRef builder
,
265 const struct util_format_description
*format_desc
,
267 LLVMValueRef base_ptr
,
274 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
275 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
276 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
277 format_desc
->block
.width
== 1 &&
278 format_desc
->block
.height
== 1 &&
279 format_desc
->block
.bits
<= type
.width
&&
280 (format_desc
->channel
[0].type
!= UTIL_FORMAT_TYPE_FLOAT
||
281 format_desc
->channel
[0].size
== 32))
284 * The packed pixel fits into an element of the destination format. Put
285 * the packed pixels into a vector and estract each component for all
286 * vector elements in parallel.
292 * gather the texels from the texture
294 packed
= lp_build_gather(builder
,
296 format_desc
->block
.bits
,
301 * convert texels to float rgba
303 lp_build_unpack_rgba_soa(builder
,
310 * Fallback to calling util_format_description::fetch_float for each
313 * This is definitely not the most efficient way of fetching pixels, as
314 * we miss the opportunity to do vectorization, but this it is a
315 * convenient for formats or scenarios for which there was no opportunity
316 * or incentive to optimize.
319 LLVMModuleRef module
= LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder
)));
321 LLVMValueRef function
;
325 assert(type
.floating
);
327 util_snprintf(name
, sizeof name
, "util_format_%s_fetch_float", format_desc
->short_name
);
330 * Declare and bind format_desc->fetch_float().
333 function
= LLVMGetNamedFunction(module
, name
);
335 LLVMTypeRef ret_type
;
336 LLVMTypeRef arg_types
[4];
337 LLVMTypeRef function_type
;
339 ret_type
= LLVMVoidType();
340 arg_types
[0] = LLVMPointerType(LLVMFloatType(), 0);
341 arg_types
[1] = LLVMPointerType(LLVMInt8Type(), 0);
342 arg_types
[3] = arg_types
[2] = LLVMIntType(sizeof(unsigned) * 8);
343 function_type
= LLVMFunctionType(ret_type
, arg_types
, Elements(arg_types
), 0);
344 function
= LLVMAddFunction(module
, name
, function_type
);
346 LLVMSetFunctionCallConv(function
, LLVMCCallConv
);
347 LLVMSetLinkage(function
, LLVMExternalLinkage
);
349 assert(LLVMIsDeclaration(function
));
351 LLVMAddGlobalMapping(lp_build_engine
, function
, format_desc
->fetch_float
);
354 for (chan
= 0; chan
< 4; ++chan
) {
355 rgba
[chan
] = lp_build_undef(type
);
358 tmp
= LLVMBuildArrayAlloca(builder
,
360 LLVMConstInt(LLVMInt32Type(), 4, 0),
364 * Invoke format_desc->fetch_float() for each pixel and insert the result
365 * in the SoA vectors.
368 for(k
= 0; k
< type
.length
; ++k
) {
369 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), k
, 0);
370 LLVMValueRef offset_elem
;
372 LLVMValueRef i_elem
, j_elem
;
373 LLVMValueRef args
[4];
375 offset_elem
= LLVMBuildExtractElement(builder
, offset
, index
, "");
376 ptr
= LLVMBuildGEP(builder
, base_ptr
, &offset_elem
, 1, "");
378 i_elem
= LLVMBuildExtractElement(builder
, i
, index
, "");
379 j_elem
= LLVMBuildExtractElement(builder
, j
, index
, "");
386 LLVMBuildCall(builder
, function
, args
, 4, "");
388 for (chan
= 0; chan
< 4; ++chan
) {
389 LLVMValueRef chan_val
= LLVMConstInt(LLVMInt32Type(), chan
, 0),
390 tmp_chan
= LLVMBuildGEP(builder
, tmp
, &chan_val
, 1, "");
391 tmp_chan
= LLVMBuildLoad(builder
, tmp_chan
, "");
392 rgba
[chan
] = LLVMBuildInsertElement(builder
, rgba
[chan
], tmp_chan
, index
, "");