1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * AoS pixel format manipulation.
32 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "util/u_format.h"
37 #include "util/u_memory.h"
38 #include "util/u_math.h"
39 #include "util/u_string.h"
41 #include "lp_bld_init.h"
42 #include "lp_bld_type.h"
43 #include "lp_bld_flow.h"
44 #include "lp_bld_format.h"
48 * Unpack a single pixel into its RGBA components.
50 * @param desc the pixel format for the packed pixel value
51 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
53 * @return RGBA in a 4 floats vector.
56 lp_build_unpack_rgba_aos(LLVMBuilderRef builder
,
57 const struct util_format_description
*desc
,
60 LLVMValueRef shifted
, casted
, scaled
, masked
;
61 LLVMValueRef shifts
[4];
62 LLVMValueRef masks
[4];
63 LLVMValueRef scales
[4];
64 LLVMValueRef swizzles
[4];
72 /* TODO: Support more formats */
73 assert(desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
74 assert(desc
->block
.width
== 1);
75 assert(desc
->block
.height
== 1);
76 assert(desc
->block
.bits
<= 32);
78 /* Do the intermediate integer computations with 32bit integers since it
79 * matches floating point size */
80 if (desc
->block
.bits
< 32)
81 packed
= LLVMBuildZExt(builder
, packed
, LLVMInt32Type(), "");
83 /* Broadcast the packed value to all four channels
84 * before: packed = BGRA
85 * after: packed = {BGRA, BGRA, BGRA, BGRA}
87 packed
= LLVMBuildInsertElement(builder
,
88 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
90 LLVMConstNull(LLVMInt32Type()),
92 packed
= LLVMBuildShuffleVector(builder
,
94 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
95 LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
98 /* Initialize vector constants */
100 needs_uitofp
= FALSE
;
104 /* Loop over 4 color components */
105 for (i
= 0; i
< 4; ++i
) {
106 unsigned bits
= desc
->channel
[i
].size
;
108 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_VOID
) {
109 shifts
[i
] = LLVMGetUndef(LLVMInt32Type());
110 masks
[i
] = LLVMConstNull(LLVMInt32Type());
111 scales
[i
] = LLVMConstNull(LLVMFloatType());
115 unsigned long long mask
= (1ULL << bits
) - 1;
117 assert(desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
123 shifts
[i
] = LLVMConstInt(LLVMInt32Type(), shift
, 0);
124 masks
[i
] = LLVMConstInt(LLVMInt32Type(), mask
, 0);
126 if (desc
->channel
[i
].normalized
) {
127 scales
[i
] = LLVMConstReal(LLVMFloatType(), 1.0/mask
);
131 scales
[i
] = LLVMConstReal(LLVMFloatType(), 1.0);
137 /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
138 * into masked = {B, G, R, A}
140 shifted
= LLVMBuildLShr(builder
, packed
, LLVMConstVector(shifts
, 4), "");
141 masked
= LLVMBuildAnd(builder
, shifted
, LLVMConstVector(masks
, 4), "");
145 /* UIToFP can't be expressed in SSE2 */
146 casted
= LLVMBuildSIToFP(builder
, masked
, LLVMVectorType(LLVMFloatType(), 4), "");
148 casted
= LLVMBuildUIToFP(builder
, masked
, LLVMVectorType(LLVMFloatType(), 4), "");
151 /* At this point 'casted' may be a vector of floats such as
152 * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized
153 * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
157 scaled
= LLVMBuildMul(builder
, casted
, LLVMConstVector(scales
, 4), "");
161 for (i
= 0; i
< 4; ++i
)
162 aux
[i
] = LLVMGetUndef(LLVMFloatType());
164 /* Build swizzles vector to put components into R,G,B,A order */
165 for (i
= 0; i
< 4; ++i
) {
166 enum util_format_swizzle swizzle
;
168 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
170 * For ZS formats do RGBA = ZZZ1
173 swizzle
= UTIL_FORMAT_SWIZZLE_1
;
174 } else if (desc
->swizzle
[0] == UTIL_FORMAT_SWIZZLE_NONE
) {
175 swizzle
= UTIL_FORMAT_SWIZZLE_0
;
177 swizzle
= desc
->swizzle
[0];
180 swizzle
= desc
->swizzle
[i
];
184 case UTIL_FORMAT_SWIZZLE_X
:
185 case UTIL_FORMAT_SWIZZLE_Y
:
186 case UTIL_FORMAT_SWIZZLE_Z
:
187 case UTIL_FORMAT_SWIZZLE_W
:
188 swizzles
[i
] = LLVMConstInt(LLVMInt32Type(), swizzle
, 0);
190 case UTIL_FORMAT_SWIZZLE_0
:
191 assert(empty_channel
>= 0);
192 swizzles
[i
] = LLVMConstInt(LLVMInt32Type(), empty_channel
, 0);
194 case UTIL_FORMAT_SWIZZLE_1
:
195 swizzles
[i
] = LLVMConstInt(LLVMInt32Type(), 4, 0);
196 aux
[0] = LLVMConstReal(LLVMFloatType(), 1.0);
198 case UTIL_FORMAT_SWIZZLE_NONE
:
199 swizzles
[i
] = LLVMGetUndef(LLVMFloatType());
205 return LLVMBuildShuffleVector(builder
, scaled
, LLVMConstVector(aux
, 4),
206 LLVMConstVector(swizzles
, 4), "");
211 * Pack a single pixel.
213 * @param rgba 4 float vector with the unpacked components.
215 * XXX: This is mostly for reference and testing -- operating a single pixel at
216 * a time is rarely if ever needed.
219 lp_build_pack_rgba_aos(LLVMBuilderRef builder
,
220 const struct util_format_description
*desc
,
224 LLVMValueRef packed
= NULL
;
225 LLVMValueRef swizzles
[4];
226 LLVMValueRef shifted
, casted
, scaled
, unswizzled
;
227 LLVMValueRef shifts
[4];
228 LLVMValueRef scales
[4];
233 assert(desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
234 assert(desc
->block
.width
== 1);
235 assert(desc
->block
.height
== 1);
237 type
= LLVMIntType(desc
->block
.bits
);
239 /* Unswizzle the color components into the source vector. */
240 for (i
= 0; i
< 4; ++i
) {
241 for (j
= 0; j
< 4; ++j
) {
242 if (desc
->swizzle
[j
] == i
)
246 swizzles
[i
] = LLVMConstInt(LLVMInt32Type(), j
, 0);
248 swizzles
[i
] = LLVMGetUndef(LLVMInt32Type());
251 unswizzled
= LLVMBuildShuffleVector(builder
, rgba
,
252 LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
253 LLVMConstVector(swizzles
, 4), "");
257 for (i
= 0; i
< 4; ++i
) {
258 unsigned bits
= desc
->channel
[i
].size
;
260 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_VOID
) {
261 shifts
[i
] = LLVMGetUndef(LLVMInt32Type());
262 scales
[i
] = LLVMGetUndef(LLVMFloatType());
265 unsigned mask
= (1 << bits
) - 1;
267 assert(desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
270 shifts
[i
] = LLVMConstInt(LLVMInt32Type(), shift
, 0);
272 if (desc
->channel
[i
].normalized
) {
273 scales
[i
] = LLVMConstReal(LLVMFloatType(), mask
);
277 scales
[i
] = LLVMConstReal(LLVMFloatType(), 1.0);
284 scaled
= LLVMBuildMul(builder
, unswizzled
, LLVMConstVector(scales
, 4), "");
288 casted
= LLVMBuildFPToSI(builder
, scaled
, LLVMVectorType(LLVMInt32Type(), 4), "");
290 shifted
= LLVMBuildShl(builder
, casted
, LLVMConstVector(shifts
, 4), "");
292 /* Bitwise or all components */
293 for (i
= 0; i
< 4; ++i
) {
294 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
) {
295 LLVMValueRef component
= LLVMBuildExtractElement(builder
, shifted
, LLVMConstInt(LLVMInt32Type(), i
, 0), "");
297 packed
= LLVMBuildOr(builder
, packed
, component
, "");
304 packed
= LLVMGetUndef(LLVMInt32Type());
306 if (desc
->block
.bits
< 32)
307 packed
= LLVMBuildTrunc(builder
, packed
, type
, "");
314 * Fetch a pixel into a 4 float AoS.
316 * \param format_desc describes format of the image we're fetching from
317 * \param ptr address of the pixel block (or the texel if uncompressed)
318 * \param i, j the sub-block pixel coordinates. For non-compressed formats
319 * these will always be (0,).
320 * \return valueRef with the float[4] RGBA pixel
323 lp_build_fetch_rgba_aos(LLVMBuilderRef builder
,
324 const struct util_format_description
*format_desc
,
330 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
331 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
332 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
333 format_desc
->block
.width
== 1 &&
334 format_desc
->block
.height
== 1 &&
335 util_is_pot(format_desc
->block
.bits
) &&
336 format_desc
->block
.bits
<= 32 &&
337 format_desc
->is_bitmask
&&
338 !format_desc
->is_mixed
&&
339 (format_desc
->channel
[0].type
== UTIL_FORMAT_TYPE_UNSIGNED
||
340 format_desc
->channel
[1].type
== UTIL_FORMAT_TYPE_UNSIGNED
))
344 ptr
= LLVMBuildBitCast(builder
, ptr
,
345 LLVMPointerType(LLVMIntType(format_desc
->block
.bits
), 0) ,
348 packed
= LLVMBuildLoad(builder
, ptr
, "packed");
350 return lp_build_unpack_rgba_aos(builder
, format_desc
, packed
);
352 else if (format_desc
->fetch_rgba_float
) {
354 * Fallback to calling util_format_description::fetch_rgba_float.
356 * This is definitely not the most efficient way of fetching pixels, as
357 * we miss the opportunity to do vectorization, but this it is a
358 * convenient for formats or scenarios for which there was no opportunity
359 * or incentive to optimize.
362 LLVMModuleRef module
= LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder
)));
364 LLVMValueRef function
;
366 LLVMValueRef args
[4];
368 util_snprintf(name
, sizeof name
, "util_format_%s_fetch_rgba_float",
369 format_desc
->short_name
);
372 * Declare and bind format_desc->fetch_rgba_float().
375 function
= LLVMGetNamedFunction(module
, name
);
377 LLVMTypeRef ret_type
;
378 LLVMTypeRef arg_types
[4];
379 LLVMTypeRef function_type
;
381 ret_type
= LLVMVoidType();
382 arg_types
[0] = LLVMPointerType(LLVMFloatType(), 0);
383 arg_types
[1] = LLVMPointerType(LLVMInt8Type(), 0);
384 arg_types
[3] = arg_types
[2] = LLVMIntType(sizeof(unsigned) * 8);
385 function_type
= LLVMFunctionType(ret_type
, arg_types
, Elements(arg_types
), 0);
386 function
= LLVMAddFunction(module
, name
, function_type
);
388 LLVMSetFunctionCallConv(function
, LLVMCCallConv
);
389 LLVMSetLinkage(function
, LLVMExternalLinkage
);
391 assert(LLVMIsDeclaration(function
));
393 LLVMAddGlobalMapping(lp_build_engine
, function
,
394 func_to_pointer((func_pointer
)format_desc
->fetch_rgba_float
));
397 tmp
= lp_build_alloca(builder
, LLVMVectorType(LLVMFloatType(), 4), "");
400 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
401 * in the SoA vectors.
404 args
[0] = LLVMBuildBitCast(builder
, tmp
,
405 LLVMPointerType(LLVMFloatType(), 0), "");
410 LLVMBuildCall(builder
, function
, args
, Elements(args
), "");
412 return LLVMBuildLoad(builder
, tmp
, "");
416 return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));