1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * AoS pixel format manipulation.
32 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "util/u_format.h"
37 #include "util/u_memory.h"
38 #include "util/u_math.h"
39 #include "util/u_string.h"
41 #include "lp_bld_arit.h"
42 #include "lp_bld_init.h"
43 #include "lp_bld_type.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_swizzle.h"
48 #include "lp_bld_format.h"
52 * Basic swizzling. Rearrange the order of the unswizzled array elements
53 * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported
55 * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
58 lp_build_format_swizzle_aos(const struct util_format_description
*desc
,
59 struct lp_build_context
*bld
,
60 LLVMValueRef unswizzled
)
62 unsigned char swizzles
[4];
65 assert(bld
->type
.length
% 4 == 0);
67 for (chan
= 0; chan
< 4; ++chan
) {
68 enum util_format_swizzle swizzle
;
70 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
72 * For ZS formats do RGBA = ZZZ1
75 swizzle
= UTIL_FORMAT_SWIZZLE_1
;
76 } else if (desc
->swizzle
[0] == UTIL_FORMAT_SWIZZLE_NONE
) {
77 swizzle
= UTIL_FORMAT_SWIZZLE_0
;
79 swizzle
= desc
->swizzle
[0];
82 swizzle
= desc
->swizzle
[chan
];
84 swizzles
[chan
] = swizzle
;
87 return lp_build_swizzle_aos(bld
, unswizzled
, swizzles
);
92 * Whether the format matches the vector type, apart of swizzles.
95 format_matches_type(const struct util_format_description
*desc
,
98 enum util_format_type chan_type
;
101 assert(type
.length
% 4 == 0);
103 if (desc
->layout
!= UTIL_FORMAT_LAYOUT_PLAIN
||
104 desc
->colorspace
!= UTIL_FORMAT_COLORSPACE_RGB
) {
109 chan_type
= UTIL_FORMAT_TYPE_FLOAT
;
110 } else if (type
.fixed
) {
111 chan_type
= UTIL_FORMAT_TYPE_FIXED
;
112 } else if (type
.sign
) {
113 chan_type
= UTIL_FORMAT_TYPE_SIGNED
;
115 chan_type
= UTIL_FORMAT_TYPE_UNSIGNED
;
118 for (chan
= 0; chan
< desc
->nr_channels
; ++chan
) {
119 if (desc
->channel
[chan
].size
!= type
.width
) {
123 if (desc
->channel
[chan
].type
!= UTIL_FORMAT_TYPE_VOID
) {
124 if (desc
->channel
[chan
].type
!= chan_type
||
125 desc
->channel
[chan
].normalized
!= type
.norm
) {
136 * Unpack a single pixel into its RGBA components.
138 * @param desc the pixel format for the packed pixel value
139 * @param type the desired return type (float[4] vs. ubyte[4])
140 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
142 * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
144 static INLINE LLVMValueRef
145 lp_build_unpack_rgba_aos(const struct util_format_description
*desc
,
146 struct lp_build_context
*bld
,
149 LLVMBuilderRef builder
= bld
->builder
;
150 struct lp_type type
= bld
->type
;
151 LLVMValueRef shifted
, casted
, scaled
, masked
;
152 LLVMValueRef shifts
[4];
153 LLVMValueRef masks
[4];
154 LLVMValueRef scales
[4];
157 boolean needs_uitofp
;
161 /* TODO: Support more formats */
162 assert(desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
163 assert(desc
->block
.width
== 1);
164 assert(desc
->block
.height
== 1);
165 assert(desc
->block
.bits
<= 32);
167 /* Do the intermediate integer computations with 32bit integers since it
168 * matches floating point size */
169 if (desc
->block
.bits
< 32)
170 packed
= LLVMBuildZExt(builder
, packed
, LLVMInt32Type(), "");
172 /* Broadcast the packed value to all four channels
173 * before: packed = BGRA
174 * after: packed = {BGRA, BGRA, BGRA, BGRA}
176 packed
= LLVMBuildInsertElement(builder
,
177 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
179 LLVMConstNull(LLVMInt32Type()),
181 packed
= LLVMBuildShuffleVector(builder
,
183 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
184 LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
187 /* Initialize vector constants */
189 needs_uitofp
= FALSE
;
192 /* Loop over 4 color components */
193 for (i
= 0; i
< 4; ++i
) {
194 unsigned bits
= desc
->channel
[i
].size
;
196 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_VOID
) {
197 shifts
[i
] = LLVMGetUndef(LLVMInt32Type());
198 masks
[i
] = LLVMConstNull(LLVMInt32Type());
199 scales
[i
] = LLVMConstNull(LLVMFloatType());
202 unsigned long long mask
= (1ULL << bits
) - 1;
204 assert(desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
210 shifts
[i
] = LLVMConstInt(LLVMInt32Type(), shift
, 0);
211 masks
[i
] = LLVMConstInt(LLVMInt32Type(), mask
, 0);
213 if (desc
->channel
[i
].normalized
) {
214 scales
[i
] = LLVMConstReal(LLVMFloatType(), 1.0/mask
);
218 scales
[i
] = LLVMConstReal(LLVMFloatType(), 1.0);
224 /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
225 * into masked = {B, G, R, A}
227 shifted
= LLVMBuildLShr(builder
, packed
, LLVMConstVector(shifts
, 4), "");
228 masked
= LLVMBuildAnd(builder
, shifted
, LLVMConstVector(masks
, 4), "");
232 /* UIToFP can't be expressed in SSE2 */
233 casted
= LLVMBuildSIToFP(builder
, masked
, LLVMVectorType(LLVMFloatType(), 4), "");
235 casted
= LLVMBuildUIToFP(builder
, masked
, LLVMVectorType(LLVMFloatType(), 4), "");
238 /* At this point 'casted' may be a vector of floats such as
239 * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized
240 * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
244 scaled
= LLVMBuildMul(builder
, casted
, LLVMConstVector(scales
, 4), "");
251 * TODO: We could avoid floating conversion for integer to
252 * integer conversions.
255 lp_build_conv(builder
,
256 lp_float32_vec4_type(),
258 &scaled
, 1, &scaled
, 1);
260 scaled
= lp_build_format_swizzle_aos(desc
, bld
, scaled
);
267 * Pack a single pixel.
269 * @param rgba 4 float vector with the unpacked components.
271 * XXX: This is mostly for reference and testing -- operating a single pixel at
272 * a time is rarely if ever needed.
275 lp_build_pack_rgba_aos(LLVMBuilderRef builder
,
276 const struct util_format_description
*desc
,
280 LLVMValueRef packed
= NULL
;
281 LLVMValueRef swizzles
[4];
282 LLVMValueRef shifted
, casted
, scaled
, unswizzled
;
283 LLVMValueRef shifts
[4];
284 LLVMValueRef scales
[4];
289 assert(desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
290 assert(desc
->block
.width
== 1);
291 assert(desc
->block
.height
== 1);
293 type
= LLVMIntType(desc
->block
.bits
);
295 /* Unswizzle the color components into the source vector. */
296 for (i
= 0; i
< 4; ++i
) {
297 for (j
= 0; j
< 4; ++j
) {
298 if (desc
->swizzle
[j
] == i
)
302 swizzles
[i
] = LLVMConstInt(LLVMInt32Type(), j
, 0);
304 swizzles
[i
] = LLVMGetUndef(LLVMInt32Type());
307 unswizzled
= LLVMBuildShuffleVector(builder
, rgba
,
308 LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
309 LLVMConstVector(swizzles
, 4), "");
313 for (i
= 0; i
< 4; ++i
) {
314 unsigned bits
= desc
->channel
[i
].size
;
316 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_VOID
) {
317 shifts
[i
] = LLVMGetUndef(LLVMInt32Type());
318 scales
[i
] = LLVMGetUndef(LLVMFloatType());
321 unsigned mask
= (1 << bits
) - 1;
323 assert(desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
326 shifts
[i
] = LLVMConstInt(LLVMInt32Type(), shift
, 0);
328 if (desc
->channel
[i
].normalized
) {
329 scales
[i
] = LLVMConstReal(LLVMFloatType(), mask
);
333 scales
[i
] = LLVMConstReal(LLVMFloatType(), 1.0);
340 scaled
= LLVMBuildMul(builder
, unswizzled
, LLVMConstVector(scales
, 4), "");
344 casted
= LLVMBuildFPToSI(builder
, scaled
, LLVMVectorType(LLVMInt32Type(), 4), "");
346 shifted
= LLVMBuildShl(builder
, casted
, LLVMConstVector(shifts
, 4), "");
348 /* Bitwise or all components */
349 for (i
= 0; i
< 4; ++i
) {
350 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
) {
351 LLVMValueRef component
= LLVMBuildExtractElement(builder
, shifted
, LLVMConstInt(LLVMInt32Type(), i
, 0), "");
353 packed
= LLVMBuildOr(builder
, packed
, component
, "");
360 packed
= LLVMGetUndef(LLVMInt32Type());
362 if (desc
->block
.bits
< 32)
363 packed
= LLVMBuildTrunc(builder
, packed
, type
, "");
372 * Fetch a pixel into a 4 float AoS.
374 * \param format_desc describes format of the image we're fetching from
375 * \param ptr address of the pixel block (or the texel if uncompressed)
376 * \param i, j the sub-block pixel coordinates. For non-compressed formats
377 * these will always be (0, 0).
378 * \return a 4 element vector with the pixel's RGBA values.
381 lp_build_fetch_rgba_aos(LLVMBuilderRef builder
,
382 const struct util_format_description
*format_desc
,
388 struct lp_build_context bld
;
390 /* XXX: For now we only support one pixel at a time */
391 assert(type
.length
== 4);
393 lp_build_context_init(&bld
, builder
, type
);
395 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
396 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
397 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
398 format_desc
->block
.width
== 1 &&
399 format_desc
->block
.height
== 1 &&
400 util_is_pot(format_desc
->block
.bits
) &&
401 format_desc
->block
.bits
<= 32 &&
402 format_desc
->is_bitmask
&&
403 !format_desc
->is_mixed
&&
404 (format_desc
->channel
[0].type
== UTIL_FORMAT_TYPE_UNSIGNED
||
405 format_desc
->channel
[1].type
== UTIL_FORMAT_TYPE_UNSIGNED
))
409 ptr
= LLVMBuildBitCast(builder
, ptr
,
410 LLVMPointerType(LLVMIntType(format_desc
->block
.bits
), 0) ,
413 packed
= LLVMBuildLoad(builder
, ptr
, "packed");
415 if (format_matches_type(format_desc
, type
)) {
417 * The format matches the type (apart of a swizzle) so no need for
418 * scaling or converting.
421 assert(format_desc
->block
.bits
<= type
.width
* type
.length
);
422 if (format_desc
->block
.bits
< type
.width
* type
.length
) {
423 packed
= LLVMBuildZExt(builder
, packed
,
424 LLVMIntType(type
.width
* type
.length
), "");
427 packed
= LLVMBuildBitCast(builder
, packed
, lp_build_vec_type(type
), "");
429 return lp_build_format_swizzle_aos(format_desc
, &bld
, packed
);
431 return lp_build_unpack_rgba_aos(format_desc
, &bld
, packed
);
434 else if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_SUBSAMPLED
) {
438 ptr
= LLVMBuildBitCast(builder
, ptr
,
439 LLVMPointerType(LLVMInt32Type(), 0),
442 packed
= LLVMBuildLoad(builder
, ptr
, "packed");
444 rgba
= lp_build_unpack_subsampled_to_rgba_aos(builder
, format_desc
,
447 lp_build_conv(builder
,
448 lp_unorm8_vec4_type(),
454 else if (format_desc
->fetch_rgba_float
) {
456 * Fallback to calling util_format_description::fetch_rgba_float.
458 * This is definitely not the most efficient way of fetching pixels, as
459 * we miss the opportunity to do vectorization, but this it is a
460 * convenient for formats or scenarios for which there was no opportunity
461 * or incentive to optimize.
464 LLVMModuleRef module
= LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder
)));
466 LLVMTypeRef f32t
= LLVMFloatType();
467 LLVMTypeRef f32x4t
= LLVMVectorType(f32t
, 4);
468 LLVMTypeRef pf32t
= LLVMPointerType(f32t
, 0);
469 LLVMValueRef function
;
470 LLVMValueRef tmp_ptr
;
471 LLVMValueRef tmp_val
;
472 LLVMValueRef args
[4];
474 util_snprintf(name
, sizeof name
, "util_format_%s_fetch_rgba_float",
475 format_desc
->short_name
);
478 * Declare and bind format_desc->fetch_rgba_float().
481 function
= LLVMGetNamedFunction(module
, name
);
483 LLVMTypeRef ret_type
;
484 LLVMTypeRef arg_types
[4];
485 LLVMTypeRef function_type
;
487 ret_type
= LLVMVoidType();
488 arg_types
[0] = pf32t
;
489 arg_types
[1] = LLVMPointerType(LLVMInt8Type(), 0);
490 arg_types
[3] = arg_types
[2] = LLVMIntType(sizeof(unsigned) * 8);
491 function_type
= LLVMFunctionType(ret_type
, arg_types
, Elements(arg_types
), 0);
492 function
= LLVMAddFunction(module
, name
, function_type
);
494 LLVMSetFunctionCallConv(function
, LLVMCCallConv
);
495 LLVMSetLinkage(function
, LLVMExternalLinkage
);
497 assert(LLVMIsDeclaration(function
));
499 LLVMAddGlobalMapping(lp_build_engine
, function
,
500 func_to_pointer((func_pointer
)format_desc
->fetch_rgba_float
));
503 tmp_ptr
= lp_build_alloca(builder
, f32x4t
, "");
506 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
507 * in the SoA vectors.
510 args
[0] = LLVMBuildBitCast(builder
, tmp_ptr
, pf32t
, "");
515 LLVMBuildCall(builder
, function
, args
, Elements(args
), "");
517 tmp_val
= LLVMBuildLoad(builder
, tmp_ptr
, "");
520 /* No further conversion necessary */
522 lp_build_conv(builder
,
523 lp_float32_vec4_type(),
525 &tmp_val
, 1, &tmp_val
, 1);
532 return lp_build_undef(type
);