1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * AoS pixel format manipulation.
32 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "util/u_format.h"
37 #include "util/u_memory.h"
38 #include "util/u_math.h"
39 #include "util/u_string.h"
41 #include "lp_bld_arit.h"
42 #include "lp_bld_init.h"
43 #include "lp_bld_type.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_swizzle.h"
48 #include "lp_bld_gather.h"
49 #include "lp_bld_debug.h"
50 #include "lp_bld_format.h"
54 * Basic swizzling. Rearrange the order of the unswizzled array elements
55 * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported
57 * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
60 lp_build_format_swizzle_aos(const struct util_format_description
*desc
,
61 struct lp_build_context
*bld
,
62 LLVMValueRef unswizzled
)
64 unsigned char swizzles
[4];
67 assert(bld
->type
.length
% 4 == 0);
69 for (chan
= 0; chan
< 4; ++chan
) {
70 enum util_format_swizzle swizzle
;
72 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
74 * For ZS formats do RGBA = ZZZ1
77 swizzle
= UTIL_FORMAT_SWIZZLE_1
;
78 } else if (desc
->swizzle
[0] == UTIL_FORMAT_SWIZZLE_NONE
) {
79 swizzle
= UTIL_FORMAT_SWIZZLE_0
;
81 swizzle
= desc
->swizzle
[0];
84 swizzle
= desc
->swizzle
[chan
];
86 swizzles
[chan
] = swizzle
;
89 return lp_build_swizzle_aos(bld
, unswizzled
, swizzles
);
94 * Whether the format matches the vector type, apart of swizzles.
97 format_matches_type(const struct util_format_description
*desc
,
100 enum util_format_type chan_type
;
103 assert(type
.length
% 4 == 0);
105 if (desc
->layout
!= UTIL_FORMAT_LAYOUT_PLAIN
||
106 desc
->colorspace
!= UTIL_FORMAT_COLORSPACE_RGB
||
107 desc
->block
.width
!= 1 ||
108 desc
->block
.height
!= 1) {
113 chan_type
= UTIL_FORMAT_TYPE_FLOAT
;
114 } else if (type
.fixed
) {
115 chan_type
= UTIL_FORMAT_TYPE_FIXED
;
116 } else if (type
.sign
) {
117 chan_type
= UTIL_FORMAT_TYPE_SIGNED
;
119 chan_type
= UTIL_FORMAT_TYPE_UNSIGNED
;
122 for (chan
= 0; chan
< desc
->nr_channels
; ++chan
) {
123 if (desc
->channel
[chan
].size
!= type
.width
) {
127 if (desc
->channel
[chan
].type
!= UTIL_FORMAT_TYPE_VOID
) {
128 if (desc
->channel
[chan
].type
!= chan_type
||
129 desc
->channel
[chan
].normalized
!= type
.norm
) {
140 * Unpack a single pixel into its RGBA components.
142 * @param desc the pixel format for the packed pixel value
143 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
145 * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
147 static INLINE LLVMValueRef
148 lp_build_unpack_arith_rgba_aos(struct gallivm_state
*gallivm
,
149 const struct util_format_description
*desc
,
152 LLVMBuilderRef builder
= gallivm
->builder
;
153 LLVMValueRef shifted
, casted
, scaled
, masked
;
154 LLVMValueRef shifts
[4];
155 LLVMValueRef masks
[4];
156 LLVMValueRef scales
[4];
159 boolean needs_uitofp
;
163 /* TODO: Support more formats */
164 assert(desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
165 assert(desc
->block
.width
== 1);
166 assert(desc
->block
.height
== 1);
167 assert(desc
->block
.bits
<= 32);
169 /* Do the intermediate integer computations with 32bit integers since it
170 * matches floating point size */
171 assert (LLVMTypeOf(packed
) == LLVMInt32TypeInContext(gallivm
->context
));
173 /* Broadcast the packed value to all four channels
174 * before: packed = BGRA
175 * after: packed = {BGRA, BGRA, BGRA, BGRA}
177 packed
= LLVMBuildInsertElement(builder
,
178 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4)),
180 LLVMConstNull(LLVMInt32TypeInContext(gallivm
->context
)),
182 packed
= LLVMBuildShuffleVector(builder
,
184 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4)),
185 LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4)),
188 /* Initialize vector constants */
190 needs_uitofp
= FALSE
;
193 /* Loop over 4 color components */
194 for (i
= 0; i
< 4; ++i
) {
195 unsigned bits
= desc
->channel
[i
].size
;
197 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_VOID
) {
198 shifts
[i
] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
199 masks
[i
] = LLVMConstNull(LLVMInt32TypeInContext(gallivm
->context
));
200 scales
[i
] = LLVMConstNull(LLVMFloatTypeInContext(gallivm
->context
));
203 unsigned long long mask
= (1ULL << bits
) - 1;
205 assert(desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
211 shifts
[i
] = lp_build_const_int32(gallivm
, shift
);
212 masks
[i
] = lp_build_const_int32(gallivm
, mask
);
214 if (desc
->channel
[i
].normalized
) {
215 scales
[i
] = lp_build_const_float(gallivm
, 1.0 / mask
);
219 scales
[i
] = lp_build_const_float(gallivm
, 1.0);
225 /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
226 * into masked = {B, G, R, A}
228 shifted
= LLVMBuildLShr(builder
, packed
, LLVMConstVector(shifts
, 4), "");
229 masked
= LLVMBuildAnd(builder
, shifted
, LLVMConstVector(masks
, 4), "");
233 /* UIToFP can't be expressed in SSE2 */
234 casted
= LLVMBuildSIToFP(builder
, masked
, LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4), "");
236 casted
= LLVMBuildUIToFP(builder
, masked
, LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4), "");
239 /* At this point 'casted' may be a vector of floats such as
240 * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized
241 * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
245 scaled
= LLVMBuildFMul(builder
, casted
, LLVMConstVector(scales
, 4), "");
254 * Pack a single pixel.
256 * @param rgba 4 float vector with the unpacked components.
258 * XXX: This is mostly for reference and testing -- operating a single pixel at
259 * a time is rarely if ever needed.
262 lp_build_pack_rgba_aos(struct gallivm_state
*gallivm
,
263 const struct util_format_description
*desc
,
266 LLVMBuilderRef builder
= gallivm
->builder
;
268 LLVMValueRef packed
= NULL
;
269 LLVMValueRef swizzles
[4];
270 LLVMValueRef shifted
, casted
, scaled
, unswizzled
;
271 LLVMValueRef shifts
[4];
272 LLVMValueRef scales
[4];
277 assert(desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
278 assert(desc
->block
.width
== 1);
279 assert(desc
->block
.height
== 1);
281 type
= LLVMIntTypeInContext(gallivm
->context
, desc
->block
.bits
);
283 /* Unswizzle the color components into the source vector. */
284 for (i
= 0; i
< 4; ++i
) {
285 for (j
= 0; j
< 4; ++j
) {
286 if (desc
->swizzle
[j
] == i
)
290 swizzles
[i
] = lp_build_const_int32(gallivm
, j
);
292 swizzles
[i
] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
295 unswizzled
= LLVMBuildShuffleVector(builder
, rgba
,
296 LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4)),
297 LLVMConstVector(swizzles
, 4), "");
301 for (i
= 0; i
< 4; ++i
) {
302 unsigned bits
= desc
->channel
[i
].size
;
304 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_VOID
) {
305 shifts
[i
] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
306 scales
[i
] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm
->context
));
309 unsigned mask
= (1 << bits
) - 1;
311 assert(desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
314 shifts
[i
] = lp_build_const_int32(gallivm
, shift
);
316 if (desc
->channel
[i
].normalized
) {
317 scales
[i
] = lp_build_const_float(gallivm
, mask
);
321 scales
[i
] = lp_build_const_float(gallivm
, 1.0);
328 scaled
= LLVMBuildFMul(builder
, unswizzled
, LLVMConstVector(scales
, 4), "");
332 casted
= LLVMBuildFPToSI(builder
, scaled
, LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4), "");
334 shifted
= LLVMBuildShl(builder
, casted
, LLVMConstVector(shifts
, 4), "");
336 /* Bitwise or all components */
337 for (i
= 0; i
< 4; ++i
) {
338 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
) {
339 LLVMValueRef component
= LLVMBuildExtractElement(builder
, shifted
,
340 lp_build_const_int32(gallivm
, i
), "");
342 packed
= LLVMBuildOr(builder
, packed
, component
, "");
349 packed
= LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
351 if (desc
->block
.bits
< 32)
352 packed
= LLVMBuildTrunc(builder
, packed
, type
, "");
361 * Fetch a pixel into a 4 float AoS.
363 * \param format_desc describes format of the image we're fetching from
364 * \param ptr address of the pixel block (or the texel if uncompressed)
365 * \param i, j the sub-block pixel coordinates. For non-compressed formats
366 * these will always be (0, 0).
367 * \return a 4 element vector with the pixel's RGBA values.
370 lp_build_fetch_rgba_aos(struct gallivm_state
*gallivm
,
371 const struct util_format_description
*format_desc
,
373 LLVMValueRef base_ptr
,
378 LLVMBuilderRef builder
= gallivm
->builder
;
379 unsigned num_pixels
= type
.length
/ 4;
380 struct lp_build_context bld
;
382 assert(type
.length
<= LP_MAX_VECTOR_LENGTH
);
383 assert(type
.length
% 4 == 0);
385 lp_build_context_init(&bld
, gallivm
, type
);
390 * The format matches the type (apart of a swizzle) so no need for
391 * scaling or converting.
394 if (format_matches_type(format_desc
, type
) &&
395 format_desc
->block
.bits
<= type
.width
* 4 &&
396 util_is_power_of_two(format_desc
->block
.bits
)) {
400 * The format matches the type (apart of a swizzle) so no need for
401 * scaling or converting.
404 packed
= lp_build_gather(gallivm
, type
.length
/4,
405 format_desc
->block
.bits
, type
.width
*4,
408 assert(format_desc
->block
.bits
<= type
.width
* type
.length
);
410 packed
= LLVMBuildBitCast(gallivm
->builder
, packed
,
411 lp_build_vec_type(gallivm
, type
), "");
413 return lp_build_format_swizzle_aos(format_desc
, &bld
, packed
);
420 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
421 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
422 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
423 format_desc
->block
.width
== 1 &&
424 format_desc
->block
.height
== 1 &&
425 util_is_power_of_two(format_desc
->block
.bits
) &&
426 format_desc
->block
.bits
<= 32 &&
427 format_desc
->is_bitmask
&&
428 !format_desc
->is_mixed
&&
429 (format_desc
->channel
[0].type
== UTIL_FORMAT_TYPE_UNSIGNED
||
430 format_desc
->channel
[1].type
== UTIL_FORMAT_TYPE_UNSIGNED
)) {
432 LLVMValueRef tmps
[LP_MAX_VECTOR_LENGTH
/4];
437 * Unpack a pixel at a time into a <4 x float> RGBA vector
440 for (k
= 0; k
< num_pixels
; ++k
) {
443 packed
= lp_build_gather_elem(gallivm
, num_pixels
,
444 format_desc
->block
.bits
, 32,
445 base_ptr
, offset
, k
);
447 tmps
[k
] = lp_build_unpack_arith_rgba_aos(gallivm
,
455 * TODO: We could avoid floating conversion for integer to
456 * integer conversions.
459 if (gallivm_debug
& GALLIVM_DEBUG_PERF
&& !type
.floating
) {
460 debug_printf("%s: unpacking %s with floating point\n",
461 __FUNCTION__
, format_desc
->short_name
);
464 lp_build_conv(gallivm
,
465 lp_float32_vec4_type(),
467 tmps
, num_pixels
, &res
, 1);
469 return lp_build_format_swizzle_aos(format_desc
, &bld
, res
);
473 * YUV / subsampled formats
476 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_SUBSAMPLED
) {
477 struct lp_type tmp_type
;
480 memset(&tmp_type
, 0, sizeof tmp_type
);
482 tmp_type
.length
= num_pixels
* 4;
483 tmp_type
.norm
= TRUE
;
485 tmp
= lp_build_fetch_subsampled_rgba_aos(gallivm
,
492 lp_build_conv(gallivm
,
500 * Fallback to util_format_description::fetch_rgba_8unorm().
503 if (format_desc
->fetch_rgba_8unorm
&&
504 !type
.floating
&& type
.width
== 8 && !type
.sign
&& type
.norm
) {
506 * Fallback to calling util_format_description::fetch_rgba_8unorm.
508 * This is definitely not the most efficient way of fetching pixels, as
509 * we miss the opportunity to do vectorization, but this it is a
510 * convenient for formats or scenarios for which there was no opportunity
511 * or incentive to optimize.
514 LLVMModuleRef module
= LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm
->builder
)));
516 LLVMTypeRef i8t
= LLVMInt8TypeInContext(gallivm
->context
);
517 LLVMTypeRef pi8t
= LLVMPointerType(i8t
, 0);
518 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
519 LLVMValueRef function
;
520 LLVMValueRef tmp_ptr
;
525 util_snprintf(name
, sizeof name
, "util_format_%s_fetch_rgba_8unorm",
526 format_desc
->short_name
);
528 if (gallivm_debug
& GALLIVM_DEBUG_PERF
) {
529 debug_printf("%s: falling back to %s\n", __FUNCTION__
, name
);
533 * Declare and bind format_desc->fetch_rgba_8unorm().
536 function
= LLVMGetNamedFunction(module
, name
);
538 LLVMTypeRef ret_type
;
539 LLVMTypeRef arg_types
[4];
540 LLVMTypeRef function_type
;
542 ret_type
= LLVMVoidTypeInContext(gallivm
->context
);
545 arg_types
[3] = arg_types
[2] = LLVMIntTypeInContext(gallivm
->context
, sizeof(unsigned) * 8);
546 function_type
= LLVMFunctionType(ret_type
, arg_types
, Elements(arg_types
), 0);
547 function
= LLVMAddFunction(module
, name
, function_type
);
549 LLVMSetFunctionCallConv(function
, LLVMCCallConv
);
550 LLVMSetLinkage(function
, LLVMExternalLinkage
);
552 assert(LLVMIsDeclaration(function
));
554 LLVMAddGlobalMapping(gallivm
->engine
, function
,
555 func_to_pointer((func_pointer
)format_desc
->fetch_rgba_8unorm
));
558 tmp_ptr
= lp_build_alloca(gallivm
, i32t
, "");
560 res
= LLVMGetUndef(LLVMVectorType(i32t
, num_pixels
));
563 * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
564 * in the SoA vectors.
567 for (k
= 0; k
< num_pixels
; ++k
) {
568 LLVMValueRef index
= lp_build_const_int32(gallivm
, k
);
569 LLVMValueRef args
[4];
571 args
[0] = LLVMBuildBitCast(builder
, tmp_ptr
, pi8t
, "");
572 args
[1] = lp_build_gather_elem_ptr(gallivm
, num_pixels
,
573 base_ptr
, offset
, k
);
575 if (num_pixels
== 1) {
580 args
[2] = LLVMBuildExtractElement(builder
, i
, index
, "");
581 args
[3] = LLVMBuildExtractElement(builder
, j
, index
, "");
584 LLVMBuildCall(builder
, function
, args
, Elements(args
), "");
586 tmp
= LLVMBuildLoad(builder
, tmp_ptr
, "");
588 if (num_pixels
== 1) {
592 res
= LLVMBuildInsertElement(builder
, res
, tmp
, index
, "");
596 /* Bitcast from <n x i32> to <4n x i8> */
597 res
= LLVMBuildBitCast(builder
, res
, bld
.vec_type
, "");
604 * Fallback to util_format_description::fetch_rgba_float().
607 if (format_desc
->fetch_rgba_float
) {
609 * Fallback to calling util_format_description::fetch_rgba_float.
611 * This is definitely not the most efficient way of fetching pixels, as
612 * we miss the opportunity to do vectorization, but this it is a
613 * convenient for formats or scenarios for which there was no opportunity
614 * or incentive to optimize.
617 LLVMModuleRef module
= LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder
)));
619 LLVMTypeRef f32t
= LLVMFloatTypeInContext(gallivm
->context
);
620 LLVMTypeRef f32x4t
= LLVMVectorType(f32t
, 4);
621 LLVMTypeRef pf32t
= LLVMPointerType(f32t
, 0);
622 LLVMValueRef function
;
623 LLVMValueRef tmp_ptr
;
624 LLVMValueRef tmps
[LP_MAX_VECTOR_LENGTH
/4];
628 util_snprintf(name
, sizeof name
, "util_format_%s_fetch_rgba_float",
629 format_desc
->short_name
);
631 if (gallivm_debug
& GALLIVM_DEBUG_PERF
) {
632 debug_printf("%s: falling back to %s\n", __FUNCTION__
, name
);
636 * Declare and bind format_desc->fetch_rgba_float().
639 function
= LLVMGetNamedFunction(module
, name
);
641 LLVMTypeRef ret_type
;
642 LLVMTypeRef arg_types
[4];
643 LLVMTypeRef function_type
;
645 ret_type
= LLVMVoidTypeInContext(gallivm
->context
);
646 arg_types
[0] = pf32t
;
647 arg_types
[1] = LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0);
648 arg_types
[3] = arg_types
[2] = LLVMIntTypeInContext(gallivm
->context
, sizeof(unsigned) * 8);
649 function_type
= LLVMFunctionType(ret_type
, arg_types
, Elements(arg_types
), 0);
650 function
= LLVMAddFunction(module
, name
, function_type
);
652 LLVMSetFunctionCallConv(function
, LLVMCCallConv
);
653 LLVMSetLinkage(function
, LLVMExternalLinkage
);
655 assert(LLVMIsDeclaration(function
));
657 LLVMAddGlobalMapping(gallivm
->engine
, function
,
658 func_to_pointer((func_pointer
)format_desc
->fetch_rgba_float
));
661 tmp_ptr
= lp_build_alloca(gallivm
, f32x4t
, "");
664 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
665 * in the SoA vectors.
668 for (k
= 0; k
< num_pixels
; ++k
) {
669 LLVMValueRef args
[4];
671 args
[0] = LLVMBuildBitCast(builder
, tmp_ptr
, pf32t
, "");
672 args
[1] = lp_build_gather_elem_ptr(gallivm
, num_pixels
,
673 base_ptr
, offset
, k
);
675 if (num_pixels
== 1) {
680 LLVMValueRef index
= lp_build_const_int32(gallivm
, k
);
681 args
[2] = LLVMBuildExtractElement(builder
, i
, index
, "");
682 args
[3] = LLVMBuildExtractElement(builder
, j
, index
, "");
685 LLVMBuildCall(builder
, function
, args
, Elements(args
), "");
687 tmps
[k
] = LLVMBuildLoad(builder
, tmp_ptr
, "");
690 lp_build_conv(gallivm
,
691 lp_float32_vec4_type(),
693 tmps
, num_pixels
, &res
, 1);
699 return lp_build_undef(gallivm
, type
);