1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * AoS pixel format manipulation.
32 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "util/u_format.h"
37 #include "util/u_memory.h"
38 #include "util/u_math.h"
39 #include "util/u_pointer.h"
40 #include "util/u_string.h"
42 #include "lp_bld_arit.h"
43 #include "lp_bld_init.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_flow.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_swizzle.h"
49 #include "lp_bld_gather.h"
50 #include "lp_bld_debug.h"
51 #include "lp_bld_format.h"
52 #include "lp_bld_intr.h"
56 * Basic swizzling. Rearrange the order of the unswizzled array elements
57 * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported
59 * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
62 lp_build_format_swizzle_aos(const struct util_format_description
*desc
,
63 struct lp_build_context
*bld
,
64 LLVMValueRef unswizzled
)
66 unsigned char swizzles
[4];
69 assert(bld
->type
.length
% 4 == 0);
71 for (chan
= 0; chan
< 4; ++chan
) {
72 enum util_format_swizzle swizzle
;
74 if (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) {
76 * For ZS formats do RGBA = ZZZ1
79 swizzle
= UTIL_FORMAT_SWIZZLE_1
;
80 } else if (desc
->swizzle
[0] == UTIL_FORMAT_SWIZZLE_NONE
) {
81 swizzle
= UTIL_FORMAT_SWIZZLE_0
;
83 swizzle
= desc
->swizzle
[0];
86 swizzle
= desc
->swizzle
[chan
];
88 swizzles
[chan
] = swizzle
;
91 return lp_build_swizzle_aos(bld
, unswizzled
, swizzles
);
96 * Whether the format matches the vector type, apart of swizzles.
99 format_matches_type(const struct util_format_description
*desc
,
102 enum util_format_type chan_type
;
105 assert(type
.length
% 4 == 0);
107 if (desc
->layout
!= UTIL_FORMAT_LAYOUT_PLAIN
||
108 desc
->colorspace
!= UTIL_FORMAT_COLORSPACE_RGB
||
109 desc
->block
.width
!= 1 ||
110 desc
->block
.height
!= 1) {
115 chan_type
= UTIL_FORMAT_TYPE_FLOAT
;
116 } else if (type
.fixed
) {
117 chan_type
= UTIL_FORMAT_TYPE_FIXED
;
118 } else if (type
.sign
) {
119 chan_type
= UTIL_FORMAT_TYPE_SIGNED
;
121 chan_type
= UTIL_FORMAT_TYPE_UNSIGNED
;
124 for (chan
= 0; chan
< desc
->nr_channels
; ++chan
) {
125 if (desc
->channel
[chan
].size
!= type
.width
) {
129 if (desc
->channel
[chan
].type
!= UTIL_FORMAT_TYPE_VOID
) {
130 if (desc
->channel
[chan
].type
!= chan_type
||
131 desc
->channel
[chan
].normalized
!= type
.norm
) {
142 * Unpack a single pixel into its RGBA components.
144 * @param desc the pixel format for the packed pixel value
145 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
147 * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
149 static INLINE LLVMValueRef
150 lp_build_unpack_arith_rgba_aos(struct gallivm_state
*gallivm
,
151 const struct util_format_description
*desc
,
154 LLVMBuilderRef builder
= gallivm
->builder
;
155 LLVMValueRef shifted
, casted
, scaled
, masked
;
156 LLVMValueRef shifts
[4];
157 LLVMValueRef masks
[4];
158 LLVMValueRef scales
[4];
161 boolean needs_uitofp
;
165 /* TODO: Support more formats */
166 assert(desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
167 assert(desc
->block
.width
== 1);
168 assert(desc
->block
.height
== 1);
169 assert(desc
->block
.bits
<= 32);
171 /* Do the intermediate integer computations with 32bit integers since it
172 * matches floating point size */
173 assert (LLVMTypeOf(packed
) == LLVMInt32TypeInContext(gallivm
->context
));
175 #ifdef PIPE_ARCH_BIG_ENDIAN
176 packed
= lp_build_bswap(gallivm
, packed
, lp_type_uint(32));
179 /* Broadcast the packed value to all four channels
180 * before: packed = BGRA
181 * after: packed = {BGRA, BGRA, BGRA, BGRA}
183 packed
= LLVMBuildInsertElement(builder
,
184 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4)),
186 LLVMConstNull(LLVMInt32TypeInContext(gallivm
->context
)),
188 packed
= LLVMBuildShuffleVector(builder
,
190 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4)),
191 LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4)),
194 /* Initialize vector constants */
196 needs_uitofp
= FALSE
;
199 /* Loop over 4 color components */
200 for (i
= 0; i
< 4; ++i
) {
201 unsigned bits
= desc
->channel
[i
].size
;
203 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_VOID
) {
204 shifts
[i
] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
205 masks
[i
] = LLVMConstNull(LLVMInt32TypeInContext(gallivm
->context
));
206 scales
[i
] = LLVMConstNull(LLVMFloatTypeInContext(gallivm
->context
));
209 unsigned long long mask
= (1ULL << bits
) - 1;
211 assert(desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
217 shifts
[i
] = lp_build_const_int32(gallivm
, shift
);
218 masks
[i
] = lp_build_const_int32(gallivm
, mask
);
220 if (desc
->channel
[i
].normalized
) {
221 scales
[i
] = lp_build_const_float(gallivm
, 1.0 / mask
);
225 scales
[i
] = lp_build_const_float(gallivm
, 1.0);
231 /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
232 * into masked = {B, G, R, A}
234 shifted
= LLVMBuildLShr(builder
, packed
, LLVMConstVector(shifts
, 4), "");
235 masked
= LLVMBuildAnd(builder
, shifted
, LLVMConstVector(masks
, 4), "");
239 /* UIToFP can't be expressed in SSE2 */
240 casted
= LLVMBuildSIToFP(builder
, masked
, LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4), "");
242 casted
= LLVMBuildUIToFP(builder
, masked
, LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4), "");
245 /* At this point 'casted' may be a vector of floats such as
246 * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized
247 * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
251 scaled
= LLVMBuildFMul(builder
, casted
, LLVMConstVector(scales
, 4), "");
260 * Pack a single pixel.
262 * @param rgba 4 float vector with the unpacked components.
264 * XXX: This is mostly for reference and testing -- operating a single pixel at
265 * a time is rarely if ever needed.
268 lp_build_pack_rgba_aos(struct gallivm_state
*gallivm
,
269 const struct util_format_description
*desc
,
272 LLVMBuilderRef builder
= gallivm
->builder
;
274 LLVMValueRef packed
= NULL
;
275 LLVMValueRef swizzles
[4];
276 LLVMValueRef shifted
, casted
, scaled
, unswizzled
;
277 LLVMValueRef shifts
[4];
278 LLVMValueRef scales
[4];
283 assert(desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
);
284 assert(desc
->block
.width
== 1);
285 assert(desc
->block
.height
== 1);
287 type
= LLVMIntTypeInContext(gallivm
->context
, desc
->block
.bits
);
289 /* Unswizzle the color components into the source vector. */
290 for (i
= 0; i
< 4; ++i
) {
291 for (j
= 0; j
< 4; ++j
) {
292 if (desc
->swizzle
[j
] == i
)
296 swizzles
[i
] = lp_build_const_int32(gallivm
, j
);
298 swizzles
[i
] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
301 unswizzled
= LLVMBuildShuffleVector(builder
, rgba
,
302 LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4)),
303 LLVMConstVector(swizzles
, 4), "");
307 for (i
= 0; i
< 4; ++i
) {
308 unsigned bits
= desc
->channel
[i
].size
;
310 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_VOID
) {
311 shifts
[i
] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
312 scales
[i
] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm
->context
));
315 unsigned mask
= (1 << bits
) - 1;
317 assert(desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
);
320 shifts
[i
] = lp_build_const_int32(gallivm
, shift
);
322 if (desc
->channel
[i
].normalized
) {
323 scales
[i
] = lp_build_const_float(gallivm
, mask
);
327 scales
[i
] = lp_build_const_float(gallivm
, 1.0);
334 scaled
= LLVMBuildFMul(builder
, unswizzled
, LLVMConstVector(scales
, 4), "");
338 casted
= LLVMBuildFPToSI(builder
, scaled
, LLVMVectorType(LLVMInt32TypeInContext(gallivm
->context
), 4), "");
340 shifted
= LLVMBuildShl(builder
, casted
, LLVMConstVector(shifts
, 4), "");
342 /* Bitwise or all components */
343 for (i
= 0; i
< 4; ++i
) {
344 if (desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_UNSIGNED
) {
345 LLVMValueRef component
= LLVMBuildExtractElement(builder
, shifted
,
346 lp_build_const_int32(gallivm
, i
), "");
348 packed
= LLVMBuildOr(builder
, packed
, component
, "");
355 packed
= LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
357 if (desc
->block
.bits
< 32)
358 packed
= LLVMBuildTrunc(builder
, packed
, type
, "");
367 * Fetch a pixel into a 4 float AoS.
369 * \param format_desc describes format of the image we're fetching from
370 * \param ptr address of the pixel block (or the texel if uncompressed)
371 * \param i, j the sub-block pixel coordinates. For non-compressed formats
372 * these will always be (0, 0).
373 * \return a 4 element vector with the pixel's RGBA values.
376 lp_build_fetch_rgba_aos(struct gallivm_state
*gallivm
,
377 const struct util_format_description
*format_desc
,
379 LLVMValueRef base_ptr
,
384 LLVMBuilderRef builder
= gallivm
->builder
;
385 unsigned num_pixels
= type
.length
/ 4;
386 struct lp_build_context bld
;
388 assert(type
.length
<= LP_MAX_VECTOR_LENGTH
);
389 assert(type
.length
% 4 == 0);
391 lp_build_context_init(&bld
, gallivm
, type
);
396 * The format matches the type (apart of a swizzle) so no need for
397 * scaling or converting.
400 if (format_matches_type(format_desc
, type
) &&
401 format_desc
->block
.bits
<= type
.width
* 4 &&
402 util_is_power_of_two(format_desc
->block
.bits
)) {
404 LLVMTypeRef dst_vec_type
= lp_build_vec_type(gallivm
, type
);
405 unsigned vec_len
= type
.width
* type
.length
;
408 * The format matches the type (apart of a swizzle) so no need for
409 * scaling or converting.
412 packed
= lp_build_gather(gallivm
, type
.length
/4,
413 format_desc
->block
.bits
, type
.width
*4,
416 assert(format_desc
->block
.bits
<= vec_len
);
418 packed
= LLVMBuildBitCast(gallivm
->builder
, packed
, dst_vec_type
, "");
419 #ifdef PIPE_ARCH_BIG_ENDIAN
421 packed
= lp_build_bswap_vec(gallivm
, packed
, type
,
422 lp_type_float_vec(type
.width
, vec_len
));
424 return lp_build_format_swizzle_aos(format_desc
, &bld
, packed
);
431 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_PLAIN
&&
432 (format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_RGB
||
433 format_desc
->colorspace
== UTIL_FORMAT_COLORSPACE_ZS
) &&
434 format_desc
->block
.width
== 1 &&
435 format_desc
->block
.height
== 1 &&
436 util_is_power_of_two(format_desc
->block
.bits
) &&
437 format_desc
->block
.bits
<= 32 &&
438 format_desc
->is_bitmask
&&
439 !format_desc
->is_mixed
&&
440 (format_desc
->channel
[0].type
== UTIL_FORMAT_TYPE_UNSIGNED
||
441 format_desc
->channel
[1].type
== UTIL_FORMAT_TYPE_UNSIGNED
)) {
443 LLVMValueRef tmps
[LP_MAX_VECTOR_LENGTH
/4];
448 * Unpack a pixel at a time into a <4 x float> RGBA vector
451 for (k
= 0; k
< num_pixels
; ++k
) {
454 packed
= lp_build_gather_elem(gallivm
, num_pixels
,
455 format_desc
->block
.bits
, 32,
456 base_ptr
, offset
, k
);
458 tmps
[k
] = lp_build_unpack_arith_rgba_aos(gallivm
,
466 * TODO: We could avoid floating conversion for integer to
467 * integer conversions.
470 if (gallivm_debug
& GALLIVM_DEBUG_PERF
&& !type
.floating
) {
471 debug_printf("%s: unpacking %s with floating point\n",
472 __FUNCTION__
, format_desc
->short_name
);
475 lp_build_conv(gallivm
,
476 lp_float32_vec4_type(),
478 tmps
, num_pixels
, &res
, 1);
480 return lp_build_format_swizzle_aos(format_desc
, &bld
, res
);
483 /* If all channels are of same type and we are not using half-floats */
484 if (util_format_is_array(format_desc
)) {
485 return lp_build_fetch_rgba_aos_array(gallivm
, format_desc
, type
, base_ptr
, offset
);
489 * YUV / subsampled formats
492 if (format_desc
->layout
== UTIL_FORMAT_LAYOUT_SUBSAMPLED
) {
493 struct lp_type tmp_type
;
496 memset(&tmp_type
, 0, sizeof tmp_type
);
498 tmp_type
.length
= num_pixels
* 4;
499 tmp_type
.norm
= TRUE
;
501 tmp
= lp_build_fetch_subsampled_rgba_aos(gallivm
,
508 lp_build_conv(gallivm
,
516 * Fallback to util_format_description::fetch_rgba_8unorm().
519 if (format_desc
->fetch_rgba_8unorm
&&
520 !type
.floating
&& type
.width
== 8 && !type
.sign
&& type
.norm
) {
522 * Fallback to calling util_format_description::fetch_rgba_8unorm.
524 * This is definitely not the most efficient way of fetching pixels, as
525 * we miss the opportunity to do vectorization, but this it is a
526 * convenient for formats or scenarios for which there was no opportunity
527 * or incentive to optimize.
530 LLVMTypeRef i8t
= LLVMInt8TypeInContext(gallivm
->context
);
531 LLVMTypeRef pi8t
= LLVMPointerType(i8t
, 0);
532 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
533 LLVMValueRef function
;
534 LLVMValueRef tmp_ptr
;
539 if (gallivm_debug
& GALLIVM_DEBUG_PERF
) {
540 debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
541 __FUNCTION__
, format_desc
->short_name
);
545 * Declare and bind format_desc->fetch_rgba_8unorm().
550 * Function to call looks like:
551 * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
553 LLVMTypeRef ret_type
;
554 LLVMTypeRef arg_types
[4];
555 LLVMTypeRef function_type
;
557 ret_type
= LLVMVoidTypeInContext(gallivm
->context
);
562 function_type
= LLVMFunctionType(ret_type
, arg_types
,
563 Elements(arg_types
), 0);
565 /* make const pointer for the C fetch_rgba_8unorm function */
566 function
= lp_build_const_int_pointer(gallivm
,
567 func_to_pointer((func_pointer
) format_desc
->fetch_rgba_8unorm
));
569 /* cast the callee pointer to the function's type */
570 function
= LLVMBuildBitCast(builder
, function
,
571 LLVMPointerType(function_type
, 0),
575 tmp_ptr
= lp_build_alloca(gallivm
, i32t
, "");
577 res
= LLVMGetUndef(LLVMVectorType(i32t
, num_pixels
));
580 * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
581 * in the SoA vectors.
584 for (k
= 0; k
< num_pixels
; ++k
) {
585 LLVMValueRef index
= lp_build_const_int32(gallivm
, k
);
586 LLVMValueRef args
[4];
588 args
[0] = LLVMBuildBitCast(builder
, tmp_ptr
, pi8t
, "");
589 args
[1] = lp_build_gather_elem_ptr(gallivm
, num_pixels
,
590 base_ptr
, offset
, k
);
592 if (num_pixels
== 1) {
597 args
[2] = LLVMBuildExtractElement(builder
, i
, index
, "");
598 args
[3] = LLVMBuildExtractElement(builder
, j
, index
, "");
601 LLVMBuildCall(builder
, function
, args
, Elements(args
), "");
603 tmp
= LLVMBuildLoad(builder
, tmp_ptr
, "");
605 if (num_pixels
== 1) {
609 res
= LLVMBuildInsertElement(builder
, res
, tmp
, index
, "");
613 /* Bitcast from <n x i32> to <4n x i8> */
614 res
= LLVMBuildBitCast(builder
, res
, bld
.vec_type
, "");
620 * Fallback to util_format_description::fetch_rgba_float().
623 if (format_desc
->fetch_rgba_float
) {
625 * Fallback to calling util_format_description::fetch_rgba_float.
627 * This is definitely not the most efficient way of fetching pixels, as
628 * we miss the opportunity to do vectorization, but this it is a
629 * convenient for formats or scenarios for which there was no opportunity
630 * or incentive to optimize.
633 LLVMTypeRef f32t
= LLVMFloatTypeInContext(gallivm
->context
);
634 LLVMTypeRef f32x4t
= LLVMVectorType(f32t
, 4);
635 LLVMTypeRef pf32t
= LLVMPointerType(f32t
, 0);
636 LLVMTypeRef pi8t
= LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0);
637 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
638 LLVMValueRef function
;
639 LLVMValueRef tmp_ptr
;
640 LLVMValueRef tmps
[LP_MAX_VECTOR_LENGTH
/4];
644 if (gallivm_debug
& GALLIVM_DEBUG_PERF
) {
645 debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
646 __FUNCTION__
, format_desc
->short_name
);
650 * Declare and bind format_desc->fetch_rgba_float().
655 * Function to call looks like:
656 * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
658 LLVMTypeRef ret_type
;
659 LLVMTypeRef arg_types
[4];
661 ret_type
= LLVMVoidTypeInContext(gallivm
->context
);
662 arg_types
[0] = pf32t
;
667 function
= lp_build_const_func_pointer(gallivm
,
668 func_to_pointer((func_pointer
) format_desc
->fetch_rgba_float
),
670 arg_types
, Elements(arg_types
),
671 format_desc
->short_name
);
674 tmp_ptr
= lp_build_alloca(gallivm
, f32x4t
, "");
677 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
678 * in the SoA vectors.
681 for (k
= 0; k
< num_pixels
; ++k
) {
682 LLVMValueRef args
[4];
684 args
[0] = LLVMBuildBitCast(builder
, tmp_ptr
, pf32t
, "");
685 args
[1] = lp_build_gather_elem_ptr(gallivm
, num_pixels
,
686 base_ptr
, offset
, k
);
688 if (num_pixels
== 1) {
693 LLVMValueRef index
= lp_build_const_int32(gallivm
, k
);
694 args
[2] = LLVMBuildExtractElement(builder
, i
, index
, "");
695 args
[3] = LLVMBuildExtractElement(builder
, j
, index
, "");
698 LLVMBuildCall(builder
, function
, args
, Elements(args
), "");
700 tmps
[k
] = LLVMBuildLoad(builder
, tmp_ptr
, "");
703 lp_build_conv(gallivm
,
704 lp_float32_vec4_type(),
706 tmps
, num_pixels
, &res
, 1);
712 return lp_build_undef(gallivm
, type
);