1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
26 **************************************************************************/
31 * YUV pixel format manipulation.
33 * @author Jose Fonseca <jfonseca@vmware.com>
37 #include "util/u_format.h"
38 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_conv.h"
44 #include "lp_bld_gather.h"
45 #include "lp_bld_format.h"
46 #include "lp_bld_init.h"
47 #include "lp_bld_logic.h"
50 * Extract Y, U, V channels from packed UYVY.
51 * @param packed is a <n x i32> vector with the packed UYVY blocks
52 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
55 uyvy_to_yuv_soa(struct gallivm_state
*gallivm
,
63 LLVMBuilderRef builder
= gallivm
->builder
;
67 memset(&type
, 0, sizeof type
);
71 assert(lp_check_value(type
, packed
));
72 assert(lp_check_value(type
, i
));
75 * y = (uyvy >> (16*i + 8)) & 0xff
77 * v = (uyvy >> 16 ) & 0xff
80 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
82 * Avoid shift with per-element count.
83 * No support on x86, gets translated to roughly 5 instructions
84 * per element. Didn't measure performance but cuts shader size
85 * by quite a bit (less difference if cpu has no sse4.1 support).
87 if (util_cpu_caps
.has_sse2
&& n
== 4) {
88 LLVMValueRef sel
, tmp
, tmp2
;
89 struct lp_build_context bld32
;
91 lp_build_context_init(&bld32
, gallivm
, type
);
93 tmp
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 8), "");
94 tmp2
= LLVMBuildLShr(builder
, tmp
, lp_build_const_int_vec(gallivm
, type
, 16), "");
95 sel
= lp_build_compare(gallivm
, type
, PIPE_FUNC_EQUAL
, i
, lp_build_const_int_vec(gallivm
, type
, 0));
96 *y
= lp_build_select(&bld32
, sel
, tmp
, tmp2
);
101 shift
= LLVMBuildMul(builder
, i
, lp_build_const_int_vec(gallivm
, type
, 16), "");
102 shift
= LLVMBuildAdd(builder
, shift
, lp_build_const_int_vec(gallivm
, type
, 8), "");
103 *y
= LLVMBuildLShr(builder
, packed
, shift
, "");
107 *v
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 16), "");
109 mask
= lp_build_const_int_vec(gallivm
, type
, 0xff);
111 *y
= LLVMBuildAnd(builder
, *y
, mask
, "y");
112 *u
= LLVMBuildAnd(builder
, *u
, mask
, "u");
113 *v
= LLVMBuildAnd(builder
, *v
, mask
, "v");
118 * Extract Y, U, V channels from packed YUYV.
119 * @param packed is a <n x i32> vector with the packed YUYV blocks
120 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
123 yuyv_to_yuv_soa(struct gallivm_state
*gallivm
,
131 LLVMBuilderRef builder
= gallivm
->builder
;
135 memset(&type
, 0, sizeof type
);
139 assert(lp_check_value(type
, packed
));
140 assert(lp_check_value(type
, i
));
143 * y = (yuyv >> 16*i) & 0xff
144 * u = (yuyv >> 8 ) & 0xff
145 * v = (yuyv >> 24 ) & 0xff
148 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
150 * Avoid shift with per-element count.
151 * No support on x86, gets translated to roughly 5 instructions
152 * per element. Didn't measure performance but cuts shader size
153 * by quite a bit (less difference if cpu has no sse4.1 support).
155 if (util_cpu_caps
.has_sse2
&& n
== 4) {
156 LLVMValueRef sel
, tmp
;
157 struct lp_build_context bld32
;
159 lp_build_context_init(&bld32
, gallivm
, type
);
161 tmp
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 16), "");
162 sel
= lp_build_compare(gallivm
, type
, PIPE_FUNC_EQUAL
, i
, lp_build_const_int_vec(gallivm
, type
, 0));
163 *y
= lp_build_select(&bld32
, sel
, packed
, tmp
);
168 shift
= LLVMBuildMul(builder
, i
, lp_build_const_int_vec(gallivm
, type
, 16), "");
169 *y
= LLVMBuildLShr(builder
, packed
, shift
, "");
172 *u
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 8), "");
173 *v
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 24), "");
175 mask
= lp_build_const_int_vec(gallivm
, type
, 0xff);
177 *y
= LLVMBuildAnd(builder
, *y
, mask
, "y");
178 *u
= LLVMBuildAnd(builder
, *u
, mask
, "u");
179 *v
= LLVMBuildAnd(builder
, *v
, mask
, "v");
184 yuv_to_rgb_soa(struct gallivm_state
*gallivm
,
186 LLVMValueRef y
, LLVMValueRef u
, LLVMValueRef v
,
187 LLVMValueRef
*r
, LLVMValueRef
*g
, LLVMValueRef
*b
)
189 LLVMBuilderRef builder
= gallivm
->builder
;
191 struct lp_build_context bld
;
205 memset(&type
, 0, sizeof type
);
210 lp_build_context_init(&bld
, gallivm
, type
);
212 assert(lp_check_value(type
, y
));
213 assert(lp_check_value(type
, u
));
214 assert(lp_check_value(type
, v
));
220 c0
= lp_build_const_int_vec(gallivm
, type
, 0);
221 c8
= lp_build_const_int_vec(gallivm
, type
, 8);
222 c16
= lp_build_const_int_vec(gallivm
, type
, 16);
223 c128
= lp_build_const_int_vec(gallivm
, type
, 128);
224 c255
= lp_build_const_int_vec(gallivm
, type
, 255);
226 cy
= lp_build_const_int_vec(gallivm
, type
, 298);
227 cug
= lp_build_const_int_vec(gallivm
, type
, -100);
228 cub
= lp_build_const_int_vec(gallivm
, type
, 516);
229 cvr
= lp_build_const_int_vec(gallivm
, type
, 409);
230 cvg
= lp_build_const_int_vec(gallivm
, type
, -208);
238 y
= LLVMBuildSub(builder
, y
, c16
, "");
239 u
= LLVMBuildSub(builder
, u
, c128
, "");
240 v
= LLVMBuildSub(builder
, v
, c128
, "");
243 * r = 298 * _y + 409 * _v + 128;
244 * g = 298 * _y - 100 * _u - 208 * _v + 128;
245 * b = 298 * _y + 516 * _u + 128;
248 y
= LLVMBuildMul(builder
, y
, cy
, "");
249 y
= LLVMBuildAdd(builder
, y
, c128
, "");
251 *r
= LLVMBuildMul(builder
, v
, cvr
, "");
252 *g
= LLVMBuildAdd(builder
,
253 LLVMBuildMul(builder
, u
, cug
, ""),
254 LLVMBuildMul(builder
, v
, cvg
, ""),
256 *b
= LLVMBuildMul(builder
, u
, cub
, "");
258 *r
= LLVMBuildAdd(builder
, *r
, y
, "");
259 *g
= LLVMBuildAdd(builder
, *g
, y
, "");
260 *b
= LLVMBuildAdd(builder
, *b
, y
, "");
268 *r
= LLVMBuildAShr(builder
, *r
, c8
, "r");
269 *g
= LLVMBuildAShr(builder
, *g
, c8
, "g");
270 *b
= LLVMBuildAShr(builder
, *b
, c8
, "b");
276 *r
= lp_build_clamp(&bld
, *r
, c0
, c255
);
277 *g
= lp_build_clamp(&bld
, *g
, c0
, c255
);
278 *b
= lp_build_clamp(&bld
, *b
, c0
, c255
);
283 rgb_to_rgba_aos(struct gallivm_state
*gallivm
,
285 LLVMValueRef r
, LLVMValueRef g
, LLVMValueRef b
)
287 LLVMBuilderRef builder
= gallivm
->builder
;
292 memset(&type
, 0, sizeof type
);
297 assert(lp_check_value(type
, r
));
298 assert(lp_check_value(type
, g
));
299 assert(lp_check_value(type
, b
));
302 * Make a 4 x unorm8 vector
306 g
= LLVMBuildShl(builder
, g
, lp_build_const_int_vec(gallivm
, type
, 8), "");
307 b
= LLVMBuildShl(builder
, b
, lp_build_const_int_vec(gallivm
, type
, 16), "");
308 a
= lp_build_const_int_vec(gallivm
, type
, 0xff000000);
311 rgba
= LLVMBuildOr(builder
, rgba
, g
, "");
312 rgba
= LLVMBuildOr(builder
, rgba
, b
, "");
313 rgba
= LLVMBuildOr(builder
, rgba
, a
, "");
315 rgba
= LLVMBuildBitCast(builder
, rgba
,
316 LLVMVectorType(LLVMInt8TypeInContext(gallivm
->context
), 4*n
), "");
323 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
326 uyvy_to_rgba_aos(struct gallivm_state
*gallivm
,
331 LLVMValueRef y
, u
, v
;
332 LLVMValueRef r
, g
, b
;
335 uyvy_to_yuv_soa(gallivm
, n
, packed
, i
, &y
, &u
, &v
);
336 yuv_to_rgb_soa(gallivm
, n
, y
, u
, v
, &r
, &g
, &b
);
337 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
344 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
347 yuyv_to_rgba_aos(struct gallivm_state
*gallivm
,
352 LLVMValueRef y
, u
, v
;
353 LLVMValueRef r
, g
, b
;
356 yuyv_to_yuv_soa(gallivm
, n
, packed
, i
, &y
, &u
, &v
);
357 yuv_to_rgb_soa(gallivm
, n
, y
, u
, v
, &r
, &g
, &b
);
358 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
365 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
368 rgbg_to_rgba_aos(struct gallivm_state
*gallivm
,
373 LLVMValueRef r
, g
, b
;
376 uyvy_to_yuv_soa(gallivm
, n
, packed
, i
, &g
, &r
, &b
);
377 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
384 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
387 grgb_to_rgba_aos(struct gallivm_state
*gallivm
,
392 LLVMValueRef r
, g
, b
;
395 yuyv_to_yuv_soa(gallivm
, n
, packed
, i
, &g
, &r
, &b
);
396 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
402 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
405 grbr_to_rgba_aos(struct gallivm_state
*gallivm
,
410 LLVMValueRef r
, g
, b
;
413 uyvy_to_yuv_soa(gallivm
, n
, packed
, i
, &r
, &g
, &b
);
414 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
421 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
424 rgrb_to_rgba_aos(struct gallivm_state
*gallivm
,
429 LLVMValueRef r
, g
, b
;
432 yuyv_to_yuv_soa(gallivm
, n
, packed
, i
, &r
, &g
, &b
);
433 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
439 * @param n is the number of pixels processed
440 * @param packed is a <n x i32> vector with the packed YUYV blocks
441 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
442 * @return a <4*n x i8> vector with the pixel RGBA values in AoS
445 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state
*gallivm
,
446 const struct util_format_description
*format_desc
,
448 LLVMValueRef base_ptr
,
456 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_SUBSAMPLED
);
457 assert(format_desc
->block
.bits
== 32);
458 assert(format_desc
->block
.width
== 2);
459 assert(format_desc
->block
.height
== 1);
461 packed
= lp_build_gather(gallivm
, n
, 32, 32, base_ptr
, offset
);
465 switch (format_desc
->format
) {
466 case PIPE_FORMAT_UYVY
:
467 rgba
= uyvy_to_rgba_aos(gallivm
, n
, packed
, i
);
469 case PIPE_FORMAT_YUYV
:
470 rgba
= yuyv_to_rgba_aos(gallivm
, n
, packed
, i
);
472 case PIPE_FORMAT_R8G8_B8G8_UNORM
:
473 rgba
= rgbg_to_rgba_aos(gallivm
, n
, packed
, i
);
475 case PIPE_FORMAT_G8R8_G8B8_UNORM
:
476 rgba
= grgb_to_rgba_aos(gallivm
, n
, packed
, i
);
478 case PIPE_FORMAT_G8R8_B8R8_UNORM
:
479 rgba
= grbr_to_rgba_aos(gallivm
, n
, packed
, i
);
481 case PIPE_FORMAT_R8G8_R8B8_UNORM
:
482 rgba
= rgrb_to_rgba_aos(gallivm
, n
, packed
, i
);
486 rgba
= LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm
->context
), 4*n
));