1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
26 **************************************************************************/
31 * YUV pixel format manipulation.
33 * @author Jose Fonseca <jfonseca@vmware.com>
37 #include "util/u_format.h"
38 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_conv.h"
44 #include "lp_bld_gather.h"
45 #include "lp_bld_format.h"
46 #include "lp_bld_logic.h"
49 * Extract Y, U, V channels from packed UYVY.
50 * @param packed is a <n x i32> vector with the packed UYVY blocks
51 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
54 uyvy_to_yuv_soa(LLVMBuilderRef builder
,
65 memset(&type
, 0, sizeof type
);
69 assert(lp_check_value(type
, packed
));
70 assert(lp_check_value(type
, i
));
73 * y = (uyvy >> (16*i + 8)) & 0xff
75 * v = (uyvy >> 16 ) & 0xff
78 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
80 * Avoid shift with per-element count.
81 * No support on x86, gets translated to roughly 5 instructions
82 * per element. Didn't measure performance but cuts shader size
83 * by quite a bit (less difference if cpu has no sse4.1 support).
85 if (util_cpu_caps
.has_sse2
&& n
== 4) {
86 LLVMValueRef sel
, tmp
, tmp2
;
87 struct lp_build_context bld32
;
89 lp_build_context_init(&bld32
, builder
, type
);
91 tmp
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(type
, 8), "");
92 tmp2
= LLVMBuildLShr(builder
, tmp
, lp_build_const_int_vec(type
, 16), "");
93 sel
= lp_build_compare(builder
, type
, PIPE_FUNC_EQUAL
, i
, lp_build_const_int_vec(type
, 0));
94 *y
= lp_build_select(&bld32
, sel
, tmp
, tmp2
);
99 shift
= LLVMBuildMul(builder
, i
, lp_build_const_int_vec(type
, 16), "");
100 shift
= LLVMBuildAdd(builder
, shift
, lp_build_const_int_vec(type
, 8), "");
101 *y
= LLVMBuildLShr(builder
, packed
, shift
, "");
105 *v
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(type
, 16), "");
107 mask
= lp_build_const_int_vec(type
, 0xff);
109 *y
= LLVMBuildAnd(builder
, *y
, mask
, "y");
110 *u
= LLVMBuildAnd(builder
, *u
, mask
, "u");
111 *v
= LLVMBuildAnd(builder
, *v
, mask
, "v");
116 * Extract Y, U, V channels from packed YUYV.
117 * @param packed is a <n x i32> vector with the packed YUYV blocks
118 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
121 yuyv_to_yuv_soa(LLVMBuilderRef builder
,
132 memset(&type
, 0, sizeof type
);
136 assert(lp_check_value(type
, packed
));
137 assert(lp_check_value(type
, i
));
140 * y = (yuyv >> 16*i) & 0xff
141 * u = (yuyv >> 8 ) & 0xff
142 * v = (yuyv >> 24 ) & 0xff
145 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
147 * Avoid shift with per-element count.
148 * No support on x86, gets translated to roughly 5 instructions
149 * per element. Didn't measure performance but cuts shader size
150 * by quite a bit (less difference if cpu has no sse4.1 support).
152 if (util_cpu_caps
.has_sse2
&& n
== 4) {
153 LLVMValueRef sel
, tmp
;
154 struct lp_build_context bld32
;
156 lp_build_context_init(&bld32
, builder
, type
);
158 tmp
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(type
, 16), "");
159 sel
= lp_build_compare(builder
, type
, PIPE_FUNC_EQUAL
, i
, lp_build_const_int_vec(type
, 0));
160 *y
= lp_build_select(&bld32
, sel
, packed
, tmp
);
165 shift
= LLVMBuildMul(builder
, i
, lp_build_const_int_vec(type
, 16), "");
166 *y
= LLVMBuildLShr(builder
, packed
, shift
, "");
169 *u
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(type
, 8), "");
170 *v
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(type
, 24), "");
172 mask
= lp_build_const_int_vec(type
, 0xff);
174 *y
= LLVMBuildAnd(builder
, *y
, mask
, "y");
175 *u
= LLVMBuildAnd(builder
, *u
, mask
, "u");
176 *v
= LLVMBuildAnd(builder
, *v
, mask
, "v");
181 yuv_to_rgb_soa(LLVMBuilderRef builder
,
183 LLVMValueRef y
, LLVMValueRef u
, LLVMValueRef v
,
184 LLVMValueRef
*r
, LLVMValueRef
*g
, LLVMValueRef
*b
)
187 struct lp_build_context bld
;
201 memset(&type
, 0, sizeof type
);
206 lp_build_context_init(&bld
, builder
, type
);
208 assert(lp_check_value(type
, y
));
209 assert(lp_check_value(type
, u
));
210 assert(lp_check_value(type
, v
));
216 c0
= lp_build_const_int_vec(type
, 0);
217 c8
= lp_build_const_int_vec(type
, 8);
218 c16
= lp_build_const_int_vec(type
, 16);
219 c128
= lp_build_const_int_vec(type
, 128);
220 c255
= lp_build_const_int_vec(type
, 255);
222 cy
= lp_build_const_int_vec(type
, 298);
223 cug
= lp_build_const_int_vec(type
, -100);
224 cub
= lp_build_const_int_vec(type
, 516);
225 cvr
= lp_build_const_int_vec(type
, 409);
226 cvg
= lp_build_const_int_vec(type
, -208);
234 y
= LLVMBuildSub(builder
, y
, c16
, "");
235 u
= LLVMBuildSub(builder
, u
, c128
, "");
236 v
= LLVMBuildSub(builder
, v
, c128
, "");
239 * r = 298 * _y + 409 * _v + 128;
240 * g = 298 * _y - 100 * _u - 208 * _v + 128;
241 * b = 298 * _y + 516 * _u + 128;
244 y
= LLVMBuildMul(builder
, y
, cy
, "");
245 y
= LLVMBuildAdd(builder
, y
, c128
, "");
247 *r
= LLVMBuildMul(builder
, v
, cvr
, "");
248 *g
= LLVMBuildAdd(builder
,
249 LLVMBuildMul(builder
, u
, cug
, ""),
250 LLVMBuildMul(builder
, v
, cvg
, ""),
252 *b
= LLVMBuildMul(builder
, u
, cub
, "");
254 *r
= LLVMBuildAdd(builder
, *r
, y
, "");
255 *g
= LLVMBuildAdd(builder
, *g
, y
, "");
256 *b
= LLVMBuildAdd(builder
, *b
, y
, "");
264 *r
= LLVMBuildAShr(builder
, *r
, c8
, "r");
265 *g
= LLVMBuildAShr(builder
, *g
, c8
, "g");
266 *b
= LLVMBuildAShr(builder
, *b
, c8
, "b");
272 *r
= lp_build_clamp(&bld
, *r
, c0
, c255
);
273 *g
= lp_build_clamp(&bld
, *g
, c0
, c255
);
274 *b
= lp_build_clamp(&bld
, *b
, c0
, c255
);
279 rgb_to_rgba_aos(LLVMBuilderRef builder
,
281 LLVMValueRef r
, LLVMValueRef g
, LLVMValueRef b
)
287 memset(&type
, 0, sizeof type
);
292 assert(lp_check_value(type
, r
));
293 assert(lp_check_value(type
, g
));
294 assert(lp_check_value(type
, b
));
297 * Make a 4 x unorm8 vector
301 g
= LLVMBuildShl(builder
, g
, lp_build_const_int_vec(type
, 8), "");
302 b
= LLVMBuildShl(builder
, b
, lp_build_const_int_vec(type
, 16), "");
303 a
= lp_build_const_int_vec(type
, 0xff000000);
306 rgba
= LLVMBuildOr(builder
, rgba
, g
, "");
307 rgba
= LLVMBuildOr(builder
, rgba
, b
, "");
308 rgba
= LLVMBuildOr(builder
, rgba
, a
, "");
310 rgba
= LLVMBuildBitCast(builder
, rgba
,
311 LLVMVectorType(LLVMInt8Type(), 4*n
), "");
318 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
321 uyvy_to_rgba_aos(LLVMBuilderRef builder
,
326 LLVMValueRef y
, u
, v
;
327 LLVMValueRef r
, g
, b
;
330 uyvy_to_yuv_soa(builder
, n
, packed
, i
, &y
, &u
, &v
);
331 yuv_to_rgb_soa(builder
, n
, y
, u
, v
, &r
, &g
, &b
);
332 rgba
= rgb_to_rgba_aos(builder
, n
, r
, g
, b
);
339 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
342 yuyv_to_rgba_aos(LLVMBuilderRef builder
,
347 LLVMValueRef y
, u
, v
;
348 LLVMValueRef r
, g
, b
;
351 yuyv_to_yuv_soa(builder
, n
, packed
, i
, &y
, &u
, &v
);
352 yuv_to_rgb_soa(builder
, n
, y
, u
, v
, &r
, &g
, &b
);
353 rgba
= rgb_to_rgba_aos(builder
, n
, r
, g
, b
);
360 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
363 rgbg_to_rgba_aos(LLVMBuilderRef builder
,
368 LLVMValueRef r
, g
, b
;
371 uyvy_to_yuv_soa(builder
, n
, packed
, i
, &g
, &r
, &b
);
372 rgba
= rgb_to_rgba_aos(builder
, n
, r
, g
, b
);
379 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
382 grgb_to_rgba_aos(LLVMBuilderRef builder
,
387 LLVMValueRef r
, g
, b
;
390 yuyv_to_yuv_soa(builder
, n
, packed
, i
, &g
, &r
, &b
);
391 rgba
= rgb_to_rgba_aos(builder
, n
, r
, g
, b
);
398 * @param n is the number of pixels processed
399 * @param packed is a <n x i32> vector with the packed YUYV blocks
400 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
401 * @return a <4*n x i8> vector with the pixel RGBA values in AoS
404 lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder
,
405 const struct util_format_description
*format_desc
,
407 LLVMValueRef base_ptr
,
415 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_SUBSAMPLED
);
416 assert(format_desc
->block
.bits
== 32);
417 assert(format_desc
->block
.width
== 2);
418 assert(format_desc
->block
.height
== 1);
420 packed
= lp_build_gather(builder
, n
, 32, 32, base_ptr
, offset
);
424 switch (format_desc
->format
) {
425 case PIPE_FORMAT_UYVY
:
426 rgba
= uyvy_to_rgba_aos(builder
, n
, packed
, i
);
428 case PIPE_FORMAT_YUYV
:
429 rgba
= yuyv_to_rgba_aos(builder
, n
, packed
, i
);
431 case PIPE_FORMAT_R8G8_B8G8_UNORM
:
432 rgba
= rgbg_to_rgba_aos(builder
, n
, packed
, i
);
434 case PIPE_FORMAT_G8R8_G8B8_UNORM
:
435 rgba
= grgb_to_rgba_aos(builder
, n
, packed
, i
);
439 rgba
= LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n
));