1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
26 **************************************************************************/
31 * YUV pixel format manipulation.
33 * @author Jose Fonseca <jfonseca@vmware.com>
37 #include "util/u_format.h"
38 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_conv.h"
44 #include "lp_bld_gather.h"
45 #include "lp_bld_format.h"
46 #include "lp_bld_init.h"
47 #include "lp_bld_logic.h"
50 * Extract Y, U, V channels from packed UYVY.
51 * @param packed is a <n x i32> vector with the packed UYVY blocks
52 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
55 uyvy_to_yuv_soa(struct gallivm_state
*gallivm
,
63 LLVMBuilderRef builder
= gallivm
->builder
;
67 memset(&type
, 0, sizeof type
);
71 assert(lp_check_value(type
, packed
));
72 assert(lp_check_value(type
, i
));
76 * y = (uyvy >> (16*i + 8)) & 0xff
78 * v = (uyvy >> 16 ) & 0xff
81 * y = (uyvy >> (-16*i + 16)) & 0xff
82 * u = (uyvy >> 24) & 0xff
83 * v = (uyvy >> 8) & 0xff
86 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
88 * Avoid shift with per-element count.
89 * No support on x86, gets translated to roughly 5 instructions
90 * per element. Didn't measure performance but cuts shader size
91 * by quite a bit (less difference if cpu has no sse4.1 support).
93 if (util_cpu_caps
.has_sse2
&& n
> 1) {
94 LLVMValueRef sel
, tmp
, tmp2
;
95 struct lp_build_context bld32
;
97 lp_build_context_init(&bld32
, gallivm
, type
);
99 tmp
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 8), "");
100 tmp2
= LLVMBuildLShr(builder
, tmp
, lp_build_const_int_vec(gallivm
, type
, 16), "");
101 sel
= lp_build_compare(gallivm
, type
, PIPE_FUNC_EQUAL
, i
, lp_build_const_int_vec(gallivm
, type
, 0));
102 *y
= lp_build_select(&bld32
, sel
, tmp
, tmp2
);
107 #ifdef PIPE_ARCH_LITTLE_ENDIAN
108 shift
= LLVMBuildMul(builder
, i
, lp_build_const_int_vec(gallivm
, type
, 16), "");
109 shift
= LLVMBuildAdd(builder
, shift
, lp_build_const_int_vec(gallivm
, type
, 8), "");
111 shift
= LLVMBuildMul(builder
, i
, lp_build_const_int_vec(gallivm
, type
, -16), "");
112 shift
= LLVMBuildAdd(builder
, shift
, lp_build_const_int_vec(gallivm
, type
, 16), "");
114 *y
= LLVMBuildLShr(builder
, packed
, shift
, "");
117 #ifdef PIPE_ARCH_LITTLE_ENDIAN
119 *v
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 16), "");
121 *u
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 24), "");
122 *v
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 8), "");
125 mask
= lp_build_const_int_vec(gallivm
, type
, 0xff);
127 *y
= LLVMBuildAnd(builder
, *y
, mask
, "y");
128 *u
= LLVMBuildAnd(builder
, *u
, mask
, "u");
129 *v
= LLVMBuildAnd(builder
, *v
, mask
, "v");
134 * Extract Y, U, V channels from packed YUYV.
135 * @param packed is a <n x i32> vector with the packed YUYV blocks
136 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
139 yuyv_to_yuv_soa(struct gallivm_state
*gallivm
,
147 LLVMBuilderRef builder
= gallivm
->builder
;
151 memset(&type
, 0, sizeof type
);
155 assert(lp_check_value(type
, packed
));
156 assert(lp_check_value(type
, i
));
160 * y = (yuyv >> 16*i) & 0xff
161 * u = (yuyv >> 8 ) & 0xff
162 * v = (yuyv >> 24 ) & 0xff
165 * y = (yuyv >> (-16*i + 24) & 0xff
166 * u = (yuyv >> 16) & 0xff
170 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
172 * Avoid shift with per-element count.
173 * No support on x86, gets translated to roughly 5 instructions
174 * per element. Didn't measure performance but cuts shader size
175 * by quite a bit (less difference if cpu has no sse4.1 support).
177 if (util_cpu_caps
.has_sse2
&& n
> 1) {
178 LLVMValueRef sel
, tmp
;
179 struct lp_build_context bld32
;
181 lp_build_context_init(&bld32
, gallivm
, type
);
183 tmp
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 16), "");
184 sel
= lp_build_compare(gallivm
, type
, PIPE_FUNC_EQUAL
, i
, lp_build_const_int_vec(gallivm
, type
, 0));
185 *y
= lp_build_select(&bld32
, sel
, packed
, tmp
);
190 #ifdef PIPE_ARCH_LITTLE_ENDIAN
191 shift
= LLVMBuildMul(builder
, i
, lp_build_const_int_vec(gallivm
, type
, 16), "");
193 shift
= LLVMBuildMul(builder
, i
, lp_build_const_int_vec(gallivm
, type
, -16), "");
194 shift
= LLVMBuildAdd(builder
, shift
, lp_build_const_int_vec(gallivm
, type
, 24), "");
196 *y
= LLVMBuildLShr(builder
, packed
, shift
, "");
199 #ifdef PIPE_ARCH_LITTLE_ENDIAN
200 *u
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 8), "");
201 *v
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 24), "");
203 *u
= LLVMBuildLShr(builder
, packed
, lp_build_const_int_vec(gallivm
, type
, 16), "");
207 mask
= lp_build_const_int_vec(gallivm
, type
, 0xff);
209 *y
= LLVMBuildAnd(builder
, *y
, mask
, "y");
210 *u
= LLVMBuildAnd(builder
, *u
, mask
, "u");
211 *v
= LLVMBuildAnd(builder
, *v
, mask
, "v");
216 yuv_to_rgb_soa(struct gallivm_state
*gallivm
,
218 LLVMValueRef y
, LLVMValueRef u
, LLVMValueRef v
,
219 LLVMValueRef
*r
, LLVMValueRef
*g
, LLVMValueRef
*b
)
221 LLVMBuilderRef builder
= gallivm
->builder
;
223 struct lp_build_context bld
;
237 memset(&type
, 0, sizeof type
);
242 lp_build_context_init(&bld
, gallivm
, type
);
244 assert(lp_check_value(type
, y
));
245 assert(lp_check_value(type
, u
));
246 assert(lp_check_value(type
, v
));
252 c0
= lp_build_const_int_vec(gallivm
, type
, 0);
253 c8
= lp_build_const_int_vec(gallivm
, type
, 8);
254 c16
= lp_build_const_int_vec(gallivm
, type
, 16);
255 c128
= lp_build_const_int_vec(gallivm
, type
, 128);
256 c255
= lp_build_const_int_vec(gallivm
, type
, 255);
258 cy
= lp_build_const_int_vec(gallivm
, type
, 298);
259 cug
= lp_build_const_int_vec(gallivm
, type
, -100);
260 cub
= lp_build_const_int_vec(gallivm
, type
, 516);
261 cvr
= lp_build_const_int_vec(gallivm
, type
, 409);
262 cvg
= lp_build_const_int_vec(gallivm
, type
, -208);
270 y
= LLVMBuildSub(builder
, y
, c16
, "");
271 u
= LLVMBuildSub(builder
, u
, c128
, "");
272 v
= LLVMBuildSub(builder
, v
, c128
, "");
275 * r = 298 * _y + 409 * _v + 128;
276 * g = 298 * _y - 100 * _u - 208 * _v + 128;
277 * b = 298 * _y + 516 * _u + 128;
280 y
= LLVMBuildMul(builder
, y
, cy
, "");
281 y
= LLVMBuildAdd(builder
, y
, c128
, "");
283 *r
= LLVMBuildMul(builder
, v
, cvr
, "");
284 *g
= LLVMBuildAdd(builder
,
285 LLVMBuildMul(builder
, u
, cug
, ""),
286 LLVMBuildMul(builder
, v
, cvg
, ""),
288 *b
= LLVMBuildMul(builder
, u
, cub
, "");
290 *r
= LLVMBuildAdd(builder
, *r
, y
, "");
291 *g
= LLVMBuildAdd(builder
, *g
, y
, "");
292 *b
= LLVMBuildAdd(builder
, *b
, y
, "");
300 *r
= LLVMBuildAShr(builder
, *r
, c8
, "r");
301 *g
= LLVMBuildAShr(builder
, *g
, c8
, "g");
302 *b
= LLVMBuildAShr(builder
, *b
, c8
, "b");
308 *r
= lp_build_clamp(&bld
, *r
, c0
, c255
);
309 *g
= lp_build_clamp(&bld
, *g
, c0
, c255
);
310 *b
= lp_build_clamp(&bld
, *b
, c0
, c255
);
315 rgb_to_rgba_aos(struct gallivm_state
*gallivm
,
317 LLVMValueRef r
, LLVMValueRef g
, LLVMValueRef b
)
319 LLVMBuilderRef builder
= gallivm
->builder
;
324 memset(&type
, 0, sizeof type
);
329 assert(lp_check_value(type
, r
));
330 assert(lp_check_value(type
, g
));
331 assert(lp_check_value(type
, b
));
334 * Make a 4 x unorm8 vector
337 #ifdef PIPE_ARCH_LITTLE_ENDIAN
339 g
= LLVMBuildShl(builder
, g
, lp_build_const_int_vec(gallivm
, type
, 8), "");
340 b
= LLVMBuildShl(builder
, b
, lp_build_const_int_vec(gallivm
, type
, 16), "");
341 a
= lp_build_const_int_vec(gallivm
, type
, 0xff000000);
343 r
= LLVMBuildShl(builder
, r
, lp_build_const_int_vec(gallivm
, type
, 24), "");
344 g
= LLVMBuildShl(builder
, g
, lp_build_const_int_vec(gallivm
, type
, 16), "");
345 b
= LLVMBuildShl(builder
, b
, lp_build_const_int_vec(gallivm
, type
, 8), "");
346 a
= lp_build_const_int_vec(gallivm
, type
, 0x000000ff);
350 rgba
= LLVMBuildOr(builder
, rgba
, g
, "");
351 rgba
= LLVMBuildOr(builder
, rgba
, b
, "");
352 rgba
= LLVMBuildOr(builder
, rgba
, a
, "");
354 rgba
= LLVMBuildBitCast(builder
, rgba
,
355 LLVMVectorType(LLVMInt8TypeInContext(gallivm
->context
), 4*n
), "");
362 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
365 uyvy_to_rgba_aos(struct gallivm_state
*gallivm
,
370 LLVMValueRef y
, u
, v
;
371 LLVMValueRef r
, g
, b
;
374 uyvy_to_yuv_soa(gallivm
, n
, packed
, i
, &y
, &u
, &v
);
375 yuv_to_rgb_soa(gallivm
, n
, y
, u
, v
, &r
, &g
, &b
);
376 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
383 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
386 yuyv_to_rgba_aos(struct gallivm_state
*gallivm
,
391 LLVMValueRef y
, u
, v
;
392 LLVMValueRef r
, g
, b
;
395 yuyv_to_yuv_soa(gallivm
, n
, packed
, i
, &y
, &u
, &v
);
396 yuv_to_rgb_soa(gallivm
, n
, y
, u
, v
, &r
, &g
, &b
);
397 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
404 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
407 rgbg_to_rgba_aos(struct gallivm_state
*gallivm
,
412 LLVMValueRef r
, g
, b
;
415 uyvy_to_yuv_soa(gallivm
, n
, packed
, i
, &g
, &r
, &b
);
416 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
423 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
426 grgb_to_rgba_aos(struct gallivm_state
*gallivm
,
431 LLVMValueRef r
, g
, b
;
434 yuyv_to_yuv_soa(gallivm
, n
, packed
, i
, &g
, &r
, &b
);
435 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
441 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
444 grbr_to_rgba_aos(struct gallivm_state
*gallivm
,
449 LLVMValueRef r
, g
, b
;
452 uyvy_to_yuv_soa(gallivm
, n
, packed
, i
, &r
, &g
, &b
);
453 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
460 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
463 rgrb_to_rgba_aos(struct gallivm_state
*gallivm
,
468 LLVMValueRef r
, g
, b
;
471 yuyv_to_yuv_soa(gallivm
, n
, packed
, i
, &r
, &g
, &b
);
472 rgba
= rgb_to_rgba_aos(gallivm
, n
, r
, g
, b
);
478 * @param n is the number of pixels processed
479 * @param packed is a <n x i32> vector with the packed YUYV blocks
480 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
481 * @return a <4*n x i8> vector with the pixel RGBA values in AoS
484 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state
*gallivm
,
485 const struct util_format_description
*format_desc
,
487 LLVMValueRef base_ptr
,
495 assert(format_desc
->layout
== UTIL_FORMAT_LAYOUT_SUBSAMPLED
);
496 assert(format_desc
->block
.bits
== 32);
497 assert(format_desc
->block
.width
== 2);
498 assert(format_desc
->block
.height
== 1);
500 packed
= lp_build_gather(gallivm
, n
, 32, 32, base_ptr
, offset
, FALSE
);
504 switch (format_desc
->format
) {
505 case PIPE_FORMAT_UYVY
:
506 rgba
= uyvy_to_rgba_aos(gallivm
, n
, packed
, i
);
508 case PIPE_FORMAT_YUYV
:
509 rgba
= yuyv_to_rgba_aos(gallivm
, n
, packed
, i
);
511 case PIPE_FORMAT_R8G8_B8G8_UNORM
:
512 rgba
= rgbg_to_rgba_aos(gallivm
, n
, packed
, i
);
514 case PIPE_FORMAT_G8R8_G8B8_UNORM
:
515 rgba
= grgb_to_rgba_aos(gallivm
, n
, packed
, i
);
517 case PIPE_FORMAT_G8R8_B8R8_UNORM
:
518 rgba
= grbr_to_rgba_aos(gallivm
, n
, packed
, i
);
520 case PIPE_FORMAT_R8G8_R8B8_UNORM
:
521 rgba
= rgrb_to_rgba_aos(gallivm
, n
, packed
, i
);
525 rgba
= LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm
->context
), 4*n
));