5adf2815034f26bb8a29b2d7093ff215e913333b
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_yuv.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * YUV pixel format manipulation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36
37 #include "util/u_format.h"
38 #include "util/u_cpu_detect.h"
39
40 #include "lp_bld_arit.h"
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_conv.h"
44 #include "lp_bld_gather.h"
45 #include "lp_bld_format.h"
46 #include "lp_bld_init.h"
47 #include "lp_bld_logic.h"
48
49 /**
50 * Extract Y, U, V channels from packed UYVY.
51 * @param packed is a <n x i32> vector with the packed UYVY blocks
52 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
53 */
54 static void
55 uyvy_to_yuv_soa(struct gallivm_state *gallivm,
56 unsigned n,
57 LLVMValueRef packed,
58 LLVMValueRef i,
59 LLVMValueRef *y,
60 LLVMValueRef *u,
61 LLVMValueRef *v)
62 {
63 LLVMBuilderRef builder = gallivm->builder;
64 struct lp_type type;
65 LLVMValueRef mask;
66
67 memset(&type, 0, sizeof type);
68 type.width = 32;
69 type.length = n;
70
71 assert(lp_check_value(type, packed));
72 assert(lp_check_value(type, i));
73
74 /*
75 * Little endian:
76 * y = (uyvy >> (16*i + 8)) & 0xff
77 * u = (uyvy ) & 0xff
78 * v = (uyvy >> 16 ) & 0xff
79 *
80 * Big endian:
81 * y = (uyvy >> (-16*i + 16)) & 0xff
82 * u = (uyvy >> 24) & 0xff
83 * v = (uyvy >> 8) & 0xff
84 */
85
86 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
87 /*
88 * Avoid shift with per-element count.
89 * No support on x86, gets translated to roughly 5 instructions
90 * per element. Didn't measure performance but cuts shader size
91 * by quite a bit (less difference if cpu has no sse4.1 support).
92 */
93 if (util_cpu_caps.has_sse2 && n > 1) {
94 LLVMValueRef sel, tmp, tmp2;
95 struct lp_build_context bld32;
96
97 lp_build_context_init(&bld32, gallivm, type);
98
99 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
100 tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
101 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
102 *y = lp_build_select(&bld32, sel, tmp, tmp2);
103 } else
104 #endif
105 {
106 LLVMValueRef shift;
107 #if PIPE_ARCH_LITTLE_ENDIAN
108 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
109 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
110 #else
111 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
112 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), "");
113 #endif
114 *y = LLVMBuildLShr(builder, packed, shift, "");
115 }
116
117 #if PIPE_ARCH_LITTLE_ENDIAN
118 *u = packed;
119 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
120 #else
121 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
122 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
123 #endif
124
125 mask = lp_build_const_int_vec(gallivm, type, 0xff);
126
127 *y = LLVMBuildAnd(builder, *y, mask, "y");
128 *u = LLVMBuildAnd(builder, *u, mask, "u");
129 *v = LLVMBuildAnd(builder, *v, mask, "v");
130 }
131
132
133 /**
134 * Extract Y, U, V channels from packed YUYV.
135 * @param packed is a <n x i32> vector with the packed YUYV blocks
136 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
137 */
138 static void
139 yuyv_to_yuv_soa(struct gallivm_state *gallivm,
140 unsigned n,
141 LLVMValueRef packed,
142 LLVMValueRef i,
143 LLVMValueRef *y,
144 LLVMValueRef *u,
145 LLVMValueRef *v)
146 {
147 LLVMBuilderRef builder = gallivm->builder;
148 struct lp_type type;
149 LLVMValueRef mask;
150
151 memset(&type, 0, sizeof type);
152 type.width = 32;
153 type.length = n;
154
155 assert(lp_check_value(type, packed));
156 assert(lp_check_value(type, i));
157
158 /*
159 * Little endian:
160 * y = (yuyv >> 16*i) & 0xff
161 * u = (yuyv >> 8 ) & 0xff
162 * v = (yuyv >> 24 ) & 0xff
163 *
164 * Big endian:
165 * y = (yuyv >> (-16*i + 24) & 0xff
166 * u = (yuyv >> 16) & 0xff
167 * v = (yuyv) & 0xff
168 */
169
170 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
171 /*
172 * Avoid shift with per-element count.
173 * No support on x86, gets translated to roughly 5 instructions
174 * per element. Didn't measure performance but cuts shader size
175 * by quite a bit (less difference if cpu has no sse4.1 support).
176 */
177 if (util_cpu_caps.has_sse2 && n > 1) {
178 LLVMValueRef sel, tmp;
179 struct lp_build_context bld32;
180
181 lp_build_context_init(&bld32, gallivm, type);
182
183 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
184 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
185 *y = lp_build_select(&bld32, sel, packed, tmp);
186 } else
187 #endif
188 {
189 LLVMValueRef shift;
190 #if PIPE_ARCH_LITTLE_ENDIAN
191 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
192 #else
193 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), "");
194 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), "");
195 #endif
196 *y = LLVMBuildLShr(builder, packed, shift, "");
197 }
198
199 #if PIPE_ARCH_LITTLE_ENDIAN
200 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
201 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
202 #else
203 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
204 *v = packed;
205 #endif
206
207 mask = lp_build_const_int_vec(gallivm, type, 0xff);
208
209 *y = LLVMBuildAnd(builder, *y, mask, "y");
210 *u = LLVMBuildAnd(builder, *u, mask, "u");
211 *v = LLVMBuildAnd(builder, *v, mask, "v");
212 }
213
214
215 static inline void
216 yuv_to_rgb_soa(struct gallivm_state *gallivm,
217 unsigned n,
218 LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
219 LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
220 {
221 LLVMBuilderRef builder = gallivm->builder;
222 struct lp_type type;
223 struct lp_build_context bld;
224
225 LLVMValueRef c0;
226 LLVMValueRef c8;
227 LLVMValueRef c16;
228 LLVMValueRef c128;
229 LLVMValueRef c255;
230
231 LLVMValueRef cy;
232 LLVMValueRef cug;
233 LLVMValueRef cub;
234 LLVMValueRef cvr;
235 LLVMValueRef cvg;
236
237 memset(&type, 0, sizeof type);
238 type.sign = TRUE;
239 type.width = 32;
240 type.length = n;
241
242 lp_build_context_init(&bld, gallivm, type);
243
244 assert(lp_check_value(type, y));
245 assert(lp_check_value(type, u));
246 assert(lp_check_value(type, v));
247
248 /*
249 * Constants
250 */
251
252 c0 = lp_build_const_int_vec(gallivm, type, 0);
253 c8 = lp_build_const_int_vec(gallivm, type, 8);
254 c16 = lp_build_const_int_vec(gallivm, type, 16);
255 c128 = lp_build_const_int_vec(gallivm, type, 128);
256 c255 = lp_build_const_int_vec(gallivm, type, 255);
257
258 cy = lp_build_const_int_vec(gallivm, type, 298);
259 cug = lp_build_const_int_vec(gallivm, type, -100);
260 cub = lp_build_const_int_vec(gallivm, type, 516);
261 cvr = lp_build_const_int_vec(gallivm, type, 409);
262 cvg = lp_build_const_int_vec(gallivm, type, -208);
263
264 /*
265 * y -= 16;
266 * u -= 128;
267 * v -= 128;
268 */
269
270 y = LLVMBuildSub(builder, y, c16, "");
271 u = LLVMBuildSub(builder, u, c128, "");
272 v = LLVMBuildSub(builder, v, c128, "");
273
274 /*
275 * r = 298 * _y + 409 * _v + 128;
276 * g = 298 * _y - 100 * _u - 208 * _v + 128;
277 * b = 298 * _y + 516 * _u + 128;
278 */
279
280 y = LLVMBuildMul(builder, y, cy, "");
281 y = LLVMBuildAdd(builder, y, c128, "");
282
283 *r = LLVMBuildMul(builder, v, cvr, "");
284 *g = LLVMBuildAdd(builder,
285 LLVMBuildMul(builder, u, cug, ""),
286 LLVMBuildMul(builder, v, cvg, ""),
287 "");
288 *b = LLVMBuildMul(builder, u, cub, "");
289
290 *r = LLVMBuildAdd(builder, *r, y, "");
291 *g = LLVMBuildAdd(builder, *g, y, "");
292 *b = LLVMBuildAdd(builder, *b, y, "");
293
294 /*
295 * r >>= 8;
296 * g >>= 8;
297 * b >>= 8;
298 */
299
300 *r = LLVMBuildAShr(builder, *r, c8, "r");
301 *g = LLVMBuildAShr(builder, *g, c8, "g");
302 *b = LLVMBuildAShr(builder, *b, c8, "b");
303
304 /*
305 * Clamp
306 */
307
308 *r = lp_build_clamp(&bld, *r, c0, c255);
309 *g = lp_build_clamp(&bld, *g, c0, c255);
310 *b = lp_build_clamp(&bld, *b, c0, c255);
311 }
312
313
314 static LLVMValueRef
315 rgb_to_rgba_aos(struct gallivm_state *gallivm,
316 unsigned n,
317 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
318 {
319 LLVMBuilderRef builder = gallivm->builder;
320 struct lp_type type;
321 LLVMValueRef a;
322 LLVMValueRef rgba;
323
324 memset(&type, 0, sizeof type);
325 type.sign = TRUE;
326 type.width = 32;
327 type.length = n;
328
329 assert(lp_check_value(type, r));
330 assert(lp_check_value(type, g));
331 assert(lp_check_value(type, b));
332
333 /*
334 * Make a 4 x unorm8 vector
335 */
336
337 #if PIPE_ARCH_LITTLE_ENDIAN
338 r = r;
339 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
340 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
341 a = lp_build_const_int_vec(gallivm, type, 0xff000000);
342 #else
343 r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), "");
344 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), "");
345 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), "");
346 a = lp_build_const_int_vec(gallivm, type, 0x000000ff);
347 #endif
348
349 rgba = r;
350 rgba = LLVMBuildOr(builder, rgba, g, "");
351 rgba = LLVMBuildOr(builder, rgba, b, "");
352 rgba = LLVMBuildOr(builder, rgba, a, "");
353
354 rgba = LLVMBuildBitCast(builder, rgba,
355 LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
356
357 return rgba;
358 }
359
360
361 /**
362 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
363 */
364 static LLVMValueRef
365 uyvy_to_rgba_aos(struct gallivm_state *gallivm,
366 unsigned n,
367 LLVMValueRef packed,
368 LLVMValueRef i)
369 {
370 LLVMValueRef y, u, v;
371 LLVMValueRef r, g, b;
372 LLVMValueRef rgba;
373
374 uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
375 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
376 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
377
378 return rgba;
379 }
380
381
382 /**
383 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
384 */
385 static LLVMValueRef
386 yuyv_to_rgba_aos(struct gallivm_state *gallivm,
387 unsigned n,
388 LLVMValueRef packed,
389 LLVMValueRef i)
390 {
391 LLVMValueRef y, u, v;
392 LLVMValueRef r, g, b;
393 LLVMValueRef rgba;
394
395 yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
396 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
397 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
398
399 return rgba;
400 }
401
402
403 /**
404 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
405 */
406 static LLVMValueRef
407 rgbg_to_rgba_aos(struct gallivm_state *gallivm,
408 unsigned n,
409 LLVMValueRef packed,
410 LLVMValueRef i)
411 {
412 LLVMValueRef r, g, b;
413 LLVMValueRef rgba;
414
415 uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
416 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
417
418 return rgba;
419 }
420
421
422 /**
423 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
424 */
425 static LLVMValueRef
426 grgb_to_rgba_aos(struct gallivm_state *gallivm,
427 unsigned n,
428 LLVMValueRef packed,
429 LLVMValueRef i)
430 {
431 LLVMValueRef r, g, b;
432 LLVMValueRef rgba;
433
434 yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
435 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
436
437 return rgba;
438 }
439
440 /**
441 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS
442 */
443 static LLVMValueRef
444 grbr_to_rgba_aos(struct gallivm_state *gallivm,
445 unsigned n,
446 LLVMValueRef packed,
447 LLVMValueRef i)
448 {
449 LLVMValueRef r, g, b;
450 LLVMValueRef rgba;
451
452 uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
453 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
454
455 return rgba;
456 }
457
458
459 /**
460 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS
461 */
462 static LLVMValueRef
463 rgrb_to_rgba_aos(struct gallivm_state *gallivm,
464 unsigned n,
465 LLVMValueRef packed,
466 LLVMValueRef i)
467 {
468 LLVMValueRef r, g, b;
469 LLVMValueRef rgba;
470
471 yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b);
472 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
473
474 return rgba;
475 }
476
477 /**
478 * @param n is the number of pixels processed
479 * @param packed is a <n x i32> vector with the packed YUYV blocks
480 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
481 * @return a <4*n x i8> vector with the pixel RGBA values in AoS
482 */
483 LLVMValueRef
484 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
485 const struct util_format_description *format_desc,
486 unsigned n,
487 LLVMValueRef base_ptr,
488 LLVMValueRef offset,
489 LLVMValueRef i,
490 LLVMValueRef j)
491 {
492 LLVMValueRef packed;
493 LLVMValueRef rgba;
494 struct lp_type fetch_type;
495
496 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
497 assert(format_desc->block.bits == 32);
498 assert(format_desc->block.width == 2);
499 assert(format_desc->block.height == 1);
500
501 fetch_type = lp_type_uint(32);
502 packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE);
503
504 (void)j;
505
506 switch (format_desc->format) {
507 case PIPE_FORMAT_UYVY:
508 rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
509 break;
510 case PIPE_FORMAT_YUYV:
511 rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
512 break;
513 case PIPE_FORMAT_R8G8_B8G8_UNORM:
514 rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
515 break;
516 case PIPE_FORMAT_G8R8_G8B8_UNORM:
517 rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
518 break;
519 case PIPE_FORMAT_G8R8_B8R8_UNORM:
520 rgba = grbr_to_rgba_aos(gallivm, n, packed, i);
521 break;
522 case PIPE_FORMAT_R8G8_R8B8_UNORM:
523 rgba = rgrb_to_rgba_aos(gallivm, n, packed, i);
524 break;
525 default:
526 assert(0);
527 rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
528 break;
529 }
530
531 return rgba;
532 }
533