gallivm/llvmpipe: squash merge of the llvm-context branch
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_yuv.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * YUV pixel format manipulation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36
37 #include "util/u_format.h"
38 #include "util/u_cpu_detect.h"
39
40 #include "lp_bld_arit.h"
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_conv.h"
44 #include "lp_bld_gather.h"
45 #include "lp_bld_format.h"
46 #include "lp_bld_init.h"
47 #include "lp_bld_logic.h"
48
49 /**
50 * Extract Y, U, V channels from packed UYVY.
51 * @param packed is a <n x i32> vector with the packed UYVY blocks
52 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
53 */
54 static void
55 uyvy_to_yuv_soa(struct gallivm_state *gallivm,
56 unsigned n,
57 LLVMValueRef packed,
58 LLVMValueRef i,
59 LLVMValueRef *y,
60 LLVMValueRef *u,
61 LLVMValueRef *v)
62 {
63 LLVMBuilderRef builder = gallivm->builder;
64 struct lp_type type;
65 LLVMValueRef mask;
66
67 memset(&type, 0, sizeof type);
68 type.width = 32;
69 type.length = n;
70
71 assert(lp_check_value(type, packed));
72 assert(lp_check_value(type, i));
73
74 /*
75 * y = (uyvy >> (16*i + 8)) & 0xff
76 * u = (uyvy ) & 0xff
77 * v = (uyvy >> 16 ) & 0xff
78 */
79
80 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
81 /*
82 * Avoid shift with per-element count.
83 * No support on x86, gets translated to roughly 5 instructions
84 * per element. Didn't measure performance but cuts shader size
85 * by quite a bit (less difference if cpu has no sse4.1 support).
86 */
87 if (util_cpu_caps.has_sse2 && n == 4) {
88 LLVMValueRef sel, tmp, tmp2;
89 struct lp_build_context bld32;
90
91 lp_build_context_init(&bld32, gallivm, type);
92
93 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
94 tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), "");
95 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
96 *y = lp_build_select(&bld32, sel, tmp, tmp2);
97 } else
98 #endif
99 {
100 LLVMValueRef shift;
101 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
102 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), "");
103 *y = LLVMBuildLShr(builder, packed, shift, "");
104 }
105
106 *u = packed;
107 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
108
109 mask = lp_build_const_int_vec(gallivm, type, 0xff);
110
111 *y = LLVMBuildAnd(builder, *y, mask, "y");
112 *u = LLVMBuildAnd(builder, *u, mask, "u");
113 *v = LLVMBuildAnd(builder, *v, mask, "v");
114 }
115
116
117 /**
118 * Extract Y, U, V channels from packed YUYV.
119 * @param packed is a <n x i32> vector with the packed YUYV blocks
120 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
121 */
122 static void
123 yuyv_to_yuv_soa(struct gallivm_state *gallivm,
124 unsigned n,
125 LLVMValueRef packed,
126 LLVMValueRef i,
127 LLVMValueRef *y,
128 LLVMValueRef *u,
129 LLVMValueRef *v)
130 {
131 LLVMBuilderRef builder = gallivm->builder;
132 struct lp_type type;
133 LLVMValueRef mask;
134
135 memset(&type, 0, sizeof type);
136 type.width = 32;
137 type.length = n;
138
139 assert(lp_check_value(type, packed));
140 assert(lp_check_value(type, i));
141
142 /*
143 * y = (yuyv >> 16*i) & 0xff
144 * u = (yuyv >> 8 ) & 0xff
145 * v = (yuyv >> 24 ) & 0xff
146 */
147
148 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
149 /*
150 * Avoid shift with per-element count.
151 * No support on x86, gets translated to roughly 5 instructions
152 * per element. Didn't measure performance but cuts shader size
153 * by quite a bit (less difference if cpu has no sse4.1 support).
154 */
155 if (util_cpu_caps.has_sse2 && n == 4) {
156 LLVMValueRef sel, tmp;
157 struct lp_build_context bld32;
158
159 lp_build_context_init(&bld32, gallivm, type);
160
161 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
162 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
163 *y = lp_build_select(&bld32, sel, packed, tmp);
164 } else
165 #endif
166 {
167 LLVMValueRef shift;
168 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
169 *y = LLVMBuildLShr(builder, packed, shift, "");
170 }
171
172 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
173 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");
174
175 mask = lp_build_const_int_vec(gallivm, type, 0xff);
176
177 *y = LLVMBuildAnd(builder, *y, mask, "y");
178 *u = LLVMBuildAnd(builder, *u, mask, "u");
179 *v = LLVMBuildAnd(builder, *v, mask, "v");
180 }
181
182
183 static INLINE void
184 yuv_to_rgb_soa(struct gallivm_state *gallivm,
185 unsigned n,
186 LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
187 LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
188 {
189 LLVMBuilderRef builder = gallivm->builder;
190 struct lp_type type;
191 struct lp_build_context bld;
192
193 LLVMValueRef c0;
194 LLVMValueRef c8;
195 LLVMValueRef c16;
196 LLVMValueRef c128;
197 LLVMValueRef c255;
198
199 LLVMValueRef cy;
200 LLVMValueRef cug;
201 LLVMValueRef cub;
202 LLVMValueRef cvr;
203 LLVMValueRef cvg;
204
205 memset(&type, 0, sizeof type);
206 type.sign = TRUE;
207 type.width = 32;
208 type.length = n;
209
210 lp_build_context_init(&bld, gallivm, type);
211
212 assert(lp_check_value(type, y));
213 assert(lp_check_value(type, u));
214 assert(lp_check_value(type, v));
215
216 /*
217 * Constants
218 */
219
220 c0 = lp_build_const_int_vec(gallivm, type, 0);
221 c8 = lp_build_const_int_vec(gallivm, type, 8);
222 c16 = lp_build_const_int_vec(gallivm, type, 16);
223 c128 = lp_build_const_int_vec(gallivm, type, 128);
224 c255 = lp_build_const_int_vec(gallivm, type, 255);
225
226 cy = lp_build_const_int_vec(gallivm, type, 298);
227 cug = lp_build_const_int_vec(gallivm, type, -100);
228 cub = lp_build_const_int_vec(gallivm, type, 516);
229 cvr = lp_build_const_int_vec(gallivm, type, 409);
230 cvg = lp_build_const_int_vec(gallivm, type, -208);
231
232 /*
233 * y -= 16;
234 * u -= 128;
235 * v -= 128;
236 */
237
238 y = LLVMBuildSub(builder, y, c16, "");
239 u = LLVMBuildSub(builder, u, c128, "");
240 v = LLVMBuildSub(builder, v, c128, "");
241
242 /*
243 * r = 298 * _y + 409 * _v + 128;
244 * g = 298 * _y - 100 * _u - 208 * _v + 128;
245 * b = 298 * _y + 516 * _u + 128;
246 */
247
248 y = LLVMBuildMul(builder, y, cy, "");
249 y = LLVMBuildAdd(builder, y, c128, "");
250
251 *r = LLVMBuildMul(builder, v, cvr, "");
252 *g = LLVMBuildAdd(builder,
253 LLVMBuildMul(builder, u, cug, ""),
254 LLVMBuildMul(builder, v, cvg, ""),
255 "");
256 *b = LLVMBuildMul(builder, u, cub, "");
257
258 *r = LLVMBuildAdd(builder, *r, y, "");
259 *g = LLVMBuildAdd(builder, *g, y, "");
260 *b = LLVMBuildAdd(builder, *b, y, "");
261
262 /*
263 * r >>= 8;
264 * g >>= 8;
265 * b >>= 8;
266 */
267
268 *r = LLVMBuildAShr(builder, *r, c8, "r");
269 *g = LLVMBuildAShr(builder, *g, c8, "g");
270 *b = LLVMBuildAShr(builder, *b, c8, "b");
271
272 /*
273 * Clamp
274 */
275
276 *r = lp_build_clamp(&bld, *r, c0, c255);
277 *g = lp_build_clamp(&bld, *g, c0, c255);
278 *b = lp_build_clamp(&bld, *b, c0, c255);
279 }
280
281
282 static LLVMValueRef
283 rgb_to_rgba_aos(struct gallivm_state *gallivm,
284 unsigned n,
285 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
286 {
287 LLVMBuilderRef builder = gallivm->builder;
288 struct lp_type type;
289 LLVMValueRef a;
290 LLVMValueRef rgba;
291
292 memset(&type, 0, sizeof type);
293 type.sign = TRUE;
294 type.width = 32;
295 type.length = n;
296
297 assert(lp_check_value(type, r));
298 assert(lp_check_value(type, g));
299 assert(lp_check_value(type, b));
300
301 /*
302 * Make a 4 x unorm8 vector
303 */
304
305 r = r;
306 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), "");
307 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), "");
308 a = lp_build_const_int_vec(gallivm, type, 0xff000000);
309
310 rgba = r;
311 rgba = LLVMBuildOr(builder, rgba, g, "");
312 rgba = LLVMBuildOr(builder, rgba, b, "");
313 rgba = LLVMBuildOr(builder, rgba, a, "");
314
315 rgba = LLVMBuildBitCast(builder, rgba,
316 LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), "");
317
318 return rgba;
319 }
320
321
322 /**
323 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
324 */
325 static LLVMValueRef
326 uyvy_to_rgba_aos(struct gallivm_state *gallivm,
327 unsigned n,
328 LLVMValueRef packed,
329 LLVMValueRef i)
330 {
331 LLVMValueRef y, u, v;
332 LLVMValueRef r, g, b;
333 LLVMValueRef rgba;
334
335 uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
336 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
337 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
338
339 return rgba;
340 }
341
342
343 /**
344 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
345 */
346 static LLVMValueRef
347 yuyv_to_rgba_aos(struct gallivm_state *gallivm,
348 unsigned n,
349 LLVMValueRef packed,
350 LLVMValueRef i)
351 {
352 LLVMValueRef y, u, v;
353 LLVMValueRef r, g, b;
354 LLVMValueRef rgba;
355
356 yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v);
357 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b);
358 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
359
360 return rgba;
361 }
362
363
364 /**
365 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
366 */
367 static LLVMValueRef
368 rgbg_to_rgba_aos(struct gallivm_state *gallivm,
369 unsigned n,
370 LLVMValueRef packed,
371 LLVMValueRef i)
372 {
373 LLVMValueRef r, g, b;
374 LLVMValueRef rgba;
375
376 uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
377 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
378
379 return rgba;
380 }
381
382
383 /**
384 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
385 */
386 static LLVMValueRef
387 grgb_to_rgba_aos(struct gallivm_state *gallivm,
388 unsigned n,
389 LLVMValueRef packed,
390 LLVMValueRef i)
391 {
392 LLVMValueRef r, g, b;
393 LLVMValueRef rgba;
394
395 yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b);
396 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b);
397
398 return rgba;
399 }
400
401
402 /**
403 * @param n is the number of pixels processed
404 * @param packed is a <n x i32> vector with the packed YUYV blocks
405 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
406 * @return a <4*n x i8> vector with the pixel RGBA values in AoS
407 */
408 LLVMValueRef
409 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
410 const struct util_format_description *format_desc,
411 unsigned n,
412 LLVMValueRef base_ptr,
413 LLVMValueRef offset,
414 LLVMValueRef i,
415 LLVMValueRef j)
416 {
417 LLVMValueRef packed;
418 LLVMValueRef rgba;
419
420 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
421 assert(format_desc->block.bits == 32);
422 assert(format_desc->block.width == 2);
423 assert(format_desc->block.height == 1);
424
425 packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset);
426
427 (void)j;
428
429 switch (format_desc->format) {
430 case PIPE_FORMAT_UYVY:
431 rgba = uyvy_to_rgba_aos(gallivm, n, packed, i);
432 break;
433 case PIPE_FORMAT_YUYV:
434 rgba = yuyv_to_rgba_aos(gallivm, n, packed, i);
435 break;
436 case PIPE_FORMAT_R8G8_B8G8_UNORM:
437 rgba = rgbg_to_rgba_aos(gallivm, n, packed, i);
438 break;
439 case PIPE_FORMAT_G8R8_G8B8_UNORM:
440 rgba = grgb_to_rgba_aos(gallivm, n, packed, i);
441 break;
442 default:
443 assert(0);
444 rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n));
445 break;
446 }
447
448 return rgba;
449 }
450