gallivm: Support multiple pixels in lp_build_fetch_rgba_aos().
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_yuv.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * YUV pixel format manipulation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36
37 #include "util/u_format.h"
38
39 #include "lp_bld_arit.h"
40 #include "lp_bld_init.h"
41 #include "lp_bld_type.h"
42 #include "lp_bld_const.h"
43 #include "lp_bld_conv.h"
44 #include "lp_bld_gather.h"
45 #include "lp_bld_format.h"
46
47
48 /**
49 * Extract Y, U, V channels from packed UYVY.
50 * @param packed is a <n x i32> vector with the packed UYVY blocks
51 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
52 */
53 static void
54 uyvy_to_yuv_soa(LLVMBuilderRef builder,
55 unsigned n,
56 LLVMValueRef packed,
57 LLVMValueRef i,
58 LLVMValueRef *y,
59 LLVMValueRef *u,
60 LLVMValueRef *v)
61 {
62 struct lp_type type;
63 LLVMValueRef shift, mask;
64
65 memset(&type, 0, sizeof type);
66 type.width = 32;
67 type.length = n;
68
69 assert(lp_check_value(type, packed));
70 assert(lp_check_value(type, i));
71
72 /*
73 * y = (uyvy >> 16*i) & 0xff
74 * u = (uyvy ) & 0xff
75 * v = (uyvy >> 16 ) & 0xff
76 */
77
78 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
79 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
80 *y = LLVMBuildLShr(builder, packed, shift, "");
81 *u = packed;
82 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
83
84 mask = lp_build_const_int_vec(type, 0xff);
85
86 *y = LLVMBuildAnd(builder, *y, mask, "y");
87 *u = LLVMBuildAnd(builder, *u, mask, "u");
88 *v = LLVMBuildAnd(builder, *v, mask, "v");
89 }
90
91
92 /**
93 * Extract Y, U, V channels from packed YUYV.
94 * @param packed is a <n x i32> vector with the packed YUYV blocks
95 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
96 */
97 static void
98 yuyv_to_yuv_soa(LLVMBuilderRef builder,
99 unsigned n,
100 LLVMValueRef packed,
101 LLVMValueRef i,
102 LLVMValueRef *y,
103 LLVMValueRef *u,
104 LLVMValueRef *v)
105 {
106 struct lp_type type;
107 LLVMValueRef shift, mask;
108
109 memset(&type, 0, sizeof type);
110 type.width = 32;
111 type.length = n;
112
113 assert(lp_check_value(type, packed));
114 assert(lp_check_value(type, i));
115
116 /*
117 * y = (yuyv >> 16*i) & 0xff
118 * u = (yuyv >> 8 ) & 0xff
119 * v = (yuyv >> 24 ) & 0xff
120 */
121
122 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
123 *y = LLVMBuildLShr(builder, packed, shift, "");
124 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
125 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");
126
127 mask = lp_build_const_int_vec(type, 0xff);
128
129 *y = LLVMBuildAnd(builder, *y, mask, "y");
130 *u = LLVMBuildAnd(builder, *u, mask, "u");
131 *v = LLVMBuildAnd(builder, *v, mask, "v");
132 }
133
134
135 static INLINE void
136 yuv_to_rgb_soa(LLVMBuilderRef builder,
137 unsigned n,
138 LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
139 LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
140 {
141 struct lp_type type;
142 struct lp_build_context bld;
143
144 LLVMValueRef c0;
145 LLVMValueRef c8;
146 LLVMValueRef c16;
147 LLVMValueRef c128;
148 LLVMValueRef c255;
149
150 LLVMValueRef cy;
151 LLVMValueRef cug;
152 LLVMValueRef cub;
153 LLVMValueRef cvr;
154 LLVMValueRef cvg;
155
156 memset(&type, 0, sizeof type);
157 type.sign = TRUE;
158 type.width = 32;
159 type.length = n;
160
161 lp_build_context_init(&bld, builder, type);
162
163 assert(lp_check_value(type, y));
164 assert(lp_check_value(type, u));
165 assert(lp_check_value(type, v));
166
167 /*
168 * Constants
169 */
170
171 c0 = lp_build_const_int_vec(type, 0);
172 c8 = lp_build_const_int_vec(type, 8);
173 c16 = lp_build_const_int_vec(type, 16);
174 c128 = lp_build_const_int_vec(type, 128);
175 c255 = lp_build_const_int_vec(type, 255);
176
177 cy = lp_build_const_int_vec(type, 298);
178 cug = lp_build_const_int_vec(type, -100);
179 cub = lp_build_const_int_vec(type, 516);
180 cvr = lp_build_const_int_vec(type, 409);
181 cvg = lp_build_const_int_vec(type, -208);
182
183 /*
184 * y -= 16;
185 * u -= 128;
186 * v -= 128;
187 */
188
189 y = LLVMBuildSub(builder, y, c16, "");
190 u = LLVMBuildSub(builder, u, c128, "");
191 v = LLVMBuildSub(builder, v, c128, "");
192
193 /*
194 * r = 298 * _y + 409 * _v + 128;
195 * g = 298 * _y - 100 * _u - 208 * _v + 128;
196 * b = 298 * _y + 516 * _u + 128;
197 */
198
199 y = LLVMBuildMul(builder, y, cy, "");
200 y = LLVMBuildAdd(builder, y, c128, "");
201
202 *r = LLVMBuildMul(builder, v, cvr, "");
203 *g = LLVMBuildAdd(builder,
204 LLVMBuildMul(builder, u, cug, ""),
205 LLVMBuildMul(builder, v, cvg, ""),
206 "");
207 *b = LLVMBuildMul(builder, u, cub, "");
208
209 *r = LLVMBuildAdd(builder, *r, y, "");
210 *g = LLVMBuildAdd(builder, *g, y, "");
211 *b = LLVMBuildAdd(builder, *b, y, "");
212
213 /*
214 * r >>= 8;
215 * g >>= 8;
216 * b >>= 8;
217 */
218
219 *r = LLVMBuildAShr(builder, *r, c8, "r");
220 *g = LLVMBuildAShr(builder, *g, c8, "g");
221 *b = LLVMBuildAShr(builder, *b, c8, "b");
222
223 /*
224 * Clamp
225 */
226
227 *r = lp_build_clamp(&bld, *r, c0, c255);
228 *g = lp_build_clamp(&bld, *g, c0, c255);
229 *b = lp_build_clamp(&bld, *b, c0, c255);
230 }
231
232
233 static LLVMValueRef
234 rgb_to_rgba_aos(LLVMBuilderRef builder,
235 unsigned n,
236 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
237 {
238 struct lp_type type;
239 LLVMValueRef a;
240 LLVMValueRef rgba;
241
242 memset(&type, 0, sizeof type);
243 type.sign = TRUE;
244 type.width = 32;
245 type.length = n;
246
247 assert(lp_check_value(type, r));
248 assert(lp_check_value(type, g));
249 assert(lp_check_value(type, b));
250
251 /*
252 * Make a 4 x unorm8 vector
253 */
254
255 r = r;
256 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), "");
257 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), "");
258 a = lp_build_const_int_vec(type, 0xff000000);
259
260 rgba = r;
261 rgba = LLVMBuildOr(builder, rgba, g, "");
262 rgba = LLVMBuildOr(builder, rgba, b, "");
263 rgba = LLVMBuildOr(builder, rgba, a, "");
264
265 rgba = LLVMBuildBitCast(builder, rgba,
266 LLVMVectorType(LLVMInt8Type(), 4*n), "");
267
268 return rgba;
269 }
270
271
272 /**
273 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
274 */
275 static LLVMValueRef
276 uyvy_to_rgba_aos(LLVMBuilderRef builder,
277 unsigned n,
278 LLVMValueRef packed,
279 LLVMValueRef i)
280 {
281 LLVMValueRef y, u, v;
282 LLVMValueRef r, g, b;
283 LLVMValueRef rgba;
284
285 uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
286 yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
287 rgba = rgb_to_rgba_aos(builder, n, r, g, b);
288
289 return rgba;
290 }
291
292
293 /**
294 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
295 */
296 static LLVMValueRef
297 yuyv_to_rgba_aos(LLVMBuilderRef builder,
298 unsigned n,
299 LLVMValueRef packed,
300 LLVMValueRef i)
301 {
302 LLVMValueRef y, u, v;
303 LLVMValueRef r, g, b;
304 LLVMValueRef rgba;
305
306 yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
307 yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
308 rgba = rgb_to_rgba_aos(builder, n, r, g, b);
309
310 return rgba;
311 }
312
313
314 /**
315 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
316 */
317 static LLVMValueRef
318 rgbg_to_rgba_aos(LLVMBuilderRef builder,
319 unsigned n,
320 LLVMValueRef packed,
321 LLVMValueRef i)
322 {
323 LLVMValueRef r, g, b;
324 LLVMValueRef rgba;
325
326 uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
327 rgba = rgb_to_rgba_aos(builder, n, r, g, b);
328
329 return rgba;
330 }
331
332
333 /**
334 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
335 */
336 static LLVMValueRef
337 grgb_to_rgba_aos(LLVMBuilderRef builder,
338 unsigned n,
339 LLVMValueRef packed,
340 LLVMValueRef i)
341 {
342 LLVMValueRef r, g, b;
343 LLVMValueRef rgba;
344
345 yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
346 rgba = rgb_to_rgba_aos(builder, n, r, g, b);
347
348 return rgba;
349 }
350
351
352 /**
353 * @param n is the number of pixels processed
354 * @param packed is a <n x i32> vector with the packed YUYV blocks
355 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
356 * @return a <4*n x i8> vector with the pixel RGBA values in AoS
357 */
358 LLVMValueRef
359 lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
360 const struct util_format_description *format_desc,
361 unsigned n,
362 LLVMValueRef base_ptr,
363 LLVMValueRef offset,
364 LLVMValueRef i,
365 LLVMValueRef j)
366 {
367 LLVMValueRef packed;
368 LLVMValueRef rgba;
369
370 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
371 assert(format_desc->block.bits == 32);
372 assert(format_desc->block.width == 2);
373 assert(format_desc->block.height == 1);
374
375 packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset);
376
377 (void)j;
378
379 switch (format_desc->format) {
380 case PIPE_FORMAT_UYVY:
381 rgba = uyvy_to_rgba_aos(builder, n, packed, i);
382 break;
383 case PIPE_FORMAT_YUYV:
384 rgba = yuyv_to_rgba_aos(builder, n, packed, i);
385 break;
386 case PIPE_FORMAT_R8G8_B8G8_UNORM:
387 rgba = rgbg_to_rgba_aos(builder, n, packed, i);
388 break;
389 case PIPE_FORMAT_G8R8_G8B8_UNORM:
390 rgba = grgb_to_rgba_aos(builder, n, packed, i);
391 break;
392 default:
393 assert(0);
394 rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n));
395 break;
396 }
397
398 return rgba;
399 }
400