Merge branch 'glsl2-head' into glsl2
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * AoS pixel format manipulation.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36 #include "util/u_format.h"
37 #include "util/u_memory.h"
38 #include "util/u_math.h"
39 #include "util/u_string.h"
40
41 #include "lp_bld_init.h"
42 #include "lp_bld_type.h"
43 #include "lp_bld_flow.h"
44 #include "lp_bld_format.h"
45
46
47 /**
48 * Unpack a single pixel into its RGBA components.
49 *
50 * @param desc the pixel format for the packed pixel value
51 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
52 *
53 * @return RGBA in a 4 floats vector.
54 */
55 LLVMValueRef
56 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
57 const struct util_format_description *desc,
58 LLVMValueRef packed)
59 {
60 LLVMValueRef shifted, casted, scaled, masked;
61 LLVMValueRef shifts[4];
62 LLVMValueRef masks[4];
63 LLVMValueRef scales[4];
64 LLVMValueRef swizzles[4];
65 LLVMValueRef aux[4];
66 boolean normalized;
67 int empty_channel;
68 boolean needs_uitofp;
69 unsigned shift;
70 unsigned i;
71
72 /* TODO: Support more formats */
73 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
74 assert(desc->block.width == 1);
75 assert(desc->block.height == 1);
76 assert(desc->block.bits <= 32);
77
78 /* Do the intermediate integer computations with 32bit integers since it
79 * matches floating point size */
80 if (desc->block.bits < 32)
81 packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
82
83 /* Broadcast the packed value to all four channels
84 * before: packed = BGRA
85 * after: packed = {BGRA, BGRA, BGRA, BGRA}
86 */
87 packed = LLVMBuildInsertElement(builder,
88 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
89 packed,
90 LLVMConstNull(LLVMInt32Type()),
91 "");
92 packed = LLVMBuildShuffleVector(builder,
93 packed,
94 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
95 LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
96 "");
97
98 /* Initialize vector constants */
99 normalized = FALSE;
100 needs_uitofp = FALSE;
101 empty_channel = -1;
102 shift = 0;
103
104 /* Loop over 4 color components */
105 for (i = 0; i < 4; ++i) {
106 unsigned bits = desc->channel[i].size;
107
108 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
109 shifts[i] = LLVMGetUndef(LLVMInt32Type());
110 masks[i] = LLVMConstNull(LLVMInt32Type());
111 scales[i] = LLVMConstNull(LLVMFloatType());
112 empty_channel = i;
113 }
114 else {
115 unsigned long long mask = (1ULL << bits) - 1;
116
117 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
118
119 if (bits == 32) {
120 needs_uitofp = TRUE;
121 }
122
123 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
124 masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
125
126 if (desc->channel[i].normalized) {
127 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
128 normalized = TRUE;
129 }
130 else
131 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
132 }
133
134 shift += bits;
135 }
136
137 /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
138 * into masked = {B, G, R, A}
139 */
140 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
141 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
142
143
144 if (!needs_uitofp) {
145 /* UIToFP can't be expressed in SSE2 */
146 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
147 } else {
148 casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
149 }
150
151 /* At this point 'casted' may be a vector of floats such as
152 * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized
153 * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
154 */
155
156 if (normalized)
157 scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
158 else
159 scaled = casted;
160
161 for (i = 0; i < 4; ++i)
162 aux[i] = LLVMGetUndef(LLVMFloatType());
163
164 /* Build swizzles vector to put components into R,G,B,A order */
165 for (i = 0; i < 4; ++i) {
166 enum util_format_swizzle swizzle;
167
168 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
169 /*
170 * For ZS formats do RGBA = ZZZ1
171 */
172 if (i == 3) {
173 swizzle = UTIL_FORMAT_SWIZZLE_1;
174 } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
175 swizzle = UTIL_FORMAT_SWIZZLE_0;
176 } else {
177 swizzle = desc->swizzle[0];
178 }
179 } else {
180 swizzle = desc->swizzle[i];
181 }
182
183 switch (swizzle) {
184 case UTIL_FORMAT_SWIZZLE_X:
185 case UTIL_FORMAT_SWIZZLE_Y:
186 case UTIL_FORMAT_SWIZZLE_Z:
187 case UTIL_FORMAT_SWIZZLE_W:
188 swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
189 break;
190 case UTIL_FORMAT_SWIZZLE_0:
191 assert(empty_channel >= 0);
192 swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
193 break;
194 case UTIL_FORMAT_SWIZZLE_1:
195 swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
196 aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
197 break;
198 case UTIL_FORMAT_SWIZZLE_NONE:
199 swizzles[i] = LLVMGetUndef(LLVMFloatType());
200 assert(0);
201 break;
202 }
203 }
204
205 return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4),
206 LLVMConstVector(swizzles, 4), "");
207 }
208
209
210 /**
211 * Pack a single pixel.
212 *
213 * @param rgba 4 float vector with the unpacked components.
214 *
215 * XXX: This is mostly for reference and testing -- operating a single pixel at
216 * a time is rarely if ever needed.
217 */
218 LLVMValueRef
219 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
220 const struct util_format_description *desc,
221 LLVMValueRef rgba)
222 {
223 LLVMTypeRef type;
224 LLVMValueRef packed = NULL;
225 LLVMValueRef swizzles[4];
226 LLVMValueRef shifted, casted, scaled, unswizzled;
227 LLVMValueRef shifts[4];
228 LLVMValueRef scales[4];
229 boolean normalized;
230 unsigned shift;
231 unsigned i, j;
232
233 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
234 assert(desc->block.width == 1);
235 assert(desc->block.height == 1);
236
237 type = LLVMIntType(desc->block.bits);
238
239 /* Unswizzle the color components into the source vector. */
240 for (i = 0; i < 4; ++i) {
241 for (j = 0; j < 4; ++j) {
242 if (desc->swizzle[j] == i)
243 break;
244 }
245 if (j < 4)
246 swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
247 else
248 swizzles[i] = LLVMGetUndef(LLVMInt32Type());
249 }
250
251 unswizzled = LLVMBuildShuffleVector(builder, rgba,
252 LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
253 LLVMConstVector(swizzles, 4), "");
254
255 normalized = FALSE;
256 shift = 0;
257 for (i = 0; i < 4; ++i) {
258 unsigned bits = desc->channel[i].size;
259
260 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
261 shifts[i] = LLVMGetUndef(LLVMInt32Type());
262 scales[i] = LLVMGetUndef(LLVMFloatType());
263 }
264 else {
265 unsigned mask = (1 << bits) - 1;
266
267 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
268 assert(bits < 32);
269
270 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
271
272 if (desc->channel[i].normalized) {
273 scales[i] = LLVMConstReal(LLVMFloatType(), mask);
274 normalized = TRUE;
275 }
276 else
277 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
278 }
279
280 shift += bits;
281 }
282
283 if (normalized)
284 scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
285 else
286 scaled = unswizzled;
287
288 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
289
290 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
291
292 /* Bitwise or all components */
293 for (i = 0; i < 4; ++i) {
294 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
295 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
296 if (packed)
297 packed = LLVMBuildOr(builder, packed, component, "");
298 else
299 packed = component;
300 }
301 }
302
303 if (!packed)
304 packed = LLVMGetUndef(LLVMInt32Type());
305
306 if (desc->block.bits < 32)
307 packed = LLVMBuildTrunc(builder, packed, type, "");
308
309 return packed;
310 }
311
312
313 /**
314 * Fetch a pixel into a 4 float AoS.
315 *
316 * \param format_desc describes format of the image we're fetching from
317 * \param ptr address of the pixel block (or the texel if uncompressed)
318 * \param i, j the sub-block pixel coordinates. For non-compressed formats
319 * these will always be (0,).
320 * \return valueRef with the float[4] RGBA pixel
321 */
322 LLVMValueRef
323 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
324 const struct util_format_description *format_desc,
325 LLVMValueRef ptr,
326 LLVMValueRef i,
327 LLVMValueRef j)
328 {
329
330 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
331 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
332 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
333 format_desc->block.width == 1 &&
334 format_desc->block.height == 1 &&
335 util_is_pot(format_desc->block.bits) &&
336 format_desc->block.bits <= 32 &&
337 format_desc->is_bitmask &&
338 !format_desc->is_mixed &&
339 (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
340 format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
341 {
342 LLVMValueRef packed;
343
344 ptr = LLVMBuildBitCast(builder, ptr,
345 LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
346 "");
347
348 packed = LLVMBuildLoad(builder, ptr, "packed");
349
350 return lp_build_unpack_rgba_aos(builder, format_desc, packed);
351 }
352 else if (format_desc->fetch_rgba_float) {
353 /*
354 * Fallback to calling util_format_description::fetch_rgba_float.
355 *
356 * This is definitely not the most efficient way of fetching pixels, as
357 * we miss the opportunity to do vectorization, but this it is a
358 * convenient for formats or scenarios for which there was no opportunity
359 * or incentive to optimize.
360 */
361
362 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
363 char name[256];
364 LLVMValueRef function;
365 LLVMValueRef tmp;
366 LLVMValueRef args[4];
367
368 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
369 format_desc->short_name);
370
371 /*
372 * Declare and bind format_desc->fetch_rgba_float().
373 */
374
375 function = LLVMGetNamedFunction(module, name);
376 if (!function) {
377 LLVMTypeRef ret_type;
378 LLVMTypeRef arg_types[4];
379 LLVMTypeRef function_type;
380
381 ret_type = LLVMVoidType();
382 arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
383 arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
384 arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
385 function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
386 function = LLVMAddFunction(module, name, function_type);
387
388 LLVMSetFunctionCallConv(function, LLVMCCallConv);
389 LLVMSetLinkage(function, LLVMExternalLinkage);
390
391 assert(LLVMIsDeclaration(function));
392
393 LLVMAddGlobalMapping(lp_build_engine, function,
394 func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
395 }
396
397 tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
398
399 /*
400 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
401 * in the SoA vectors.
402 */
403
404 args[0] = LLVMBuildBitCast(builder, tmp,
405 LLVMPointerType(LLVMFloatType(), 0), "");
406 args[1] = ptr;
407 args[2] = i;
408 args[3] = j;
409
410 LLVMBuildCall(builder, function, args, Elements(args), "");
411
412 return LLVMBuildLoad(builder, tmp, "");
413 }
414 else {
415 assert(0);
416 return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
417 }
418 }