gallivm: Universal format support on lp_build_fetch_rgba_aos via util_format_descript...
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * AoS pixel format manipulation.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36 #include "util/u_format.h"
37 #include "util/u_memory.h"
38 #include "util/u_math.h"
39 #include "util/u_string.h"
40
41 #include "lp_bld_init.h"
42 #include "lp_bld_type.h"
43 #include "lp_bld_const.h"
44 #include "lp_bld_swizzle.h"
45 #include "lp_bld_format.h"
46
47
48 /**
49 * Unpack a single pixel into its RGBA components.
50 *
51 * @param packed integer.
52 *
53 * @return RGBA in a 4 floats vector.
54 */
55 LLVMValueRef
56 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
57 const struct util_format_description *desc,
58 LLVMValueRef packed)
59 {
60 LLVMTypeRef type;
61 LLVMValueRef shifted, casted, scaled, masked;
62 LLVMValueRef shifts[4];
63 LLVMValueRef masks[4];
64 LLVMValueRef scales[4];
65 LLVMValueRef swizzles[4];
66 LLVMValueRef aux[4];
67 bool normalized;
68 int empty_channel;
69 bool needs_uitofp;
70 unsigned shift;
71 unsigned i;
72
73 /* TODO: Support more formats */
74 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
75 assert(desc->block.width == 1);
76 assert(desc->block.height == 1);
77 assert(desc->block.bits <= 32);
78
79 type = LLVMIntType(desc->block.bits);
80
81 /* Do the intermediate integer computations with 32bit integers since it
82 * matches floating point size */
83 if (desc->block.bits < 32)
84 packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
85
86 /* Broadcast the packed value to all four channels */
87 packed = LLVMBuildInsertElement(builder,
88 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
89 packed,
90 LLVMConstNull(LLVMInt32Type()),
91 "");
92 packed = LLVMBuildShuffleVector(builder,
93 packed,
94 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
95 LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
96 "");
97
98 /* Initialize vector constants */
99 normalized = FALSE;
100 needs_uitofp = FALSE;
101 empty_channel = -1;
102 shift = 0;
103 for (i = 0; i < 4; ++i) {
104 unsigned bits = desc->channel[i].size;
105
106 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
107 shifts[i] = LLVMGetUndef(LLVMInt32Type());
108 masks[i] = LLVMConstNull(LLVMInt32Type());
109 scales[i] = LLVMConstNull(LLVMFloatType());
110 empty_channel = i;
111 }
112 else {
113 unsigned long long mask = (1ULL << bits) - 1;
114
115 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
116
117 if (bits == 32) {
118 needs_uitofp = TRUE;
119 }
120
121 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
122 masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
123
124 if (desc->channel[i].normalized) {
125 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
126 normalized = TRUE;
127 }
128 else
129 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
130 }
131
132 shift += bits;
133 }
134
135 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
136 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
137 if (!needs_uitofp) {
138 /* UIToFP can't be expressed in SSE2 */
139 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
140 } else {
141 casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
142 }
143
144 if (normalized)
145 scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
146 else
147 scaled = casted;
148
149 for (i = 0; i < 4; ++i)
150 aux[i] = LLVMGetUndef(LLVMFloatType());
151
152 for (i = 0; i < 4; ++i) {
153 enum util_format_swizzle swizzle;
154
155 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
156 /*
157 * For ZS formats do RGBA = ZZZ1
158 */
159 if (i == 3) {
160 swizzle = UTIL_FORMAT_SWIZZLE_1;
161 } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
162 swizzle = UTIL_FORMAT_SWIZZLE_0;
163 } else {
164 swizzle = desc->swizzle[0];
165 }
166 } else {
167 swizzle = desc->swizzle[i];
168 }
169
170 switch (swizzle) {
171 case UTIL_FORMAT_SWIZZLE_X:
172 case UTIL_FORMAT_SWIZZLE_Y:
173 case UTIL_FORMAT_SWIZZLE_Z:
174 case UTIL_FORMAT_SWIZZLE_W:
175 swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
176 break;
177 case UTIL_FORMAT_SWIZZLE_0:
178 assert(empty_channel >= 0);
179 swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
180 break;
181 case UTIL_FORMAT_SWIZZLE_1:
182 swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
183 aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
184 break;
185 case UTIL_FORMAT_SWIZZLE_NONE:
186 swizzles[i] = LLVMGetUndef(LLVMFloatType());
187 assert(0);
188 break;
189 }
190 }
191
192 return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
193 }
194
195
196 /**
197 * Pack a single pixel.
198 *
199 * @param rgba 4 float vector with the unpacked components.
200 *
201 * XXX: This is mostly for reference and testing -- operating a single pixel at
202 * a time is rarely if ever needed.
203 */
204 LLVMValueRef
205 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
206 const struct util_format_description *desc,
207 LLVMValueRef rgba)
208 {
209 LLVMTypeRef type;
210 LLVMValueRef packed = NULL;
211 LLVMValueRef swizzles[4];
212 LLVMValueRef shifted, casted, scaled, unswizzled;
213 LLVMValueRef shifts[4];
214 LLVMValueRef scales[4];
215 bool normalized;
216 unsigned shift;
217 unsigned i, j;
218
219 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
220 assert(desc->block.width == 1);
221 assert(desc->block.height == 1);
222
223 type = LLVMIntType(desc->block.bits);
224
225 /* Unswizzle the color components into the source vector. */
226 for (i = 0; i < 4; ++i) {
227 for (j = 0; j < 4; ++j) {
228 if (desc->swizzle[j] == i)
229 break;
230 }
231 if (j < 4)
232 swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
233 else
234 swizzles[i] = LLVMGetUndef(LLVMInt32Type());
235 }
236
237 unswizzled = LLVMBuildShuffleVector(builder, rgba,
238 LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
239 LLVMConstVector(swizzles, 4), "");
240
241 normalized = FALSE;
242 shift = 0;
243 for (i = 0; i < 4; ++i) {
244 unsigned bits = desc->channel[i].size;
245
246 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
247 shifts[i] = LLVMGetUndef(LLVMInt32Type());
248 scales[i] = LLVMGetUndef(LLVMFloatType());
249 }
250 else {
251 unsigned mask = (1 << bits) - 1;
252
253 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
254 assert(bits < 32);
255
256 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
257
258 if (desc->channel[i].normalized) {
259 scales[i] = LLVMConstReal(LLVMFloatType(), mask);
260 normalized = TRUE;
261 }
262 else
263 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
264 }
265
266 shift += bits;
267 }
268
269 if (normalized)
270 scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
271 else
272 scaled = unswizzled;
273
274 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
275
276 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
277
278 /* Bitwise or all components */
279 for (i = 0; i < 4; ++i) {
280 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
281 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
282 if (packed)
283 packed = LLVMBuildOr(builder, packed, component, "");
284 else
285 packed = component;
286 }
287 }
288
289 if (!packed)
290 packed = LLVMGetUndef(LLVMInt32Type());
291
292 if (desc->block.bits < 32)
293 packed = LLVMBuildTrunc(builder, packed, type, "");
294
295 return packed;
296 }
297
298
299 /**
300 * Fetch a pixel into a 4 float AoS.
301 *
302 * i and j are the sub-block pixel coordinates.
303 */
304 LLVMValueRef
305 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
306 const struct util_format_description *format_desc,
307 LLVMValueRef ptr,
308 LLVMValueRef i,
309 LLVMValueRef j)
310 {
311
312 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
313 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
314 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
315 format_desc->block.width == 1 &&
316 format_desc->block.height == 1 &&
317 util_is_pot(format_desc->block.bits) &&
318 format_desc->block.bits <= 32 &&
319 format_desc->is_bitmask &&
320 !format_desc->is_mixed &&
321 (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
322 format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
323 {
324 LLVMValueRef packed;
325
326 ptr = LLVMBuildBitCast(builder, ptr,
327 LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
328 "");
329
330 packed = LLVMBuildLoad(builder, ptr, "packed");
331
332 return lp_build_unpack_rgba_aos(builder, format_desc, packed);
333 }
334 else if (format_desc->fetch_rgba_float) {
335 /*
336 * Fallback to calling util_format_description::fetch_rgba_float.
337 *
338 * This is definitely not the most efficient way of fetching pixels, as
339 * we miss the opportunity to do vectorization, but this it is a
340 * convenient for formats or scenarios for which there was no opportunity
341 * or incentive to optimize.
342 */
343
344 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
345 char name[256];
346 LLVMValueRef function;
347 LLVMValueRef tmp;
348 LLVMValueRef args[4];
349
350 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
351 format_desc->short_name);
352
353 /*
354 * Declare and bind format_desc->fetch_rgba_float().
355 */
356
357 function = LLVMGetNamedFunction(module, name);
358 if (!function) {
359 LLVMTypeRef ret_type;
360 LLVMTypeRef arg_types[4];
361 LLVMTypeRef function_type;
362
363 ret_type = LLVMVoidType();
364 arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
365 arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
366 arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
367 function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
368 function = LLVMAddFunction(module, name, function_type);
369
370 LLVMSetFunctionCallConv(function, LLVMCCallConv);
371 LLVMSetLinkage(function, LLVMExternalLinkage);
372
373 assert(LLVMIsDeclaration(function));
374
375 LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
376 }
377
378 /*
379 * XXX: this should better go to the first block in the function
380 */
381
382 tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
383
384 /*
385 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
386 * in the SoA vectors.
387 */
388
389 args[0] = LLVMBuildBitCast(builder, tmp,
390 LLVMPointerType(LLVMFloatType(), 0), "");
391 args[1] = ptr;
392 args[2] = i;
393 args[3] = j;
394
395 LLVMBuildCall(builder, function, args, 4, "");
396
397 return LLVMBuildLoad(builder, tmp, "");
398 }
399 else {
400 assert(0);
401 return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
402 }
403 }