Merge remote branch 'origin/7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * AoS pixel format manipulation.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36 #include "util/u_format.h"
37 #include "util/u_memory.h"
38 #include "util/u_math.h"
39 #include "util/u_string.h"
40
41 #include "lp_bld_init.h"
42 #include "lp_bld_type.h"
43 #include "lp_bld_format.h"
44
45
46 /**
47 * Unpack a single pixel into its RGBA components.
48 *
49 * @param packed integer.
50 *
51 * @return RGBA in a 4 floats vector.
52 */
53 LLVMValueRef
54 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
55 const struct util_format_description *desc,
56 LLVMValueRef packed)
57 {
58 LLVMValueRef shifted, casted, scaled, masked;
59 LLVMValueRef shifts[4];
60 LLVMValueRef masks[4];
61 LLVMValueRef scales[4];
62 LLVMValueRef swizzles[4];
63 LLVMValueRef aux[4];
64 bool normalized;
65 int empty_channel;
66 bool needs_uitofp;
67 unsigned shift;
68 unsigned i;
69
70 /* TODO: Support more formats */
71 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
72 assert(desc->block.width == 1);
73 assert(desc->block.height == 1);
74 assert(desc->block.bits <= 32);
75
76 /* Do the intermediate integer computations with 32bit integers since it
77 * matches floating point size */
78 if (desc->block.bits < 32)
79 packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
80
81 /* Broadcast the packed value to all four channels */
82 packed = LLVMBuildInsertElement(builder,
83 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
84 packed,
85 LLVMConstNull(LLVMInt32Type()),
86 "");
87 packed = LLVMBuildShuffleVector(builder,
88 packed,
89 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
90 LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
91 "");
92
93 /* Initialize vector constants */
94 normalized = FALSE;
95 needs_uitofp = FALSE;
96 empty_channel = -1;
97 shift = 0;
98 for (i = 0; i < 4; ++i) {
99 unsigned bits = desc->channel[i].size;
100
101 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
102 shifts[i] = LLVMGetUndef(LLVMInt32Type());
103 masks[i] = LLVMConstNull(LLVMInt32Type());
104 scales[i] = LLVMConstNull(LLVMFloatType());
105 empty_channel = i;
106 }
107 else {
108 unsigned long long mask = (1ULL << bits) - 1;
109
110 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
111
112 if (bits == 32) {
113 needs_uitofp = TRUE;
114 }
115
116 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
117 masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
118
119 if (desc->channel[i].normalized) {
120 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
121 normalized = TRUE;
122 }
123 else
124 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
125 }
126
127 shift += bits;
128 }
129
130 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
131 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
132 if (!needs_uitofp) {
133 /* UIToFP can't be expressed in SSE2 */
134 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
135 } else {
136 casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
137 }
138
139 if (normalized)
140 scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
141 else
142 scaled = casted;
143
144 for (i = 0; i < 4; ++i)
145 aux[i] = LLVMGetUndef(LLVMFloatType());
146
147 for (i = 0; i < 4; ++i) {
148 enum util_format_swizzle swizzle;
149
150 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
151 /*
152 * For ZS formats do RGBA = ZZZ1
153 */
154 if (i == 3) {
155 swizzle = UTIL_FORMAT_SWIZZLE_1;
156 } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
157 swizzle = UTIL_FORMAT_SWIZZLE_0;
158 } else {
159 swizzle = desc->swizzle[0];
160 }
161 } else {
162 swizzle = desc->swizzle[i];
163 }
164
165 switch (swizzle) {
166 case UTIL_FORMAT_SWIZZLE_X:
167 case UTIL_FORMAT_SWIZZLE_Y:
168 case UTIL_FORMAT_SWIZZLE_Z:
169 case UTIL_FORMAT_SWIZZLE_W:
170 swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
171 break;
172 case UTIL_FORMAT_SWIZZLE_0:
173 assert(empty_channel >= 0);
174 swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
175 break;
176 case UTIL_FORMAT_SWIZZLE_1:
177 swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
178 aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
179 break;
180 case UTIL_FORMAT_SWIZZLE_NONE:
181 swizzles[i] = LLVMGetUndef(LLVMFloatType());
182 assert(0);
183 break;
184 }
185 }
186
187 return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
188 }
189
190
191 /**
192 * Pack a single pixel.
193 *
194 * @param rgba 4 float vector with the unpacked components.
195 *
196 * XXX: This is mostly for reference and testing -- operating a single pixel at
197 * a time is rarely if ever needed.
198 */
199 LLVMValueRef
200 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
201 const struct util_format_description *desc,
202 LLVMValueRef rgba)
203 {
204 LLVMTypeRef type;
205 LLVMValueRef packed = NULL;
206 LLVMValueRef swizzles[4];
207 LLVMValueRef shifted, casted, scaled, unswizzled;
208 LLVMValueRef shifts[4];
209 LLVMValueRef scales[4];
210 bool normalized;
211 unsigned shift;
212 unsigned i, j;
213
214 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
215 assert(desc->block.width == 1);
216 assert(desc->block.height == 1);
217
218 type = LLVMIntType(desc->block.bits);
219
220 /* Unswizzle the color components into the source vector. */
221 for (i = 0; i < 4; ++i) {
222 for (j = 0; j < 4; ++j) {
223 if (desc->swizzle[j] == i)
224 break;
225 }
226 if (j < 4)
227 swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
228 else
229 swizzles[i] = LLVMGetUndef(LLVMInt32Type());
230 }
231
232 unswizzled = LLVMBuildShuffleVector(builder, rgba,
233 LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
234 LLVMConstVector(swizzles, 4), "");
235
236 normalized = FALSE;
237 shift = 0;
238 for (i = 0; i < 4; ++i) {
239 unsigned bits = desc->channel[i].size;
240
241 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
242 shifts[i] = LLVMGetUndef(LLVMInt32Type());
243 scales[i] = LLVMGetUndef(LLVMFloatType());
244 }
245 else {
246 unsigned mask = (1 << bits) - 1;
247
248 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
249 assert(bits < 32);
250
251 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
252
253 if (desc->channel[i].normalized) {
254 scales[i] = LLVMConstReal(LLVMFloatType(), mask);
255 normalized = TRUE;
256 }
257 else
258 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
259 }
260
261 shift += bits;
262 }
263
264 if (normalized)
265 scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
266 else
267 scaled = unswizzled;
268
269 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
270
271 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
272
273 /* Bitwise or all components */
274 for (i = 0; i < 4; ++i) {
275 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
276 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
277 if (packed)
278 packed = LLVMBuildOr(builder, packed, component, "");
279 else
280 packed = component;
281 }
282 }
283
284 if (!packed)
285 packed = LLVMGetUndef(LLVMInt32Type());
286
287 if (desc->block.bits < 32)
288 packed = LLVMBuildTrunc(builder, packed, type, "");
289
290 return packed;
291 }
292
293
294 /**
295 * Fetch a pixel into a 4 float AoS.
296 *
297 * i and j are the sub-block pixel coordinates.
298 */
299 LLVMValueRef
300 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
301 const struct util_format_description *format_desc,
302 LLVMValueRef ptr,
303 LLVMValueRef i,
304 LLVMValueRef j)
305 {
306
307 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
308 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
309 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
310 format_desc->block.width == 1 &&
311 format_desc->block.height == 1 &&
312 util_is_pot(format_desc->block.bits) &&
313 format_desc->block.bits <= 32 &&
314 format_desc->is_bitmask &&
315 !format_desc->is_mixed &&
316 (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
317 format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
318 {
319 LLVMValueRef packed;
320
321 ptr = LLVMBuildBitCast(builder, ptr,
322 LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
323 "");
324
325 packed = LLVMBuildLoad(builder, ptr, "packed");
326
327 return lp_build_unpack_rgba_aos(builder, format_desc, packed);
328 }
329 else if (format_desc->fetch_rgba_float) {
330 /*
331 * Fallback to calling util_format_description::fetch_rgba_float.
332 *
333 * This is definitely not the most efficient way of fetching pixels, as
334 * we miss the opportunity to do vectorization, but this it is a
335 * convenient for formats or scenarios for which there was no opportunity
336 * or incentive to optimize.
337 */
338
339 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
340 char name[256];
341 LLVMValueRef function;
342 LLVMValueRef tmp;
343 LLVMValueRef args[4];
344
345 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
346 format_desc->short_name);
347
348 /*
349 * Declare and bind format_desc->fetch_rgba_float().
350 */
351
352 function = LLVMGetNamedFunction(module, name);
353 if (!function) {
354 LLVMTypeRef ret_type;
355 LLVMTypeRef arg_types[4];
356 LLVMTypeRef function_type;
357
358 ret_type = LLVMVoidType();
359 arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
360 arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
361 arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
362 function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
363 function = LLVMAddFunction(module, name, function_type);
364
365 LLVMSetFunctionCallConv(function, LLVMCCallConv);
366 LLVMSetLinkage(function, LLVMExternalLinkage);
367
368 assert(LLVMIsDeclaration(function));
369
370 LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
371 }
372
373 /*
374 * XXX: this should better go to the first block in the function
375 */
376
377 tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
378
379 /*
380 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
381 * in the SoA vectors.
382 */
383
384 args[0] = LLVMBuildBitCast(builder, tmp,
385 LLVMPointerType(LLVMFloatType(), 0), "");
386 args[1] = ptr;
387 args[2] = i;
388 args[3] = j;
389
390 LLVMBuildCall(builder, function, args, 4, "");
391
392 return LLVMBuildLoad(builder, tmp, "");
393 }
394 else {
395 assert(0);
396 return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
397 }
398 }