Merge branch '7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * AoS pixel format manipulation.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36 #include "util/u_format.h"
37 #include "util/u_memory.h"
38 #include "util/u_math.h"
39 #include "util/u_string.h"
40
41 #include "lp_bld_init.h"
42 #include "lp_bld_type.h"
43 #include "lp_bld_flow.h"
44 #include "lp_bld_format.h"
45
46
47 /**
48 * Unpack a single pixel into its RGBA components.
49 *
50 * @param packed integer.
51 *
52 * @return RGBA in a 4 floats vector.
53 */
54 LLVMValueRef
55 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
56 const struct util_format_description *desc,
57 LLVMValueRef packed)
58 {
59 LLVMValueRef shifted, casted, scaled, masked;
60 LLVMValueRef shifts[4];
61 LLVMValueRef masks[4];
62 LLVMValueRef scales[4];
63 LLVMValueRef swizzles[4];
64 LLVMValueRef aux[4];
65 bool normalized;
66 int empty_channel;
67 bool needs_uitofp;
68 unsigned shift;
69 unsigned i;
70
71 /* TODO: Support more formats */
72 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
73 assert(desc->block.width == 1);
74 assert(desc->block.height == 1);
75 assert(desc->block.bits <= 32);
76
77 /* Do the intermediate integer computations with 32bit integers since it
78 * matches floating point size */
79 if (desc->block.bits < 32)
80 packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
81
82 /* Broadcast the packed value to all four channels */
83 packed = LLVMBuildInsertElement(builder,
84 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
85 packed,
86 LLVMConstNull(LLVMInt32Type()),
87 "");
88 packed = LLVMBuildShuffleVector(builder,
89 packed,
90 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
91 LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
92 "");
93
94 /* Initialize vector constants */
95 normalized = FALSE;
96 needs_uitofp = FALSE;
97 empty_channel = -1;
98 shift = 0;
99 for (i = 0; i < 4; ++i) {
100 unsigned bits = desc->channel[i].size;
101
102 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
103 shifts[i] = LLVMGetUndef(LLVMInt32Type());
104 masks[i] = LLVMConstNull(LLVMInt32Type());
105 scales[i] = LLVMConstNull(LLVMFloatType());
106 empty_channel = i;
107 }
108 else {
109 unsigned long long mask = (1ULL << bits) - 1;
110
111 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
112
113 if (bits == 32) {
114 needs_uitofp = TRUE;
115 }
116
117 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
118 masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
119
120 if (desc->channel[i].normalized) {
121 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
122 normalized = TRUE;
123 }
124 else
125 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
126 }
127
128 shift += bits;
129 }
130
131 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
132 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
133 if (!needs_uitofp) {
134 /* UIToFP can't be expressed in SSE2 */
135 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
136 } else {
137 casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
138 }
139
140 if (normalized)
141 scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
142 else
143 scaled = casted;
144
145 for (i = 0; i < 4; ++i)
146 aux[i] = LLVMGetUndef(LLVMFloatType());
147
148 for (i = 0; i < 4; ++i) {
149 enum util_format_swizzle swizzle;
150
151 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
152 /*
153 * For ZS formats do RGBA = ZZZ1
154 */
155 if (i == 3) {
156 swizzle = UTIL_FORMAT_SWIZZLE_1;
157 } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
158 swizzle = UTIL_FORMAT_SWIZZLE_0;
159 } else {
160 swizzle = desc->swizzle[0];
161 }
162 } else {
163 swizzle = desc->swizzle[i];
164 }
165
166 switch (swizzle) {
167 case UTIL_FORMAT_SWIZZLE_X:
168 case UTIL_FORMAT_SWIZZLE_Y:
169 case UTIL_FORMAT_SWIZZLE_Z:
170 case UTIL_FORMAT_SWIZZLE_W:
171 swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
172 break;
173 case UTIL_FORMAT_SWIZZLE_0:
174 assert(empty_channel >= 0);
175 swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
176 break;
177 case UTIL_FORMAT_SWIZZLE_1:
178 swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
179 aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
180 break;
181 case UTIL_FORMAT_SWIZZLE_NONE:
182 swizzles[i] = LLVMGetUndef(LLVMFloatType());
183 assert(0);
184 break;
185 }
186 }
187
188 return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
189 }
190
191
192 /**
193 * Pack a single pixel.
194 *
195 * @param rgba 4 float vector with the unpacked components.
196 *
197 * XXX: This is mostly for reference and testing -- operating a single pixel at
198 * a time is rarely if ever needed.
199 */
200 LLVMValueRef
201 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
202 const struct util_format_description *desc,
203 LLVMValueRef rgba)
204 {
205 LLVMTypeRef type;
206 LLVMValueRef packed = NULL;
207 LLVMValueRef swizzles[4];
208 LLVMValueRef shifted, casted, scaled, unswizzled;
209 LLVMValueRef shifts[4];
210 LLVMValueRef scales[4];
211 bool normalized;
212 unsigned shift;
213 unsigned i, j;
214
215 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
216 assert(desc->block.width == 1);
217 assert(desc->block.height == 1);
218
219 type = LLVMIntType(desc->block.bits);
220
221 /* Unswizzle the color components into the source vector. */
222 for (i = 0; i < 4; ++i) {
223 for (j = 0; j < 4; ++j) {
224 if (desc->swizzle[j] == i)
225 break;
226 }
227 if (j < 4)
228 swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
229 else
230 swizzles[i] = LLVMGetUndef(LLVMInt32Type());
231 }
232
233 unswizzled = LLVMBuildShuffleVector(builder, rgba,
234 LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
235 LLVMConstVector(swizzles, 4), "");
236
237 normalized = FALSE;
238 shift = 0;
239 for (i = 0; i < 4; ++i) {
240 unsigned bits = desc->channel[i].size;
241
242 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
243 shifts[i] = LLVMGetUndef(LLVMInt32Type());
244 scales[i] = LLVMGetUndef(LLVMFloatType());
245 }
246 else {
247 unsigned mask = (1 << bits) - 1;
248
249 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
250 assert(bits < 32);
251
252 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
253
254 if (desc->channel[i].normalized) {
255 scales[i] = LLVMConstReal(LLVMFloatType(), mask);
256 normalized = TRUE;
257 }
258 else
259 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
260 }
261
262 shift += bits;
263 }
264
265 if (normalized)
266 scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
267 else
268 scaled = unswizzled;
269
270 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
271
272 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
273
274 /* Bitwise or all components */
275 for (i = 0; i < 4; ++i) {
276 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
277 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
278 if (packed)
279 packed = LLVMBuildOr(builder, packed, component, "");
280 else
281 packed = component;
282 }
283 }
284
285 if (!packed)
286 packed = LLVMGetUndef(LLVMInt32Type());
287
288 if (desc->block.bits < 32)
289 packed = LLVMBuildTrunc(builder, packed, type, "");
290
291 return packed;
292 }
293
294
295 /**
296 * Fetch a pixel into a 4 float AoS.
297 *
298 * i and j are the sub-block pixel coordinates.
299 */
300 LLVMValueRef
301 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
302 const struct util_format_description *format_desc,
303 LLVMValueRef ptr,
304 LLVMValueRef i,
305 LLVMValueRef j)
306 {
307
308 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
309 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
310 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
311 format_desc->block.width == 1 &&
312 format_desc->block.height == 1 &&
313 util_is_pot(format_desc->block.bits) &&
314 format_desc->block.bits <= 32 &&
315 format_desc->is_bitmask &&
316 !format_desc->is_mixed &&
317 (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
318 format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
319 {
320 LLVMValueRef packed;
321
322 ptr = LLVMBuildBitCast(builder, ptr,
323 LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
324 "");
325
326 packed = LLVMBuildLoad(builder, ptr, "packed");
327
328 return lp_build_unpack_rgba_aos(builder, format_desc, packed);
329 }
330 else if (format_desc->fetch_rgba_float) {
331 /*
332 * Fallback to calling util_format_description::fetch_rgba_float.
333 *
334 * This is definitely not the most efficient way of fetching pixels, as
335 * we miss the opportunity to do vectorization, but this it is a
336 * convenient for formats or scenarios for which there was no opportunity
337 * or incentive to optimize.
338 */
339
340 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
341 char name[256];
342 LLVMValueRef function;
343 LLVMValueRef tmp;
344 LLVMValueRef args[4];
345
346 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
347 format_desc->short_name);
348
349 /*
350 * Declare and bind format_desc->fetch_rgba_float().
351 */
352
353 function = LLVMGetNamedFunction(module, name);
354 if (!function) {
355 LLVMTypeRef ret_type;
356 LLVMTypeRef arg_types[4];
357 LLVMTypeRef function_type;
358
359 ret_type = LLVMVoidType();
360 arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
361 arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
362 arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
363 function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
364 function = LLVMAddFunction(module, name, function_type);
365
366 LLVMSetFunctionCallConv(function, LLVMCCallConv);
367 LLVMSetLinkage(function, LLVMExternalLinkage);
368
369 assert(LLVMIsDeclaration(function));
370
371 LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
372 }
373
374 tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
375
376 /*
377 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
378 * in the SoA vectors.
379 */
380
381 args[0] = LLVMBuildBitCast(builder, tmp,
382 LLVMPointerType(LLVMFloatType(), 0), "");
383 args[1] = ptr;
384 args[2] = i;
385 args[3] = j;
386
387 LLVMBuildCall(builder, function, args, 4, "");
388
389 return LLVMBuildLoad(builder, tmp, "");
390 }
391 else {
392 assert(0);
393 return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
394 }
395 }