gallivm: Bring aos format back to life.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * AoS pixel format manipulation.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36 #include "util/u_cpu_detect.h"
37 #include "util/u_format.h"
38
39 #include "lp_bld_type.h"
40 #include "lp_bld_const.h"
41 #include "lp_bld_swizzle.h"
42 #include "lp_bld_format.h"
43
44
45 /**
46 * Unpack a single pixel into its RGBA components.
47 *
48 * @param packed integer.
49 *
50 * @return RGBA in a 4 floats vector.
51 */
52 LLVMValueRef
53 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
54 const struct util_format_description *desc,
55 LLVMValueRef packed)
56 {
57 LLVMTypeRef type;
58 LLVMValueRef shifted, casted, scaled, masked;
59 LLVMValueRef shifts[4];
60 LLVMValueRef masks[4];
61 LLVMValueRef scales[4];
62 LLVMValueRef swizzles[4];
63 LLVMValueRef aux[4];
64 bool normalized;
65 int empty_channel;
66 unsigned shift;
67 unsigned i;
68
69 /* FIXME: Support more formats */
70 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
71 assert(desc->block.width == 1);
72 assert(desc->block.height == 1);
73 assert(desc->block.bits <= 32);
74
75 type = LLVMIntType(desc->block.bits);
76
77 /* Do the intermediate integer computations with 32bit integers since it
78 * matches floating point size */
79 if (desc->block.bits < 32)
80 packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
81
82 /* Broadcast the packed value to all four channels */
83 packed = LLVMBuildInsertElement(builder,
84 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
85 packed,
86 LLVMConstNull(LLVMInt32Type()),
87 "");
88 packed = LLVMBuildShuffleVector(builder,
89 packed,
90 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
91 LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
92 "");
93
94 /* Initialize vector constants */
95 normalized = FALSE;
96 empty_channel = -1;
97 shift = 0;
98 for (i = 0; i < 4; ++i) {
99 unsigned bits = desc->channel[i].size;
100
101 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
102 shifts[i] = LLVMGetUndef(LLVMInt32Type());
103 masks[i] = LLVMConstNull(LLVMInt32Type());
104 scales[i] = LLVMConstNull(LLVMFloatType());
105 empty_channel = i;
106 }
107 else {
108 unsigned mask = (1 << bits) - 1;
109
110 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
111 assert(bits < 32);
112
113 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
114 masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
115
116 if (desc->channel[i].normalized) {
117 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
118 normalized = TRUE;
119 }
120 else
121 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
122 }
123
124 shift += bits;
125 }
126
127 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
128 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
129 /* UIToFP can't be expressed in SSE2 */
130 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
131
132 if (normalized)
133 scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
134 else
135 scaled = casted;
136
137 for (i = 0; i < 4; ++i)
138 aux[i] = LLVMGetUndef(LLVMFloatType());
139
140 for (i = 0; i < 4; ++i) {
141 enum util_format_swizzle swizzle = desc->swizzle[i];
142
143 switch (swizzle) {
144 case UTIL_FORMAT_SWIZZLE_X:
145 case UTIL_FORMAT_SWIZZLE_Y:
146 case UTIL_FORMAT_SWIZZLE_Z:
147 case UTIL_FORMAT_SWIZZLE_W:
148 swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
149 break;
150 case UTIL_FORMAT_SWIZZLE_0:
151 assert(empty_channel >= 0);
152 swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
153 break;
154 case UTIL_FORMAT_SWIZZLE_1:
155 swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
156 aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
157 break;
158 case UTIL_FORMAT_SWIZZLE_NONE:
159 swizzles[i] = LLVMGetUndef(LLVMFloatType());
160 assert(0);
161 break;
162 }
163 }
164
165 return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
166 }
167
168
169 /**
170 * Take a vector with packed pixels and unpack into a rgba8 vector.
171 *
172 * Formats with bit depth smaller than 32bits are accepted, but they must be
173 * padded to 32bits.
174 */
175 LLVMValueRef
176 lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
177 const struct util_format_description *desc,
178 struct lp_type type,
179 LLVMValueRef packed)
180 {
181 struct lp_build_context bld;
182 bool rgba8;
183 LLVMValueRef res;
184 unsigned i;
185
186 lp_build_context_init(&bld, builder, type);
187
188 /* FIXME: Support more formats */
189 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
190 assert(desc->block.width == 1);
191 assert(desc->block.height == 1);
192 assert(desc->block.bits <= 32);
193
194 assert(!type.floating);
195 assert(!type.fixed);
196 assert(type.norm);
197 assert(type.width == 8);
198 assert(type.length % 4 == 0);
199
200 rgba8 = TRUE;
201 for(i = 0; i < 4; ++i) {
202 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
203 desc->channel[i].type == UTIL_FORMAT_TYPE_VOID);
204 if(desc->channel[0].size != 8)
205 rgba8 = FALSE;
206 }
207
208 if(rgba8) {
209 /*
210 * The pixel is already in a rgba8 format variant. All it is necessary
211 * is to swizzle the channels.
212 */
213
214 unsigned char swizzles[4];
215 boolean zeros[4]; /* bitwise AND mask */
216 boolean ones[4]; /* bitwise OR mask */
217 boolean swizzles_needed = FALSE;
218 boolean zeros_needed = FALSE;
219 boolean ones_needed = FALSE;
220
221 for(i = 0; i < 4; ++i) {
222 enum util_format_swizzle swizzle = desc->swizzle[i];
223
224 /* Initialize with the no-op case */
225 swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i;
226 zeros[i] = TRUE;
227 ones[i] = FALSE;
228
229 switch (swizzle) {
230 case UTIL_FORMAT_SWIZZLE_X:
231 case UTIL_FORMAT_SWIZZLE_Y:
232 case UTIL_FORMAT_SWIZZLE_Z:
233 case UTIL_FORMAT_SWIZZLE_W:
234 if(swizzle != swizzles[i]) {
235 swizzles[i] = swizzle;
236 swizzles_needed = TRUE;
237 }
238 break;
239 case UTIL_FORMAT_SWIZZLE_0:
240 zeros[i] = FALSE;
241 zeros_needed = TRUE;
242 break;
243 case UTIL_FORMAT_SWIZZLE_1:
244 ones[i] = TRUE;
245 ones_needed = TRUE;
246 break;
247 case UTIL_FORMAT_SWIZZLE_NONE:
248 assert(0);
249 break;
250 }
251 }
252
253 res = packed;
254
255 if(swizzles_needed)
256 res = lp_build_swizzle1_aos(&bld, res, swizzles);
257
258 if(zeros_needed) {
259 /* Mask out zero channels */
260 LLVMValueRef mask = lp_build_const_mask_aos(type, zeros);
261 res = LLVMBuildAnd(builder, res, mask, "");
262 }
263
264 if(ones_needed) {
265 /* Or one channels */
266 LLVMValueRef mask = lp_build_const_mask_aos(type, ones);
267 res = LLVMBuildOr(builder, res, mask, "");
268 }
269 }
270 else {
271 /* FIXME */
272 assert(0);
273 res = lp_build_undef(type);
274 }
275
276 return res;
277 }
278
279
280 /**
281 * Pack a single pixel.
282 *
283 * @param rgba 4 float vector with the unpacked components.
284 *
285 * XXX: This is mostly for reference and testing -- operating a single pixel at
286 * a time is rarely if ever needed.
287 */
288 LLVMValueRef
289 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
290 const struct util_format_description *desc,
291 LLVMValueRef rgba)
292 {
293 LLVMTypeRef type;
294 LLVMValueRef packed = NULL;
295 LLVMValueRef swizzles[4];
296 LLVMValueRef shifted, casted, scaled, unswizzled;
297 LLVMValueRef shifts[4];
298 LLVMValueRef scales[4];
299 bool normalized;
300 unsigned shift;
301 unsigned i, j;
302
303 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
304 assert(desc->block.width == 1);
305 assert(desc->block.height == 1);
306
307 type = LLVMIntType(desc->block.bits);
308
309 /* Unswizzle the color components into the source vector. */
310 for (i = 0; i < 4; ++i) {
311 for (j = 0; j < 4; ++j) {
312 if (desc->swizzle[j] == i)
313 break;
314 }
315 if (j < 4)
316 swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
317 else
318 swizzles[i] = LLVMGetUndef(LLVMInt32Type());
319 }
320
321 unswizzled = LLVMBuildShuffleVector(builder, rgba,
322 LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
323 LLVMConstVector(swizzles, 4), "");
324
325 normalized = FALSE;
326 shift = 0;
327 for (i = 0; i < 4; ++i) {
328 unsigned bits = desc->channel[i].size;
329
330 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
331 shifts[i] = LLVMGetUndef(LLVMInt32Type());
332 scales[i] = LLVMGetUndef(LLVMFloatType());
333 }
334 else {
335 unsigned mask = (1 << bits) - 1;
336
337 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
338 assert(bits < 32);
339
340 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
341
342 if (desc->channel[i].normalized) {
343 scales[i] = LLVMConstReal(LLVMFloatType(), mask);
344 normalized = TRUE;
345 }
346 else
347 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0);
348 }
349
350 shift += bits;
351 }
352
353 if (normalized)
354 scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
355 else
356 scaled = unswizzled;
357
358 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
359
360 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
361
362 /* Bitwise or all components */
363 for (i = 0; i < 4; ++i) {
364 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
365 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
366 if (packed)
367 packed = LLVMBuildOr(builder, packed, component, "");
368 else
369 packed = component;
370 }
371 }
372
373 if (!packed)
374 packed = LLVMGetUndef(LLVMInt32Type());
375
376 if (desc->block.bits < 32)
377 packed = LLVMBuildTrunc(builder, packed, type, "");
378
379 return packed;
380 }