gallivm: Always use floating-point operators for floating-point types
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_defines.h"
30
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
34
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_format.h"
41
42
43 void
44 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
45 struct lp_build_context *bld,
46 const LLVMValueRef *unswizzled,
47 LLVMValueRef swizzled_out[4])
48 {
49 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
50 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
51
52 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
53 /*
54 * Return zzz1 for depth-stencil formats.
55 *
56 * XXX: Allow to control the depth swizzle with an additional parameter,
57 * as the caller may wish another depth swizzle, or retain the stencil
58 * value.
59 */
60 enum util_format_swizzle swizzle = format_desc->swizzle[0];
61 LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
62 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
63 swizzled_out[3] = bld->one;
64 }
65 else {
66 unsigned chan;
67 for (chan = 0; chan < 4; ++chan) {
68 enum util_format_swizzle swizzle = format_desc->swizzle[chan];
69 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
70 }
71 }
72 }
73
74
75 /**
76 * Unpack several pixels in SoA.
77 *
78 * It takes a vector of packed pixels:
79 *
80 * packed = {P0, P1, P2, P3, ..., Pn}
81 *
82 * And will produce four vectors:
83 *
84 * red = {R0, R1, R2, R3, ..., Rn}
85 * green = {G0, G1, G2, G3, ..., Gn}
86 * blue = {B0, B1, B2, B3, ..., Bn}
87 * alpha = {A0, A1, A2, A3, ..., An}
88 *
89 * It requires that a packed pixel fits into an element of the output
90 * channels. The common case is when converting pixel with a depth of 32 bit or
91 * less into floats.
92 *
93 * \param format_desc the format of the 'packed' incoming pixel vector
94 * \param type the desired type for rgba_out (type.length = n, above)
95 * \param packed the incoming vector of packed pixels
96 * \param rgba_out returns the SoA R,G,B,A vectors
97 */
98 void
99 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
100 const struct util_format_description *format_desc,
101 struct lp_type type,
102 LLVMValueRef packed,
103 LLVMValueRef rgba_out[4])
104 {
105 struct lp_build_context bld;
106 LLVMValueRef inputs[4];
107 unsigned start;
108 unsigned chan;
109
110 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
111 assert(format_desc->block.width == 1);
112 assert(format_desc->block.height == 1);
113 assert(format_desc->block.bits <= type.width);
114 /* FIXME: Support more output types */
115 assert(type.floating);
116 assert(type.width == 32);
117
118 lp_build_context_init(&bld, builder, type);
119
120 /* Decode the input vector components */
121 start = 0;
122 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
123 const unsigned width = format_desc->channel[chan].size;
124 const unsigned stop = start + width;
125 LLVMValueRef input;
126
127 input = packed;
128
129 switch(format_desc->channel[chan].type) {
130 case UTIL_FORMAT_TYPE_VOID:
131 input = lp_build_undef(type);
132 break;
133
134 case UTIL_FORMAT_TYPE_UNSIGNED:
135 /*
136 * Align the LSB
137 */
138
139 if (start) {
140 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
141 }
142
143 /*
144 * Zero the MSBs
145 */
146
147 if (stop < format_desc->block.bits) {
148 unsigned mask = ((unsigned long long)1 << width) - 1;
149 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
150 }
151
152 /*
153 * Type conversion
154 */
155
156 if (type.floating) {
157 if(format_desc->channel[chan].normalized)
158 input = lp_build_unsigned_norm_to_float(builder, width, type, input);
159 else
160 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
161 }
162 else {
163 /* FIXME */
164 assert(0);
165 input = lp_build_undef(type);
166 }
167
168 break;
169
170 case UTIL_FORMAT_TYPE_SIGNED:
171 /*
172 * Align the sign bit first.
173 */
174
175 if (stop < type.width) {
176 unsigned bits = type.width - stop;
177 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
178 input = LLVMBuildShl(builder, input, bits_val, "");
179 }
180
181 /*
182 * Align the LSB (with an arithmetic shift to preserve the sign)
183 */
184
185 if (format_desc->channel[chan].size < type.width) {
186 unsigned bits = type.width - format_desc->channel[chan].size;
187 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
188 input = LLVMBuildAShr(builder, input, bits_val, "");
189 }
190
191 /*
192 * Type conversion
193 */
194
195 if (type.floating) {
196 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
197 if (format_desc->channel[chan].normalized) {
198 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
199 LLVMValueRef scale_val = lp_build_const_vec(type, scale);
200 input = LLVMBuildFMul(builder, input, scale_val, "");
201 }
202 }
203 else {
204 /* FIXME */
205 assert(0);
206 input = lp_build_undef(type);
207 }
208
209 break;
210
211 case UTIL_FORMAT_TYPE_FLOAT:
212 if (type.floating) {
213 assert(start == 0);
214 assert(stop == 32);
215 assert(type.width == 32);
216 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
217 }
218 else {
219 /* FIXME */
220 assert(0);
221 input = lp_build_undef(type);
222 }
223 break;
224
225 case UTIL_FORMAT_TYPE_FIXED:
226 if (type.floating) {
227 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
228 LLVMValueRef scale_val = lp_build_const_vec(type, scale);
229 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
230 input = LLVMBuildFMul(builder, input, scale_val, "");
231 }
232 else {
233 /* FIXME */
234 assert(0);
235 input = lp_build_undef(type);
236 }
237 break;
238
239 default:
240 assert(0);
241 input = lp_build_undef(type);
242 break;
243 }
244
245 inputs[chan] = input;
246
247 start = stop;
248 }
249
250 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
251 }
252
253
254 void
255 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
256 struct lp_type dst_type,
257 LLVMValueRef packed,
258 LLVMValueRef *rgba)
259 {
260 LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
261 unsigned chan;
262
263 packed = LLVMBuildBitCast(builder, packed,
264 lp_build_int_vec_type(dst_type), "");
265
266 /* Decode the input vector components */
267 for (chan = 0; chan < 4; ++chan) {
268 unsigned start = chan*8;
269 unsigned stop = start + 8;
270 LLVMValueRef input;
271
272 input = packed;
273
274 if (start)
275 input = LLVMBuildLShr(builder, input,
276 lp_build_const_int_vec(dst_type, start), "");
277
278 if (stop < 32)
279 input = LLVMBuildAnd(builder, input, mask, "");
280
281 input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
282
283 rgba[chan] = input;
284 }
285 }
286
287
288
289 /**
290 * Fetch a texels from a texture, returning them in SoA layout.
291 *
292 * \param type the desired return type for 'rgba'. The vector length
293 * is the number of texels to fetch
294 *
295 * \param base_ptr points to start of the texture image block. For non-
296 * compressed formats, this simply points to the texel.
297 * For compressed formats, it points to the start of the
298 * compressed data block.
299 *
300 * \param i, j the sub-block pixel coordinates. For non-compressed formats
301 * these will always be (0,0). For compressed formats, i will
302 * be in [0, block_width-1] and j will be in [0, block_height-1].
303 */
304 void
305 lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
306 const struct util_format_description *format_desc,
307 struct lp_type type,
308 LLVMValueRef base_ptr,
309 LLVMValueRef offset,
310 LLVMValueRef i,
311 LLVMValueRef j,
312 LLVMValueRef rgba_out[4])
313 {
314
315 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
316 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
317 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
318 format_desc->block.width == 1 &&
319 format_desc->block.height == 1 &&
320 format_desc->block.bits <= type.width &&
321 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
322 format_desc->channel[0].size == 32))
323 {
324 /*
325 * The packed pixel fits into an element of the destination format. Put
326 * the packed pixels into a vector and extract each component for all
327 * vector elements in parallel.
328 */
329
330 LLVMValueRef packed;
331
332 /*
333 * gather the texels from the texture
334 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
335 */
336 packed = lp_build_gather(builder,
337 type.length,
338 format_desc->block.bits,
339 type.width,
340 base_ptr, offset);
341
342 /*
343 * convert texels to float rgba
344 */
345 lp_build_unpack_rgba_soa(builder,
346 format_desc,
347 type,
348 packed, rgba_out);
349 return;
350 }
351
352 /*
353 * Try calling lp_build_fetch_rgba_aos for all pixels.
354 */
355
356 if (util_format_fits_8unorm(format_desc) &&
357 type.floating && type.width == 32 && type.length == 4) {
358 struct lp_type tmp_type;
359 LLVMValueRef tmp;
360
361 memset(&tmp_type, 0, sizeof tmp_type);
362 tmp_type.width = 8;
363 tmp_type.length = type.length * 4;
364 tmp_type.norm = TRUE;
365
366 tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
367 base_ptr, offset, i, j);
368
369 lp_build_rgba8_to_f32_soa(builder,
370 type,
371 tmp,
372 rgba_out);
373
374 return;
375 }
376
377 /*
378 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
379 *
380 * This is not the most efficient way of fetching pixels, as we
381 * miss some opportunities to do vectorization, but this is
382 * convenient for formats or scenarios for which there was no
383 * opportunity or incentive to optimize.
384 */
385
386 {
387 unsigned k, chan;
388 struct lp_type tmp_type;
389
390 tmp_type = type;
391 tmp_type.length = 4;
392
393 for (chan = 0; chan < 4; ++chan) {
394 rgba_out[chan] = lp_build_undef(type);
395 }
396
397 /* loop over number of pixels */
398 for(k = 0; k < type.length; ++k) {
399 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
400 LLVMValueRef offset_elem;
401 LLVMValueRef i_elem, j_elem;
402 LLVMValueRef tmp;
403
404 offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
405
406 i_elem = LLVMBuildExtractElement(builder, i, index, "");
407 j_elem = LLVMBuildExtractElement(builder, j, index, "");
408
409 /* Get a single float[4]={R,G,B,A} pixel */
410 tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
411 base_ptr, offset_elem,
412 i_elem, j_elem);
413
414 /*
415 * Insert the AoS tmp value channels into the SoA result vectors at
416 * position = 'index'.
417 */
418 for (chan = 0; chan < 4; ++chan) {
419 LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
420 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
421 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
422 tmp_chan, index, "");
423 }
424 }
425 }
426 }