Merge commit 'origin/graw-tests'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_defines.h"
30
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
34
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_sample.h" /* for lp_build_gather */
40 #include "lp_bld_format.h"
41
42
43 void
44 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
45 struct lp_build_context *bld,
46 const LLVMValueRef *unswizzled,
47 LLVMValueRef swizzled_out[4])
48 {
49 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
50 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
51
52 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
53 /*
54 * Return zzz1 for depth-stencil formats.
55 *
56 * XXX: Allow to control the depth swizzle with an additional parameter,
57 * as the caller may wish another depth swizzle, or retain the stencil
58 * value.
59 */
60 enum util_format_swizzle swizzle = format_desc->swizzle[0];
61 LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
62 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
63 swizzled_out[3] = bld->one;
64 }
65 else {
66 unsigned chan;
67 for (chan = 0; chan < 4; ++chan) {
68 enum util_format_swizzle swizzle = format_desc->swizzle[chan];
69 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
70 }
71 }
72 }
73
74
75 /**
76 * Unpack several pixels in SoA.
77 *
78 * It takes a vector of packed pixels:
79 *
80 * packed = {P0, P1, P2, P3, ..., Pn}
81 *
82 * And will produce four vectors:
83 *
84 * red = {R0, R1, R2, R3, ..., Rn}
85 * green = {G0, G1, G2, G3, ..., Gn}
86 * blue = {B0, B1, B2, B3, ..., Bn}
87 * alpha = {A0, A1, A2, A3, ..., An}
88 *
89 * It requires that a packed pixel fits into an element of the output
90 * channels. The common case is when converting pixel with a depth of 32 bit or
91 * less into floats.
92 *
93 * \param format_desc the format of the 'packed' incoming pixel vector
94 * \param type the desired type for rgba_out (type.length = n, above)
95 * \param packed the incoming vector of packed pixels
96 * \param rgba_out returns the SoA R,G,B,A vectors
97 */
98 void
99 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
100 const struct util_format_description *format_desc,
101 struct lp_type type,
102 LLVMValueRef packed,
103 LLVMValueRef rgba_out[4])
104 {
105 struct lp_build_context bld;
106 LLVMValueRef inputs[4];
107 unsigned start;
108 unsigned chan;
109
110 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
111 assert(format_desc->block.width == 1);
112 assert(format_desc->block.height == 1);
113 assert(format_desc->block.bits <= type.width);
114 /* FIXME: Support more output types */
115 assert(type.floating);
116 assert(type.width == 32);
117
118 lp_build_context_init(&bld, builder, type);
119
120 /* Decode the input vector components */
121 start = 0;
122 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
123 const unsigned width = format_desc->channel[chan].size;
124 const unsigned stop = start + width;
125 LLVMValueRef input;
126
127 input = packed;
128
129 switch(format_desc->channel[chan].type) {
130 case UTIL_FORMAT_TYPE_VOID:
131 input = lp_build_undef(type);
132 break;
133
134 case UTIL_FORMAT_TYPE_UNSIGNED:
135 /*
136 * Align the LSB
137 */
138
139 if (start) {
140 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
141 }
142
143 /*
144 * Zero the MSBs
145 */
146
147 if (stop < format_desc->block.bits) {
148 unsigned mask = ((unsigned long long)1 << width) - 1;
149 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
150 }
151
152 /*
153 * Type conversion
154 */
155
156 if (type.floating) {
157 if(format_desc->channel[chan].normalized)
158 input = lp_build_unsigned_norm_to_float(builder, width, type, input);
159 else
160 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
161 }
162 else {
163 /* FIXME */
164 assert(0);
165 input = lp_build_undef(type);
166 }
167
168 break;
169
170 case UTIL_FORMAT_TYPE_SIGNED:
171 /*
172 * Align the sign bit first.
173 */
174
175 if (stop < type.width) {
176 unsigned bits = type.width - stop;
177 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
178 input = LLVMBuildShl(builder, input, bits_val, "");
179 }
180
181 /*
182 * Align the LSB (with an arithmetic shift to preserve the sign)
183 */
184
185 if (format_desc->channel[chan].size < type.width) {
186 unsigned bits = type.width - format_desc->channel[chan].size;
187 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
188 input = LLVMBuildAShr(builder, input, bits_val, "");
189 }
190
191 /*
192 * Type conversion
193 */
194
195 if (type.floating) {
196 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
197 if (format_desc->channel[chan].normalized) {
198 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
199 LLVMValueRef scale_val = lp_build_const_vec(type, scale);
200 input = LLVMBuildMul(builder, input, scale_val, "");
201 }
202 }
203 else {
204 /* FIXME */
205 assert(0);
206 input = lp_build_undef(type);
207 }
208
209 break;
210
211 case UTIL_FORMAT_TYPE_FLOAT:
212 if (type.floating) {
213 assert(start == 0);
214 assert(stop == 32);
215 assert(type.width == 32);
216 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
217 }
218 else {
219 /* FIXME */
220 assert(0);
221 input = lp_build_undef(type);
222 }
223 break;
224
225 case UTIL_FORMAT_TYPE_FIXED:
226 if (type.floating) {
227 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
228 LLVMValueRef scale_val = lp_build_const_vec(type, scale);
229 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
230 input = LLVMBuildMul(builder, input, scale_val, "");
231 }
232 else {
233 /* FIXME */
234 assert(0);
235 input = lp_build_undef(type);
236 }
237 break;
238
239 default:
240 assert(0);
241 input = lp_build_undef(type);
242 break;
243 }
244
245 inputs[chan] = input;
246
247 start = stop;
248 }
249
250 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
251 }
252
253
254 /**
255 * Fetch a texels from a texture, returning them in SoA layout.
256 *
257 * \param type the desired return type for 'rgba'. The vector length
258 * is the number of texels to fetch
259 *
260 * \param base_ptr points to start of the texture image block. For non-
261 * compressed formats, this simply points to the texel.
262 * For compressed formats, it points to the start of the
263 * compressed data block.
264 *
265 * \param i, j the sub-block pixel coordinates. For non-compressed formats
266 * these will always be (0,0). For compressed formats, i will
267 * be in [0, block_width-1] and j will be in [0, block_height-1].
268 */
269 void
270 lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
271 const struct util_format_description *format_desc,
272 struct lp_type type,
273 LLVMValueRef base_ptr,
274 LLVMValueRef offset,
275 LLVMValueRef i,
276 LLVMValueRef j,
277 LLVMValueRef rgba_out[4])
278 {
279
280 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
281 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
282 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
283 format_desc->block.width == 1 &&
284 format_desc->block.height == 1 &&
285 format_desc->block.bits <= type.width &&
286 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
287 format_desc->channel[0].size == 32))
288 {
289 /*
290 * The packed pixel fits into an element of the destination format. Put
291 * the packed pixels into a vector and extract each component for all
292 * vector elements in parallel.
293 */
294
295 LLVMValueRef packed;
296
297 /*
298 * gather the texels from the texture
299 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
300 */
301 packed = lp_build_gather(builder,
302 type.length,
303 format_desc->block.bits,
304 type.width,
305 base_ptr, offset);
306
307 /*
308 * convert texels to float rgba
309 */
310 lp_build_unpack_rgba_soa(builder,
311 format_desc,
312 type,
313 packed, rgba_out);
314 }
315 else {
316 /*
317 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
318 *
319 * This is not the most efficient way of fetching pixels, as we
320 * miss some opportunities to do vectorization, but this is
321 * convenient for formats or scenarios for which there was no
322 * opportunity or incentive to optimize.
323 */
324
325 unsigned k, chan;
326
327 assert(type.floating);
328
329 for (chan = 0; chan < 4; ++chan) {
330 rgba_out[chan] = lp_build_undef(type);
331 }
332
333 /* loop over number of pixels */
334 for(k = 0; k < type.length; ++k) {
335 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
336 LLVMValueRef offset_elem;
337 LLVMValueRef ptr;
338 LLVMValueRef i_elem, j_elem;
339 LLVMValueRef tmp;
340
341 offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
342 ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
343
344 i_elem = LLVMBuildExtractElement(builder, i, index, "");
345 j_elem = LLVMBuildExtractElement(builder, j, index, "");
346
347 /* Get a single float[4]={R,G,B,A} pixel */
348 tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr,
349 i_elem, j_elem);
350
351 /*
352 * Insert the AoS tmp value channels into the SoA result vectors at
353 * position = 'index'.
354 */
355 for (chan = 0; chan < 4; ++chan) {
356 LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
357 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
358 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
359 tmp_chan, index, "");
360 }
361 }
362 }
363 }