Merge remote branch 'origin/7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_format.h"
30 #include "util/u_memory.h"
31 #include "util/u_string.h"
32
33 #include "lp_bld_type.h"
34 #include "lp_bld_const.h"
35 #include "lp_bld_conv.h"
36 #include "lp_bld_sample.h" /* for lp_build_gather */
37 #include "lp_bld_format.h"
38
39
40 static LLVMValueRef
41 lp_build_format_swizzle_chan_soa(struct lp_type type,
42 const LLVMValueRef *unswizzled,
43 enum util_format_swizzle swizzle)
44 {
45 switch (swizzle) {
46 case UTIL_FORMAT_SWIZZLE_X:
47 case UTIL_FORMAT_SWIZZLE_Y:
48 case UTIL_FORMAT_SWIZZLE_Z:
49 case UTIL_FORMAT_SWIZZLE_W:
50 return unswizzled[swizzle];
51 case UTIL_FORMAT_SWIZZLE_0:
52 return lp_build_zero(type);
53 case UTIL_FORMAT_SWIZZLE_1:
54 return lp_build_one(type);
55 case UTIL_FORMAT_SWIZZLE_NONE:
56 return lp_build_undef(type);
57 default:
58 assert(0);
59 return lp_build_undef(type);
60 }
61 }
62
63
64 void
65 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
66 struct lp_type type,
67 const LLVMValueRef *unswizzled,
68 LLVMValueRef *swizzled)
69 {
70 if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
71 enum util_format_swizzle swizzle = format_desc->swizzle[0];
72 LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
73 swizzled[2] = swizzled[1] = swizzled[0] = depth;
74 swizzled[3] = lp_build_one(type);
75 }
76 else {
77 unsigned chan;
78 for (chan = 0; chan < 4; ++chan) {
79 enum util_format_swizzle swizzle = format_desc->swizzle[chan];
80 swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
81 }
82 }
83 }
84
85
86 /**
87 * Unpack several pixels in SoA.
88 *
89 * It takes a vector of packed pixels:
90 *
91 * packed = {P0, P1, P2, P3, ..., Pn}
92 *
93 * And will produce four vectors:
94 *
95 * red = {R0, R1, R2, R3, ..., Rn}
96 * green = {G0, G1, G2, G3, ..., Gn}
97 * blue = {B0, B1, B2, B3, ..., Bn}
98 * alpha = {A0, A1, A2, A3, ..., An}
99 *
100 * It requires that a packed pixel fits into an element of the output
101 * channels. The common case is when converting pixel with a depth of 32 bit or
102 * less into floats.
103 */
104 void
105 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
106 const struct util_format_description *format_desc,
107 struct lp_type type,
108 LLVMValueRef packed,
109 LLVMValueRef *rgba)
110 {
111 LLVMValueRef inputs[4];
112 unsigned start;
113 unsigned chan;
114
115 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
116 assert(format_desc->block.width == 1);
117 assert(format_desc->block.height == 1);
118 assert(format_desc->block.bits <= type.width);
119 /* FIXME: Support more output types */
120 assert(type.floating);
121 assert(type.width == 32);
122
123 /* Decode the input vector components */
124 start = 0;
125 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
126 unsigned width = format_desc->channel[chan].size;
127 unsigned stop = start + width;
128 LLVMValueRef input;
129
130 input = packed;
131
132 switch(format_desc->channel[chan].type) {
133 case UTIL_FORMAT_TYPE_VOID:
134 input = lp_build_undef(type);
135 break;
136
137 case UTIL_FORMAT_TYPE_UNSIGNED:
138 /*
139 * Align the LSB
140 */
141
142 if (start) {
143 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
144 }
145
146 /*
147 * Zero the MSBs
148 */
149
150 if (stop < format_desc->block.bits) {
151 unsigned mask = ((unsigned long long)1 << width) - 1;
152 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
153 }
154
155 /*
156 * Type conversion
157 */
158
159 if (type.floating) {
160 if(format_desc->channel[chan].normalized)
161 input = lp_build_unsigned_norm_to_float(builder, width, type, input);
162 else
163 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
164 }
165 else {
166 /* FIXME */
167 assert(0);
168 input = lp_build_undef(type);
169 }
170
171 break;
172
173 case UTIL_FORMAT_TYPE_SIGNED:
174 /*
175 * Align the sign bit first.
176 */
177
178 if (stop < type.width) {
179 unsigned bits = type.width - stop;
180 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
181 input = LLVMBuildShl(builder, input, bits_val, "");
182 }
183
184 /*
185 * Align the LSB (with an arithmetic shift to preserve the sign)
186 */
187
188 if (format_desc->channel[chan].size < type.width) {
189 unsigned bits = type.width - format_desc->channel[chan].size;
190 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
191 input = LLVMBuildAShr(builder, input, bits_val, "");
192 }
193
194 /*
195 * Type conversion
196 */
197
198 if (type.floating) {
199 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
200 if (format_desc->channel[chan].normalized) {
201 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
202 LLVMValueRef scale_val = lp_build_const_vec(type, scale);
203 input = LLVMBuildMul(builder, input, scale_val, "");
204 }
205 }
206 else {
207 /* FIXME */
208 assert(0);
209 input = lp_build_undef(type);
210 }
211
212 break;
213
214 case UTIL_FORMAT_TYPE_FLOAT:
215 if (type.floating) {
216 assert(start == 0);
217 assert(stop == 32);
218 assert(type.width == 32);
219 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
220 }
221 else {
222 /* FIXME */
223 assert(0);
224 input = lp_build_undef(type);
225 }
226 break;
227
228 case UTIL_FORMAT_TYPE_FIXED:
229 if (type.floating) {
230 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
231 LLVMValueRef scale_val = lp_build_const_vec(type, scale);
232 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
233 input = LLVMBuildMul(builder, input, scale_val, "");
234 }
235 else {
236 /* FIXME */
237 assert(0);
238 input = lp_build_undef(type);
239 }
240 break;
241
242 default:
243 assert(0);
244 input = lp_build_undef(type);
245 break;
246 }
247
248 inputs[chan] = input;
249
250 start = stop;
251 }
252
253 lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
254 }
255
256
257 /**
258 * Fetch a pixel into a SoA.
259 *
260 * i and j are the sub-block pixel coordinates.
261 */
262 void
263 lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
264 const struct util_format_description *format_desc,
265 struct lp_type type,
266 LLVMValueRef base_ptr,
267 LLVMValueRef offset,
268 LLVMValueRef i,
269 LLVMValueRef j,
270 LLVMValueRef *rgba)
271 {
272
273 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
274 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
275 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
276 format_desc->block.width == 1 &&
277 format_desc->block.height == 1 &&
278 format_desc->block.bits <= type.width &&
279 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
280 format_desc->channel[0].size == 32))
281 {
282 /*
283 * The packed pixel fits into an element of the destination format. Put
284 * the packed pixels into a vector and estract each component for all
285 * vector elements in parallel.
286 */
287
288 LLVMValueRef packed;
289
290 /*
291 * gather the texels from the texture
292 */
293 packed = lp_build_gather(builder,
294 type.length,
295 format_desc->block.bits,
296 type.width,
297 base_ptr, offset);
298
299 /*
300 * convert texels to float rgba
301 */
302 lp_build_unpack_rgba_soa(builder,
303 format_desc,
304 type,
305 packed, rgba);
306 }
307 else {
308 /*
309 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
310 *
311 * This is not the most efficient way of fetching pixels, as
312 * we miss some opportunities to do vectorization, but this it is a
313 * convenient for formats or scenarios for which there was no opportunity
314 * or incentive to optimize.
315 */
316
317 unsigned k, chan;
318
319 assert(type.floating);
320
321 for (chan = 0; chan < 4; ++chan) {
322 rgba[chan] = lp_build_undef(type);
323 }
324
325 for(k = 0; k < type.length; ++k) {
326 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
327 LLVMValueRef offset_elem;
328 LLVMValueRef ptr;
329 LLVMValueRef i_elem, j_elem;
330 LLVMValueRef tmp;
331
332 offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
333 ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
334
335 i_elem = LLVMBuildExtractElement(builder, i, index, "");
336 j_elem = LLVMBuildExtractElement(builder, j, index, "");
337
338 tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem);
339
340 /*
341 * AoS to SoA
342 */
343
344 for (chan = 0; chan < 4; ++chan) {
345 LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
346 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
347 rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
348 }
349 }
350 }
351 }