Merge branch '7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_format.h"
30 #include "util/u_memory.h"
31 #include "util/u_string.h"
32
33 #include "lp_bld_type.h"
34 #include "lp_bld_const.h"
35 #include "lp_bld_conv.h"
36 #include "lp_bld_sample.h" /* for lp_build_gather */
37 #include "lp_bld_init.h"
38 #include "lp_bld_format.h"
39
40
41 static LLVMValueRef
42 lp_build_format_swizzle_chan_soa(struct lp_type type,
43 const LLVMValueRef *unswizzled,
44 enum util_format_swizzle swizzle)
45 {
46 switch (swizzle) {
47 case UTIL_FORMAT_SWIZZLE_X:
48 case UTIL_FORMAT_SWIZZLE_Y:
49 case UTIL_FORMAT_SWIZZLE_Z:
50 case UTIL_FORMAT_SWIZZLE_W:
51 return unswizzled[swizzle];
52 case UTIL_FORMAT_SWIZZLE_0:
53 return lp_build_zero(type);
54 case UTIL_FORMAT_SWIZZLE_1:
55 return lp_build_one(type);
56 case UTIL_FORMAT_SWIZZLE_NONE:
57 return lp_build_undef(type);
58 default:
59 assert(0);
60 return lp_build_undef(type);
61 }
62 }
63
64
65 void
66 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
67 struct lp_type type,
68 const LLVMValueRef *unswizzled,
69 LLVMValueRef *swizzled)
70 {
71 if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
72 enum util_format_swizzle swizzle = format_desc->swizzle[0];
73 LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
74 swizzled[2] = swizzled[1] = swizzled[0] = depth;
75 swizzled[3] = lp_build_one(type);
76 }
77 else {
78 unsigned chan;
79 for (chan = 0; chan < 4; ++chan) {
80 enum util_format_swizzle swizzle = format_desc->swizzle[chan];
81 swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
82 }
83 }
84 }
85
86
87 /**
88 * Unpack several pixels in SoA.
89 *
90 * It takes a vector of packed pixels:
91 *
92 * packed = {P0, P1, P2, P3, ..., Pn}
93 *
94 * And will produce four vectors:
95 *
96 * red = {R0, R1, R2, R3, ..., Rn}
97 * green = {G0, G1, G2, G3, ..., Gn}
98 * blue = {B0, B1, B2, B3, ..., Bn}
99 * alpha = {A0, A1, A2, A3, ..., An}
100 *
101 * It requires that a packed pixel fits into an element of the output
102 * channels. The common case is when converting pixel with a depth of 32 bit or
103 * less into floats.
104 */
105 void
106 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
107 const struct util_format_description *format_desc,
108 struct lp_type type,
109 LLVMValueRef packed,
110 LLVMValueRef *rgba)
111 {
112 LLVMValueRef inputs[4];
113 unsigned start;
114 unsigned chan;
115
116 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
117 assert(format_desc->block.width == 1);
118 assert(format_desc->block.height == 1);
119 assert(format_desc->block.bits <= type.width);
120 /* FIXME: Support more output types */
121 assert(type.floating);
122 assert(type.width == 32);
123
124 /* Decode the input vector components */
125 start = 0;
126 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
127 unsigned width = format_desc->channel[chan].size;
128 unsigned stop = start + width;
129 LLVMValueRef input;
130
131 input = packed;
132
133 switch(format_desc->channel[chan].type) {
134 case UTIL_FORMAT_TYPE_VOID:
135 input = lp_build_undef(type);
136 break;
137
138 case UTIL_FORMAT_TYPE_UNSIGNED:
139 /*
140 * Align the LSB
141 */
142
143 if (start) {
144 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
145 }
146
147 /*
148 * Zero the MSBs
149 */
150
151 if (stop < format_desc->block.bits) {
152 unsigned mask = ((unsigned long long)1 << width) - 1;
153 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
154 }
155
156 /*
157 * Type conversion
158 */
159
160 if (type.floating) {
161 if(format_desc->channel[chan].normalized)
162 input = lp_build_unsigned_norm_to_float(builder, width, type, input);
163 else
164 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
165 }
166 else {
167 /* FIXME */
168 assert(0);
169 input = lp_build_undef(type);
170 }
171
172 break;
173
174 case UTIL_FORMAT_TYPE_SIGNED:
175 /*
176 * Align the sign bit first.
177 */
178
179 if (stop < type.width) {
180 unsigned bits = type.width - stop;
181 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
182 input = LLVMBuildShl(builder, input, bits_val, "");
183 }
184
185 /*
186 * Align the LSB (with an arithmetic shift to preserve the sign)
187 */
188
189 if (format_desc->channel[chan].size < type.width) {
190 unsigned bits = type.width - format_desc->channel[chan].size;
191 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
192 input = LLVMBuildAShr(builder, input, bits_val, "");
193 }
194
195 /*
196 * Type conversion
197 */
198
199 if (type.floating) {
200 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
201 if (format_desc->channel[chan].normalized) {
202 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
203 LLVMValueRef scale_val = lp_build_const_vec(type, scale);
204 input = LLVMBuildMul(builder, input, scale_val, "");
205 }
206 }
207 else {
208 /* FIXME */
209 assert(0);
210 input = lp_build_undef(type);
211 }
212
213 break;
214
215 case UTIL_FORMAT_TYPE_FLOAT:
216 if (type.floating) {
217 assert(start == 0);
218 assert(stop == 32);
219 assert(type.width == 32);
220 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
221 }
222 else {
223 /* FIXME */
224 assert(0);
225 input = lp_build_undef(type);
226 }
227 break;
228
229 case UTIL_FORMAT_TYPE_FIXED:
230 if (type.floating) {
231 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
232 LLVMValueRef scale_val = lp_build_const_vec(type, scale);
233 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
234 input = LLVMBuildMul(builder, input, scale_val, "");
235 }
236 else {
237 /* FIXME */
238 assert(0);
239 input = lp_build_undef(type);
240 }
241 break;
242
243 default:
244 assert(0);
245 input = lp_build_undef(type);
246 break;
247 }
248
249 inputs[chan] = input;
250
251 start = stop;
252 }
253
254 lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
255 }
256
257
258 /**
259 * Fetch a pixel into a SoA.
260 *
261 * i and j are the sub-block pixel coordinates.
262 */
263 void
264 lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
265 const struct util_format_description *format_desc,
266 struct lp_type type,
267 LLVMValueRef base_ptr,
268 LLVMValueRef offset,
269 LLVMValueRef i,
270 LLVMValueRef j,
271 LLVMValueRef *rgba)
272 {
273
274 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
275 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
276 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
277 format_desc->block.width == 1 &&
278 format_desc->block.height == 1 &&
279 format_desc->block.bits <= type.width &&
280 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
281 format_desc->channel[0].size == 32))
282 {
283 /*
284 * The packed pixel fits into an element of the destination format. Put
285 * the packed pixels into a vector and estract each component for all
286 * vector elements in parallel.
287 */
288
289 LLVMValueRef packed;
290
291 /*
292 * gather the texels from the texture
293 */
294 packed = lp_build_gather(builder,
295 type.length,
296 format_desc->block.bits,
297 type.width,
298 base_ptr, offset);
299
300 /*
301 * convert texels to float rgba
302 */
303 lp_build_unpack_rgba_soa(builder,
304 format_desc,
305 type,
306 packed, rgba);
307 }
308 else {
309 /*
310 * Fallback to calling util_format_description::fetch_rgba_float for each
311 * pixel.
312 *
313 * This is definitely not the most efficient way of fetching pixels, as
314 * we miss the opportunity to do vectorization, but this it is a
315 * convenient for formats or scenarios for which there was no opportunity
316 * or incentive to optimize.
317 */
318
319 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
320 char name[256];
321 LLVMValueRef function;
322 LLVMValueRef tmp;
323 unsigned k, chan;
324
325 assert(type.floating);
326
327 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name);
328
329 /*
330 * Declare and bind format_desc->fetch_rgba_float().
331 */
332
333 function = LLVMGetNamedFunction(module, name);
334 if (!function) {
335 LLVMTypeRef ret_type;
336 LLVMTypeRef arg_types[4];
337 LLVMTypeRef function_type;
338
339 ret_type = LLVMVoidType();
340 arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
341 arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
342 arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
343 function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
344 function = LLVMAddFunction(module, name, function_type);
345
346 LLVMSetFunctionCallConv(function, LLVMCCallConv);
347 LLVMSetLinkage(function, LLVMExternalLinkage);
348
349 assert(LLVMIsDeclaration(function));
350
351 LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
352 }
353
354 for (chan = 0; chan < 4; ++chan) {
355 rgba[chan] = lp_build_undef(type);
356 }
357
358 tmp = LLVMBuildArrayAlloca(builder,
359 LLVMFloatType(),
360 LLVMConstInt(LLVMInt32Type(), 4, 0),
361 "");
362
363 /*
364 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
365 * in the SoA vectors.
366 */
367
368 for(k = 0; k < type.length; ++k) {
369 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
370 LLVMValueRef offset_elem;
371 LLVMValueRef ptr;
372 LLVMValueRef i_elem, j_elem;
373 LLVMValueRef args[4];
374
375 offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
376 ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
377
378 i_elem = LLVMBuildExtractElement(builder, i, index, "");
379 j_elem = LLVMBuildExtractElement(builder, j, index, "");
380
381 args[0] = tmp;
382 args[1] = ptr;
383 args[2] = i_elem;
384 args[3] = j_elem;
385
386 LLVMBuildCall(builder, function, args, 4, "");
387
388 for (chan = 0; chan < 4; ++chan) {
389 LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
390 tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, "");
391 tmp_chan = LLVMBuildLoad(builder, tmp_chan, "");
392 rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
393 }
394 }
395 }
396 }