gallium: fix tgsi SAMPLE_L opcode to use separate source for explicit lod
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_defines.h"
30
31 #include "util/u_format.h"
32 #include "util/u_memory.h"
33 #include "util/u_string.h"
34
35 #include "lp_bld_type.h"
36 #include "lp_bld_const.h"
37 #include "lp_bld_conv.h"
38 #include "lp_bld_swizzle.h"
39 #include "lp_bld_gather.h"
40 #include "lp_bld_debug.h"
41 #include "lp_bld_format.h"
42
43
44 void
45 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
46 struct lp_build_context *bld,
47 const LLVMValueRef *unswizzled,
48 LLVMValueRef swizzled_out[4])
49 {
50 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
51 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
52
53 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
54 /*
55 * Return zzz1 for depth-stencil formats.
56 *
57 * XXX: Allow to control the depth swizzle with an additional parameter,
58 * as the caller may wish another depth swizzle, or retain the stencil
59 * value.
60 */
61 enum util_format_swizzle swizzle = format_desc->swizzle[0];
62 LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
63 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
64 swizzled_out[3] = bld->one;
65 }
66 else {
67 unsigned chan;
68 for (chan = 0; chan < 4; ++chan) {
69 enum util_format_swizzle swizzle = format_desc->swizzle[chan];
70 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
71 }
72 }
73 }
74
75
76 /**
77 * Unpack several pixels in SoA.
78 *
79 * It takes a vector of packed pixels:
80 *
81 * packed = {P0, P1, P2, P3, ..., Pn}
82 *
83 * And will produce four vectors:
84 *
85 * red = {R0, R1, R2, R3, ..., Rn}
86 * green = {G0, G1, G2, G3, ..., Gn}
87 * blue = {B0, B1, B2, B3, ..., Bn}
88 * alpha = {A0, A1, A2, A3, ..., An}
89 *
90 * It requires that a packed pixel fits into an element of the output
91 * channels. The common case is when converting pixel with a depth of 32 bit or
92 * less into floats.
93 *
94 * \param format_desc the format of the 'packed' incoming pixel vector
95 * \param type the desired type for rgba_out (type.length = n, above)
96 * \param packed the incoming vector of packed pixels
97 * \param rgba_out returns the SoA R,G,B,A vectors
98 */
99 void
100 lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
101 const struct util_format_description *format_desc,
102 struct lp_type type,
103 LLVMValueRef packed,
104 LLVMValueRef rgba_out[4])
105 {
106 LLVMBuilderRef builder = gallivm->builder;
107 struct lp_build_context bld;
108 LLVMValueRef inputs[4];
109 unsigned start;
110 unsigned chan;
111
112 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
113 assert(format_desc->block.width == 1);
114 assert(format_desc->block.height == 1);
115 assert(format_desc->block.bits <= type.width);
116 /* FIXME: Support more output types */
117 assert(type.width == 32);
118
119 lp_build_context_init(&bld, gallivm, type);
120
121 /* Decode the input vector components */
122 start = 0;
123 for (chan = 0; chan < format_desc->nr_channels; ++chan) {
124 const unsigned width = format_desc->channel[chan].size;
125 const unsigned stop = start + width;
126 LLVMValueRef input;
127
128 input = packed;
129
130 switch(format_desc->channel[chan].type) {
131 case UTIL_FORMAT_TYPE_VOID:
132 input = lp_build_undef(gallivm, type);
133 break;
134
135 case UTIL_FORMAT_TYPE_UNSIGNED:
136 /*
137 * Align the LSB
138 */
139
140 if (start) {
141 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
142 }
143
144 /*
145 * Zero the MSBs
146 */
147
148 if (stop < format_desc->block.bits) {
149 unsigned mask = ((unsigned long long)1 << width) - 1;
150 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
151 }
152
153 /*
154 * Type conversion
155 */
156
157 if (type.floating) {
158 if(format_desc->channel[chan].normalized)
159 input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
160 else
161 input = LLVMBuildSIToFP(builder, input,
162 lp_build_vec_type(gallivm, type), "");
163 }
164 else if (format_desc->channel[chan].pure_integer) {
165 /* Nothing to do */
166 } else {
167 /* FIXME */
168 assert(0);
169 }
170
171 break;
172
173 case UTIL_FORMAT_TYPE_SIGNED:
174 /*
175 * Align the sign bit first.
176 */
177
178 if (stop < type.width) {
179 unsigned bits = type.width - stop;
180 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
181 input = LLVMBuildShl(builder, input, bits_val, "");
182 }
183
184 /*
185 * Align the LSB (with an arithmetic shift to preserve the sign)
186 */
187
188 if (format_desc->channel[chan].size < type.width) {
189 unsigned bits = type.width - format_desc->channel[chan].size;
190 LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
191 input = LLVMBuildAShr(builder, input, bits_val, "");
192 }
193
194 /*
195 * Type conversion
196 */
197
198 if (type.floating) {
199 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
200 if (format_desc->channel[chan].normalized) {
201 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
202 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
203 input = LLVMBuildFMul(builder, input, scale_val, "");
204 }
205 }
206 else if (format_desc->channel[chan].pure_integer) {
207 /* Nothing to do */
208 } else {
209 /* FIXME */
210 assert(0);
211 }
212
213 break;
214
215 case UTIL_FORMAT_TYPE_FLOAT:
216 if (type.floating) {
217 assert(start == 0);
218 assert(stop == 32);
219 assert(type.width == 32);
220 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
221 }
222 else {
223 /* FIXME */
224 assert(0);
225 input = lp_build_undef(gallivm, type);
226 }
227 break;
228
229 case UTIL_FORMAT_TYPE_FIXED:
230 if (type.floating) {
231 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
232 LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
233 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
234 input = LLVMBuildFMul(builder, input, scale_val, "");
235 }
236 else {
237 /* FIXME */
238 assert(0);
239 input = lp_build_undef(gallivm, type);
240 }
241 break;
242
243 default:
244 assert(0);
245 input = lp_build_undef(gallivm, type);
246 break;
247 }
248
249 inputs[chan] = input;
250
251 start = stop;
252 }
253
254 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
255 }
256
257
258 /**
259 * Convert a vector of rgba8 values into 32bit wide SoA vectors.
260 *
261 * \param dst_type The desired return type. For pure integer formats
262 * this should be a 32bit wide int or uint vector type,
263 * otherwise a float vector type.
264 *
265 * \param packed The rgba8 values to pack.
266 *
267 * \param rgba The 4 SoA return vectors.
268 */
269 void
270 lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
271 struct lp_type dst_type,
272 LLVMValueRef packed,
273 LLVMValueRef *rgba)
274 {
275 LLVMBuilderRef builder = gallivm->builder;
276 LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
277 unsigned chan;
278
279 /* XXX technically shouldn't use that for uint dst_type */
280 packed = LLVMBuildBitCast(builder, packed,
281 lp_build_int_vec_type(gallivm, dst_type), "");
282
283 /* Decode the input vector components */
284 for (chan = 0; chan < 4; ++chan) {
285 unsigned start = chan*8;
286 unsigned stop = start + 8;
287 LLVMValueRef input;
288
289 input = packed;
290
291 if (start)
292 input = LLVMBuildLShr(builder, input,
293 lp_build_const_int_vec(gallivm, dst_type, start), "");
294
295 if (stop < 32)
296 input = LLVMBuildAnd(builder, input, mask, "");
297
298 if (dst_type.floating)
299 input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
300
301 rgba[chan] = input;
302 }
303 }
304
305
306
307 /**
308 * Fetch a texels from a texture, returning them in SoA layout.
309 *
310 * \param type the desired return type for 'rgba'. The vector length
311 * is the number of texels to fetch
312 *
313 * \param base_ptr points to start of the texture image block. For non-
314 * compressed formats, this simply points to the texel.
315 * For compressed formats, it points to the start of the
316 * compressed data block.
317 *
318 * \param i, j the sub-block pixel coordinates. For non-compressed formats
319 * these will always be (0,0). For compressed formats, i will
320 * be in [0, block_width-1] and j will be in [0, block_height-1].
321 */
322 void
323 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
324 const struct util_format_description *format_desc,
325 struct lp_type type,
326 LLVMValueRef base_ptr,
327 LLVMValueRef offset,
328 LLVMValueRef i,
329 LLVMValueRef j,
330 LLVMValueRef rgba_out[4])
331 {
332 LLVMBuilderRef builder = gallivm->builder;
333
334 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
335 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
336 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
337 format_desc->block.width == 1 &&
338 format_desc->block.height == 1 &&
339 format_desc->block.bits <= type.width &&
340 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
341 format_desc->channel[0].size == 32))
342 {
343 /*
344 * The packed pixel fits into an element of the destination format. Put
345 * the packed pixels into a vector and extract each component for all
346 * vector elements in parallel.
347 */
348
349 LLVMValueRef packed;
350
351 /*
352 * gather the texels from the texture
353 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
354 */
355 packed = lp_build_gather(gallivm,
356 type.length,
357 format_desc->block.bits,
358 type.width,
359 base_ptr, offset);
360
361 /*
362 * convert texels to float rgba
363 */
364 lp_build_unpack_rgba_soa(gallivm,
365 format_desc,
366 type,
367 packed, rgba_out);
368 return;
369 }
370
371 /*
372 * Try calling lp_build_fetch_rgba_aos for all pixels.
373 */
374
375 if (util_format_fits_8unorm(format_desc) &&
376 type.floating && type.width == 32 &&
377 (type.length == 1 || (type.length % 4 == 0))) {
378 struct lp_type tmp_type;
379 LLVMValueRef tmp;
380
381 memset(&tmp_type, 0, sizeof tmp_type);
382 tmp_type.width = 8;
383 tmp_type.length = type.length * 4;
384 tmp_type.norm = TRUE;
385
386 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
387 base_ptr, offset, i, j);
388
389 lp_build_rgba8_to_fi32_soa(gallivm,
390 type,
391 tmp,
392 rgba_out);
393
394 return;
395 }
396
397 /*
398 * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
399 *
400 * This is not the most efficient way of fetching pixels, as we
401 * miss some opportunities to do vectorization, but this is
402 * convenient for formats or scenarios for which there was no
403 * opportunity or incentive to optimize.
404 */
405
406 {
407 unsigned k, chan;
408 struct lp_type tmp_type;
409
410 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
411 debug_printf("%s: scalar unpacking of %s\n",
412 __FUNCTION__, format_desc->short_name);
413 }
414
415 tmp_type = type;
416 tmp_type.length = 4;
417
418 for (chan = 0; chan < 4; ++chan) {
419 rgba_out[chan] = lp_build_undef(gallivm, type);
420 }
421
422 /* loop over number of pixels */
423 for(k = 0; k < type.length; ++k) {
424 LLVMValueRef index = lp_build_const_int32(gallivm, k);
425 LLVMValueRef offset_elem;
426 LLVMValueRef i_elem, j_elem;
427 LLVMValueRef tmp;
428
429 offset_elem = LLVMBuildExtractElement(builder, offset,
430 index, "");
431
432 i_elem = LLVMBuildExtractElement(builder, i, index, "");
433 j_elem = LLVMBuildExtractElement(builder, j, index, "");
434
435 /* Get a single float[4]={R,G,B,A} pixel */
436 tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
437 base_ptr, offset_elem,
438 i_elem, j_elem);
439
440 /*
441 * Insert the AoS tmp value channels into the SoA result vectors at
442 * position = 'index'.
443 */
444 for (chan = 0; chan < 4; ++chan) {
445 LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
446 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
447 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
448 tmp_chan, index, "");
449 }
450 }
451 }
452 }