gallivm: fix an issue with NaNs with seamless cube filtering
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/u_debug.h"
40 #include "util/u_dump.h"
41 #include "util/u_memory.h"
42 #include "util/u_math.h"
43 #include "util/u_format.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/format_rgb9e5.h"
46 #include "lp_bld_debug.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_const.h"
49 #include "lp_bld_conv.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_bitarit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_printf.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_format.h"
58 #include "lp_bld_sample.h"
59 #include "lp_bld_sample_aos.h"
60 #include "lp_bld_struct.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_pack.h"
63 #include "lp_bld_intr.h"
64
65
66 /**
67 * Generate code to fetch a texel from a texture at int coords (x, y, z).
68 * The computation depends on whether the texture is 1D, 2D or 3D.
69 * The result, texel, will be float vectors:
70 * texel[0] = red values
71 * texel[1] = green values
72 * texel[2] = blue values
73 * texel[3] = alpha values
74 */
75 static void
76 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
77 LLVMValueRef width,
78 LLVMValueRef height,
79 LLVMValueRef depth,
80 LLVMValueRef x,
81 LLVMValueRef y,
82 LLVMValueRef z,
83 LLVMValueRef y_stride,
84 LLVMValueRef z_stride,
85 LLVMValueRef data_ptr,
86 LLVMValueRef mipoffsets,
87 LLVMValueRef texel_out[4])
88 {
89 const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
90 const unsigned dims = bld->dims;
91 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
92 LLVMBuilderRef builder = bld->gallivm->builder;
93 LLVMValueRef offset;
94 LLVMValueRef i, j;
95 LLVMValueRef use_border = NULL;
96
97 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
98 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
99 static_state->min_img_filter,
100 static_state->mag_img_filter)) {
101 LLVMValueRef b1, b2;
102 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
103 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
104 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
105 }
106
107 if (dims >= 2 &&
108 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
109 static_state->min_img_filter,
110 static_state->mag_img_filter)) {
111 LLVMValueRef b1, b2;
112 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
113 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
114 if (use_border) {
115 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
116 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
117 }
118 else {
119 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
120 }
121 }
122
123 if (dims == 3 &&
124 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
125 static_state->min_img_filter,
126 static_state->mag_img_filter)) {
127 LLVMValueRef b1, b2;
128 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
129 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
130 if (use_border) {
131 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
132 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
133 }
134 else {
135 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
136 }
137 }
138
139 /* convert x,y,z coords to linear offset from start of texture, in bytes */
140 lp_build_sample_offset(&bld->int_coord_bld,
141 bld->format_desc,
142 x, y, z, y_stride, z_stride,
143 &offset, &i, &j);
144 if (mipoffsets) {
145 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
146 }
147
148 if (use_border) {
149 /* If we can sample the border color, it means that texcoords may
150 * lie outside the bounds of the texture image. We need to do
151 * something to prevent reading out of bounds and causing a segfault.
152 *
153 * Simply AND the texture coords with !use_border. This will cause
154 * coords which are out of bounds to become zero. Zero's guaranteed
155 * to be inside the texture image.
156 */
157 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
158 }
159
160 lp_build_fetch_rgba_soa(bld->gallivm,
161 bld->format_desc,
162 bld->texel_type, TRUE,
163 data_ptr, offset,
164 i, j,
165 bld->cache,
166 texel_out);
167
168 /*
169 * Note: if we find an app which frequently samples the texture border
170 * we might want to implement a true conditional here to avoid sampling
171 * the texture whenever possible (since that's quite a bit of code).
172 * Ex:
173 * if (use_border) {
174 * texel = border_color;
175 * }
176 * else {
177 * texel = sample_texture(coord);
178 * }
179 * As it is now, we always sample the texture, then selectively replace
180 * the texel color results with the border color.
181 */
182
183 if (use_border) {
184 /* select texel color or border color depending on use_border. */
185 const struct util_format_description *format_desc = bld->format_desc;
186 int chan;
187 struct lp_type border_type = bld->texel_type;
188 border_type.length = 4;
189 /*
190 * Only replace channels which are actually present. The others should
191 * get optimized away eventually by sampler_view swizzle anyway but it's
192 * easier too.
193 */
194 for (chan = 0; chan < 4; chan++) {
195 unsigned chan_s;
196 /* reverse-map channel... */
197 for (chan_s = 0; chan_s < 4; chan_s++) {
198 if (chan_s == format_desc->swizzle[chan]) {
199 break;
200 }
201 }
202 if (chan_s <= 3) {
203 /* use the already clamped color */
204 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
205 LLVMValueRef border_chan;
206
207 border_chan = lp_build_extract_broadcast(bld->gallivm,
208 border_type,
209 bld->texel_type,
210 bld->border_color_clamped,
211 idx);
212 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
213 border_chan, texel_out[chan]);
214 }
215 }
216 }
217 }
218
219
220 /**
221 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode.
222 * (Note that with pot sizes could do this much more easily post-scale
223 * with some bit arithmetic.)
224 */
225 static LLVMValueRef
226 lp_build_coord_mirror(struct lp_build_sample_context *bld,
227 LLVMValueRef coord, boolean posOnly)
228 {
229 struct lp_build_context *coord_bld = &bld->coord_bld;
230 LLVMValueRef fract;
231 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
232
233 /*
234 * We can just use 2*(x - round(0.5*x)) to do all the mirroring,
235 * it all works out. (The result is in range [-1, 1.0], negative if
236 * the coord is in the "odd" section, otherwise positive.)
237 */
238
239 coord = lp_build_mul(coord_bld, coord, half);
240 fract = lp_build_round(coord_bld, coord);
241 fract = lp_build_sub(coord_bld, coord, fract);
242 coord = lp_build_add(coord_bld, fract, fract);
243
244 if (posOnly) {
245 /*
246 * Theoretically it's not quite 100% accurate because the spec says
247 * that ultimately a scaled coord of -x.0 should map to int coord
248 * -x + 1 with mirroring, not -x (this does not matter for bilinear
249 * filtering).
250 */
251 coord = lp_build_abs(coord_bld, coord);
252 /* kill off NaNs */
253 /* XXX: not safe without arch rounding, fract can be anything. */
254 coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero,
255 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
256 }
257
258 return coord;
259 }
260
261
262 /**
263 * Helper to compute the first coord and the weight for
264 * linear wrap repeat npot textures
265 */
266 void
267 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
268 LLVMValueRef coord_f,
269 LLVMValueRef length_i,
270 LLVMValueRef length_f,
271 LLVMValueRef *coord0_i,
272 LLVMValueRef *weight_f)
273 {
274 struct lp_build_context *coord_bld = &bld->coord_bld;
275 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
276 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
277 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
278 int_coord_bld->one);
279 LLVMValueRef mask;
280 /* wrap with normalized floats is just fract */
281 coord_f = lp_build_fract(coord_bld, coord_f);
282 /* mul by size and subtract 0.5 */
283 coord_f = lp_build_mul(coord_bld, coord_f, length_f);
284 coord_f = lp_build_sub(coord_bld, coord_f, half);
285 /*
286 * we avoided the 0.5/length division before the repeat wrap,
287 * now need to fix up edge cases with selects
288 */
289 /*
290 * Note we do a float (unordered) compare so we can eliminate NaNs.
291 * (Otherwise would need fract_safe above).
292 */
293 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
294 PIPE_FUNC_LESS, coord_f, coord_bld->zero);
295
296 /* convert to int, compute lerp weight */
297 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
298 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
299 }
300
301
302 /**
303 * Build LLVM code for texture wrap mode for linear filtering.
304 * \param x0_out returns first integer texcoord
305 * \param x1_out returns second integer texcoord
306 * \param weight_out returns linear interpolation weight
307 */
308 static void
309 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
310 boolean is_gather,
311 LLVMValueRef coord,
312 LLVMValueRef length,
313 LLVMValueRef length_f,
314 LLVMValueRef offset,
315 boolean is_pot,
316 unsigned wrap_mode,
317 LLVMValueRef *x0_out,
318 LLVMValueRef *x1_out,
319 LLVMValueRef *weight_out)
320 {
321 struct lp_build_context *coord_bld = &bld->coord_bld;
322 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
323 LLVMBuilderRef builder = bld->gallivm->builder;
324 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
325 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
326 LLVMValueRef coord0, coord1, weight;
327
328 switch(wrap_mode) {
329 case PIPE_TEX_WRAP_REPEAT:
330 if (is_pot) {
331 /* mul by size and subtract 0.5 */
332 coord = lp_build_mul(coord_bld, coord, length_f);
333 coord = lp_build_sub(coord_bld, coord, half);
334 if (offset) {
335 offset = lp_build_int_to_float(coord_bld, offset);
336 coord = lp_build_add(coord_bld, coord, offset);
337 }
338 /* convert to int, compute lerp weight */
339 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
340 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
341 /* repeat wrap */
342 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
343 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
344 }
345 else {
346 LLVMValueRef mask;
347 if (offset) {
348 offset = lp_build_int_to_float(coord_bld, offset);
349 offset = lp_build_div(coord_bld, offset, length_f);
350 coord = lp_build_add(coord_bld, coord, offset);
351 }
352 lp_build_coord_repeat_npot_linear(bld, coord,
353 length, length_f,
354 &coord0, &weight);
355 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
356 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
357 coord1 = LLVMBuildAnd(builder,
358 lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
359 mask, "");
360 }
361 break;
362
363 case PIPE_TEX_WRAP_CLAMP:
364 if (bld->static_sampler_state->normalized_coords) {
365 /* scale coord to length */
366 coord = lp_build_mul(coord_bld, coord, length_f);
367 }
368 if (offset) {
369 offset = lp_build_int_to_float(coord_bld, offset);
370 coord = lp_build_add(coord_bld, coord, offset);
371 }
372
373 /*
374 * clamp to [0, length]
375 *
376 * Unlike some other wrap modes, this should be correct for gather
377 * too. GL_CLAMP explicitly does this clamp on the coord prior to
378 * actual wrapping (which is per sample).
379 */
380 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
381
382 coord = lp_build_sub(coord_bld, coord, half);
383
384 /* convert to int, compute lerp weight */
385 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
386 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
387 break;
388
389 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
390 {
391 struct lp_build_context abs_coord_bld = bld->coord_bld;
392 abs_coord_bld.type.sign = FALSE;
393
394 if (bld->static_sampler_state->normalized_coords) {
395 /* mul by tex size */
396 coord = lp_build_mul(coord_bld, coord, length_f);
397 }
398 if (offset) {
399 offset = lp_build_int_to_float(coord_bld, offset);
400 coord = lp_build_add(coord_bld, coord, offset);
401 }
402
403 /* clamp to length max */
404 coord = lp_build_min_ext(coord_bld, coord, length_f,
405 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
406 if (!is_gather) {
407 /* subtract 0.5 */
408 coord = lp_build_sub(coord_bld, coord, half);
409 /* clamp to [0, length - 0.5] */
410 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
411 /* convert to int, compute lerp weight */
412 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
413 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
414 } else {
415 /*
416 * The non-gather path will end up with coords 0, 1 if coord was
417 * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
418 * really matter what the second coord is). But for gather, we
419 * really need to end up with coords 0, 0.
420 */
421 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
422 coord0 = lp_build_sub(coord_bld, coord, half);
423 coord1 = lp_build_add(coord_bld, coord, half);
424 /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
425 coord0 = lp_build_itrunc(coord_bld, coord0);
426 coord1 = lp_build_itrunc(coord_bld, coord1);
427 weight = coord_bld->undef;
428 }
429 /* coord1 = min(coord1, length-1) */
430 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
431 break;
432 }
433
434 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
435 if (bld->static_sampler_state->normalized_coords) {
436 /* scale coord to length */
437 coord = lp_build_mul(coord_bld, coord, length_f);
438 }
439 if (offset) {
440 offset = lp_build_int_to_float(coord_bld, offset);
441 coord = lp_build_add(coord_bld, coord, offset);
442 }
443 /*
444 * We don't need any clamp. Technically, for very large (pos or neg)
445 * (or infinite) values, clamp against [-length, length] would be
446 * correct, but we don't need to guarantee any specific
447 * result for such coords (the ifloor will be undefined, but for modes
448 * requiring border all resulting coords are safe).
449 */
450 coord = lp_build_sub(coord_bld, coord, half);
451 /* convert to int, compute lerp weight */
452 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
453 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
454 break;
455
456 case PIPE_TEX_WRAP_MIRROR_REPEAT:
457 if (offset) {
458 offset = lp_build_int_to_float(coord_bld, offset);
459 offset = lp_build_div(coord_bld, offset, length_f);
460 coord = lp_build_add(coord_bld, coord, offset);
461 }
462 if (!is_gather) {
463 /* compute mirror function */
464 coord = lp_build_coord_mirror(bld, coord, TRUE);
465
466 /* scale coord to length */
467 coord = lp_build_mul(coord_bld, coord, length_f);
468 coord = lp_build_sub(coord_bld, coord, half);
469
470 /* convert to int, compute lerp weight */
471 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
472 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
473
474 /* coord0 = max(coord0, 0) */
475 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
476 /* coord1 = min(coord1, length-1) */
477 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
478 } else {
479 /*
480 * This is pretty reasonable in the end, all what the tests care
481 * about is nasty edge cases (scaled coords x.5, so the individual
482 * coords are actually integers, which is REALLY tricky to get right
483 * due to this working differently both for negative numbers as well
484 * as for even/odd cases). But with enough magic it's not too complex
485 * after all.
486 * Maybe should try a bit arithmetic one though for POT textures...
487 */
488 LLVMValueRef isNeg;
489 /*
490 * Wrapping just once still works, even though it means we can
491 * get "wrong" sign due to performing mirror in the middle of the
492 * two coords (because this can only happen very near the odd/even
493 * edges, so both coords will actually end up as 0 or length - 1
494 * in the end).
495 * For GL4 gather with per-sample offsets we'd need to the mirroring
496 * per coord too.
497 */
498 coord = lp_build_coord_mirror(bld, coord, FALSE);
499 coord = lp_build_mul(coord_bld, coord, length_f);
500
501 /*
502 * NaNs should be safe here, we'll do away with them with
503 * the ones' complement plus min.
504 */
505 coord0 = lp_build_sub(coord_bld, coord, half);
506 coord0 = lp_build_ifloor(coord_bld, coord0);
507 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
508 /* ones complement for neg numbers (mirror(negX) = X - 1) */
509 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
510 coord0, int_coord_bld->zero);
511 coord0 = lp_build_xor(int_coord_bld, coord0, isNeg);
512 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
513 coord1, int_coord_bld->zero);
514 coord1 = lp_build_xor(int_coord_bld, coord1, isNeg);
515 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
516 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
517
518 weight = coord_bld->undef;
519 }
520 break;
521
522 case PIPE_TEX_WRAP_MIRROR_CLAMP:
523 if (bld->static_sampler_state->normalized_coords) {
524 /* scale coord to length */
525 coord = lp_build_mul(coord_bld, coord, length_f);
526 }
527 if (offset) {
528 offset = lp_build_int_to_float(coord_bld, offset);
529 coord = lp_build_add(coord_bld, coord, offset);
530 }
531 /*
532 * XXX: probably not correct for gather, albeit I'm not
533 * entirely sure as it's poorly specified. The wrapping looks
534 * correct according to the spec which is against gl 1.2.1,
535 * however negative values will be swapped - gl re-specified
536 * wrapping with newer versions (no more pre-clamp except with
537 * GL_CLAMP).
538 */
539 coord = lp_build_abs(coord_bld, coord);
540
541 /* clamp to [0, length] */
542 coord = lp_build_min_ext(coord_bld, coord, length_f,
543 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
544
545 coord = lp_build_sub(coord_bld, coord, half);
546
547 /* convert to int, compute lerp weight */
548 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
549 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
550 break;
551
552 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
553 {
554 struct lp_build_context abs_coord_bld = bld->coord_bld;
555 abs_coord_bld.type.sign = FALSE;
556
557 if (bld->static_sampler_state->normalized_coords) {
558 /* scale coord to length */
559 coord = lp_build_mul(coord_bld, coord, length_f);
560 }
561 if (offset) {
562 offset = lp_build_int_to_float(coord_bld, offset);
563 coord = lp_build_add(coord_bld, coord, offset);
564 }
565 if (!is_gather) {
566 coord = lp_build_abs(coord_bld, coord);
567
568 /* clamp to length max */
569 coord = lp_build_min_ext(coord_bld, coord, length_f,
570 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
571 /* subtract 0.5 */
572 coord = lp_build_sub(coord_bld, coord, half);
573 /* clamp to [0, length - 0.5] */
574 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
575
576 /* convert to int, compute lerp weight */
577 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
578 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
579 /* coord1 = min(coord1, length-1) */
580 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
581 } else {
582 /*
583 * The non-gather path will swap coord0/1 if coord was negative,
584 * which is ok for filtering since the filter weight matches
585 * accordingly. Also, if coord is close to zero, coord0/1 will
586 * be 0 and 1, instead of 0 and 0 (again ok due to filter
587 * weight being 0.0). Both issues need to be fixed for gather.
588 */
589 LLVMValueRef isNeg;
590
591 /*
592 * Actually wanted to cheat here and use:
593 * coord1 = lp_build_iround(coord_bld, coord);
594 * but it's not good enough for some tests (even piglit
595 * textureGather is set up in a way so the coords area always
596 * .5, that is right at the crossover points).
597 * So do ordinary sub/floor, then do ones' complement
598 * for negative numbers.
599 * (Note can't just do sub|add/abs/itrunc per coord neither -
600 * because the spec demands that mirror(3.0) = 3 but
601 * mirror(-3.0) = 2.)
602 */
603 coord = lp_build_sub(coord_bld, coord, half);
604 coord0 = lp_build_ifloor(coord_bld, coord);
605 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
606 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0,
607 int_coord_bld->zero);
608 coord0 = lp_build_xor(int_coord_bld, isNeg, coord0);
609 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
610
611 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1,
612 int_coord_bld->zero);
613 coord1 = lp_build_xor(int_coord_bld, isNeg, coord1);
614 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
615
616 weight = coord_bld->undef;
617 }
618 }
619 break;
620
621 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
622 {
623 if (bld->static_sampler_state->normalized_coords) {
624 /* scale coord to length */
625 coord = lp_build_mul(coord_bld, coord, length_f);
626 }
627 if (offset) {
628 offset = lp_build_int_to_float(coord_bld, offset);
629 coord = lp_build_add(coord_bld, coord, offset);
630 }
631 /*
632 * XXX: probably not correct for gather due to swapped
633 * order if coord is negative (same rationale as for
634 * MIRROR_CLAMP).
635 */
636 coord = lp_build_abs(coord_bld, coord);
637
638 /*
639 * We don't need any clamp. Technically, for very large
640 * (or infinite) values, clamp against length would be
641 * correct, but we don't need to guarantee any specific
642 * result for such coords (the ifloor will be undefined, but
643 * for modes requiring border all resulting coords are safe).
644 */
645 coord = lp_build_sub(coord_bld, coord, half);
646
647 /* convert to int, compute lerp weight */
648 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
649 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
650 }
651 break;
652
653 default:
654 assert(0);
655 coord0 = NULL;
656 coord1 = NULL;
657 weight = NULL;
658 }
659
660 *x0_out = coord0;
661 *x1_out = coord1;
662 *weight_out = weight;
663 }
664
665
666 /**
667 * Build LLVM code for texture wrap mode for nearest filtering.
668 * \param coord the incoming texcoord (nominally in [0,1])
669 * \param length the texture size along one dimension, as int vector
670 * \param length_f the texture size along one dimension, as float vector
671 * \param offset texel offset along one dimension (as int vector)
672 * \param is_pot if TRUE, length is a power of two
673 * \param wrap_mode one of PIPE_TEX_WRAP_x
674 */
675 static LLVMValueRef
676 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
677 LLVMValueRef coord,
678 LLVMValueRef length,
679 LLVMValueRef length_f,
680 LLVMValueRef offset,
681 boolean is_pot,
682 unsigned wrap_mode)
683 {
684 struct lp_build_context *coord_bld = &bld->coord_bld;
685 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
686 LLVMBuilderRef builder = bld->gallivm->builder;
687 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
688 LLVMValueRef icoord;
689
690 switch(wrap_mode) {
691 case PIPE_TEX_WRAP_REPEAT:
692 if (is_pot) {
693 coord = lp_build_mul(coord_bld, coord, length_f);
694 icoord = lp_build_ifloor(coord_bld, coord);
695 if (offset) {
696 icoord = lp_build_add(int_coord_bld, icoord, offset);
697 }
698 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
699 }
700 else {
701 if (offset) {
702 offset = lp_build_int_to_float(coord_bld, offset);
703 offset = lp_build_div(coord_bld, offset, length_f);
704 coord = lp_build_add(coord_bld, coord, offset);
705 }
706 /* take fraction, unnormalize */
707 coord = lp_build_fract_safe(coord_bld, coord);
708 coord = lp_build_mul(coord_bld, coord, length_f);
709 icoord = lp_build_itrunc(coord_bld, coord);
710 }
711 break;
712
713 case PIPE_TEX_WRAP_CLAMP:
714 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
715 if (bld->static_sampler_state->normalized_coords) {
716 /* scale coord to length */
717 coord = lp_build_mul(coord_bld, coord, length_f);
718 }
719
720 if (offset) {
721 offset = lp_build_int_to_float(coord_bld, offset);
722 coord = lp_build_add(coord_bld, coord, offset);
723 }
724 /* floor */
725 /* use itrunc instead since we clamp to 0 anyway */
726 icoord = lp_build_itrunc(coord_bld, coord);
727
728 /* clamp to [0, length - 1]. */
729 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
730 length_minus_one);
731 break;
732
733 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
734 if (bld->static_sampler_state->normalized_coords) {
735 /* scale coord to length */
736 coord = lp_build_mul(coord_bld, coord, length_f);
737 }
738 /* no clamp necessary, border masking will handle this */
739 icoord = lp_build_ifloor(coord_bld, coord);
740 if (offset) {
741 icoord = lp_build_add(int_coord_bld, icoord, offset);
742 }
743 break;
744
745 case PIPE_TEX_WRAP_MIRROR_REPEAT:
746 if (offset) {
747 offset = lp_build_int_to_float(coord_bld, offset);
748 offset = lp_build_div(coord_bld, offset, length_f);
749 coord = lp_build_add(coord_bld, coord, offset);
750 }
751 /* compute mirror function */
752 coord = lp_build_coord_mirror(bld, coord, TRUE);
753
754 /* scale coord to length */
755 assert(bld->static_sampler_state->normalized_coords);
756 coord = lp_build_mul(coord_bld, coord, length_f);
757
758 /* itrunc == ifloor here */
759 icoord = lp_build_itrunc(coord_bld, coord);
760
761 /* clamp to [0, length - 1] */
762 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
763 break;
764
765 case PIPE_TEX_WRAP_MIRROR_CLAMP:
766 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
767 if (bld->static_sampler_state->normalized_coords) {
768 /* scale coord to length */
769 coord = lp_build_mul(coord_bld, coord, length_f);
770 }
771 if (offset) {
772 offset = lp_build_int_to_float(coord_bld, offset);
773 coord = lp_build_add(coord_bld, coord, offset);
774 }
775 coord = lp_build_abs(coord_bld, coord);
776
777 /* itrunc == ifloor here */
778 icoord = lp_build_itrunc(coord_bld, coord);
779 /*
780 * Use unsigned min due to possible undef values (NaNs, overflow)
781 */
782 {
783 struct lp_build_context abs_coord_bld = *int_coord_bld;
784 abs_coord_bld.type.sign = FALSE;
785 /* clamp to [0, length - 1] */
786 icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
787 }
788 break;
789
790 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
791 if (bld->static_sampler_state->normalized_coords) {
792 /* scale coord to length */
793 coord = lp_build_mul(coord_bld, coord, length_f);
794 }
795 if (offset) {
796 offset = lp_build_int_to_float(coord_bld, offset);
797 coord = lp_build_add(coord_bld, coord, offset);
798 }
799 coord = lp_build_abs(coord_bld, coord);
800
801 /* itrunc == ifloor here */
802 icoord = lp_build_itrunc(coord_bld, coord);
803 break;
804
805 default:
806 assert(0);
807 icoord = NULL;
808 }
809
810 return icoord;
811 }
812
813
814 /**
815 * Do shadow test/comparison.
816 * \param p shadow ref value
817 * \param texel the texel to compare against
818 */
819 static LLVMValueRef
820 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
821 LLVMValueRef p,
822 LLVMValueRef texel)
823 {
824 struct lp_build_context *texel_bld = &bld->texel_bld;
825 LLVMValueRef res;
826
827 if (0) {
828 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
829 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
830 }
831
832 /* result = (p FUNC texel) ? 1 : 0 */
833 /*
834 * honor d3d10 floating point rules here, which state that comparisons
835 * are ordered except NOT_EQUAL which is unordered.
836 */
837 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
838 res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
839 p, texel);
840 }
841 else {
842 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
843 p, texel);
844 }
845 return res;
846 }
847
848
849 /**
850 * Generate code to sample a mipmap level with nearest filtering.
851 * If sampling a cube texture, r = cube face in [0,5].
852 */
853 static void
854 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
855 LLVMValueRef size,
856 LLVMValueRef row_stride_vec,
857 LLVMValueRef img_stride_vec,
858 LLVMValueRef data_ptr,
859 LLVMValueRef mipoffsets,
860 LLVMValueRef *coords,
861 const LLVMValueRef *offsets,
862 LLVMValueRef colors_out[4])
863 {
864 const unsigned dims = bld->dims;
865 LLVMValueRef width_vec;
866 LLVMValueRef height_vec;
867 LLVMValueRef depth_vec;
868 LLVMValueRef flt_size;
869 LLVMValueRef flt_width_vec;
870 LLVMValueRef flt_height_vec;
871 LLVMValueRef flt_depth_vec;
872 LLVMValueRef x, y = NULL, z = NULL;
873
874 lp_build_extract_image_sizes(bld,
875 &bld->int_size_bld,
876 bld->int_coord_type,
877 size,
878 &width_vec, &height_vec, &depth_vec);
879
880 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
881
882 lp_build_extract_image_sizes(bld,
883 &bld->float_size_bld,
884 bld->coord_type,
885 flt_size,
886 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
887
888 /*
889 * Compute integer texcoords.
890 */
891 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
892 flt_width_vec, offsets[0],
893 bld->static_texture_state->pot_width,
894 bld->static_sampler_state->wrap_s);
895 lp_build_name(x, "tex.x.wrapped");
896
897 if (dims >= 2) {
898 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
899 flt_height_vec, offsets[1],
900 bld->static_texture_state->pot_height,
901 bld->static_sampler_state->wrap_t);
902 lp_build_name(y, "tex.y.wrapped");
903
904 if (dims == 3) {
905 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
906 flt_depth_vec, offsets[2],
907 bld->static_texture_state->pot_depth,
908 bld->static_sampler_state->wrap_r);
909 lp_build_name(z, "tex.z.wrapped");
910 }
911 }
912 if (has_layer_coord(bld->static_texture_state->target)) {
913 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
914 /* add cube layer to face */
915 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
916 }
917 else {
918 z = coords[2];
919 }
920 lp_build_name(z, "tex.z.layer");
921 }
922
923 /*
924 * Get texture colors.
925 */
926 lp_build_sample_texel_soa(bld,
927 width_vec, height_vec, depth_vec,
928 x, y, z,
929 row_stride_vec, img_stride_vec,
930 data_ptr, mipoffsets, colors_out);
931
932 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
933 LLVMValueRef cmpval;
934 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
935 /* this is really just a AND 1.0, cmpval but llvm is clever enough */
936 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
937 bld->texel_bld.one, bld->texel_bld.zero);
938 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
939 }
940
941 }
942
943
944 /**
945 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
946 */
947 static LLVMValueRef
948 lp_build_masklerp(struct lp_build_context *bld,
949 LLVMValueRef weight,
950 LLVMValueRef mask0,
951 LLVMValueRef mask1)
952 {
953 struct gallivm_state *gallivm = bld->gallivm;
954 LLVMBuilderRef builder = gallivm->builder;
955 LLVMValueRef weight2;
956
957 weight2 = lp_build_sub(bld, bld->one, weight);
958 weight = LLVMBuildBitCast(builder, weight,
959 lp_build_int_vec_type(gallivm, bld->type), "");
960 weight2 = LLVMBuildBitCast(builder, weight2,
961 lp_build_int_vec_type(gallivm, bld->type), "");
962 weight = LLVMBuildAnd(builder, weight, mask1, "");
963 weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
964 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
965 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
966 return lp_build_add(bld, weight, weight2);
967 }
968
969 /**
970 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
971 */
972 static LLVMValueRef
973 lp_build_masklerp2d(struct lp_build_context *bld,
974 LLVMValueRef weight0,
975 LLVMValueRef weight1,
976 LLVMValueRef mask00,
977 LLVMValueRef mask01,
978 LLVMValueRef mask10,
979 LLVMValueRef mask11)
980 {
981 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
982 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
983 return lp_build_lerp(bld, weight1, val0, val1, 0);
984 }
985
986 /*
987 * this is a bit excessive code for something OpenGL just recommends
988 * but does not require.
989 */
990 #define ACCURATE_CUBE_CORNERS 1
991
992 /**
993 * Generate code to sample a mipmap level with linear filtering.
994 * If sampling a cube texture, r = cube face in [0,5].
995 * If linear_mask is present, only pixels having their mask set
996 * will receive linear filtering, the rest will use nearest.
997 */
998 static void
999 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1000 boolean is_gather,
1001 LLVMValueRef size,
1002 LLVMValueRef linear_mask,
1003 LLVMValueRef row_stride_vec,
1004 LLVMValueRef img_stride_vec,
1005 LLVMValueRef data_ptr,
1006 LLVMValueRef mipoffsets,
1007 LLVMValueRef *coords,
1008 const LLVMValueRef *offsets,
1009 LLVMValueRef colors_out[4])
1010 {
1011 LLVMBuilderRef builder = bld->gallivm->builder;
1012 struct lp_build_context *ivec_bld = &bld->int_coord_bld;
1013 struct lp_build_context *coord_bld = &bld->coord_bld;
1014 struct lp_build_context *texel_bld = &bld->texel_bld;
1015 const unsigned dims = bld->dims;
1016 LLVMValueRef width_vec;
1017 LLVMValueRef height_vec;
1018 LLVMValueRef depth_vec;
1019 LLVMValueRef flt_size;
1020 LLVMValueRef flt_width_vec;
1021 LLVMValueRef flt_height_vec;
1022 LLVMValueRef flt_depth_vec;
1023 LLVMValueRef fall_off[4], have_corners;
1024 LLVMValueRef z1 = NULL;
1025 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
1026 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
1027 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
1028 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
1029 LLVMValueRef xs[4], ys[4], zs[4];
1030 LLVMValueRef neighbors[2][2][4];
1031 int chan, texel_index;
1032 boolean seamless_cube_filter, accurate_cube_corners;
1033
1034 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1035 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
1036 bld->static_sampler_state->seamless_cube_map;
1037 /*
1038 * XXX I don't know how this is really supposed to work with gather. From GL
1039 * spec wording (not gather specific) it sounds like the 4th missing texel
1040 * should be an average of the other 3, hence for gather could return this.
1041 * This is however NOT how the code here works, which just fixes up the
1042 * weights used for filtering instead. And of course for gather there is
1043 * no filter to tweak...
1044 */
1045 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
1046 !is_gather;
1047
1048 lp_build_extract_image_sizes(bld,
1049 &bld->int_size_bld,
1050 bld->int_coord_type,
1051 size,
1052 &width_vec, &height_vec, &depth_vec);
1053
1054 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1055
1056 lp_build_extract_image_sizes(bld,
1057 &bld->float_size_bld,
1058 bld->coord_type,
1059 flt_size,
1060 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1061
1062 /*
1063 * Compute integer texcoords.
1064 */
1065
1066 if (!seamless_cube_filter) {
1067 lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec,
1068 flt_width_vec, offsets[0],
1069 bld->static_texture_state->pot_width,
1070 bld->static_sampler_state->wrap_s,
1071 &x00, &x01, &s_fpart);
1072 lp_build_name(x00, "tex.x0.wrapped");
1073 lp_build_name(x01, "tex.x1.wrapped");
1074 x10 = x00;
1075 x11 = x01;
1076
1077 if (dims >= 2) {
1078 lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec,
1079 flt_height_vec, offsets[1],
1080 bld->static_texture_state->pot_height,
1081 bld->static_sampler_state->wrap_t,
1082 &y00, &y10, &t_fpart);
1083 lp_build_name(y00, "tex.y0.wrapped");
1084 lp_build_name(y10, "tex.y1.wrapped");
1085 y01 = y00;
1086 y11 = y10;
1087
1088 if (dims == 3) {
1089 lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec,
1090 flt_depth_vec, offsets[2],
1091 bld->static_texture_state->pot_depth,
1092 bld->static_sampler_state->wrap_r,
1093 &z00, &z1, &r_fpart);
1094 z01 = z10 = z11 = z00;
1095 lp_build_name(z00, "tex.z0.wrapped");
1096 lp_build_name(z1, "tex.z1.wrapped");
1097 }
1098 }
1099 if (has_layer_coord(bld->static_texture_state->target)) {
1100 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1101 /* add cube layer to face */
1102 z00 = z01 = z10 = z11 = z1 =
1103 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1104 }
1105 else {
1106 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
1107 }
1108 lp_build_name(z00, "tex.z0.layer");
1109 lp_build_name(z1, "tex.z1.layer");
1110 }
1111 }
1112 else {
1113 struct lp_build_if_state edge_if;
1114 LLVMTypeRef int1t;
1115 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
1116 LLVMValueRef coord, have_edge, have_corner;
1117 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
1118 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
1119 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
1120 LLVMValueRef face = coords[2];
1121 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
1122 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
1123 /* XXX drop height calcs. Could (should) do this without seamless filtering too */
1124 height_vec = width_vec;
1125 flt_height_vec = flt_width_vec;
1126
1127 /* XXX the overflow logic is actually sort of duplicated with trilinear,
1128 * since an overflow in one mip should also have a corresponding overflow
1129 * in another.
1130 */
1131 /* should always have normalized coords, and offsets are undefined */
1132 assert(bld->static_sampler_state->normalized_coords);
1133 /*
1134 * The coords should all be between [0,1] however we can have NaNs,
1135 * which will wreak havoc. In particular the y1_clamped value below
1136 * can be -INT_MAX (on x86) and be propagated right through (probably
1137 * other values might be bogus in the end too).
1138 * So kill off the NaNs here.
1139 */
1140 coords[0] = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
1141 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1142 coords[1] = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
1143 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1144 coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
1145 /* instead of clamp, build mask if overflowed */
1146 coord = lp_build_sub(coord_bld, coord, half);
1147 /* convert to int, compute lerp weight */
1148 /* not ideal with AVX (and no AVX2) */
1149 lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
1150 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
1151 coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
1152 coord = lp_build_sub(coord_bld, coord, half);
1153 lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
1154 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
1155
1156 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
1157 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
1158 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
1159 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
1160
1161 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
1162 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
1163 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
1164 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
1165
1166 /* needed for accurate corner filtering branch later, rely on 0 init */
1167 int1t = LLVMInt1TypeInContext(bld->gallivm->context);
1168 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
1169
1170 for (texel_index = 0; texel_index < 4; texel_index++) {
1171 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
1172 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
1173 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
1174 }
1175
1176 lp_build_if(&edge_if, bld->gallivm, have_edge);
1177
1178 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1179 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1180 LLVMBuildStore(builder, have_corner, have_corners);
1181
1182 /*
1183 * Need to feed clamped values here for cheap corner handling,
1184 * but only for y coord (as when falling off both edges we only
1185 * fall off the x one) - this should be sufficient.
1186 */
1187 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1188 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1189
1190 /*
1191 * Get all possible new coords.
1192 */
1193 lp_build_cube_new_coords(ivec_bld, face,
1194 x0, x1, y0_clamped, y1_clamped,
1195 length_minus_one,
1196 new_faces, new_xcoords, new_ycoords);
1197
1198 /* handle fall off x-, x+ direction */
1199 /* determine new coords, face (not both fall_off vars can be true at same time) */
1200 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1201 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1202 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1203 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1204 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1205 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1206 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1207 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1208
1209 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1210 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1211
1212 /* handle fall off y-, y+ direction */
1213 /*
1214 * Cheap corner logic: just hack up things so a texel doesn't fall
1215 * off both sides (which means filter weights will be wrong but we'll only
1216 * use valid texels in the filter).
1217 * This means however (y) coords must additionally be clamped (see above).
1218 * This corner handling should be fully OpenGL (but not d3d10) compliant.
1219 */
1220 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1221 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1222 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1223 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1224
1225 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1226 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1227 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1228 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1229 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1230 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1231 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1232 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1233
1234 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1235 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1236 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1237 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1238
1239 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1240 /* now can add cube layer to face (per sample) */
1241 z00 = lp_build_add(ivec_bld, z00, coords[3]);
1242 z01 = lp_build_add(ivec_bld, z01, coords[3]);
1243 z10 = lp_build_add(ivec_bld, z10, coords[3]);
1244 z11 = lp_build_add(ivec_bld, z11, coords[3]);
1245 }
1246
1247 LLVMBuildStore(builder, x00, xs[0]);
1248 LLVMBuildStore(builder, x01, xs[1]);
1249 LLVMBuildStore(builder, x10, xs[2]);
1250 LLVMBuildStore(builder, x11, xs[3]);
1251 LLVMBuildStore(builder, y00, ys[0]);
1252 LLVMBuildStore(builder, y01, ys[1]);
1253 LLVMBuildStore(builder, y10, ys[2]);
1254 LLVMBuildStore(builder, y11, ys[3]);
1255 LLVMBuildStore(builder, z00, zs[0]);
1256 LLVMBuildStore(builder, z01, zs[1]);
1257 LLVMBuildStore(builder, z10, zs[2]);
1258 LLVMBuildStore(builder, z11, zs[3]);
1259
1260 lp_build_else(&edge_if);
1261
1262 LLVMBuildStore(builder, x0, xs[0]);
1263 LLVMBuildStore(builder, x1, xs[1]);
1264 LLVMBuildStore(builder, x0, xs[2]);
1265 LLVMBuildStore(builder, x1, xs[3]);
1266 LLVMBuildStore(builder, y0, ys[0]);
1267 LLVMBuildStore(builder, y0, ys[1]);
1268 LLVMBuildStore(builder, y1, ys[2]);
1269 LLVMBuildStore(builder, y1, ys[3]);
1270 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1271 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1272 LLVMBuildStore(builder, cube_layer, zs[0]);
1273 LLVMBuildStore(builder, cube_layer, zs[1]);
1274 LLVMBuildStore(builder, cube_layer, zs[2]);
1275 LLVMBuildStore(builder, cube_layer, zs[3]);
1276 }
1277 else {
1278 LLVMBuildStore(builder, face, zs[0]);
1279 LLVMBuildStore(builder, face, zs[1]);
1280 LLVMBuildStore(builder, face, zs[2]);
1281 LLVMBuildStore(builder, face, zs[3]);
1282 }
1283
1284 lp_build_endif(&edge_if);
1285
1286 x00 = LLVMBuildLoad(builder, xs[0], "");
1287 x01 = LLVMBuildLoad(builder, xs[1], "");
1288 x10 = LLVMBuildLoad(builder, xs[2], "");
1289 x11 = LLVMBuildLoad(builder, xs[3], "");
1290 y00 = LLVMBuildLoad(builder, ys[0], "");
1291 y01 = LLVMBuildLoad(builder, ys[1], "");
1292 y10 = LLVMBuildLoad(builder, ys[2], "");
1293 y11 = LLVMBuildLoad(builder, ys[3], "");
1294 z00 = LLVMBuildLoad(builder, zs[0], "");
1295 z01 = LLVMBuildLoad(builder, zs[1], "");
1296 z10 = LLVMBuildLoad(builder, zs[2], "");
1297 z11 = LLVMBuildLoad(builder, zs[3], "");
1298 }
1299
1300 if (linear_mask) {
1301 /*
1302 * Whack filter weights into place. Whatever texel had more weight is
1303 * the one which should have been selected by nearest filtering hence
1304 * just use 100% weight for it.
1305 */
1306 struct lp_build_context *c_bld = &bld->coord_bld;
1307 LLVMValueRef w1_mask, w1_weight;
1308 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1309
1310 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1311 /* this select is really just a "and" */
1312 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1313 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1314 if (dims >= 2) {
1315 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1316 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1317 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1318 if (dims == 3) {
1319 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1320 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1321 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1322 }
1323 }
1324 }
1325
1326 /*
1327 * Get texture colors.
1328 */
1329 /* get x0/x1 texels */
1330 lp_build_sample_texel_soa(bld,
1331 width_vec, height_vec, depth_vec,
1332 x00, y00, z00,
1333 row_stride_vec, img_stride_vec,
1334 data_ptr, mipoffsets, neighbors[0][0]);
1335 lp_build_sample_texel_soa(bld,
1336 width_vec, height_vec, depth_vec,
1337 x01, y01, z01,
1338 row_stride_vec, img_stride_vec,
1339 data_ptr, mipoffsets, neighbors[0][1]);
1340
1341 if (dims == 1) {
1342 assert(!is_gather);
1343 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1344 /* Interpolate two samples from 1D image to produce one color */
1345 for (chan = 0; chan < 4; chan++) {
1346 colors_out[chan] = lp_build_lerp(texel_bld, s_fpart,
1347 neighbors[0][0][chan],
1348 neighbors[0][1][chan],
1349 0);
1350 }
1351 }
1352 else {
1353 LLVMValueRef cmpval0, cmpval1;
1354 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1355 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1356 /* simplified lerp, AND mask with weight and add */
1357 colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
1358 cmpval0, cmpval1);
1359 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1360 }
1361 }
1362 else {
1363 /* 2D/3D texture */
1364 struct lp_build_if_state corner_if;
1365 LLVMValueRef colors0[4], colorss[4];
1366
1367 /* get x0/x1 texels at y1 */
1368 lp_build_sample_texel_soa(bld,
1369 width_vec, height_vec, depth_vec,
1370 x10, y10, z10,
1371 row_stride_vec, img_stride_vec,
1372 data_ptr, mipoffsets, neighbors[1][0]);
1373 lp_build_sample_texel_soa(bld,
1374 width_vec, height_vec, depth_vec,
1375 x11, y11, z11,
1376 row_stride_vec, img_stride_vec,
1377 data_ptr, mipoffsets, neighbors[1][1]);
1378
1379 /*
1380 * To avoid having to duplicate linear_mask / fetch code use
1381 * another branch (with corner condition though edge would work
1382 * as well) here.
1383 */
1384 if (accurate_cube_corners) {
1385 LLVMValueRef w00, w01, w10, w11, wx0, wy0;
1386 LLVMValueRef c_weight, c00, c01, c10, c11;
1387 LLVMValueRef have_corner, one_third, tmp;
1388
1389 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1390 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1391 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1392 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1393
1394 have_corner = LLVMBuildLoad(builder, have_corners, "");
1395
1396 lp_build_if(&corner_if, bld->gallivm, have_corner);
1397
1398 /*
1399 * we can't use standard 2d lerp as we need per-element weight
1400 * in case of corners, so just calculate bilinear result as
1401 * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1402 * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
1403 * however calculating the weights needs another 6, so actually probably
1404 * not slower than 2d lerp only for 4 channels as weights only need
1405 * to be calculated once - of course fixing the weights has additional cost.)
1406 */
1407 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1408 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1409 w00 = lp_build_mul(coord_bld, wx0, wy0);
1410 w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1411 w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1412 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1413
1414 /* find corner weight */
1415 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1416 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1417 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1418 c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1419 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1420 c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1421 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1422 c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1423
1424 /*
1425 * add 1/3 of the corner weight to each of the 3 other samples
1426 * and null out corner weight
1427 */
1428 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
1429 c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1430 w00 = lp_build_add(coord_bld, w00, c_weight);
1431 c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1432 w00 = lp_build_andnot(coord_bld, w00, c00);
1433 w01 = lp_build_add(coord_bld, w01, c_weight);
1434 c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1435 w01 = lp_build_andnot(coord_bld, w01, c01);
1436 w10 = lp_build_add(coord_bld, w10, c_weight);
1437 c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1438 w10 = lp_build_andnot(coord_bld, w10, c10);
1439 w11 = lp_build_add(coord_bld, w11, c_weight);
1440 c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1441 w11 = lp_build_andnot(coord_bld, w11, c11);
1442
1443 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1444 for (chan = 0; chan < 4; chan++) {
1445 colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
1446 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1447 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1448 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1449 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1450 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1451 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1452 }
1453 }
1454 else {
1455 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1456 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1457 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1458 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1459 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1460 /* inputs to interpolation are just masks so just add masked weights together */
1461 cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
1462 cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
1463 cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
1464 cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
1465 colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1466 tmp = lp_build_and(coord_bld, w01, cmpval01);
1467 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1468 tmp = lp_build_and(coord_bld, w10, cmpval10);
1469 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1470 tmp = lp_build_and(coord_bld, w11, cmpval11);
1471 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1472 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1473 }
1474
1475 LLVMBuildStore(builder, colors0[0], colorss[0]);
1476 LLVMBuildStore(builder, colors0[1], colorss[1]);
1477 LLVMBuildStore(builder, colors0[2], colorss[2]);
1478 LLVMBuildStore(builder, colors0[3], colorss[3]);
1479
1480 lp_build_else(&corner_if);
1481 }
1482
1483 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1484 if (is_gather) {
1485 /*
1486 * Just assign the red channel (no component selection yet).
1487 * This is a bit hackish, we usually do the swizzle at the
1488 * end of sampling (much less values to swizzle), but this
1489 * obviously cannot work when using gather.
1490 */
1491 unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1492 colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
1493 neighbors[1][0],
1494 chan_swiz);
1495 colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
1496 neighbors[1][1],
1497 chan_swiz);
1498 colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
1499 neighbors[0][1],
1500 chan_swiz);
1501 colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
1502 neighbors[0][0],
1503 chan_swiz);
1504 }
1505 else {
1506 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1507 for (chan = 0; chan < 4; chan++) {
1508 colors0[chan] = lp_build_lerp_2d(texel_bld,
1509 s_fpart, t_fpart,
1510 neighbors[0][0][chan],
1511 neighbors[0][1][chan],
1512 neighbors[1][0][chan],
1513 neighbors[1][1][chan],
1514 0);
1515 }
1516 }
1517 }
1518 else {
1519 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1520 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1521 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1522 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1523 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1524
1525 if (is_gather) {
1526 /* more hacks for swizzling, should be X, ONE or ZERO... */
1527 unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1528 if (chan_swiz <= PIPE_SWIZZLE_W) {
1529 colors0[0] = lp_build_select(texel_bld, cmpval10,
1530 texel_bld->one, texel_bld->zero);
1531 colors0[1] = lp_build_select(texel_bld, cmpval11,
1532 texel_bld->one, texel_bld->zero);
1533 colors0[2] = lp_build_select(texel_bld, cmpval01,
1534 texel_bld->one, texel_bld->zero);
1535 colors0[3] = lp_build_select(texel_bld, cmpval00,
1536 texel_bld->one, texel_bld->zero);
1537 }
1538 else if (chan_swiz == PIPE_SWIZZLE_0) {
1539 colors0[0] = colors0[1] = colors0[2] = colors0[3] =
1540 texel_bld->zero;
1541 }
1542 else {
1543 colors0[0] = colors0[1] = colors0[2] = colors0[3] =
1544 texel_bld->one;
1545 }
1546 }
1547 else {
1548 colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1549 cmpval00, cmpval01, cmpval10, cmpval11);
1550 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1551 }
1552 }
1553
1554 if (accurate_cube_corners) {
1555 LLVMBuildStore(builder, colors0[0], colorss[0]);
1556 LLVMBuildStore(builder, colors0[1], colorss[1]);
1557 LLVMBuildStore(builder, colors0[2], colorss[2]);
1558 LLVMBuildStore(builder, colors0[3], colorss[3]);
1559
1560 lp_build_endif(&corner_if);
1561
1562 colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
1563 colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
1564 colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
1565 colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
1566 }
1567
1568 if (dims == 3) {
1569 LLVMValueRef neighbors1[2][2][4];
1570 LLVMValueRef colors1[4];
1571
1572 assert(!is_gather);
1573
1574 /* get x0/x1/y0/y1 texels at z1 */
1575 lp_build_sample_texel_soa(bld,
1576 width_vec, height_vec, depth_vec,
1577 x00, y00, z1,
1578 row_stride_vec, img_stride_vec,
1579 data_ptr, mipoffsets, neighbors1[0][0]);
1580 lp_build_sample_texel_soa(bld,
1581 width_vec, height_vec, depth_vec,
1582 x01, y01, z1,
1583 row_stride_vec, img_stride_vec,
1584 data_ptr, mipoffsets, neighbors1[0][1]);
1585 lp_build_sample_texel_soa(bld,
1586 width_vec, height_vec, depth_vec,
1587 x10, y10, z1,
1588 row_stride_vec, img_stride_vec,
1589 data_ptr, mipoffsets, neighbors1[1][0]);
1590 lp_build_sample_texel_soa(bld,
1591 width_vec, height_vec, depth_vec,
1592 x11, y11, z1,
1593 row_stride_vec, img_stride_vec,
1594 data_ptr, mipoffsets, neighbors1[1][1]);
1595
1596 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1597 /* Bilinear interpolate the four samples from the second Z slice */
1598 for (chan = 0; chan < 4; chan++) {
1599 colors1[chan] = lp_build_lerp_2d(texel_bld,
1600 s_fpart, t_fpart,
1601 neighbors1[0][0][chan],
1602 neighbors1[0][1][chan],
1603 neighbors1[1][0][chan],
1604 neighbors1[1][1][chan],
1605 0);
1606 }
1607 /* Linearly interpolate the two samples from the two 3D slices */
1608 for (chan = 0; chan < 4; chan++) {
1609 colors_out[chan] = lp_build_lerp(texel_bld,
1610 r_fpart,
1611 colors0[chan], colors1[chan],
1612 0);
1613 }
1614 }
1615 else {
1616 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1617 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1618 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1619 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1620 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1621 colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1622 cmpval00, cmpval01, cmpval10, cmpval11);
1623 /* Linearly interpolate the two samples from the two 3D slices */
1624 colors_out[0] = lp_build_lerp(texel_bld,
1625 r_fpart,
1626 colors0[0], colors1[0],
1627 0);
1628 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1629 }
1630 }
1631 else {
1632 /* 2D tex */
1633 for (chan = 0; chan < 4; chan++) {
1634 colors_out[chan] = colors0[chan];
1635 }
1636 }
1637 }
1638 }
1639
1640
1641 /**
1642 * Sample the texture/mipmap using given image filter and mip filter.
1643 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1644 * from (vectors or scalars).
1645 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1646 */
1647 static void
1648 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1649 unsigned img_filter,
1650 unsigned mip_filter,
1651 boolean is_gather,
1652 LLVMValueRef *coords,
1653 const LLVMValueRef *offsets,
1654 LLVMValueRef ilevel0,
1655 LLVMValueRef ilevel1,
1656 LLVMValueRef lod_fpart,
1657 LLVMValueRef *colors_out)
1658 {
1659 LLVMBuilderRef builder = bld->gallivm->builder;
1660 LLVMValueRef size0 = NULL;
1661 LLVMValueRef size1 = NULL;
1662 LLVMValueRef row_stride0_vec = NULL;
1663 LLVMValueRef row_stride1_vec = NULL;
1664 LLVMValueRef img_stride0_vec = NULL;
1665 LLVMValueRef img_stride1_vec = NULL;
1666 LLVMValueRef data_ptr0 = NULL;
1667 LLVMValueRef data_ptr1 = NULL;
1668 LLVMValueRef mipoff0 = NULL;
1669 LLVMValueRef mipoff1 = NULL;
1670 LLVMValueRef colors0[4], colors1[4];
1671 unsigned chan;
1672
1673 /* sample the first mipmap level */
1674 lp_build_mipmap_level_sizes(bld, ilevel0,
1675 &size0,
1676 &row_stride0_vec, &img_stride0_vec);
1677 if (bld->num_mips == 1) {
1678 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1679 }
1680 else {
1681 /* This path should work for num_lods 1 too but slightly less efficient */
1682 data_ptr0 = bld->base_ptr;
1683 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1684 }
1685 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1686 lp_build_sample_image_nearest(bld, size0,
1687 row_stride0_vec, img_stride0_vec,
1688 data_ptr0, mipoff0, coords, offsets,
1689 colors0);
1690 }
1691 else {
1692 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1693 lp_build_sample_image_linear(bld, is_gather, size0, NULL,
1694 row_stride0_vec, img_stride0_vec,
1695 data_ptr0, mipoff0, coords, offsets,
1696 colors0);
1697 }
1698
1699 /* Store the first level's colors in the output variables */
1700 for (chan = 0; chan < 4; chan++) {
1701 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1702 }
1703
1704 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1705 struct lp_build_if_state if_ctx;
1706 LLVMValueRef need_lerp;
1707
1708 /* need_lerp = lod_fpart > 0 */
1709 if (bld->num_lods == 1) {
1710 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1711 lod_fpart, bld->lodf_bld.zero,
1712 "need_lerp");
1713 }
1714 else {
1715 /*
1716 * We'll do mip filtering if any of the quads (or individual
1717 * pixel in case of per-pixel lod) need it.
1718 * It might be better to split the vectors here and only fetch/filter
1719 * quads which need it (if there's one lod per quad).
1720 */
1721 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1722 PIPE_FUNC_GREATER,
1723 lod_fpart, bld->lodf_bld.zero);
1724 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1725 }
1726
1727 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1728 {
1729 /*
1730 * We unfortunately need to clamp lod_fpart here since we can get
1731 * negative values which would screw up filtering if not all
1732 * lod_fpart values have same sign.
1733 */
1734 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1735 bld->lodf_bld.zero);
1736 /* sample the second mipmap level */
1737 lp_build_mipmap_level_sizes(bld, ilevel1,
1738 &size1,
1739 &row_stride1_vec, &img_stride1_vec);
1740 if (bld->num_mips == 1) {
1741 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1742 }
1743 else {
1744 data_ptr1 = bld->base_ptr;
1745 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1746 }
1747 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1748 lp_build_sample_image_nearest(bld, size1,
1749 row_stride1_vec, img_stride1_vec,
1750 data_ptr1, mipoff1, coords, offsets,
1751 colors1);
1752 }
1753 else {
1754 lp_build_sample_image_linear(bld, FALSE, size1, NULL,
1755 row_stride1_vec, img_stride1_vec,
1756 data_ptr1, mipoff1, coords, offsets,
1757 colors1);
1758 }
1759
1760 /* interpolate samples from the two mipmap levels */
1761
1762 if (bld->num_lods != bld->coord_type.length)
1763 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1764 bld->lodf_bld.type,
1765 bld->texel_bld.type,
1766 lod_fpart);
1767
1768 for (chan = 0; chan < 4; chan++) {
1769 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1770 colors0[chan], colors1[chan],
1771 0);
1772 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1773 }
1774 }
1775 lp_build_endif(&if_ctx);
1776 }
1777 }
1778
1779
1780 /**
1781 * Sample the texture/mipmap using given mip filter, and using
1782 * both nearest and linear filtering at the same time depending
1783 * on linear_mask.
1784 * lod can be per quad but linear_mask is always per pixel.
1785 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1786 * from (vectors or scalars).
1787 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1788 */
1789 static void
1790 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1791 LLVMValueRef linear_mask,
1792 unsigned mip_filter,
1793 LLVMValueRef *coords,
1794 const LLVMValueRef *offsets,
1795 LLVMValueRef ilevel0,
1796 LLVMValueRef ilevel1,
1797 LLVMValueRef lod_fpart,
1798 LLVMValueRef lod_positive,
1799 LLVMValueRef *colors_out)
1800 {
1801 LLVMBuilderRef builder = bld->gallivm->builder;
1802 LLVMValueRef size0 = NULL;
1803 LLVMValueRef size1 = NULL;
1804 LLVMValueRef row_stride0_vec = NULL;
1805 LLVMValueRef row_stride1_vec = NULL;
1806 LLVMValueRef img_stride0_vec = NULL;
1807 LLVMValueRef img_stride1_vec = NULL;
1808 LLVMValueRef data_ptr0 = NULL;
1809 LLVMValueRef data_ptr1 = NULL;
1810 LLVMValueRef mipoff0 = NULL;
1811 LLVMValueRef mipoff1 = NULL;
1812 LLVMValueRef colors0[4], colors1[4];
1813 unsigned chan;
1814
1815 /* sample the first mipmap level */
1816 lp_build_mipmap_level_sizes(bld, ilevel0,
1817 &size0,
1818 &row_stride0_vec, &img_stride0_vec);
1819 if (bld->num_mips == 1) {
1820 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1821 }
1822 else {
1823 /* This path should work for num_lods 1 too but slightly less efficient */
1824 data_ptr0 = bld->base_ptr;
1825 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1826 }
1827
1828 lp_build_sample_image_linear(bld, FALSE, size0, linear_mask,
1829 row_stride0_vec, img_stride0_vec,
1830 data_ptr0, mipoff0, coords, offsets,
1831 colors0);
1832
1833 /* Store the first level's colors in the output variables */
1834 for (chan = 0; chan < 4; chan++) {
1835 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1836 }
1837
1838 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1839 struct lp_build_if_state if_ctx;
1840 LLVMValueRef need_lerp;
1841
1842 /*
1843 * We'll do mip filtering if any of the quads (or individual
1844 * pixel in case of per-pixel lod) need it.
1845 * Note using lod_positive here not lod_fpart since it may be the same
1846 * condition as that used in the outer "if" in the caller hence llvm
1847 * should be able to merge the branches in this case.
1848 */
1849 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
1850
1851 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1852 {
1853 /*
1854 * We unfortunately need to clamp lod_fpart here since we can get
1855 * negative values which would screw up filtering if not all
1856 * lod_fpart values have same sign.
1857 */
1858 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1859 bld->lodf_bld.zero);
1860 /* sample the second mipmap level */
1861 lp_build_mipmap_level_sizes(bld, ilevel1,
1862 &size1,
1863 &row_stride1_vec, &img_stride1_vec);
1864 if (bld->num_mips == 1) {
1865 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1866 }
1867 else {
1868 data_ptr1 = bld->base_ptr;
1869 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1870 }
1871
1872 lp_build_sample_image_linear(bld, FALSE, size1, linear_mask,
1873 row_stride1_vec, img_stride1_vec,
1874 data_ptr1, mipoff1, coords, offsets,
1875 colors1);
1876
1877 /* interpolate samples from the two mipmap levels */
1878
1879 if (bld->num_lods != bld->coord_type.length)
1880 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1881 bld->lodf_bld.type,
1882 bld->texel_bld.type,
1883 lod_fpart);
1884
1885 for (chan = 0; chan < 4; chan++) {
1886 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1887 colors0[chan], colors1[chan],
1888 0);
1889 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1890 }
1891 }
1892 lp_build_endif(&if_ctx);
1893 }
1894 }
1895
1896
1897 /**
1898 * Build (per-coord) layer value.
1899 * Either clamp layer to valid values or fill in optional out_of_bounds
1900 * value and just return value unclamped.
1901 */
1902 static LLVMValueRef
1903 lp_build_layer_coord(struct lp_build_sample_context *bld,
1904 unsigned texture_unit,
1905 boolean is_cube_array,
1906 LLVMValueRef layer,
1907 LLVMValueRef *out_of_bounds)
1908 {
1909 LLVMValueRef num_layers;
1910 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1911
1912 num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm,
1913 bld->context_ptr, texture_unit);
1914
1915 if (out_of_bounds) {
1916 LLVMValueRef out1, out;
1917 assert(!is_cube_array);
1918 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1919 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1920 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1921 *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1922 return layer;
1923 }
1924 else {
1925 LLVMValueRef maxlayer;
1926 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
1927 bld->int_bld.one;
1928 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
1929 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1930 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1931 }
1932 }
1933
1934
1935 /**
1936 * Calculate cube face, lod, mip levels.
1937 */
1938 static void
1939 lp_build_sample_common(struct lp_build_sample_context *bld,
1940 boolean is_lodq,
1941 unsigned texture_index,
1942 unsigned sampler_index,
1943 LLVMValueRef *coords,
1944 const struct lp_derivatives *derivs, /* optional */
1945 LLVMValueRef lod_bias, /* optional */
1946 LLVMValueRef explicit_lod, /* optional */
1947 LLVMValueRef *lod_pos_or_zero,
1948 LLVMValueRef *lod,
1949 LLVMValueRef *lod_fpart,
1950 LLVMValueRef *ilevel0,
1951 LLVMValueRef *ilevel1)
1952 {
1953 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1954 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1955 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1956 const unsigned target = bld->static_texture_state->target;
1957 LLVMValueRef first_level, cube_rho = NULL;
1958 LLVMValueRef lod_ipart = NULL;
1959 struct lp_derivatives cube_derivs;
1960
1961 /*
1962 printf("%s mip %d min %d mag %d\n", __FUNCTION__,
1963 mip_filter, min_filter, mag_filter);
1964 */
1965
1966 /*
1967 * Choose cube face, recompute texcoords for the chosen face and
1968 * compute rho here too (as it requires transform of derivatives).
1969 */
1970 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
1971 boolean need_derivs;
1972 need_derivs = ((min_filter != mag_filter ||
1973 mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1974 !bld->static_sampler_state->min_max_lod_equal &&
1975 !explicit_lod);
1976 lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
1977 derivs = &cube_derivs;
1978 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
1979 /* calculate cube layer coord now */
1980 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
1981 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
1982 layer = lp_build_mul(&bld->int_coord_bld, layer, six);
1983 coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL);
1984 /* because of seamless filtering can't add it to face (coords[2]) here. */
1985 }
1986 }
1987 else if (target == PIPE_TEXTURE_1D_ARRAY ||
1988 target == PIPE_TEXTURE_2D_ARRAY) {
1989 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1990 coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL);
1991 }
1992
1993 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1994 /*
1995 * Clamp p coords to [0,1] for fixed function depth texture format here.
1996 * Technically this is not entirely correct for unorm depth as the ref value
1997 * should be converted to the depth format (quantization!) and comparison
1998 * then done in texture format. This would actually help performance (since
1999 * only need to do it once and could save the per-sample conversion of texels
2000 * to floats instead), but it would need more messy code (would need to push
2001 * at least some bits down to actual fetch so conversion could be skipped,
2002 * and would have ugly interaction with border color, would need to convert
2003 * border color to that format too or do some other tricks to make it work).
2004 */
2005 const struct util_format_description *format_desc = bld->format_desc;
2006 unsigned chan_type;
2007 /* not entirely sure we couldn't end up with non-valid swizzle here */
2008 chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ?
2009 format_desc->channel[format_desc->swizzle[0]].type :
2010 UTIL_FORMAT_TYPE_FLOAT;
2011 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2012 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
2013 bld->coord_bld.zero, bld->coord_bld.one);
2014 }
2015 }
2016
2017 /*
2018 * Compute the level of detail (float).
2019 */
2020 if (min_filter != mag_filter ||
2021 mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) {
2022 /* Need to compute lod either to choose mipmap levels or to
2023 * distinguish between minification/magnification with one mipmap level.
2024 */
2025 lp_build_lod_selector(bld, is_lodq, texture_index, sampler_index,
2026 coords[0], coords[1], coords[2], cube_rho,
2027 derivs, lod_bias, explicit_lod,
2028 mip_filter, lod,
2029 &lod_ipart, lod_fpart, lod_pos_or_zero);
2030 if (is_lodq) {
2031 LLVMValueRef last_level;
2032 last_level = bld->dynamic_state->last_level(bld->dynamic_state,
2033 bld->gallivm,
2034 bld->context_ptr,
2035 texture_index);
2036 first_level = bld->dynamic_state->first_level(bld->dynamic_state,
2037 bld->gallivm,
2038 bld->context_ptr,
2039 texture_index);
2040 last_level = lp_build_sub(&bld->int_bld, last_level, first_level);
2041 last_level = lp_build_int_to_float(&bld->float_bld, last_level);
2042 last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level);
2043
2044 switch (mip_filter) {
2045 case PIPE_TEX_MIPFILTER_NONE:
2046 *lod_fpart = bld->lodf_bld.zero;
2047 break;
2048 case PIPE_TEX_MIPFILTER_NEAREST:
2049 *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart);
2050 /* fallthrough */
2051 case PIPE_TEX_MIPFILTER_LINEAR:
2052 *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart,
2053 bld->lodf_bld.zero, last_level);
2054 break;
2055 }
2056 return;
2057 }
2058
2059 } else {
2060 lod_ipart = bld->lodi_bld.zero;
2061 *lod_pos_or_zero = bld->lodi_bld.zero;
2062 }
2063
2064 if (bld->num_lods != bld->num_mips) {
2065 /* only makes sense if there's just a single mip level */
2066 assert(bld->num_mips == 1);
2067 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
2068 }
2069
2070 /*
2071 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
2072 */
2073 switch (mip_filter) {
2074 default:
2075 assert(0 && "bad mip_filter value in lp_build_sample_soa()");
2076 /* fall-through */
2077 case PIPE_TEX_MIPFILTER_NONE:
2078 /* always use mip level 0 */
2079 first_level = bld->dynamic_state->first_level(bld->dynamic_state,
2080 bld->gallivm, bld->context_ptr,
2081 texture_index);
2082 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
2083 *ilevel0 = first_level;
2084 break;
2085 case PIPE_TEX_MIPFILTER_NEAREST:
2086 assert(lod_ipart);
2087 lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
2088 break;
2089 case PIPE_TEX_MIPFILTER_LINEAR:
2090 assert(lod_ipart);
2091 assert(*lod_fpart);
2092 lp_build_linear_mip_levels(bld, texture_index,
2093 lod_ipart, lod_fpart,
2094 ilevel0, ilevel1);
2095 break;
2096 }
2097 }
2098
2099 static void
2100 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
2101 unsigned sampler_unit)
2102 {
2103 struct gallivm_state *gallivm = bld->gallivm;
2104 LLVMBuilderRef builder = gallivm->builder;
2105 LLVMValueRef border_color_ptr =
2106 bld->dynamic_state->border_color(bld->dynamic_state, gallivm,
2107 bld->context_ptr, sampler_unit);
2108 LLVMValueRef border_color;
2109 const struct util_format_description *format_desc = bld->format_desc;
2110 struct lp_type vec4_type = bld->texel_type;
2111 struct lp_build_context vec4_bld;
2112 LLVMValueRef min_clamp = NULL;
2113 LLVMValueRef max_clamp = NULL;
2114
2115 /*
2116 * For normalized format need to clamp border color (technically
2117 * probably should also quantize the data). Really sucks doing this
2118 * here but can't avoid at least for now since this is part of
2119 * sampler state and texture format is part of sampler_view state.
2120 * GL expects also expects clamping for uint/sint formats too so
2121 * do that as well (d3d10 can't end up here with uint/sint since it
2122 * only supports them with ld).
2123 */
2124 vec4_type.length = 4;
2125 lp_build_context_init(&vec4_bld, gallivm, vec4_type);
2126
2127 /*
2128 * Vectorized clamping of border color. Loading is a bit of a hack since
2129 * we just cast the pointer to float array to pointer to vec4
2130 * (int or float).
2131 */
2132 border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
2133 lp_build_const_int32(gallivm, 0));
2134 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
2135 LLVMPointerType(vec4_bld.vec_type, 0), "");
2136 border_color = LLVMBuildLoad(builder, border_color_ptr, "");
2137 /* we don't have aligned type in the dynamic state unfortunately */
2138 LLVMSetAlignment(border_color, 4);
2139
2140 /*
2141 * Instead of having some incredibly complex logic which will try to figure out
2142 * clamping necessary for each channel, simply use the first channel, and treat
2143 * mixed signed/unsigned normalized formats specially.
2144 * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
2145 * good reason.)
2146 */
2147 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
2148 int chan;
2149 /* d/s needs special handling because both present means just sampling depth */
2150 if (util_format_is_depth_and_stencil(format_desc->format)) {
2151 chan = format_desc->swizzle[0];
2152 }
2153 else {
2154 chan = util_format_get_first_non_void_channel(format_desc->format);
2155 }
2156 if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
2157 unsigned chan_type = format_desc->channel[chan].type;
2158 unsigned chan_norm = format_desc->channel[chan].normalized;
2159 unsigned chan_pure = format_desc->channel[chan].pure_integer;
2160 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
2161 if (chan_norm) {
2162 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2163 max_clamp = vec4_bld.one;
2164 }
2165 else if (chan_pure) {
2166 /*
2167 * Border color was stored as int, hence need min/max clamp
2168 * only if chan has less than 32 bits..
2169 */
2170 unsigned chan_size = format_desc->channel[chan].size;
2171 if (chan_size < 32) {
2172 min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2173 0 - (1 << (chan_size - 1)));
2174 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2175 (1 << (chan_size - 1)) - 1);
2176 }
2177 }
2178 /* TODO: no idea about non-pure, non-normalized! */
2179 }
2180 else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
2181 if (chan_norm) {
2182 min_clamp = vec4_bld.zero;
2183 max_clamp = vec4_bld.one;
2184 }
2185 /*
2186 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
2187 * we use Z32_FLOAT_S8X24 to imply sampling depth component
2188 * and ignoring stencil, which will blow up here if we try to
2189 * do a uint clamp in a float texel build...
2190 * And even if we had that format, mesa st also thinks using z24s8
2191 * means depth sampling ignoring stencil.
2192 */
2193 else if (chan_pure) {
2194 /*
2195 * Border color was stored as uint, hence never need min
2196 * clamp, and only need max clamp if chan has less than 32 bits.
2197 */
2198 unsigned chan_size = format_desc->channel[chan].size;
2199 if (chan_size < 32) {
2200 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2201 (1 << chan_size) - 1);
2202 }
2203 /* TODO: no idea about non-pure, non-normalized! */
2204 }
2205 }
2206 else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
2207 /* TODO: I have no idea what clamp this would need if any! */
2208 }
2209 }
2210 /* mixed plain formats (or different pure size) */
2211 switch (format_desc->format) {
2212 case PIPE_FORMAT_B10G10R10A2_UINT:
2213 case PIPE_FORMAT_R10G10B10A2_UINT:
2214 {
2215 unsigned max10 = (1 << 10) - 1;
2216 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
2217 max10, (1 << 2) - 1, NULL);
2218 }
2219 break;
2220 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
2221 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2222 -1.0F, 0.0F, NULL);
2223 max_clamp = vec4_bld.one;
2224 break;
2225 case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
2226 case PIPE_FORMAT_R5SG5SB6U_NORM:
2227 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2228 0.0F, 0.0F, NULL);
2229 max_clamp = vec4_bld.one;
2230 break;
2231 default:
2232 break;
2233 }
2234 }
2235 else {
2236 /* cannot figure this out from format description */
2237 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2238 /* s3tc formats are always unorm */
2239 min_clamp = vec4_bld.zero;
2240 max_clamp = vec4_bld.one;
2241 }
2242 else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
2243 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
2244 switch (format_desc->format) {
2245 case PIPE_FORMAT_RGTC1_UNORM:
2246 case PIPE_FORMAT_RGTC2_UNORM:
2247 case PIPE_FORMAT_LATC1_UNORM:
2248 case PIPE_FORMAT_LATC2_UNORM:
2249 case PIPE_FORMAT_ETC1_RGB8:
2250 min_clamp = vec4_bld.zero;
2251 max_clamp = vec4_bld.one;
2252 break;
2253 case PIPE_FORMAT_RGTC1_SNORM:
2254 case PIPE_FORMAT_RGTC2_SNORM:
2255 case PIPE_FORMAT_LATC1_SNORM:
2256 case PIPE_FORMAT_LATC2_SNORM:
2257 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2258 max_clamp = vec4_bld.one;
2259 break;
2260 default:
2261 assert(0);
2262 break;
2263 }
2264 }
2265 /*
2266 * all others from subsampled/other group, though we don't care
2267 * about yuv (and should not have any from zs here)
2268 */
2269 else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2270 switch (format_desc->format) {
2271 case PIPE_FORMAT_R8G8_B8G8_UNORM:
2272 case PIPE_FORMAT_G8R8_G8B8_UNORM:
2273 case PIPE_FORMAT_G8R8_B8R8_UNORM:
2274 case PIPE_FORMAT_R8G8_R8B8_UNORM:
2275 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
2276 min_clamp = vec4_bld.zero;
2277 max_clamp = vec4_bld.one;
2278 break;
2279 case PIPE_FORMAT_R8G8Bx_SNORM:
2280 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2281 max_clamp = vec4_bld.one;
2282 break;
2283 /*
2284 * Note smallfloat formats usually don't need clamping
2285 * (they still have infinite range) however this is not
2286 * true for r11g11b10 and r9g9b9e5, which can't represent
2287 * negative numbers (and additionally r9g9b9e5 can't represent
2288 * very large numbers). d3d10 seems happy without clamping in
2289 * this case, but gl spec is pretty clear: "for floating
2290 * point and integer formats, border values are clamped to
2291 * the representable range of the format" so do that here.
2292 */
2293 case PIPE_FORMAT_R11G11B10_FLOAT:
2294 min_clamp = vec4_bld.zero;
2295 break;
2296 case PIPE_FORMAT_R9G9B9E5_FLOAT:
2297 min_clamp = vec4_bld.zero;
2298 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
2299 break;
2300 default:
2301 assert(0);
2302 break;
2303 }
2304 }
2305 }
2306
2307 if (min_clamp) {
2308 border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2309 }
2310 if (max_clamp) {
2311 border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2312 }
2313
2314 bld->border_color_clamped = border_color;
2315 }
2316
2317
2318 /**
2319 * General texture sampling codegen.
2320 * This function handles texture sampling for all texture targets (1D,
2321 * 2D, 3D, cube) and all filtering modes.
2322 */
2323 static void
2324 lp_build_sample_general(struct lp_build_sample_context *bld,
2325 unsigned sampler_unit,
2326 boolean is_gather,
2327 LLVMValueRef *coords,
2328 const LLVMValueRef *offsets,
2329 LLVMValueRef lod_positive,
2330 LLVMValueRef lod_fpart,
2331 LLVMValueRef ilevel0,
2332 LLVMValueRef ilevel1,
2333 LLVMValueRef *colors_out)
2334 {
2335 LLVMBuilderRef builder = bld->gallivm->builder;
2336 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2337 const unsigned mip_filter = sampler_state->min_mip_filter;
2338 const unsigned min_filter = sampler_state->min_img_filter;
2339 const unsigned mag_filter = sampler_state->mag_img_filter;
2340 LLVMValueRef texels[4];
2341 unsigned chan;
2342
2343 /* if we need border color, (potentially) clamp it now */
2344 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
2345 min_filter,
2346 mag_filter) ||
2347 (bld->dims > 1 &&
2348 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
2349 min_filter,
2350 mag_filter)) ||
2351 (bld->dims > 2 &&
2352 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
2353 min_filter,
2354 mag_filter))) {
2355 lp_build_clamp_border_color(bld, sampler_unit);
2356 }
2357
2358
2359 /*
2360 * Get/interpolate texture colors.
2361 */
2362
2363 for (chan = 0; chan < 4; ++chan) {
2364 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
2365 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
2366 }
2367
2368 if (min_filter == mag_filter) {
2369 /* no need to distinguish between minification and magnification */
2370 lp_build_sample_mipmap(bld, min_filter, mip_filter,
2371 is_gather,
2372 coords, offsets,
2373 ilevel0, ilevel1, lod_fpart,
2374 texels);
2375 }
2376 else {
2377 /*
2378 * Could also get rid of the if-logic and always use mipmap_both, both
2379 * for the single lod and multi-lod case if nothing really uses this.
2380 */
2381 if (bld->num_lods == 1) {
2382 /* Emit conditional to choose min image filter or mag image filter
2383 * depending on the lod being > 0 or <= 0, respectively.
2384 */
2385 struct lp_build_if_state if_ctx;
2386
2387 lod_positive = LLVMBuildTrunc(builder, lod_positive,
2388 LLVMInt1TypeInContext(bld->gallivm->context), "");
2389
2390 lp_build_if(&if_ctx, bld->gallivm, lod_positive);
2391 {
2392 /* Use the minification filter */
2393 lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE,
2394 coords, offsets,
2395 ilevel0, ilevel1, lod_fpart,
2396 texels);
2397 }
2398 lp_build_else(&if_ctx);
2399 {
2400 /* Use the magnification filter */
2401 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
2402 FALSE,
2403 coords, offsets,
2404 ilevel0, NULL, NULL,
2405 texels);
2406 }
2407 lp_build_endif(&if_ctx);
2408 }
2409 else {
2410 LLVMValueRef need_linear, linear_mask;
2411 unsigned mip_filter_for_nearest;
2412 struct lp_build_if_state if_ctx;
2413
2414 if (min_filter == PIPE_TEX_FILTER_LINEAR) {
2415 linear_mask = lod_positive;
2416 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
2417 }
2418 else {
2419 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
2420 mip_filter_for_nearest = mip_filter;
2421 }
2422 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
2423 linear_mask);
2424
2425 if (bld->num_lods != bld->coord_type.length) {
2426 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2427 bld->lodi_type,
2428 bld->int_coord_type,
2429 linear_mask);
2430 }
2431
2432 lp_build_if(&if_ctx, bld->gallivm, need_linear);
2433 {
2434 /*
2435 * Do sampling with both filters simultaneously. This means using
2436 * a linear filter and doing some tricks (with weights) for the pixels
2437 * which need nearest filter.
2438 * Note that it's probably rare some pixels need nearest and some
2439 * linear filter but the fixups required for the nearest pixels
2440 * aren't all that complicated so just always run a combined path
2441 * if at least some pixels require linear.
2442 */
2443 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
2444 coords, offsets,
2445 ilevel0, ilevel1,
2446 lod_fpart, lod_positive,
2447 texels);
2448 }
2449 lp_build_else(&if_ctx);
2450 {
2451 /*
2452 * All pixels require just nearest filtering, which is way
2453 * cheaper than linear, hence do a separate path for that.
2454 */
2455 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
2456 mip_filter_for_nearest, FALSE,
2457 coords, offsets,
2458 ilevel0, ilevel1, lod_fpart,
2459 texels);
2460 }
2461 lp_build_endif(&if_ctx);
2462 }
2463 }
2464
2465 for (chan = 0; chan < 4; ++chan) {
2466 colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
2467 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
2468 }
2469 }
2470
2471
2472 /**
2473 * Texel fetch function.
2474 * In contrast to general sampling there is no filtering, no coord minification,
2475 * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
2476 * directly to be applied to the selected mip level (after adding texel offsets).
2477 * This function handles texel fetch for all targets where texel fetch is supported
2478 * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
2479 */
2480 static void
2481 lp_build_fetch_texel(struct lp_build_sample_context *bld,
2482 unsigned texture_unit,
2483 const LLVMValueRef *coords,
2484 LLVMValueRef explicit_lod,
2485 const LLVMValueRef *offsets,
2486 LLVMValueRef *colors_out)
2487 {
2488 struct lp_build_context *perquadi_bld = &bld->lodi_bld;
2489 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2490 unsigned dims = bld->dims, chan;
2491 unsigned target = bld->static_texture_state->target;
2492 boolean out_of_bound_ret_zero = TRUE;
2493 LLVMValueRef size, ilevel;
2494 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
2495 LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
2496 LLVMValueRef width, height, depth, i, j;
2497 LLVMValueRef offset, out_of_bounds, out1;
2498
2499 out_of_bounds = int_coord_bld->zero;
2500
2501 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
2502 if (bld->num_mips != int_coord_bld->type.length) {
2503 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
2504 perquadi_bld->type, explicit_lod, 0);
2505 }
2506 else {
2507 ilevel = explicit_lod;
2508 }
2509 lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
2510 out_of_bound_ret_zero ? &out_of_bounds : NULL);
2511 }
2512 else {
2513 assert(bld->num_mips == 1);
2514 if (bld->static_texture_state->target != PIPE_BUFFER) {
2515 ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
2516 bld->context_ptr, texture_unit);
2517 }
2518 else {
2519 ilevel = lp_build_const_int32(bld->gallivm, 0);
2520 }
2521 }
2522 lp_build_mipmap_level_sizes(bld, ilevel,
2523 &size,
2524 &row_stride_vec, &img_stride_vec);
2525 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
2526 size, &width, &height, &depth);
2527
2528 if (target == PIPE_TEXTURE_1D_ARRAY ||
2529 target == PIPE_TEXTURE_2D_ARRAY) {
2530 if (out_of_bound_ret_zero) {
2531 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1);
2532 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2533 }
2534 else {
2535 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL);
2536 }
2537 }
2538
2539 /* This is a lot like border sampling */
2540 if (offsets[0]) {
2541 /*
2542 * coords are really unsigned, offsets are signed, but I don't think
2543 * exceeding 31 bits is possible
2544 */
2545 x = lp_build_add(int_coord_bld, x, offsets[0]);
2546 }
2547 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
2548 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2549 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
2550 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2551
2552 if (dims >= 2) {
2553 if (offsets[1]) {
2554 y = lp_build_add(int_coord_bld, y, offsets[1]);
2555 }
2556 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
2557 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2558 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
2559 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2560
2561 if (dims >= 3) {
2562 if (offsets[2]) {
2563 z = lp_build_add(int_coord_bld, z, offsets[2]);
2564 }
2565 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
2566 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2567 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
2568 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2569 }
2570 }
2571
2572 lp_build_sample_offset(int_coord_bld,
2573 bld->format_desc,
2574 x, y, z, row_stride_vec, img_stride_vec,
2575 &offset, &i, &j);
2576
2577 if (bld->static_texture_state->target != PIPE_BUFFER) {
2578 offset = lp_build_add(int_coord_bld, offset,
2579 lp_build_get_mip_offsets(bld, ilevel));
2580 }
2581
2582 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
2583
2584 lp_build_fetch_rgba_soa(bld->gallivm,
2585 bld->format_desc,
2586 bld->texel_type, TRUE,
2587 bld->base_ptr, offset,
2588 i, j,
2589 bld->cache,
2590 colors_out);
2591
2592 if (out_of_bound_ret_zero) {
2593 /*
2594 * Only needed for ARB_robust_buffer_access_behavior and d3d10.
2595 * Could use min/max above instead of out-of-bounds comparisons
2596 * if we don't care about the result returned for out-of-bounds.
2597 */
2598 for (chan = 0; chan < 4; chan++) {
2599 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
2600 bld->texel_bld.zero, colors_out[chan]);
2601 }
2602 }
2603 }
2604
2605
2606 /**
2607 * Just set texels to white instead of actually sampling the texture.
2608 * For debugging.
2609 */
2610 void
2611 lp_build_sample_nop(struct gallivm_state *gallivm,
2612 struct lp_type type,
2613 const LLVMValueRef *coords,
2614 LLVMValueRef texel_out[4])
2615 {
2616 LLVMValueRef one = lp_build_one(gallivm, type);
2617 unsigned chan;
2618
2619 for (chan = 0; chan < 4; chan++) {
2620 texel_out[chan] = one;
2621 }
2622 }
2623
2624
2625 /**
2626 * Build the actual texture sampling code.
2627 * 'texel' will return a vector of four LLVMValueRefs corresponding to
2628 * R, G, B, A.
2629 * \param type vector float type to use for coords, etc.
2630 * \param sample_key
2631 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y
2632 */
2633 static void
2634 lp_build_sample_soa_code(struct gallivm_state *gallivm,
2635 const struct lp_static_texture_state *static_texture_state,
2636 const struct lp_static_sampler_state *static_sampler_state,
2637 struct lp_sampler_dynamic_state *dynamic_state,
2638 struct lp_type type,
2639 unsigned sample_key,
2640 unsigned texture_index,
2641 unsigned sampler_index,
2642 LLVMValueRef context_ptr,
2643 LLVMValueRef thread_data_ptr,
2644 const LLVMValueRef *coords,
2645 const LLVMValueRef *offsets,
2646 const struct lp_derivatives *derivs, /* optional */
2647 LLVMValueRef lod, /* optional */
2648 LLVMValueRef texel_out[4])
2649 {
2650 unsigned target = static_texture_state->target;
2651 unsigned dims = texture_dims(target);
2652 unsigned num_quads = type.length / 4;
2653 unsigned mip_filter, min_img_filter, mag_img_filter, i;
2654 struct lp_build_sample_context bld;
2655 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
2656 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2657 LLVMBuilderRef builder = gallivm->builder;
2658 LLVMValueRef tex_width, newcoords[5];
2659 enum lp_sampler_lod_property lod_property;
2660 enum lp_sampler_lod_control lod_control;
2661 enum lp_sampler_op_type op_type;
2662 LLVMValueRef lod_bias = NULL;
2663 LLVMValueRef explicit_lod = NULL;
2664 boolean op_is_tex, op_is_lodq, op_is_gather;
2665
2666 if (0) {
2667 enum pipe_format fmt = static_texture_state->format;
2668 debug_printf("Sample from %s\n", util_format_name(fmt));
2669 }
2670
2671 lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
2672 LP_SAMPLER_LOD_PROPERTY_SHIFT;
2673 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
2674 LP_SAMPLER_LOD_CONTROL_SHIFT;
2675 op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
2676 LP_SAMPLER_OP_TYPE_SHIFT;
2677
2678 op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
2679 op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
2680 op_is_gather = op_type == LP_SAMPLER_OP_GATHER;
2681
2682 if (lod_control == LP_SAMPLER_LOD_BIAS) {
2683 lod_bias = lod;
2684 assert(lod);
2685 assert(derivs == NULL);
2686 }
2687 else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
2688 explicit_lod = lod;
2689 assert(lod);
2690 assert(derivs == NULL);
2691 }
2692 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
2693 assert(derivs);
2694 assert(lod == NULL);
2695 }
2696 else {
2697 assert(derivs == NULL);
2698 assert(lod == NULL);
2699 }
2700
2701 if (static_texture_state->format == PIPE_FORMAT_NONE) {
2702 /*
2703 * If there's nothing bound, format is NONE, and we must return
2704 * all zero as mandated by d3d10 in this case.
2705 */
2706 unsigned chan;
2707 LLVMValueRef zero = lp_build_zero(gallivm, type);
2708 for (chan = 0; chan < 4; chan++) {
2709 texel_out[chan] = zero;
2710 }
2711 return;
2712 }
2713
2714 assert(type.floating);
2715
2716 /* Setup our build context */
2717 memset(&bld, 0, sizeof bld);
2718 bld.gallivm = gallivm;
2719 bld.context_ptr = context_ptr;
2720 bld.static_sampler_state = &derived_sampler_state;
2721 bld.static_texture_state = static_texture_state;
2722 bld.dynamic_state = dynamic_state;
2723 bld.format_desc = util_format_description(static_texture_state->format);
2724 bld.dims = dims;
2725
2726 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD || op_is_lodq) {
2727 bld.no_quad_lod = TRUE;
2728 }
2729 if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX || op_is_lodq) {
2730 bld.no_rho_approx = TRUE;
2731 }
2732 if (gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR || op_is_lodq) {
2733 bld.no_brilinear = TRUE;
2734 }
2735
2736 bld.vector_width = lp_type_width(type);
2737
2738 bld.float_type = lp_type_float(32);
2739 bld.int_type = lp_type_int(32);
2740 bld.coord_type = type;
2741 bld.int_coord_type = lp_int_type(type);
2742 bld.float_size_in_type = lp_type_float(32);
2743 bld.float_size_in_type.length = dims > 1 ? 4 : 1;
2744 bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
2745 bld.texel_type = type;
2746
2747 /* always using the first channel hopefully should be safe,
2748 * if not things WILL break in other places anyway.
2749 */
2750 if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
2751 bld.format_desc->channel[0].pure_integer) {
2752 if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
2753 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2754 }
2755 else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2756 bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
2757 }
2758 }
2759 else if (util_format_has_stencil(bld.format_desc) &&
2760 !util_format_has_depth(bld.format_desc)) {
2761 /* for stencil only formats, sample stencil (uint) */
2762 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2763 }
2764
2765 if (!static_texture_state->level_zero_only ||
2766 !static_sampler_state->max_lod_pos || op_is_lodq) {
2767 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
2768 } else {
2769 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2770 }
2771 if (op_is_gather) {
2772 /*
2773 * gather4 is exactly like GL_LINEAR filtering but in the end skipping
2774 * the actual filtering. Using mostly the same paths, so cube face
2775 * selection, coord wrapping etc. all naturally uses the same code.
2776 */
2777 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2778 derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
2779 derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
2780 }
2781 mip_filter = derived_sampler_state.min_mip_filter;
2782
2783 if (0) {
2784 debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
2785 }
2786
2787 if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2788 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2789 {
2790 /*
2791 * Seamless filtering ignores wrap modes.
2792 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
2793 * bilinear it's not correct but way better than using for instance repeat.
2794 * Note we even set this for non-seamless. Technically GL allows any wrap
2795 * mode, which made sense when supporting true borders (can get seamless
2796 * effect with border and CLAMP_TO_BORDER), but gallium doesn't support
2797 * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
2798 * up the sampler state (as it makes it texture dependent).
2799 */
2800 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2801 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2802 }
2803 /*
2804 * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
2805 * so AoS path could be used. Not sure it's worth the trouble...
2806 */
2807
2808 min_img_filter = derived_sampler_state.min_img_filter;
2809 mag_img_filter = derived_sampler_state.mag_img_filter;
2810
2811
2812 /*
2813 * This is all a bit complicated different paths are chosen for performance
2814 * reasons.
2815 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
2816 * everything (the last two options are equivalent for 4-wide case).
2817 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
2818 * lod is calculated then the lod value extracted afterwards so making this
2819 * case basically the same as far as lod handling is concerned for the
2820 * further sample/filter code as the 1 lod for everything case.
2821 * Different lod handling mostly shows up when building mipmap sizes
2822 * (lp_build_mipmap_level_sizes() and friends) and also in filtering
2823 * (getting the fractional part of the lod to the right texels).
2824 */
2825
2826 /*
2827 * There are other situations where at least the multiple int lods could be
2828 * avoided like min and max lod being equal.
2829 */
2830 bld.num_mips = bld.num_lods = 1;
2831
2832 if (bld.no_quad_lod && bld.no_rho_approx &&
2833 ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex &&
2834 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2835 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) ||
2836 op_is_lodq)) {
2837 /*
2838 * special case for using per-pixel lod even for implicit lod,
2839 * which is generally never required (ok by APIs) except to please
2840 * some (somewhat broken imho) tests (because per-pixel face selection
2841 * can cause derivatives to be different for pixels outside the primitive
2842 * due to the major axis division even if pre-project derivatives are
2843 * looking normal).
2844 * For lodq, we do it to simply avoid scalar pack / unpack (albeit for
2845 * cube maps we do indeed get per-pixel lod values).
2846 */
2847 bld.num_mips = type.length;
2848 bld.num_lods = type.length;
2849 }
2850 else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
2851 (explicit_lod || lod_bias || derivs)) {
2852 if ((!op_is_tex && target != PIPE_BUFFER) ||
2853 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2854 bld.num_mips = type.length;
2855 bld.num_lods = type.length;
2856 }
2857 else if (op_is_tex && min_img_filter != mag_img_filter) {
2858 bld.num_mips = 1;
2859 bld.num_lods = type.length;
2860 }
2861 }
2862 /* TODO: for true scalar_lod should only use 1 lod value */
2863 else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
2864 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2865 bld.num_mips = num_quads;
2866 bld.num_lods = num_quads;
2867 }
2868 else if (op_is_tex && min_img_filter != mag_img_filter) {
2869 bld.num_mips = 1;
2870 bld.num_lods = num_quads;
2871 }
2872
2873
2874 bld.lodf_type = type;
2875 /* we want native vector size to be able to use our intrinsics */
2876 if (bld.num_lods != type.length) {
2877 /* TODO: this currently always has to be per-quad or per-element */
2878 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
2879 }
2880 bld.lodi_type = lp_int_type(bld.lodf_type);
2881 bld.levelf_type = bld.lodf_type;
2882 if (bld.num_mips == 1) {
2883 bld.levelf_type.length = 1;
2884 }
2885 bld.leveli_type = lp_int_type(bld.levelf_type);
2886 bld.float_size_type = bld.float_size_in_type;
2887 /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
2888 * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
2889 if (bld.num_mips > 1) {
2890 bld.float_size_type.length = bld.num_mips == type.length ?
2891 bld.num_mips * bld.float_size_in_type.length :
2892 type.length;
2893 }
2894 bld.int_size_type = lp_int_type(bld.float_size_type);
2895
2896 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
2897 lp_build_context_init(&bld.float_vec_bld, gallivm, type);
2898 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
2899 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
2900 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
2901 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
2902 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
2903 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
2904 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
2905 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
2906 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
2907 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
2908 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
2909 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
2910
2911 /* Get the dynamic state */
2912 tex_width = dynamic_state->width(dynamic_state, gallivm,
2913 context_ptr, texture_index);
2914 bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm,
2915 context_ptr, texture_index);
2916 bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm,
2917 context_ptr, texture_index);
2918 bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
2919 context_ptr, texture_index);
2920 bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
2921 context_ptr, texture_index);
2922 /* Note that mip_offsets is an array[level] of offsets to texture images */
2923
2924 if (dynamic_state->cache_ptr && thread_data_ptr) {
2925 bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm,
2926 thread_data_ptr, texture_index);
2927 }
2928
2929 /* width, height, depth as single int vector */
2930 if (dims <= 1) {
2931 bld.int_size = tex_width;
2932 }
2933 else {
2934 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2935 tex_width,
2936 LLVMConstInt(i32t, 0, 0), "");
2937 if (dims >= 2) {
2938 LLVMValueRef tex_height =
2939 dynamic_state->height(dynamic_state, gallivm,
2940 context_ptr, texture_index);
2941 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2942 tex_height,
2943 LLVMConstInt(i32t, 1, 0), "");
2944 if (dims >= 3) {
2945 LLVMValueRef tex_depth =
2946 dynamic_state->depth(dynamic_state, gallivm, context_ptr,
2947 texture_index);
2948 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2949 tex_depth,
2950 LLVMConstInt(i32t, 2, 0), "");
2951 }
2952 }
2953 }
2954
2955 for (i = 0; i < 5; i++) {
2956 newcoords[i] = coords[i];
2957 }
2958
2959 if (util_format_is_pure_integer(static_texture_state->format) &&
2960 !util_format_has_depth(bld.format_desc) && op_is_tex &&
2961 (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
2962 static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
2963 static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
2964 /*
2965 * Bail if impossible filtering is specified (the awkard additional
2966 * depth check is because it is legal in gallium to have things like S8Z24
2967 * here which would say it's pure int despite such formats should sample
2968 * the depth component).
2969 * In GL such filters make the texture incomplete, this makes it robust
2970 * against state trackers which set this up regardless (we'd crash in the
2971 * lerp later otherwise).
2972 * At least in some apis it may be legal to use such filters with lod
2973 * queries and/or gather (at least for gather d3d10 says only the wrap
2974 * bits are really used hence filter bits are likely simply ignored).
2975 * For fetch, we don't get valid samplers either way here.
2976 */
2977 unsigned chan;
2978 LLVMValueRef zero = lp_build_zero(gallivm, type);
2979 for (chan = 0; chan < 4; chan++) {
2980 texel_out[chan] = zero;
2981 }
2982 return;
2983 }
2984
2985 if (0) {
2986 /* For debug: no-op texture sampling */
2987 lp_build_sample_nop(gallivm,
2988 bld.texel_type,
2989 newcoords,
2990 texel_out);
2991 }
2992
2993 else if (op_type == LP_SAMPLER_OP_FETCH) {
2994 lp_build_fetch_texel(&bld, texture_index, newcoords,
2995 lod, offsets,
2996 texel_out);
2997 }
2998
2999 else {
3000 LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
3001 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
3002 boolean use_aos;
3003
3004 use_aos = util_format_fits_8unorm(bld.format_desc) &&
3005 op_is_tex &&
3006 /* not sure this is strictly needed or simply impossible */
3007 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
3008 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
3009
3010 use_aos &= bld.num_lods <= num_quads ||
3011 derived_sampler_state.min_img_filter ==
3012 derived_sampler_state.mag_img_filter;
3013 if (dims > 1) {
3014 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
3015 if (dims > 2) {
3016 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
3017 }
3018 }
3019 if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
3020 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3021 derived_sampler_state.seamless_cube_map &&
3022 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3023 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3024 /* theoretically possible with AoS filtering but not implemented (complex!) */
3025 use_aos = 0;
3026 }
3027
3028 if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
3029 !use_aos && util_format_fits_8unorm(bld.format_desc)) {
3030 debug_printf("%s: using floating point linear filtering for %s\n",
3031 __FUNCTION__, bld.format_desc->short_name);
3032 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
3033 " wraps %d wrapt %d wrapr %d\n",
3034 derived_sampler_state.min_img_filter,
3035 derived_sampler_state.mag_img_filter,
3036 derived_sampler_state.min_mip_filter,
3037 static_texture_state->target,
3038 derived_sampler_state.seamless_cube_map,
3039 derived_sampler_state.wrap_s,
3040 derived_sampler_state.wrap_t,
3041 derived_sampler_state.wrap_r);
3042 }
3043
3044 lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index,
3045 newcoords,
3046 derivs, lod_bias, explicit_lod,
3047 &lod_positive, &lod, &lod_fpart,
3048 &ilevel0, &ilevel1);
3049
3050 if (op_is_lodq) {
3051 texel_out[0] = lod_fpart;
3052 texel_out[1] = lod;
3053 texel_out[2] = texel_out[3] = bld.coord_bld.zero;
3054 return;
3055 }
3056
3057 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3058 /* The aos path doesn't do seamless filtering so simply add cube layer
3059 * to face now.
3060 */
3061 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
3062 }
3063
3064 /*
3065 * we only try 8-wide sampling with soa or if we have AVX2
3066 * as it appears to be a loss with just AVX)
3067 */
3068 if (num_quads == 1 || !use_aos ||
3069 (util_cpu_caps.has_avx2 &&
3070 (bld.num_lods == 1 ||
3071 derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
3072 if (use_aos) {
3073 /* do sampling/filtering with fixed pt arithmetic */
3074 lp_build_sample_aos(&bld, sampler_index,
3075 newcoords[0], newcoords[1],
3076 newcoords[2],
3077 offsets, lod_positive, lod_fpart,
3078 ilevel0, ilevel1,
3079 texel_out);
3080 }
3081
3082 else {
3083 lp_build_sample_general(&bld, sampler_index,
3084 op_type == LP_SAMPLER_OP_GATHER,
3085 newcoords, offsets,
3086 lod_positive, lod_fpart,
3087 ilevel0, ilevel1,
3088 texel_out);
3089 }
3090 }
3091 else {
3092 unsigned j;
3093 struct lp_build_sample_context bld4;
3094 struct lp_type type4 = type;
3095 unsigned i;
3096 LLVMValueRef texelout4[4];
3097 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
3098
3099 type4.length = 4;
3100
3101 /* Setup our build context */
3102 memset(&bld4, 0, sizeof bld4);
3103 bld4.no_quad_lod = bld.no_quad_lod;
3104 bld4.no_rho_approx = bld.no_rho_approx;
3105 bld4.no_brilinear = bld.no_brilinear;
3106 bld4.gallivm = bld.gallivm;
3107 bld4.context_ptr = bld.context_ptr;
3108 bld4.static_texture_state = bld.static_texture_state;
3109 bld4.static_sampler_state = bld.static_sampler_state;
3110 bld4.dynamic_state = bld.dynamic_state;
3111 bld4.format_desc = bld.format_desc;
3112 bld4.dims = bld.dims;
3113 bld4.row_stride_array = bld.row_stride_array;
3114 bld4.img_stride_array = bld.img_stride_array;
3115 bld4.base_ptr = bld.base_ptr;
3116 bld4.mip_offsets = bld.mip_offsets;
3117 bld4.int_size = bld.int_size;
3118 bld4.cache = bld.cache;
3119
3120 bld4.vector_width = lp_type_width(type4);
3121
3122 bld4.float_type = lp_type_float(32);
3123 bld4.int_type = lp_type_int(32);
3124 bld4.coord_type = type4;
3125 bld4.int_coord_type = lp_int_type(type4);
3126 bld4.float_size_in_type = lp_type_float(32);
3127 bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
3128 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
3129 bld4.texel_type = bld.texel_type;
3130 bld4.texel_type.length = 4;
3131
3132 bld4.num_mips = bld4.num_lods = 1;
3133 if (bld4.no_quad_lod && bld4.no_rho_approx &&
3134 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3135 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3136 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3137 bld4.num_mips = type4.length;
3138 bld4.num_lods = type4.length;
3139 }
3140 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
3141 (explicit_lod || lod_bias || derivs)) {
3142 if ((!op_is_tex && target != PIPE_BUFFER) ||
3143 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3144 bld4.num_mips = type4.length;
3145 bld4.num_lods = type4.length;
3146 }
3147 else if (op_is_tex && min_img_filter != mag_img_filter) {
3148 bld4.num_mips = 1;
3149 bld4.num_lods = type4.length;
3150 }
3151 }
3152
3153 /* we want native vector size to be able to use our intrinsics */
3154 bld4.lodf_type = type4;
3155 if (bld4.num_lods != type4.length) {
3156 bld4.lodf_type.length = 1;
3157 }
3158 bld4.lodi_type = lp_int_type(bld4.lodf_type);
3159 bld4.levelf_type = type4;
3160 if (bld4.num_mips != type4.length) {
3161 bld4.levelf_type.length = 1;
3162 }
3163 bld4.leveli_type = lp_int_type(bld4.levelf_type);
3164 bld4.float_size_type = bld4.float_size_in_type;
3165 if (bld4.num_mips > 1) {
3166 bld4.float_size_type.length = bld4.num_mips == type4.length ?
3167 bld4.num_mips * bld4.float_size_in_type.length :
3168 type4.length;
3169 }
3170 bld4.int_size_type = lp_int_type(bld4.float_size_type);
3171
3172 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
3173 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
3174 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
3175 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
3176 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
3177 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
3178 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
3179 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
3180 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
3181 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
3182 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
3183 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
3184 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
3185 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
3186
3187 for (i = 0; i < num_quads; i++) {
3188 LLVMValueRef s4, t4, r4;
3189 LLVMValueRef lod_positive4, lod_fpart4 = NULL;
3190 LLVMValueRef ilevel04, ilevel14 = NULL;
3191 LLVMValueRef offsets4[4] = { NULL };
3192 unsigned num_lods = bld4.num_lods;
3193
3194 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
3195 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
3196 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
3197
3198 if (offsets[0]) {
3199 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
3200 if (dims > 1) {
3201 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
3202 if (dims > 2) {
3203 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
3204 }
3205 }
3206 }
3207 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
3208 ilevel04 = bld.num_mips == 1 ? ilevel0 :
3209 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
3210 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
3211 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
3212 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
3213 }
3214
3215 if (use_aos) {
3216 /* do sampling/filtering with fixed pt arithmetic */
3217 lp_build_sample_aos(&bld4, sampler_index,
3218 s4, t4, r4, offsets4,
3219 lod_positive4, lod_fpart4,
3220 ilevel04, ilevel14,
3221 texelout4);
3222 }
3223
3224 else {
3225 /* this path is currently unreachable and hence might break easily... */
3226 LLVMValueRef newcoords4[5];
3227 newcoords4[0] = s4;
3228 newcoords4[1] = t4;
3229 newcoords4[2] = r4;
3230 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
3231 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
3232
3233 lp_build_sample_general(&bld4, sampler_index,
3234 op_type == LP_SAMPLER_OP_GATHER,
3235 newcoords4, offsets4,
3236 lod_positive4, lod_fpart4,
3237 ilevel04, ilevel14,
3238 texelout4);
3239 }
3240 for (j = 0; j < 4; j++) {
3241 texelouttmp[j][i] = texelout4[j];
3242 }
3243 }
3244
3245 for (j = 0; j < 4; j++) {
3246 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
3247 }
3248 }
3249 }
3250
3251 if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
3252 apply_sampler_swizzle(&bld, texel_out);
3253 }
3254
3255 /*
3256 * texel type can be a (32bit) int/uint (for pure int formats only),
3257 * however we are expected to always return floats (storage is untyped).
3258 */
3259 if (!bld.texel_type.floating) {
3260 unsigned chan;
3261 for (chan = 0; chan < 4; chan++) {
3262 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
3263 lp_build_vec_type(gallivm, type), "");
3264 }
3265 }
3266 }
3267
3268
3269 #define USE_TEX_FUNC_CALL 1
3270
3271 #define LP_MAX_TEX_FUNC_ARGS 32
3272
3273 static inline void
3274 get_target_info(enum pipe_texture_target target,
3275 unsigned *num_coords, unsigned *num_derivs,
3276 unsigned *num_offsets, unsigned *layer)
3277 {
3278 unsigned dims = texture_dims(target);
3279 *num_coords = dims;
3280 *num_offsets = dims;
3281 *num_derivs = (target == PIPE_TEXTURE_CUBE ||
3282 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
3283 *layer = has_layer_coord(target) ? 2: 0;
3284 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3285 /*
3286 * dims doesn't include r coord for cubes - this is handled
3287 * by layer instead, but need to fix up for cube arrays...
3288 */
3289 *layer = 3;
3290 *num_coords = 3;
3291 }
3292 }
3293
3294
3295 /**
3296 * Generate the function body for a texture sampling function.
3297 */
3298 static void
3299 lp_build_sample_gen_func(struct gallivm_state *gallivm,
3300 const struct lp_static_texture_state *static_texture_state,
3301 const struct lp_static_sampler_state *static_sampler_state,
3302 struct lp_sampler_dynamic_state *dynamic_state,
3303 struct lp_type type,
3304 unsigned texture_index,
3305 unsigned sampler_index,
3306 LLVMValueRef function,
3307 unsigned num_args,
3308 unsigned sample_key)
3309 {
3310 LLVMBuilderRef old_builder;
3311 LLVMBasicBlockRef block;
3312 LLVMValueRef coords[5];
3313 LLVMValueRef offsets[3] = { NULL };
3314 LLVMValueRef lod = NULL;
3315 LLVMValueRef context_ptr;
3316 LLVMValueRef thread_data_ptr = NULL;
3317 LLVMValueRef texel_out[4];
3318 struct lp_derivatives derivs;
3319 struct lp_derivatives *deriv_ptr = NULL;
3320 unsigned num_param = 0;
3321 unsigned i, num_coords, num_derivs, num_offsets, layer;
3322 enum lp_sampler_lod_control lod_control;
3323 boolean need_cache = FALSE;
3324
3325 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3326 LP_SAMPLER_LOD_CONTROL_SHIFT;
3327
3328 get_target_info(static_texture_state->target,
3329 &num_coords, &num_derivs, &num_offsets, &layer);
3330
3331 if (dynamic_state->cache_ptr) {
3332 const struct util_format_description *format_desc;
3333 format_desc = util_format_description(static_texture_state->format);
3334 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
3335 need_cache = TRUE;
3336 }
3337 }
3338
3339 /* "unpack" arguments */
3340 context_ptr = LLVMGetParam(function, num_param++);
3341 if (need_cache) {
3342 thread_data_ptr = LLVMGetParam(function, num_param++);
3343 }
3344 for (i = 0; i < num_coords; i++) {
3345 coords[i] = LLVMGetParam(function, num_param++);
3346 }
3347 for (i = num_coords; i < 5; i++) {
3348 /* This is rather unfortunate... */
3349 coords[i] = lp_build_undef(gallivm, type);
3350 }
3351 if (layer) {
3352 coords[layer] = LLVMGetParam(function, num_param++);
3353 }
3354 if (sample_key & LP_SAMPLER_SHADOW) {
3355 coords[4] = LLVMGetParam(function, num_param++);
3356 }
3357 if (sample_key & LP_SAMPLER_OFFSETS) {
3358 for (i = 0; i < num_offsets; i++) {
3359 offsets[i] = LLVMGetParam(function, num_param++);
3360 }
3361 }
3362 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3363 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3364 lod = LLVMGetParam(function, num_param++);
3365 }
3366 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3367 for (i = 0; i < num_derivs; i++) {
3368 derivs.ddx[i] = LLVMGetParam(function, num_param++);
3369 derivs.ddy[i] = LLVMGetParam(function, num_param++);
3370 }
3371 deriv_ptr = &derivs;
3372 }
3373
3374 assert(num_args == num_param);
3375
3376 /*
3377 * Function body
3378 */
3379
3380 old_builder = gallivm->builder;
3381 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
3382 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
3383 LLVMPositionBuilderAtEnd(gallivm->builder, block);
3384
3385 lp_build_sample_soa_code(gallivm,
3386 static_texture_state,
3387 static_sampler_state,
3388 dynamic_state,
3389 type,
3390 sample_key,
3391 texture_index,
3392 sampler_index,
3393 context_ptr,
3394 thread_data_ptr,
3395 coords,
3396 offsets,
3397 deriv_ptr,
3398 lod,
3399 texel_out);
3400
3401 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
3402
3403 LLVMDisposeBuilder(gallivm->builder);
3404 gallivm->builder = old_builder;
3405
3406 gallivm_verify_function(gallivm, function);
3407 }
3408
3409
3410 /**
3411 * Call the matching function for texture sampling.
3412 * If there's no match, generate a new one.
3413 */
3414 static void
3415 lp_build_sample_soa_func(struct gallivm_state *gallivm,
3416 const struct lp_static_texture_state *static_texture_state,
3417 const struct lp_static_sampler_state *static_sampler_state,
3418 struct lp_sampler_dynamic_state *dynamic_state,
3419 const struct lp_sampler_params *params)
3420 {
3421 LLVMBuilderRef builder = gallivm->builder;
3422 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
3423 LLVMGetInsertBlock(builder)));
3424 LLVMValueRef function, inst;
3425 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
3426 LLVMBasicBlockRef bb;
3427 LLVMValueRef tex_ret;
3428 unsigned num_args = 0;
3429 char func_name[64];
3430 unsigned i, num_coords, num_derivs, num_offsets, layer;
3431 unsigned texture_index = params->texture_index;
3432 unsigned sampler_index = params->sampler_index;
3433 unsigned sample_key = params->sample_key;
3434 const LLVMValueRef *coords = params->coords;
3435 const LLVMValueRef *offsets = params->offsets;
3436 const struct lp_derivatives *derivs = params->derivs;
3437 enum lp_sampler_lod_control lod_control;
3438 boolean need_cache = FALSE;
3439
3440 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3441 LP_SAMPLER_LOD_CONTROL_SHIFT;
3442
3443 get_target_info(static_texture_state->target,
3444 &num_coords, &num_derivs, &num_offsets, &layer);
3445
3446 if (dynamic_state->cache_ptr) {
3447 const struct util_format_description *format_desc;
3448 format_desc = util_format_description(static_texture_state->format);
3449 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
3450 /*
3451 * This is not 100% correct, if we have cache but the
3452 * util_format_s3tc_prefer is true the cache won't get used
3453 * regardless (could hook up the block decode there...) */
3454 need_cache = TRUE;
3455 }
3456 }
3457 /*
3458 * texture function matches are found by name.
3459 * Thus the name has to include both the texture and sampler unit
3460 * (which covers all static state) plus the actual texture function
3461 * (including things like offsets, shadow coord, lod control).
3462 * Additionally lod_property has to be included too.
3463 */
3464
3465 util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
3466 texture_index, sampler_index, sample_key);
3467
3468 function = LLVMGetNamedFunction(module, func_name);
3469
3470 if(!function) {
3471 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
3472 LLVMTypeRef ret_type;
3473 LLVMTypeRef function_type;
3474 LLVMTypeRef val_type[4];
3475 unsigned num_param = 0;
3476
3477 /*
3478 * Generate the function prototype.
3479 */
3480
3481 arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
3482 if (need_cache) {
3483 arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
3484 }
3485 for (i = 0; i < num_coords; i++) {
3486 arg_types[num_param++] = LLVMTypeOf(coords[0]);
3487 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
3488 }
3489 if (layer) {
3490 arg_types[num_param++] = LLVMTypeOf(coords[layer]);
3491 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
3492 }
3493 if (sample_key & LP_SAMPLER_SHADOW) {
3494 arg_types[num_param++] = LLVMTypeOf(coords[0]);
3495 }
3496 if (sample_key & LP_SAMPLER_OFFSETS) {
3497 for (i = 0; i < num_offsets; i++) {
3498 arg_types[num_param++] = LLVMTypeOf(offsets[0]);
3499 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
3500 }
3501 }
3502 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3503 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3504 arg_types[num_param++] = LLVMTypeOf(params->lod);
3505 }
3506 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3507 for (i = 0; i < num_derivs; i++) {
3508 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
3509 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
3510 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
3511 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
3512 }
3513 }
3514
3515 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
3516 lp_build_vec_type(gallivm, params->type);
3517 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
3518 function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
3519 function = LLVMAddFunction(module, func_name, function_type);
3520
3521 for (i = 0; i < num_param; ++i) {
3522 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
3523
3524 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
3525 }
3526 }
3527
3528 LLVMSetFunctionCallConv(function, LLVMFastCallConv);
3529 LLVMSetLinkage(function, LLVMInternalLinkage);
3530
3531 lp_build_sample_gen_func(gallivm,
3532 static_texture_state,
3533 static_sampler_state,
3534 dynamic_state,
3535 params->type,
3536 texture_index,
3537 sampler_index,
3538 function,
3539 num_param,
3540 sample_key);
3541 }
3542
3543 num_args = 0;
3544 args[num_args++] = params->context_ptr;
3545 if (need_cache) {
3546 args[num_args++] = params->thread_data_ptr;
3547 }
3548 for (i = 0; i < num_coords; i++) {
3549 args[num_args++] = coords[i];
3550 }
3551 if (layer) {
3552 args[num_args++] = coords[layer];
3553 }
3554 if (sample_key & LP_SAMPLER_SHADOW) {
3555 args[num_args++] = coords[4];
3556 }
3557 if (sample_key & LP_SAMPLER_OFFSETS) {
3558 for (i = 0; i < num_offsets; i++) {
3559 args[num_args++] = offsets[i];
3560 }
3561 }
3562 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3563 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3564 args[num_args++] = params->lod;
3565 }
3566 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3567 for (i = 0; i < num_derivs; i++) {
3568 args[num_args++] = derivs->ddx[i];
3569 args[num_args++] = derivs->ddy[i];
3570 }
3571 }
3572
3573 assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
3574
3575 tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
3576 bb = LLVMGetInsertBlock(builder);
3577 inst = LLVMGetLastInstruction(bb);
3578 LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
3579
3580 for (i = 0; i < 4; i++) {
3581 params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
3582 }
3583 }
3584
3585
3586 /**
3587 * Build texture sampling code.
3588 * Either via a function call or inline it directly.
3589 */
3590 void
3591 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
3592 const struct lp_static_sampler_state *static_sampler_state,
3593 struct lp_sampler_dynamic_state *dynamic_state,
3594 struct gallivm_state *gallivm,
3595 const struct lp_sampler_params *params)
3596 {
3597 boolean use_tex_func = FALSE;
3598
3599 /*
3600 * Do not use a function call if the sampling is "simple enough".
3601 * We define this by
3602 * a) format
3603 * b) no mips (either one level only or no mip filter)
3604 * No mips will definitely make the code smaller, though
3605 * the format requirement is a bit iffy - there's some (SoA) formats
3606 * which definitely generate less code. This does happen to catch
3607 * some important cases though which are hurt quite a bit by using
3608 * a call (though not really because of the call overhead but because
3609 * they are reusing the same texture unit with some of the same
3610 * parameters).
3611 * Ideally we'd let llvm recognize this stuff by doing IPO passes.
3612 */
3613
3614 if (USE_TEX_FUNC_CALL) {
3615 const struct util_format_description *format_desc;
3616 boolean simple_format;
3617 boolean simple_tex;
3618 enum lp_sampler_op_type op_type;
3619 format_desc = util_format_description(static_texture_state->format);
3620 simple_format = !format_desc ||
3621 (util_format_is_rgba8_variant(format_desc) &&
3622 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
3623
3624 op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
3625 LP_SAMPLER_OP_TYPE_SHIFT;
3626 simple_tex =
3627 op_type != LP_SAMPLER_OP_TEXTURE ||
3628 ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
3629 static_texture_state->level_zero_only == TRUE) &&
3630 static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
3631
3632 use_tex_func = format_desc && !(simple_format && simple_tex);
3633 }
3634
3635 if (use_tex_func) {
3636 lp_build_sample_soa_func(gallivm,
3637 static_texture_state,
3638 static_sampler_state,
3639 dynamic_state,
3640 params);
3641 }
3642 else {
3643 lp_build_sample_soa_code(gallivm,
3644 static_texture_state,
3645 static_sampler_state,
3646 dynamic_state,
3647 params->type,
3648 params->sample_key,
3649 params->texture_index,
3650 params->sampler_index,
3651 params->context_ptr,
3652 params->thread_data_ptr,
3653 params->coords,
3654 params->offsets,
3655 params->derivs,
3656 params->lod,
3657 params->texel);
3658 }
3659 }
3660
3661
3662 void
3663 lp_build_size_query_soa(struct gallivm_state *gallivm,
3664 const struct lp_static_texture_state *static_state,
3665 struct lp_sampler_dynamic_state *dynamic_state,
3666 const struct lp_sampler_size_query_params *params)
3667 {
3668 LLVMValueRef lod, level = 0, size;
3669 LLVMValueRef first_level = NULL;
3670 int dims, i;
3671 boolean has_array;
3672 unsigned num_lods = 1;
3673 struct lp_build_context bld_int_vec4;
3674 LLVMValueRef context_ptr = params->context_ptr;
3675 unsigned texture_unit = params->texture_unit;
3676 unsigned target = params->target;
3677
3678 if (static_state->format == PIPE_FORMAT_NONE) {
3679 /*
3680 * If there's nothing bound, format is NONE, and we must return
3681 * all zero as mandated by d3d10 in this case.
3682 */
3683 unsigned chan;
3684 LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F);
3685 for (chan = 0; chan < 4; chan++) {
3686 params->sizes_out[chan] = zero;
3687 }
3688 return;
3689 }
3690
3691 /*
3692 * Do some sanity verification about bound texture and shader dcl target.
3693 * Not entirely sure what's possible but assume array/non-array
3694 * always compatible (probably not ok for OpenGL but d3d10 has no
3695 * distinction of arrays at the resource level).
3696 * Everything else looks bogus (though not entirely sure about rect/2d).
3697 * Currently disabled because it causes assertion failures if there's
3698 * nothing bound (or rather a dummy texture, not that this case would
3699 * return the right values).
3700 */
3701 if (0 && static_state->target != target) {
3702 if (static_state->target == PIPE_TEXTURE_1D)
3703 assert(target == PIPE_TEXTURE_1D_ARRAY);
3704 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
3705 assert(target == PIPE_TEXTURE_1D);
3706 else if (static_state->target == PIPE_TEXTURE_2D)
3707 assert(target == PIPE_TEXTURE_2D_ARRAY);
3708 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
3709 assert(target == PIPE_TEXTURE_2D);
3710 else if (static_state->target == PIPE_TEXTURE_CUBE)
3711 assert(target == PIPE_TEXTURE_CUBE_ARRAY);
3712 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
3713 assert(target == PIPE_TEXTURE_CUBE);
3714 else
3715 assert(0);
3716 }
3717
3718 dims = texture_dims(target);
3719
3720 switch (target) {
3721 case PIPE_TEXTURE_1D_ARRAY:
3722 case PIPE_TEXTURE_2D_ARRAY:
3723 case PIPE_TEXTURE_CUBE_ARRAY:
3724 has_array = TRUE;
3725 break;
3726 default:
3727 has_array = FALSE;
3728 break;
3729 }
3730
3731 assert(!params->int_type.floating);
3732
3733 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
3734
3735 if (params->explicit_lod) {
3736 /* FIXME: this needs to honor per-element lod */
3737 lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
3738 lp_build_const_int32(gallivm, 0), "");
3739 first_level = dynamic_state->first_level(dynamic_state, gallivm,
3740 context_ptr, texture_unit);
3741 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
3742 lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
3743 } else {
3744 lod = bld_int_vec4.zero;
3745 }
3746
3747 size = bld_int_vec4.undef;
3748
3749 size = LLVMBuildInsertElement(gallivm->builder, size,
3750 dynamic_state->width(dynamic_state, gallivm,
3751 context_ptr, texture_unit),
3752 lp_build_const_int32(gallivm, 0), "");
3753
3754 if (dims >= 2) {
3755 size = LLVMBuildInsertElement(gallivm->builder, size,
3756 dynamic_state->height(dynamic_state, gallivm,
3757 context_ptr, texture_unit),
3758 lp_build_const_int32(gallivm, 1), "");
3759 }
3760
3761 if (dims >= 3) {
3762 size = LLVMBuildInsertElement(gallivm->builder, size,
3763 dynamic_state->depth(dynamic_state, gallivm,
3764 context_ptr, texture_unit),
3765 lp_build_const_int32(gallivm, 2), "");
3766 }
3767
3768 size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
3769
3770 if (has_array) {
3771 LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm,
3772 context_ptr, texture_unit);
3773 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3774 /*
3775 * It looks like GL wants number of cubes, d3d10.1 has it undefined?
3776 * Could avoid this by passing in number of cubes instead of total
3777 * number of layers (might make things easier elsewhere too).
3778 */
3779 LLVMValueRef six = lp_build_const_int32(gallivm, 6);
3780 layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
3781 }
3782 size = LLVMBuildInsertElement(gallivm->builder, size, layers,
3783 lp_build_const_int32(gallivm, dims), "");
3784 }
3785
3786 /*
3787 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
3788 * if level is out of bounds (note this can't cover unbound texture
3789 * here, which also requires returning zero).
3790 */
3791 if (params->explicit_lod && params->is_sviewinfo) {
3792 LLVMValueRef last_level, out, out1;
3793 struct lp_build_context leveli_bld;
3794
3795 /* everything is scalar for now */
3796 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
3797 last_level = dynamic_state->last_level(dynamic_state, gallivm,
3798 context_ptr, texture_unit);
3799
3800 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
3801 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
3802 out = lp_build_or(&leveli_bld, out, out1);
3803 if (num_lods == 1) {
3804 out = lp_build_broadcast_scalar(&bld_int_vec4, out);
3805 }
3806 else {
3807 /* TODO */
3808 assert(0);
3809 }
3810 size = lp_build_andnot(&bld_int_vec4, size, out);
3811 }
3812 for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
3813 params->sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, params->int_type,
3814 size,
3815 lp_build_const_int32(gallivm, i));
3816 }
3817 if (params->is_sviewinfo) {
3818 for (; i < 4; i++) {
3819 params->sizes_out[i] = lp_build_const_vec(gallivm, params->int_type, 0.0);
3820 }
3821 }
3822
3823 /*
3824 * if there's no explicit_lod (buffers, rects) queries requiring nr of
3825 * mips would be illegal.
3826 */
3827 if (params->is_sviewinfo && params->explicit_lod) {
3828 struct lp_build_context bld_int_scalar;
3829 LLVMValueRef num_levels;
3830 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
3831
3832 if (static_state->level_zero_only) {
3833 num_levels = bld_int_scalar.one;
3834 }
3835 else {
3836 LLVMValueRef last_level;
3837
3838 last_level = dynamic_state->last_level(dynamic_state, gallivm,
3839 context_ptr, texture_unit);
3840 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
3841 num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
3842 }
3843 params->sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type),
3844 num_levels);
3845 }
3846 }