gallivm,draw,llvmpipe: Remove support for versions of LLVM prior to 3.1.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/u_debug.h"
40 #include "util/u_dump.h"
41 #include "util/u_memory.h"
42 #include "util/u_math.h"
43 #include "util/u_format.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/u_format_rgb9e5.h"
46 #include "lp_bld_debug.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_const.h"
49 #include "lp_bld_conv.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_bitarit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_printf.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_format.h"
58 #include "lp_bld_sample.h"
59 #include "lp_bld_sample_aos.h"
60 #include "lp_bld_struct.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_pack.h"
63
64
65 /**
66 * Generate code to fetch a texel from a texture at int coords (x, y, z).
67 * The computation depends on whether the texture is 1D, 2D or 3D.
68 * The result, texel, will be float vectors:
69 * texel[0] = red values
70 * texel[1] = green values
71 * texel[2] = blue values
72 * texel[3] = alpha values
73 */
74 static void
75 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
76 LLVMValueRef width,
77 LLVMValueRef height,
78 LLVMValueRef depth,
79 LLVMValueRef x,
80 LLVMValueRef y,
81 LLVMValueRef z,
82 LLVMValueRef y_stride,
83 LLVMValueRef z_stride,
84 LLVMValueRef data_ptr,
85 LLVMValueRef mipoffsets,
86 LLVMValueRef texel_out[4])
87 {
88 const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
89 const unsigned dims = bld->dims;
90 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
91 LLVMBuilderRef builder = bld->gallivm->builder;
92 LLVMValueRef offset;
93 LLVMValueRef i, j;
94 LLVMValueRef use_border = NULL;
95
96 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
97 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
98 static_state->min_img_filter,
99 static_state->mag_img_filter)) {
100 LLVMValueRef b1, b2;
101 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
102 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
103 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
104 }
105
106 if (dims >= 2 &&
107 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
108 static_state->min_img_filter,
109 static_state->mag_img_filter)) {
110 LLVMValueRef b1, b2;
111 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
112 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
113 if (use_border) {
114 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
115 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
116 }
117 else {
118 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
119 }
120 }
121
122 if (dims == 3 &&
123 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
124 static_state->min_img_filter,
125 static_state->mag_img_filter)) {
126 LLVMValueRef b1, b2;
127 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
128 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
129 if (use_border) {
130 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
131 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
132 }
133 else {
134 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
135 }
136 }
137
138 /* convert x,y,z coords to linear offset from start of texture, in bytes */
139 lp_build_sample_offset(&bld->int_coord_bld,
140 bld->format_desc,
141 x, y, z, y_stride, z_stride,
142 &offset, &i, &j);
143 if (mipoffsets) {
144 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
145 }
146
147 if (use_border) {
148 /* If we can sample the border color, it means that texcoords may
149 * lie outside the bounds of the texture image. We need to do
150 * something to prevent reading out of bounds and causing a segfault.
151 *
152 * Simply AND the texture coords with !use_border. This will cause
153 * coords which are out of bounds to become zero. Zero's guaranteed
154 * to be inside the texture image.
155 */
156 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
157 }
158
159 lp_build_fetch_rgba_soa(bld->gallivm,
160 bld->format_desc,
161 bld->texel_type,
162 data_ptr, offset,
163 i, j,
164 texel_out);
165
166 /*
167 * Note: if we find an app which frequently samples the texture border
168 * we might want to implement a true conditional here to avoid sampling
169 * the texture whenever possible (since that's quite a bit of code).
170 * Ex:
171 * if (use_border) {
172 * texel = border_color;
173 * }
174 * else {
175 * texel = sample_texture(coord);
176 * }
177 * As it is now, we always sample the texture, then selectively replace
178 * the texel color results with the border color.
179 */
180
181 if (use_border) {
182 /* select texel color or border color depending on use_border. */
183 const struct util_format_description *format_desc = bld->format_desc;
184 int chan;
185 struct lp_type border_type = bld->texel_type;
186 border_type.length = 4;
187 /*
188 * Only replace channels which are actually present. The others should
189 * get optimized away eventually by sampler_view swizzle anyway but it's
190 * easier too.
191 */
192 for (chan = 0; chan < 4; chan++) {
193 unsigned chan_s;
194 /* reverse-map channel... */
195 for (chan_s = 0; chan_s < 4; chan_s++) {
196 if (chan_s == format_desc->swizzle[chan]) {
197 break;
198 }
199 }
200 if (chan_s <= 3) {
201 /* use the already clamped color */
202 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
203 LLVMValueRef border_chan;
204
205 border_chan = lp_build_extract_broadcast(bld->gallivm,
206 border_type,
207 bld->texel_type,
208 bld->border_color_clamped,
209 idx);
210 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
211 border_chan, texel_out[chan]);
212 }
213 }
214 }
215 }
216
217
218 /**
219 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
220 */
221 static LLVMValueRef
222 lp_build_coord_mirror(struct lp_build_sample_context *bld,
223 LLVMValueRef coord)
224 {
225 struct lp_build_context *coord_bld = &bld->coord_bld;
226 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
227 LLVMValueRef fract, flr, isOdd;
228
229 lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
230
231 /* isOdd = flr & 1 */
232 isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
233
234 /* make coord positive or negative depending on isOdd */
235 coord = lp_build_set_sign(coord_bld, fract, isOdd);
236
237 /* convert isOdd to float */
238 isOdd = lp_build_int_to_float(coord_bld, isOdd);
239
240 /* add isOdd to coord */
241 coord = lp_build_add(coord_bld, coord, isOdd);
242
243 return coord;
244 }
245
246
247 /**
248 * Helper to compute the first coord and the weight for
249 * linear wrap repeat npot textures
250 */
251 void
252 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
253 LLVMValueRef coord_f,
254 LLVMValueRef length_i,
255 LLVMValueRef length_f,
256 LLVMValueRef *coord0_i,
257 LLVMValueRef *weight_f)
258 {
259 struct lp_build_context *coord_bld = &bld->coord_bld;
260 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
261 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
262 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
263 int_coord_bld->one);
264 LLVMValueRef mask;
265 /* wrap with normalized floats is just fract */
266 coord_f = lp_build_fract(coord_bld, coord_f);
267 /* mul by size and subtract 0.5 */
268 coord_f = lp_build_mul(coord_bld, coord_f, length_f);
269 coord_f = lp_build_sub(coord_bld, coord_f, half);
270 /*
271 * we avoided the 0.5/length division before the repeat wrap,
272 * now need to fix up edge cases with selects
273 */
274 /* convert to int, compute lerp weight */
275 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
276 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
277 PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
278 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
279 }
280
281
282 /**
283 * Build LLVM code for texture wrap mode for linear filtering.
284 * \param x0_out returns first integer texcoord
285 * \param x1_out returns second integer texcoord
286 * \param weight_out returns linear interpolation weight
287 */
288 static void
289 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
290 LLVMValueRef coord,
291 LLVMValueRef length,
292 LLVMValueRef length_f,
293 LLVMValueRef offset,
294 boolean is_pot,
295 unsigned wrap_mode,
296 LLVMValueRef *x0_out,
297 LLVMValueRef *x1_out,
298 LLVMValueRef *weight_out)
299 {
300 struct lp_build_context *coord_bld = &bld->coord_bld;
301 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
302 LLVMBuilderRef builder = bld->gallivm->builder;
303 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
304 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
305 LLVMValueRef coord0, coord1, weight;
306
307 switch(wrap_mode) {
308 case PIPE_TEX_WRAP_REPEAT:
309 if (is_pot) {
310 /* mul by size and subtract 0.5 */
311 coord = lp_build_mul(coord_bld, coord, length_f);
312 coord = lp_build_sub(coord_bld, coord, half);
313 if (offset) {
314 offset = lp_build_int_to_float(coord_bld, offset);
315 coord = lp_build_add(coord_bld, coord, offset);
316 }
317 /* convert to int, compute lerp weight */
318 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
319 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
320 /* repeat wrap */
321 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
322 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
323 }
324 else {
325 LLVMValueRef mask;
326 if (offset) {
327 offset = lp_build_int_to_float(coord_bld, offset);
328 offset = lp_build_div(coord_bld, offset, length_f);
329 coord = lp_build_add(coord_bld, coord, offset);
330 }
331 lp_build_coord_repeat_npot_linear(bld, coord,
332 length, length_f,
333 &coord0, &weight);
334 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
335 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
336 coord1 = LLVMBuildAnd(builder,
337 lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
338 mask, "");
339 }
340 break;
341
342 case PIPE_TEX_WRAP_CLAMP:
343 if (bld->static_sampler_state->normalized_coords) {
344 /* scale coord to length */
345 coord = lp_build_mul(coord_bld, coord, length_f);
346 }
347 if (offset) {
348 offset = lp_build_int_to_float(coord_bld, offset);
349 coord = lp_build_add(coord_bld, coord, offset);
350 }
351
352 /* clamp to [0, length] */
353 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
354
355 coord = lp_build_sub(coord_bld, coord, half);
356
357 /* convert to int, compute lerp weight */
358 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
359 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
360 break;
361
362 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
363 {
364 struct lp_build_context abs_coord_bld = bld->coord_bld;
365 abs_coord_bld.type.sign = FALSE;
366
367 if (bld->static_sampler_state->normalized_coords) {
368 /* mul by tex size */
369 coord = lp_build_mul(coord_bld, coord, length_f);
370 }
371 if (offset) {
372 offset = lp_build_int_to_float(coord_bld, offset);
373 coord = lp_build_add(coord_bld, coord, offset);
374 }
375
376 /* clamp to length max */
377 coord = lp_build_min(coord_bld, coord, length_f);
378 /* subtract 0.5 */
379 coord = lp_build_sub(coord_bld, coord, half);
380 /* clamp to [0, length - 0.5] */
381 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
382 /* convert to int, compute lerp weight */
383 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
384 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
385 /* coord1 = min(coord1, length-1) */
386 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
387 break;
388 }
389
390 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
391 if (bld->static_sampler_state->normalized_coords) {
392 /* scale coord to length */
393 coord = lp_build_mul(coord_bld, coord, length_f);
394 }
395 if (offset) {
396 offset = lp_build_int_to_float(coord_bld, offset);
397 coord = lp_build_add(coord_bld, coord, offset);
398 }
399 /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
400 /* can skip clamp (though might not work for very large coord values */
401 coord = lp_build_sub(coord_bld, coord, half);
402 /* convert to int, compute lerp weight */
403 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
404 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
405 break;
406
407 case PIPE_TEX_WRAP_MIRROR_REPEAT:
408 /* compute mirror function */
409 coord = lp_build_coord_mirror(bld, coord);
410
411 /* scale coord to length */
412 coord = lp_build_mul(coord_bld, coord, length_f);
413 coord = lp_build_sub(coord_bld, coord, half);
414 if (offset) {
415 offset = lp_build_int_to_float(coord_bld, offset);
416 coord = lp_build_add(coord_bld, coord, offset);
417 }
418
419 /* convert to int, compute lerp weight */
420 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
421 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
422
423 /* coord0 = max(coord0, 0) */
424 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
425 /* coord1 = min(coord1, length-1) */
426 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
427 break;
428
429 case PIPE_TEX_WRAP_MIRROR_CLAMP:
430 if (bld->static_sampler_state->normalized_coords) {
431 /* scale coord to length */
432 coord = lp_build_mul(coord_bld, coord, length_f);
433 }
434 if (offset) {
435 offset = lp_build_int_to_float(coord_bld, offset);
436 coord = lp_build_add(coord_bld, coord, offset);
437 }
438 coord = lp_build_abs(coord_bld, coord);
439
440 /* clamp to [0, length] */
441 coord = lp_build_min(coord_bld, coord, length_f);
442
443 coord = lp_build_sub(coord_bld, coord, half);
444
445 /* convert to int, compute lerp weight */
446 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
447 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
448 break;
449
450 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
451 {
452 struct lp_build_context abs_coord_bld = bld->coord_bld;
453 abs_coord_bld.type.sign = FALSE;
454
455 if (bld->static_sampler_state->normalized_coords) {
456 /* scale coord to length */
457 coord = lp_build_mul(coord_bld, coord, length_f);
458 }
459 if (offset) {
460 offset = lp_build_int_to_float(coord_bld, offset);
461 coord = lp_build_add(coord_bld, coord, offset);
462 }
463 coord = lp_build_abs(coord_bld, coord);
464
465 /* clamp to length max */
466 coord = lp_build_min(coord_bld, coord, length_f);
467 /* subtract 0.5 */
468 coord = lp_build_sub(coord_bld, coord, half);
469 /* clamp to [0, length - 0.5] */
470 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
471
472 /* convert to int, compute lerp weight */
473 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
474 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
475 /* coord1 = min(coord1, length-1) */
476 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
477 }
478 break;
479
480 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
481 {
482 if (bld->static_sampler_state->normalized_coords) {
483 /* scale coord to length */
484 coord = lp_build_mul(coord_bld, coord, length_f);
485 }
486 if (offset) {
487 offset = lp_build_int_to_float(coord_bld, offset);
488 coord = lp_build_add(coord_bld, coord, offset);
489 }
490 coord = lp_build_abs(coord_bld, coord);
491
492 /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
493 /* skip clamp - always positive, and other side
494 only potentially matters for very large coords */
495 coord = lp_build_sub(coord_bld, coord, half);
496
497 /* convert to int, compute lerp weight */
498 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
499 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
500 }
501 break;
502
503 default:
504 assert(0);
505 coord0 = NULL;
506 coord1 = NULL;
507 weight = NULL;
508 }
509
510 *x0_out = coord0;
511 *x1_out = coord1;
512 *weight_out = weight;
513 }
514
515
516 /**
517 * Build LLVM code for texture wrap mode for nearest filtering.
518 * \param coord the incoming texcoord (nominally in [0,1])
519 * \param length the texture size along one dimension, as int vector
520 * \param length_f the texture size along one dimension, as float vector
521 * \param offset texel offset along one dimension (as int vector)
522 * \param is_pot if TRUE, length is a power of two
523 * \param wrap_mode one of PIPE_TEX_WRAP_x
524 */
525 static LLVMValueRef
526 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
527 LLVMValueRef coord,
528 LLVMValueRef length,
529 LLVMValueRef length_f,
530 LLVMValueRef offset,
531 boolean is_pot,
532 unsigned wrap_mode)
533 {
534 struct lp_build_context *coord_bld = &bld->coord_bld;
535 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
536 LLVMBuilderRef builder = bld->gallivm->builder;
537 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
538 LLVMValueRef icoord;
539
540 switch(wrap_mode) {
541 case PIPE_TEX_WRAP_REPEAT:
542 if (is_pot) {
543 coord = lp_build_mul(coord_bld, coord, length_f);
544 icoord = lp_build_ifloor(coord_bld, coord);
545 if (offset) {
546 icoord = lp_build_add(int_coord_bld, icoord, offset);
547 }
548 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
549 }
550 else {
551 if (offset) {
552 offset = lp_build_int_to_float(coord_bld, offset);
553 offset = lp_build_div(coord_bld, offset, length_f);
554 coord = lp_build_add(coord_bld, coord, offset);
555 }
556 /* take fraction, unnormalize */
557 coord = lp_build_fract_safe(coord_bld, coord);
558 coord = lp_build_mul(coord_bld, coord, length_f);
559 icoord = lp_build_itrunc(coord_bld, coord);
560 }
561 break;
562
563 case PIPE_TEX_WRAP_CLAMP:
564 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
565 if (bld->static_sampler_state->normalized_coords) {
566 /* scale coord to length */
567 coord = lp_build_mul(coord_bld, coord, length_f);
568 }
569
570 /* floor */
571 /* use itrunc instead since we clamp to 0 anyway */
572 icoord = lp_build_itrunc(coord_bld, coord);
573 if (offset) {
574 icoord = lp_build_add(int_coord_bld, icoord, offset);
575 }
576
577 /* clamp to [0, length - 1]. */
578 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
579 length_minus_one);
580 break;
581
582 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
583 if (bld->static_sampler_state->normalized_coords) {
584 /* scale coord to length */
585 coord = lp_build_mul(coord_bld, coord, length_f);
586 }
587 /* no clamp necessary, border masking will handle this */
588 icoord = lp_build_ifloor(coord_bld, coord);
589 if (offset) {
590 icoord = lp_build_add(int_coord_bld, icoord, offset);
591 }
592 break;
593
594 case PIPE_TEX_WRAP_MIRROR_REPEAT:
595 if (offset) {
596 offset = lp_build_int_to_float(coord_bld, offset);
597 offset = lp_build_div(coord_bld, offset, length_f);
598 coord = lp_build_add(coord_bld, coord, offset);
599 }
600 /* compute mirror function */
601 coord = lp_build_coord_mirror(bld, coord);
602
603 /* scale coord to length */
604 assert(bld->static_sampler_state->normalized_coords);
605 coord = lp_build_mul(coord_bld, coord, length_f);
606
607 /* itrunc == ifloor here */
608 icoord = lp_build_itrunc(coord_bld, coord);
609
610 /* clamp to [0, length - 1] */
611 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
612 break;
613
614 case PIPE_TEX_WRAP_MIRROR_CLAMP:
615 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
616 if (bld->static_sampler_state->normalized_coords) {
617 /* scale coord to length */
618 coord = lp_build_mul(coord_bld, coord, length_f);
619 }
620 if (offset) {
621 offset = lp_build_int_to_float(coord_bld, offset);
622 coord = lp_build_add(coord_bld, coord, offset);
623 }
624 coord = lp_build_abs(coord_bld, coord);
625
626 /* itrunc == ifloor here */
627 icoord = lp_build_itrunc(coord_bld, coord);
628
629 /* clamp to [0, length - 1] */
630 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
631 break;
632
633 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
634 if (bld->static_sampler_state->normalized_coords) {
635 /* scale coord to length */
636 coord = lp_build_mul(coord_bld, coord, length_f);
637 }
638 if (offset) {
639 offset = lp_build_int_to_float(coord_bld, offset);
640 coord = lp_build_add(coord_bld, coord, offset);
641 }
642 coord = lp_build_abs(coord_bld, coord);
643
644 /* itrunc == ifloor here */
645 icoord = lp_build_itrunc(coord_bld, coord);
646 break;
647
648 default:
649 assert(0);
650 icoord = NULL;
651 }
652
653 return icoord;
654 }
655
656
657 /**
658 * Do shadow test/comparison.
659 * \param p shadow ref value
660 * \param texel the texel to compare against
661 */
662 static LLVMValueRef
663 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
664 LLVMValueRef p,
665 LLVMValueRef texel)
666 {
667 struct lp_build_context *texel_bld = &bld->texel_bld;
668 LLVMValueRef res;
669
670 if (0) {
671 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
672 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
673 }
674
675 /* result = (p FUNC texel) ? 1 : 0 */
676 /*
677 * honor d3d10 floating point rules here, which state that comparisons
678 * are ordered except NOT_EQUAL which is unordered.
679 */
680 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
681 res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
682 p, texel);
683 }
684 else {
685 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
686 p, texel);
687 }
688 return res;
689 }
690
691
692 /**
693 * Generate code to sample a mipmap level with nearest filtering.
694 * If sampling a cube texture, r = cube face in [0,5].
695 */
696 static void
697 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
698 LLVMValueRef size,
699 LLVMValueRef row_stride_vec,
700 LLVMValueRef img_stride_vec,
701 LLVMValueRef data_ptr,
702 LLVMValueRef mipoffsets,
703 LLVMValueRef *coords,
704 const LLVMValueRef *offsets,
705 LLVMValueRef colors_out[4])
706 {
707 const unsigned dims = bld->dims;
708 LLVMValueRef width_vec;
709 LLVMValueRef height_vec;
710 LLVMValueRef depth_vec;
711 LLVMValueRef flt_size;
712 LLVMValueRef flt_width_vec;
713 LLVMValueRef flt_height_vec;
714 LLVMValueRef flt_depth_vec;
715 LLVMValueRef x, y = NULL, z = NULL;
716
717 lp_build_extract_image_sizes(bld,
718 &bld->int_size_bld,
719 bld->int_coord_type,
720 size,
721 &width_vec, &height_vec, &depth_vec);
722
723 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
724
725 lp_build_extract_image_sizes(bld,
726 &bld->float_size_bld,
727 bld->coord_type,
728 flt_size,
729 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
730
731 /*
732 * Compute integer texcoords.
733 */
734 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
735 flt_width_vec, offsets[0],
736 bld->static_texture_state->pot_width,
737 bld->static_sampler_state->wrap_s);
738 lp_build_name(x, "tex.x.wrapped");
739
740 if (dims >= 2) {
741 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
742 flt_height_vec, offsets[1],
743 bld->static_texture_state->pot_height,
744 bld->static_sampler_state->wrap_t);
745 lp_build_name(y, "tex.y.wrapped");
746
747 if (dims == 3) {
748 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
749 flt_depth_vec, offsets[2],
750 bld->static_texture_state->pot_depth,
751 bld->static_sampler_state->wrap_r);
752 lp_build_name(z, "tex.z.wrapped");
753 }
754 }
755 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
756 bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
757 bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
758 z = coords[2];
759 lp_build_name(z, "tex.z.layer");
760 }
761
762 /*
763 * Get texture colors.
764 */
765 lp_build_sample_texel_soa(bld,
766 width_vec, height_vec, depth_vec,
767 x, y, z,
768 row_stride_vec, img_stride_vec,
769 data_ptr, mipoffsets, colors_out);
770
771 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
772 LLVMValueRef cmpval;
773 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
774 /* this is really just a AND 1.0, cmpval but llvm is clever enough */
775 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
776 bld->texel_bld.one, bld->texel_bld.zero);
777 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
778 }
779
780 }
781
782
783 /**
784 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
785 */
786 static LLVMValueRef
787 lp_build_masklerp(struct lp_build_context *bld,
788 LLVMValueRef weight,
789 LLVMValueRef mask0,
790 LLVMValueRef mask1)
791 {
792 struct gallivm_state *gallivm = bld->gallivm;
793 LLVMBuilderRef builder = gallivm->builder;
794 LLVMValueRef weight2;
795
796 weight2 = lp_build_sub(bld, bld->one, weight);
797 weight = LLVMBuildBitCast(builder, weight,
798 lp_build_int_vec_type(gallivm, bld->type), "");
799 weight2 = LLVMBuildBitCast(builder, weight2,
800 lp_build_int_vec_type(gallivm, bld->type), "");
801 weight = LLVMBuildAnd(builder, weight, mask1, "");
802 weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
803 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
804 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
805 return lp_build_add(bld, weight, weight2);
806 }
807
808 /**
809 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
810 */
811 static LLVMValueRef
812 lp_build_masklerp2d(struct lp_build_context *bld,
813 LLVMValueRef weight0,
814 LLVMValueRef weight1,
815 LLVMValueRef mask00,
816 LLVMValueRef mask01,
817 LLVMValueRef mask10,
818 LLVMValueRef mask11)
819 {
820 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
821 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
822 return lp_build_lerp(bld, weight1, val0, val1, 0);
823 }
824
825 /*
826 * this is a bit excessive code for something OpenGL just recommends
827 * but does not require.
828 */
829 #define ACCURATE_CUBE_CORNERS 1
830
831 /**
832 * Generate code to sample a mipmap level with linear filtering.
833 * If sampling a cube texture, r = cube face in [0,5].
834 * If linear_mask is present, only pixels having their mask set
835 * will receive linear filtering, the rest will use nearest.
836 */
837 static void
838 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
839 LLVMValueRef size,
840 LLVMValueRef linear_mask,
841 LLVMValueRef row_stride_vec,
842 LLVMValueRef img_stride_vec,
843 LLVMValueRef data_ptr,
844 LLVMValueRef mipoffsets,
845 LLVMValueRef *coords,
846 const LLVMValueRef *offsets,
847 LLVMValueRef colors_out[4])
848 {
849 LLVMBuilderRef builder = bld->gallivm->builder;
850 struct lp_build_context *ivec_bld = &bld->int_coord_bld;
851 struct lp_build_context *coord_bld = &bld->coord_bld;
852 const unsigned dims = bld->dims;
853 LLVMValueRef width_vec;
854 LLVMValueRef height_vec;
855 LLVMValueRef depth_vec;
856 LLVMValueRef flt_size;
857 LLVMValueRef flt_width_vec;
858 LLVMValueRef flt_height_vec;
859 LLVMValueRef flt_depth_vec;
860 LLVMValueRef fall_off[4], have_corners;
861 LLVMValueRef z1 = NULL;
862 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
863 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
864 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
865 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
866 LLVMValueRef xs[4], ys[4], zs[4];
867 LLVMValueRef neighbors[2][2][4];
868 int chan, texel_index;
869 boolean seamless_cube_filter, accurate_cube_corners;
870
871 seamless_cube_filter = bld->static_texture_state->target == PIPE_TEXTURE_CUBE &&
872 bld->static_sampler_state->seamless_cube_map;
873 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter;
874
875 lp_build_extract_image_sizes(bld,
876 &bld->int_size_bld,
877 bld->int_coord_type,
878 size,
879 &width_vec, &height_vec, &depth_vec);
880
881 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
882
883 lp_build_extract_image_sizes(bld,
884 &bld->float_size_bld,
885 bld->coord_type,
886 flt_size,
887 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
888
889 /*
890 * Compute integer texcoords.
891 */
892
893 if (!seamless_cube_filter) {
894 lp_build_sample_wrap_linear(bld, coords[0], width_vec,
895 flt_width_vec, offsets[0],
896 bld->static_texture_state->pot_width,
897 bld->static_sampler_state->wrap_s,
898 &x00, &x01, &s_fpart);
899 lp_build_name(x00, "tex.x0.wrapped");
900 lp_build_name(x01, "tex.x1.wrapped");
901 x10 = x00;
902 x11 = x01;
903
904 if (dims >= 2) {
905 lp_build_sample_wrap_linear(bld, coords[1], height_vec,
906 flt_height_vec, offsets[1],
907 bld->static_texture_state->pot_height,
908 bld->static_sampler_state->wrap_t,
909 &y00, &y10, &t_fpart);
910 lp_build_name(y00, "tex.y0.wrapped");
911 lp_build_name(y10, "tex.y1.wrapped");
912 y01 = y00;
913 y11 = y10;
914
915 if (dims == 3) {
916 lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
917 flt_depth_vec, offsets[2],
918 bld->static_texture_state->pot_depth,
919 bld->static_sampler_state->wrap_r,
920 &z00, &z1, &r_fpart);
921 z01 = z10 = z11 = z00;
922 lp_build_name(z00, "tex.z0.wrapped");
923 lp_build_name(z1, "tex.z1.wrapped");
924 }
925 }
926 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
927 bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
928 bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
929 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
930 lp_build_name(z00, "tex.z0.layer");
931 lp_build_name(z1, "tex.z1.layer");
932 }
933 }
934 else {
935 struct lp_build_if_state edge_if;
936 LLVMTypeRef int1t;
937 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
938 LLVMValueRef coord, have_edge, have_corner;
939 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
940 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
941 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
942 LLVMValueRef face = coords[2];
943 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
944 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
945 /* XXX drop height calcs. Could (should) do this without seamless filtering too */
946 height_vec = width_vec;
947 flt_height_vec = flt_width_vec;
948
949 /* XXX the overflow logic is actually sort of duplicated with trilinear,
950 * since an overflow in one mip should also have a corresponding overflow
951 * in another.
952 */
953 /* should always have normalized coords, and offsets are undefined */
954 assert(bld->static_sampler_state->normalized_coords);
955 coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
956 /* instead of clamp, build mask if overflowed */
957 coord = lp_build_sub(coord_bld, coord, half);
958 /* convert to int, compute lerp weight */
959 /* not ideal with AVX (and no AVX2) */
960 lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
961 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
962 coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
963 coord = lp_build_sub(coord_bld, coord, half);
964 lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
965 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
966
967 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
968 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
969 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
970 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
971
972 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
973 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
974 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
975 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
976
977 /* needed for accurate corner filtering branch later, rely on 0 init */
978 int1t = LLVMInt1TypeInContext(bld->gallivm->context);
979 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
980
981 for (texel_index = 0; texel_index < 4; texel_index++) {
982 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
983 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
984 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
985 }
986
987 lp_build_if(&edge_if, bld->gallivm, have_edge);
988
989 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
990 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
991 LLVMBuildStore(builder, have_corner, have_corners);
992
993 /*
994 * Need to feed clamped values here for cheap corner handling,
995 * but only for y coord (as when falling off both edges we only
996 * fall off the x one) - this should be sufficient.
997 */
998 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
999 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1000
1001 /*
1002 * Get all possible new coords.
1003 */
1004 lp_build_cube_new_coords(ivec_bld, face,
1005 x0, x1, y0_clamped, y1_clamped,
1006 length_minus_one,
1007 new_faces, new_xcoords, new_ycoords);
1008
1009 /* handle fall off x-, x+ direction */
1010 /* determine new coords, face (not both fall_off vars can be true at same time) */
1011 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1012 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1013 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1014 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1015 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1016 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1017 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1018 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1019
1020 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1021 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1022
1023 /* handle fall off y-, y+ direction */
1024 /*
1025 * Cheap corner logic: just hack up things so a texel doesn't fall
1026 * off both sides (which means filter weights will be wrong but we'll only
1027 * use valid texels in the filter).
1028 * This means however (y) coords must additionally be clamped (see above).
1029 * This corner handling should be fully OpenGL (but not d3d10) compliant.
1030 */
1031 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1032 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1033 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1034 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1035
1036 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1037 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1038 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1039 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1040 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1041 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1042 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1043 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1044
1045 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1046 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1047 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1048 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1049
1050 LLVMBuildStore(builder, x00, xs[0]);
1051 LLVMBuildStore(builder, x01, xs[1]);
1052 LLVMBuildStore(builder, x10, xs[2]);
1053 LLVMBuildStore(builder, x11, xs[3]);
1054 LLVMBuildStore(builder, y00, ys[0]);
1055 LLVMBuildStore(builder, y01, ys[1]);
1056 LLVMBuildStore(builder, y10, ys[2]);
1057 LLVMBuildStore(builder, y11, ys[3]);
1058 LLVMBuildStore(builder, z00, zs[0]);
1059 LLVMBuildStore(builder, z01, zs[1]);
1060 LLVMBuildStore(builder, z10, zs[2]);
1061 LLVMBuildStore(builder, z11, zs[3]);
1062
1063 lp_build_else(&edge_if);
1064
1065 LLVMBuildStore(builder, x0, xs[0]);
1066 LLVMBuildStore(builder, x1, xs[1]);
1067 LLVMBuildStore(builder, x0, xs[2]);
1068 LLVMBuildStore(builder, x1, xs[3]);
1069 LLVMBuildStore(builder, y0, ys[0]);
1070 LLVMBuildStore(builder, y0, ys[1]);
1071 LLVMBuildStore(builder, y1, ys[2]);
1072 LLVMBuildStore(builder, y1, ys[3]);
1073 LLVMBuildStore(builder, face, zs[0]);
1074 LLVMBuildStore(builder, face, zs[1]);
1075 LLVMBuildStore(builder, face, zs[2]);
1076 LLVMBuildStore(builder, face, zs[3]);
1077
1078 lp_build_endif(&edge_if);
1079
1080 x00 = LLVMBuildLoad(builder, xs[0], "");
1081 x01 = LLVMBuildLoad(builder, xs[1], "");
1082 x10 = LLVMBuildLoad(builder, xs[2], "");
1083 x11 = LLVMBuildLoad(builder, xs[3], "");
1084 y00 = LLVMBuildLoad(builder, ys[0], "");
1085 y01 = LLVMBuildLoad(builder, ys[1], "");
1086 y10 = LLVMBuildLoad(builder, ys[2], "");
1087 y11 = LLVMBuildLoad(builder, ys[3], "");
1088 z00 = LLVMBuildLoad(builder, zs[0], "");
1089 z01 = LLVMBuildLoad(builder, zs[1], "");
1090 z10 = LLVMBuildLoad(builder, zs[2], "");
1091 z11 = LLVMBuildLoad(builder, zs[3], "");
1092 }
1093
1094 if (linear_mask) {
1095 /*
1096 * Whack filter weights into place. Whatever texel had more weight is
1097 * the one which should have been selected by nearest filtering hence
1098 * just use 100% weight for it.
1099 */
1100 struct lp_build_context *c_bld = &bld->coord_bld;
1101 LLVMValueRef w1_mask, w1_weight;
1102 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1103
1104 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1105 /* this select is really just a "and" */
1106 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1107 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1108 if (dims >= 2) {
1109 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1110 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1111 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1112 if (dims == 3) {
1113 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1114 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1115 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1116 }
1117 }
1118 }
1119
1120 /*
1121 * Get texture colors.
1122 */
1123 /* get x0/x1 texels */
1124 lp_build_sample_texel_soa(bld,
1125 width_vec, height_vec, depth_vec,
1126 x00, y00, z00,
1127 row_stride_vec, img_stride_vec,
1128 data_ptr, mipoffsets, neighbors[0][0]);
1129 lp_build_sample_texel_soa(bld,
1130 width_vec, height_vec, depth_vec,
1131 x01, y01, z01,
1132 row_stride_vec, img_stride_vec,
1133 data_ptr, mipoffsets, neighbors[0][1]);
1134
1135 if (dims == 1) {
1136 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1137 /* Interpolate two samples from 1D image to produce one color */
1138 for (chan = 0; chan < 4; chan++) {
1139 colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1140 neighbors[0][0][chan],
1141 neighbors[0][1][chan],
1142 0);
1143 }
1144 }
1145 else {
1146 LLVMValueRef cmpval0, cmpval1;
1147 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1148 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1149 /* simplified lerp, AND mask with weight and add */
1150 colors_out[0] = lp_build_masklerp(&bld->texel_bld, s_fpart,
1151 cmpval0, cmpval1);
1152 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1153 }
1154 }
1155 else {
1156 /* 2D/3D texture */
1157 struct lp_build_if_state corner_if;
1158 LLVMValueRef colors0[4], colorss[4];
1159
1160 /* get x0/x1 texels at y1 */
1161 lp_build_sample_texel_soa(bld,
1162 width_vec, height_vec, depth_vec,
1163 x10, y10, z10,
1164 row_stride_vec, img_stride_vec,
1165 data_ptr, mipoffsets, neighbors[1][0]);
1166 lp_build_sample_texel_soa(bld,
1167 width_vec, height_vec, depth_vec,
1168 x11, y11, z11,
1169 row_stride_vec, img_stride_vec,
1170 data_ptr, mipoffsets, neighbors[1][1]);
1171
1172 /*
1173 * To avoid having to duplicate linear_mask / fetch code use
1174 * another branch (with corner condition though edge would work
1175 * as well) here.
1176 */
1177 if (accurate_cube_corners) {
1178 LLVMValueRef w00, w01, w10, w11, wx0, wy0;
1179 LLVMValueRef c_weight, c00, c01, c10, c11;
1180 LLVMValueRef have_corner, one_third, tmp;
1181
1182 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1183 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1184 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1185 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1186
1187 have_corner = LLVMBuildLoad(builder, have_corners, "");
1188
1189 lp_build_if(&corner_if, bld->gallivm, have_corner);
1190
1191 /*
1192 * we can't use standard 2d lerp as we need per-element weight
1193 * in case of corners, so just calculate bilinear result as
1194 * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1195 * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
1196 * however calculating the weights needs another 6, so actually probably
1197 * not slower than 2d lerp only for 4 channels as weights only need
1198 * to be calculated once - of course fixing the weights has additional cost.)
1199 */
1200 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1201 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1202 w00 = lp_build_mul(coord_bld, wx0, wy0);
1203 w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1204 w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1205 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1206
1207 /* find corner weight */
1208 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1209 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1210 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1211 c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1212 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1213 c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1214 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1215 c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1216
1217 /*
1218 * add 1/3 of the corner weight to each of the 3 other samples
1219 * and null out corner weight
1220 */
1221 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
1222 c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1223 w00 = lp_build_add(coord_bld, w00, c_weight);
1224 c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1225 w00 = lp_build_andnot(coord_bld, w00, c00);
1226 w01 = lp_build_add(coord_bld, w01, c_weight);
1227 c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1228 w01 = lp_build_andnot(coord_bld, w01, c01);
1229 w10 = lp_build_add(coord_bld, w10, c_weight);
1230 c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1231 w10 = lp_build_andnot(coord_bld, w10, c10);
1232 w11 = lp_build_add(coord_bld, w11, c_weight);
1233 c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1234 w11 = lp_build_andnot(coord_bld, w11, c11);
1235
1236 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1237 for (chan = 0; chan < 4; chan++) {
1238 colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
1239 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1240 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1241 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1242 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1243 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1244 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1245 }
1246 }
1247 else {
1248 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1249 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1250 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1251 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1252 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1253 /* inputs to interpolation are just masks so just add masked weights together */
1254 cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
1255 cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
1256 cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
1257 cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
1258 colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1259 tmp = lp_build_and(coord_bld, w01, cmpval01);
1260 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1261 tmp = lp_build_and(coord_bld, w10, cmpval10);
1262 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1263 tmp = lp_build_and(coord_bld, w11, cmpval11);
1264 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1265 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1266 }
1267
1268 LLVMBuildStore(builder, colors0[0], colorss[0]);
1269 LLVMBuildStore(builder, colors0[1], colorss[1]);
1270 LLVMBuildStore(builder, colors0[2], colorss[2]);
1271 LLVMBuildStore(builder, colors0[3], colorss[3]);
1272
1273 lp_build_else(&corner_if);
1274 }
1275
1276 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1277 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1278 for (chan = 0; chan < 4; chan++) {
1279 colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1280 s_fpart, t_fpart,
1281 neighbors[0][0][chan],
1282 neighbors[0][1][chan],
1283 neighbors[1][0][chan],
1284 neighbors[1][1][chan],
1285 0);
1286 }
1287 }
1288 else {
1289 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1290 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1291 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1292 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1293 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1294 colors0[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1295 cmpval00, cmpval01, cmpval10, cmpval11);
1296 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1297 }
1298
1299 if (accurate_cube_corners) {
1300 LLVMBuildStore(builder, colors0[0], colorss[0]);
1301 LLVMBuildStore(builder, colors0[1], colorss[1]);
1302 LLVMBuildStore(builder, colors0[2], colorss[2]);
1303 LLVMBuildStore(builder, colors0[3], colorss[3]);
1304
1305 lp_build_endif(&corner_if);
1306
1307 colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
1308 colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
1309 colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
1310 colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
1311 }
1312
1313 if (dims == 3) {
1314 LLVMValueRef neighbors1[2][2][4];
1315 LLVMValueRef colors1[4];
1316
1317 /* get x0/x1/y0/y1 texels at z1 */
1318 lp_build_sample_texel_soa(bld,
1319 width_vec, height_vec, depth_vec,
1320 x00, y00, z1,
1321 row_stride_vec, img_stride_vec,
1322 data_ptr, mipoffsets, neighbors1[0][0]);
1323 lp_build_sample_texel_soa(bld,
1324 width_vec, height_vec, depth_vec,
1325 x01, y01, z1,
1326 row_stride_vec, img_stride_vec,
1327 data_ptr, mipoffsets, neighbors1[0][1]);
1328 lp_build_sample_texel_soa(bld,
1329 width_vec, height_vec, depth_vec,
1330 x10, y10, z1,
1331 row_stride_vec, img_stride_vec,
1332 data_ptr, mipoffsets, neighbors1[1][0]);
1333 lp_build_sample_texel_soa(bld,
1334 width_vec, height_vec, depth_vec,
1335 x11, y11, z1,
1336 row_stride_vec, img_stride_vec,
1337 data_ptr, mipoffsets, neighbors1[1][1]);
1338
1339 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1340 /* Bilinear interpolate the four samples from the second Z slice */
1341 for (chan = 0; chan < 4; chan++) {
1342 colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1343 s_fpart, t_fpart,
1344 neighbors1[0][0][chan],
1345 neighbors1[0][1][chan],
1346 neighbors1[1][0][chan],
1347 neighbors1[1][1][chan],
1348 0);
1349 }
1350 /* Linearly interpolate the two samples from the two 3D slices */
1351 for (chan = 0; chan < 4; chan++) {
1352 colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1353 r_fpart,
1354 colors0[chan], colors1[chan],
1355 0);
1356 }
1357 }
1358 else {
1359 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1360 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1361 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1362 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1363 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1364 colors1[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1365 cmpval00, cmpval01, cmpval10, cmpval11);
1366 /* Linearly interpolate the two samples from the two 3D slices */
1367 colors_out[0] = lp_build_lerp(&bld->texel_bld,
1368 r_fpart,
1369 colors0[0], colors1[0],
1370 0);
1371 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1372 }
1373 }
1374 else {
1375 /* 2D tex */
1376 for (chan = 0; chan < 4; chan++) {
1377 colors_out[chan] = colors0[chan];
1378 }
1379 }
1380 }
1381 }
1382
1383
1384 /**
1385 * Sample the texture/mipmap using given image filter and mip filter.
1386 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1387 * from (vectors or scalars).
1388 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1389 */
1390 static void
1391 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1392 unsigned img_filter,
1393 unsigned mip_filter,
1394 LLVMValueRef *coords,
1395 const LLVMValueRef *offsets,
1396 LLVMValueRef ilevel0,
1397 LLVMValueRef ilevel1,
1398 LLVMValueRef lod_fpart,
1399 LLVMValueRef *colors_out)
1400 {
1401 LLVMBuilderRef builder = bld->gallivm->builder;
1402 LLVMValueRef size0 = NULL;
1403 LLVMValueRef size1 = NULL;
1404 LLVMValueRef row_stride0_vec = NULL;
1405 LLVMValueRef row_stride1_vec = NULL;
1406 LLVMValueRef img_stride0_vec = NULL;
1407 LLVMValueRef img_stride1_vec = NULL;
1408 LLVMValueRef data_ptr0 = NULL;
1409 LLVMValueRef data_ptr1 = NULL;
1410 LLVMValueRef mipoff0 = NULL;
1411 LLVMValueRef mipoff1 = NULL;
1412 LLVMValueRef colors0[4], colors1[4];
1413 unsigned chan;
1414
1415 /* sample the first mipmap level */
1416 lp_build_mipmap_level_sizes(bld, ilevel0,
1417 &size0,
1418 &row_stride0_vec, &img_stride0_vec);
1419 if (bld->num_mips == 1) {
1420 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1421 }
1422 else {
1423 /* This path should work for num_lods 1 too but slightly less efficient */
1424 data_ptr0 = bld->base_ptr;
1425 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1426 }
1427 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1428 lp_build_sample_image_nearest(bld, size0,
1429 row_stride0_vec, img_stride0_vec,
1430 data_ptr0, mipoff0, coords, offsets,
1431 colors0);
1432 }
1433 else {
1434 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1435 lp_build_sample_image_linear(bld, size0, NULL,
1436 row_stride0_vec, img_stride0_vec,
1437 data_ptr0, mipoff0, coords, offsets,
1438 colors0);
1439 }
1440
1441 /* Store the first level's colors in the output variables */
1442 for (chan = 0; chan < 4; chan++) {
1443 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1444 }
1445
1446 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1447 struct lp_build_if_state if_ctx;
1448 LLVMValueRef need_lerp;
1449
1450 /* need_lerp = lod_fpart > 0 */
1451 if (bld->num_lods == 1) {
1452 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1453 lod_fpart, bld->lodf_bld.zero,
1454 "need_lerp");
1455 }
1456 else {
1457 /*
1458 * We'll do mip filtering if any of the quads (or individual
1459 * pixel in case of per-pixel lod) need it.
1460 * It might be better to split the vectors here and only fetch/filter
1461 * quads which need it (if there's one lod per quad).
1462 */
1463 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1464 PIPE_FUNC_GREATER,
1465 lod_fpart, bld->lodf_bld.zero);
1466 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1467 }
1468
1469 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1470 {
1471 /*
1472 * We unfortunately need to clamp lod_fpart here since we can get
1473 * negative values which would screw up filtering if not all
1474 * lod_fpart values have same sign.
1475 */
1476 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1477 bld->lodf_bld.zero);
1478 /* sample the second mipmap level */
1479 lp_build_mipmap_level_sizes(bld, ilevel1,
1480 &size1,
1481 &row_stride1_vec, &img_stride1_vec);
1482 if (bld->num_mips == 1) {
1483 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1484 }
1485 else {
1486 data_ptr1 = bld->base_ptr;
1487 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1488 }
1489 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1490 lp_build_sample_image_nearest(bld, size1,
1491 row_stride1_vec, img_stride1_vec,
1492 data_ptr1, mipoff1, coords, offsets,
1493 colors1);
1494 }
1495 else {
1496 lp_build_sample_image_linear(bld, size1, NULL,
1497 row_stride1_vec, img_stride1_vec,
1498 data_ptr1, mipoff1, coords, offsets,
1499 colors1);
1500 }
1501
1502 /* interpolate samples from the two mipmap levels */
1503
1504 if (bld->num_lods != bld->coord_type.length)
1505 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1506 bld->lodf_bld.type,
1507 bld->texel_bld.type,
1508 lod_fpart);
1509
1510 for (chan = 0; chan < 4; chan++) {
1511 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1512 colors0[chan], colors1[chan],
1513 0);
1514 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1515 }
1516 }
1517 lp_build_endif(&if_ctx);
1518 }
1519 }
1520
1521
1522 /**
1523 * Sample the texture/mipmap using given mip filter, and using
1524 * both nearest and linear filtering at the same time depending
1525 * on linear_mask.
1526 * lod can be per quad but linear_mask is always per pixel.
1527 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1528 * from (vectors or scalars).
1529 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1530 */
1531 static void
1532 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1533 LLVMValueRef linear_mask,
1534 unsigned mip_filter,
1535 LLVMValueRef *coords,
1536 const LLVMValueRef *offsets,
1537 LLVMValueRef ilevel0,
1538 LLVMValueRef ilevel1,
1539 LLVMValueRef lod_fpart,
1540 LLVMValueRef lod_positive,
1541 LLVMValueRef *colors_out)
1542 {
1543 LLVMBuilderRef builder = bld->gallivm->builder;
1544 LLVMValueRef size0 = NULL;
1545 LLVMValueRef size1 = NULL;
1546 LLVMValueRef row_stride0_vec = NULL;
1547 LLVMValueRef row_stride1_vec = NULL;
1548 LLVMValueRef img_stride0_vec = NULL;
1549 LLVMValueRef img_stride1_vec = NULL;
1550 LLVMValueRef data_ptr0 = NULL;
1551 LLVMValueRef data_ptr1 = NULL;
1552 LLVMValueRef mipoff0 = NULL;
1553 LLVMValueRef mipoff1 = NULL;
1554 LLVMValueRef colors0[4], colors1[4];
1555 unsigned chan;
1556
1557 /* sample the first mipmap level */
1558 lp_build_mipmap_level_sizes(bld, ilevel0,
1559 &size0,
1560 &row_stride0_vec, &img_stride0_vec);
1561 if (bld->num_mips == 1) {
1562 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1563 }
1564 else {
1565 /* This path should work for num_lods 1 too but slightly less efficient */
1566 data_ptr0 = bld->base_ptr;
1567 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1568 }
1569
1570 lp_build_sample_image_linear(bld, size0, linear_mask,
1571 row_stride0_vec, img_stride0_vec,
1572 data_ptr0, mipoff0, coords, offsets,
1573 colors0);
1574
1575 /* Store the first level's colors in the output variables */
1576 for (chan = 0; chan < 4; chan++) {
1577 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1578 }
1579
1580 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1581 struct lp_build_if_state if_ctx;
1582 LLVMValueRef need_lerp;
1583
1584 /*
1585 * We'll do mip filtering if any of the quads (or individual
1586 * pixel in case of per-pixel lod) need it.
1587 * Note using lod_positive here not lod_fpart since it may be the same
1588 * condition as that used in the outer "if" in the caller hence llvm
1589 * should be able to merge the branches in this case.
1590 */
1591 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
1592
1593 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1594 {
1595 /*
1596 * We unfortunately need to clamp lod_fpart here since we can get
1597 * negative values which would screw up filtering if not all
1598 * lod_fpart values have same sign.
1599 */
1600 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1601 bld->lodf_bld.zero);
1602 /* sample the second mipmap level */
1603 lp_build_mipmap_level_sizes(bld, ilevel1,
1604 &size1,
1605 &row_stride1_vec, &img_stride1_vec);
1606 if (bld->num_mips == 1) {
1607 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1608 }
1609 else {
1610 data_ptr1 = bld->base_ptr;
1611 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1612 }
1613
1614 lp_build_sample_image_linear(bld, size1, linear_mask,
1615 row_stride1_vec, img_stride1_vec,
1616 data_ptr1, mipoff1, coords, offsets,
1617 colors1);
1618
1619 /* interpolate samples from the two mipmap levels */
1620
1621 if (bld->num_lods != bld->coord_type.length)
1622 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1623 bld->lodf_bld.type,
1624 bld->texel_bld.type,
1625 lod_fpart);
1626
1627 for (chan = 0; chan < 4; chan++) {
1628 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1629 colors0[chan], colors1[chan],
1630 0);
1631 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1632 }
1633 }
1634 lp_build_endif(&if_ctx);
1635 }
1636 }
1637
1638
1639 /**
1640 * Build (per-coord) layer value.
1641 * Either clamp layer to valid values or fill in optional out_of_bounds
1642 * value and just return value unclamped.
1643 */
1644 static LLVMValueRef
1645 lp_build_layer_coord(struct lp_build_sample_context *bld,
1646 unsigned texture_unit,
1647 LLVMValueRef layer,
1648 LLVMValueRef *out_of_bounds)
1649 {
1650 LLVMValueRef num_layers;
1651 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1652
1653 num_layers = bld->dynamic_state->depth(bld->dynamic_state,
1654 bld->gallivm, texture_unit);
1655
1656 if (out_of_bounds) {
1657 LLVMValueRef out1, out;
1658 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1659 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1660 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1661 *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1662 return layer;
1663 }
1664 else {
1665 LLVMValueRef maxlayer;
1666 maxlayer = lp_build_sub(&bld->int_bld, num_layers, bld->int_bld.one);
1667 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1668 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1669 }
1670 }
1671
1672
1673 /**
1674 * Calculate cube face, lod, mip levels.
1675 */
1676 static void
1677 lp_build_sample_common(struct lp_build_sample_context *bld,
1678 unsigned texture_index,
1679 unsigned sampler_index,
1680 LLVMValueRef *coords,
1681 const struct lp_derivatives *derivs, /* optional */
1682 LLVMValueRef lod_bias, /* optional */
1683 LLVMValueRef explicit_lod, /* optional */
1684 LLVMValueRef *lod_pos_or_zero,
1685 LLVMValueRef *lod_fpart,
1686 LLVMValueRef *ilevel0,
1687 LLVMValueRef *ilevel1)
1688 {
1689 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1690 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1691 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1692 const unsigned target = bld->static_texture_state->target;
1693 LLVMValueRef first_level, cube_rho = NULL;
1694 LLVMValueRef lod_ipart = NULL;
1695 struct lp_derivatives cube_derivs;
1696
1697 /*
1698 printf("%s mip %d min %d mag %d\n", __FUNCTION__,
1699 mip_filter, min_filter, mag_filter);
1700 */
1701
1702 /*
1703 * Choose cube face, recompute texcoords for the chosen face and
1704 * compute rho here too (as it requires transform of derivatives).
1705 */
1706 if (target == PIPE_TEXTURE_CUBE) {
1707 boolean need_derivs;
1708 need_derivs = ((min_filter != mag_filter ||
1709 mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1710 !bld->static_sampler_state->min_max_lod_equal &&
1711 !explicit_lod);
1712 lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
1713 derivs = &cube_derivs;
1714 }
1715 else if (target == PIPE_TEXTURE_1D_ARRAY ||
1716 target == PIPE_TEXTURE_2D_ARRAY) {
1717 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1718 coords[2] = lp_build_layer_coord(bld, texture_index, coords[2], NULL);
1719 }
1720
1721 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1722 /*
1723 * Clamp p coords to [0,1] for fixed function depth texture format here.
1724 * Technically this is not entirely correct for unorm depth as the ref value
1725 * should be converted to the depth format (quantization!) and comparison
1726 * then done in texture format. This would actually help performance (since
1727 * only need to do it once and could save the per-sample conversion of texels
1728 * to floats instead), but it would need more messy code (would need to push
1729 * at least some bits down to actual fetch so conversion could be skipped,
1730 * and would have ugly interaction with border color, would need to convert
1731 * border color to that format too or do some other tricks to make it work).
1732 */
1733 const struct util_format_description *format_desc = bld->format_desc;
1734 unsigned chan_type;
1735 /* not entirely sure we couldn't end up with non-valid swizzle here */
1736 chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
1737 format_desc->channel[format_desc->swizzle[0]].type :
1738 UTIL_FORMAT_TYPE_FLOAT;
1739 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1740 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
1741 bld->coord_bld.zero, bld->coord_bld.one);
1742 }
1743 }
1744
1745 /*
1746 * Compute the level of detail (float).
1747 */
1748 if (min_filter != mag_filter ||
1749 mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1750 /* Need to compute lod either to choose mipmap levels or to
1751 * distinguish between minification/magnification with one mipmap level.
1752 */
1753 lp_build_lod_selector(bld, texture_index, sampler_index,
1754 coords[0], coords[1], coords[2], cube_rho,
1755 derivs, lod_bias, explicit_lod,
1756 mip_filter,
1757 &lod_ipart, lod_fpart, lod_pos_or_zero);
1758 } else {
1759 lod_ipart = bld->lodi_bld.zero;
1760 *lod_pos_or_zero = bld->lodi_bld.zero;
1761 }
1762
1763 if (bld->num_lods != bld->num_mips) {
1764 /* only makes sense if there's just a single mip level */
1765 assert(bld->num_mips == 1);
1766 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
1767 }
1768
1769 /*
1770 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1771 */
1772 switch (mip_filter) {
1773 default:
1774 assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1775 /* fall-through */
1776 case PIPE_TEX_MIPFILTER_NONE:
1777 /* always use mip level 0 */
1778 first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1779 bld->gallivm, texture_index);
1780 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1781 *ilevel0 = first_level;
1782 break;
1783 case PIPE_TEX_MIPFILTER_NEAREST:
1784 assert(lod_ipart);
1785 lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
1786 break;
1787 case PIPE_TEX_MIPFILTER_LINEAR:
1788 assert(lod_ipart);
1789 assert(*lod_fpart);
1790 lp_build_linear_mip_levels(bld, texture_index,
1791 lod_ipart, lod_fpart,
1792 ilevel0, ilevel1);
1793 break;
1794 }
1795 }
1796
1797 static void
1798 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
1799 unsigned sampler_unit)
1800 {
1801 struct gallivm_state *gallivm = bld->gallivm;
1802 LLVMBuilderRef builder = gallivm->builder;
1803 LLVMValueRef border_color_ptr =
1804 bld->dynamic_state->border_color(bld->dynamic_state,
1805 gallivm, sampler_unit);
1806 LLVMValueRef border_color;
1807 const struct util_format_description *format_desc = bld->format_desc;
1808 struct lp_type vec4_type = bld->texel_type;
1809 struct lp_build_context vec4_bld;
1810 LLVMValueRef min_clamp = NULL;
1811 LLVMValueRef max_clamp = NULL;
1812
1813 /*
1814 * For normalized format need to clamp border color (technically
1815 * probably should also quantize the data). Really sucks doing this
1816 * here but can't avoid at least for now since this is part of
1817 * sampler state and texture format is part of sampler_view state.
1818 * GL expects also expects clamping for uint/sint formats too so
1819 * do that as well (d3d10 can't end up here with uint/sint since it
1820 * only supports them with ld).
1821 */
1822 vec4_type.length = 4;
1823 lp_build_context_init(&vec4_bld, gallivm, vec4_type);
1824
1825 /*
1826 * Vectorized clamping of border color. Loading is a bit of a hack since
1827 * we just cast the pointer to float array to pointer to vec4
1828 * (int or float).
1829 */
1830 border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
1831 lp_build_const_int32(gallivm, 0));
1832 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
1833 LLVMPointerType(vec4_bld.vec_type, 0), "");
1834 border_color = LLVMBuildLoad(builder, border_color_ptr, "");
1835 /* we don't have aligned type in the dynamic state unfortunately */
1836 lp_set_load_alignment(border_color, 4);
1837
1838 /*
1839 * Instead of having some incredibly complex logic which will try to figure out
1840 * clamping necessary for each channel, simply use the first channel, and treat
1841 * mixed signed/unsigned normalized formats specially.
1842 * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
1843 * good reason.)
1844 */
1845 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1846 int chan;
1847 /* d/s needs special handling because both present means just sampling depth */
1848 if (util_format_is_depth_and_stencil(format_desc->format)) {
1849 chan = format_desc->swizzle[0];
1850 }
1851 else {
1852 chan = util_format_get_first_non_void_channel(format_desc->format);
1853 }
1854 if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
1855 unsigned chan_type = format_desc->channel[chan].type;
1856 unsigned chan_norm = format_desc->channel[chan].normalized;
1857 unsigned chan_pure = format_desc->channel[chan].pure_integer;
1858 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
1859 if (chan_norm) {
1860 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1861 max_clamp = vec4_bld.one;
1862 }
1863 else if (chan_pure) {
1864 /*
1865 * Border color was stored as int, hence need min/max clamp
1866 * only if chan has less than 32 bits..
1867 */
1868 unsigned chan_size = format_desc->channel[chan].size;
1869 if (chan_size < 32) {
1870 min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1871 0 - (1 << (chan_size - 1)));
1872 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1873 (1 << (chan_size - 1)) - 1);
1874 }
1875 }
1876 /* TODO: no idea about non-pure, non-normalized! */
1877 }
1878 else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
1879 if (chan_norm) {
1880 min_clamp = vec4_bld.zero;
1881 max_clamp = vec4_bld.one;
1882 }
1883 /*
1884 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
1885 * we use Z32_FLOAT_S8X24 to imply sampling depth component
1886 * and ignoring stencil, which will blow up here if we try to
1887 * do a uint clamp in a float texel build...
1888 * And even if we had that format, mesa st also thinks using z24s8
1889 * means depth sampling ignoring stencil.
1890 */
1891 else if (chan_pure) {
1892 /*
1893 * Border color was stored as uint, hence never need min
1894 * clamp, and only need max clamp if chan has less than 32 bits.
1895 */
1896 unsigned chan_size = format_desc->channel[chan].size;
1897 if (chan_size < 32) {
1898 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1899 (1 << chan_size) - 1);
1900 }
1901 /* TODO: no idea about non-pure, non-normalized! */
1902 }
1903 }
1904 else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
1905 /* TODO: I have no idea what clamp this would need if any! */
1906 }
1907 }
1908 /* mixed plain formats (or different pure size) */
1909 switch (format_desc->format) {
1910 case PIPE_FORMAT_B10G10R10A2_UINT:
1911 case PIPE_FORMAT_R10G10B10A2_UINT:
1912 {
1913 unsigned max10 = (1 << 10) - 1;
1914 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
1915 max10, (1 << 2) - 1, NULL);
1916 }
1917 break;
1918 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
1919 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1920 -1.0F, 0.0F, NULL);
1921 max_clamp = vec4_bld.one;
1922 break;
1923 case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
1924 case PIPE_FORMAT_R5SG5SB6U_NORM:
1925 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1926 0.0F, 0.0F, NULL);
1927 max_clamp = vec4_bld.one;
1928 break;
1929 default:
1930 break;
1931 }
1932 }
1933 else {
1934 /* cannot figure this out from format description */
1935 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1936 /* s3tc formats are always unorm */
1937 min_clamp = vec4_bld.zero;
1938 max_clamp = vec4_bld.one;
1939 }
1940 else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
1941 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
1942 switch (format_desc->format) {
1943 case PIPE_FORMAT_RGTC1_UNORM:
1944 case PIPE_FORMAT_RGTC2_UNORM:
1945 case PIPE_FORMAT_LATC1_UNORM:
1946 case PIPE_FORMAT_LATC2_UNORM:
1947 case PIPE_FORMAT_ETC1_RGB8:
1948 min_clamp = vec4_bld.zero;
1949 max_clamp = vec4_bld.one;
1950 break;
1951 case PIPE_FORMAT_RGTC1_SNORM:
1952 case PIPE_FORMAT_RGTC2_SNORM:
1953 case PIPE_FORMAT_LATC1_SNORM:
1954 case PIPE_FORMAT_LATC2_SNORM:
1955 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1956 max_clamp = vec4_bld.one;
1957 break;
1958 default:
1959 assert(0);
1960 break;
1961 }
1962 }
1963 /*
1964 * all others from subsampled/other group, though we don't care
1965 * about yuv (and should not have any from zs here)
1966 */
1967 else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
1968 switch (format_desc->format) {
1969 case PIPE_FORMAT_R8G8_B8G8_UNORM:
1970 case PIPE_FORMAT_G8R8_G8B8_UNORM:
1971 case PIPE_FORMAT_G8R8_B8R8_UNORM:
1972 case PIPE_FORMAT_R8G8_R8B8_UNORM:
1973 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
1974 min_clamp = vec4_bld.zero;
1975 max_clamp = vec4_bld.one;
1976 break;
1977 case PIPE_FORMAT_R8G8Bx_SNORM:
1978 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1979 max_clamp = vec4_bld.one;
1980 break;
1981 /*
1982 * Note smallfloat formats usually don't need clamping
1983 * (they still have infinite range) however this is not
1984 * true for r11g11b10 and r9g9b9e5, which can't represent
1985 * negative numbers (and additionally r9g9b9e5 can't represent
1986 * very large numbers). d3d10 seems happy without clamping in
1987 * this case, but gl spec is pretty clear: "for floating
1988 * point and integer formats, border values are clamped to
1989 * the representable range of the format" so do that here.
1990 */
1991 case PIPE_FORMAT_R11G11B10_FLOAT:
1992 min_clamp = vec4_bld.zero;
1993 break;
1994 case PIPE_FORMAT_R9G9B9E5_FLOAT:
1995 min_clamp = vec4_bld.zero;
1996 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
1997 break;
1998 default:
1999 assert(0);
2000 break;
2001 }
2002 }
2003 }
2004
2005 if (min_clamp) {
2006 border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2007 }
2008 if (max_clamp) {
2009 border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2010 }
2011
2012 bld->border_color_clamped = border_color;
2013 }
2014
2015
2016 /**
2017 * General texture sampling codegen.
2018 * This function handles texture sampling for all texture targets (1D,
2019 * 2D, 3D, cube) and all filtering modes.
2020 */
2021 static void
2022 lp_build_sample_general(struct lp_build_sample_context *bld,
2023 unsigned sampler_unit,
2024 LLVMValueRef *coords,
2025 const LLVMValueRef *offsets,
2026 LLVMValueRef lod_positive,
2027 LLVMValueRef lod_fpart,
2028 LLVMValueRef ilevel0,
2029 LLVMValueRef ilevel1,
2030 LLVMValueRef *colors_out)
2031 {
2032 LLVMBuilderRef builder = bld->gallivm->builder;
2033 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2034 const unsigned mip_filter = sampler_state->min_mip_filter;
2035 const unsigned min_filter = sampler_state->min_img_filter;
2036 const unsigned mag_filter = sampler_state->mag_img_filter;
2037 LLVMValueRef texels[4];
2038 unsigned chan;
2039
2040 /* if we need border color, (potentially) clamp it now */
2041 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
2042 min_filter,
2043 mag_filter) ||
2044 (bld->dims > 1 &&
2045 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
2046 min_filter,
2047 mag_filter)) ||
2048 (bld->dims > 2 &&
2049 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
2050 min_filter,
2051 mag_filter))) {
2052 lp_build_clamp_border_color(bld, sampler_unit);
2053 }
2054
2055
2056 /*
2057 * Get/interpolate texture colors.
2058 */
2059
2060 for (chan = 0; chan < 4; ++chan) {
2061 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
2062 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
2063 }
2064
2065 if (min_filter == mag_filter) {
2066 /* no need to distinguish between minification and magnification */
2067 lp_build_sample_mipmap(bld, min_filter, mip_filter,
2068 coords, offsets,
2069 ilevel0, ilevel1, lod_fpart,
2070 texels);
2071 }
2072 else {
2073 /*
2074 * Could also get rid of the if-logic and always use mipmap_both, both
2075 * for the single lod and multi-lod case if nothing really uses this.
2076 */
2077 if (bld->num_lods == 1) {
2078 /* Emit conditional to choose min image filter or mag image filter
2079 * depending on the lod being > 0 or <= 0, respectively.
2080 */
2081 struct lp_build_if_state if_ctx;
2082
2083 lod_positive = LLVMBuildTrunc(builder, lod_positive,
2084 LLVMInt1TypeInContext(bld->gallivm->context), "");
2085
2086 lp_build_if(&if_ctx, bld->gallivm, lod_positive);
2087 {
2088 /* Use the minification filter */
2089 lp_build_sample_mipmap(bld, min_filter, mip_filter,
2090 coords, offsets,
2091 ilevel0, ilevel1, lod_fpart,
2092 texels);
2093 }
2094 lp_build_else(&if_ctx);
2095 {
2096 /* Use the magnification filter */
2097 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
2098 coords, offsets,
2099 ilevel0, NULL, NULL,
2100 texels);
2101 }
2102 lp_build_endif(&if_ctx);
2103 }
2104 else {
2105 LLVMValueRef need_linear, linear_mask;
2106 unsigned mip_filter_for_nearest;
2107 struct lp_build_if_state if_ctx;
2108
2109 if (min_filter == PIPE_TEX_FILTER_LINEAR) {
2110 linear_mask = lod_positive;
2111 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
2112 }
2113 else {
2114 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
2115 mip_filter_for_nearest = mip_filter;
2116 }
2117 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
2118 linear_mask);
2119
2120 if (bld->num_lods != bld->coord_type.length) {
2121 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2122 bld->lodi_type,
2123 bld->int_coord_type,
2124 linear_mask);
2125 }
2126
2127 lp_build_if(&if_ctx, bld->gallivm, need_linear);
2128 {
2129 /*
2130 * Do sampling with both filters simultaneously. This means using
2131 * a linear filter and doing some tricks (with weights) for the pixels
2132 * which need nearest filter.
2133 * Note that it's probably rare some pixels need nearest and some
2134 * linear filter but the fixups required for the nearest pixels
2135 * aren't all that complicated so just always run a combined path
2136 * if at least some pixels require linear.
2137 */
2138 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
2139 coords, offsets,
2140 ilevel0, ilevel1,
2141 lod_fpart, lod_positive,
2142 texels);
2143 }
2144 lp_build_else(&if_ctx);
2145 {
2146 /*
2147 * All pixels require just nearest filtering, which is way
2148 * cheaper than linear, hence do a separate path for that.
2149 */
2150 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
2151 mip_filter_for_nearest,
2152 coords, offsets,
2153 ilevel0, ilevel1, lod_fpart,
2154 texels);
2155 }
2156 lp_build_endif(&if_ctx);
2157 }
2158 }
2159
2160 for (chan = 0; chan < 4; ++chan) {
2161 colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
2162 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
2163 }
2164 }
2165
2166
2167 /**
2168 * Texel fetch function.
2169 * In contrast to general sampling there is no filtering, no coord minification,
2170 * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
2171 * directly to be applied to the selected mip level (after adding texel offsets).
2172 * This function handles texel fetch for all targets where texel fetch is supported
2173 * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
2174 */
2175 static void
2176 lp_build_fetch_texel(struct lp_build_sample_context *bld,
2177 unsigned texture_unit,
2178 const LLVMValueRef *coords,
2179 LLVMValueRef explicit_lod,
2180 const LLVMValueRef *offsets,
2181 LLVMValueRef *colors_out)
2182 {
2183 struct lp_build_context *perquadi_bld = &bld->lodi_bld;
2184 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2185 unsigned dims = bld->dims, chan;
2186 unsigned target = bld->static_texture_state->target;
2187 boolean out_of_bound_ret_zero = TRUE;
2188 LLVMValueRef size, ilevel;
2189 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
2190 LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
2191 LLVMValueRef width, height, depth, i, j;
2192 LLVMValueRef offset, out_of_bounds, out1;
2193
2194 out_of_bounds = int_coord_bld->zero;
2195
2196 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
2197 if (bld->num_mips != int_coord_bld->type.length) {
2198 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
2199 perquadi_bld->type, explicit_lod, 0);
2200 }
2201 else {
2202 ilevel = explicit_lod;
2203 }
2204 lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
2205 out_of_bound_ret_zero ? &out_of_bounds : NULL);
2206 }
2207 else {
2208 assert(bld->num_mips == 1);
2209 if (bld->static_texture_state->target != PIPE_BUFFER) {
2210 ilevel = bld->dynamic_state->first_level(bld->dynamic_state,
2211 bld->gallivm, texture_unit);
2212 }
2213 else {
2214 ilevel = lp_build_const_int32(bld->gallivm, 0);
2215 }
2216 }
2217 lp_build_mipmap_level_sizes(bld, ilevel,
2218 &size,
2219 &row_stride_vec, &img_stride_vec);
2220 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
2221 size, &width, &height, &depth);
2222
2223 if (target == PIPE_TEXTURE_1D_ARRAY ||
2224 target == PIPE_TEXTURE_2D_ARRAY) {
2225 if (out_of_bound_ret_zero) {
2226 z = lp_build_layer_coord(bld, texture_unit, z, &out1);
2227 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2228 }
2229 else {
2230 z = lp_build_layer_coord(bld, texture_unit, z, NULL);
2231 }
2232 }
2233
2234 /* This is a lot like border sampling */
2235 if (offsets[0]) {
2236 /*
2237 * coords are really unsigned, offsets are signed, but I don't think
2238 * exceeding 31 bits is possible
2239 */
2240 x = lp_build_add(int_coord_bld, x, offsets[0]);
2241 }
2242 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
2243 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2244 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
2245 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2246
2247 if (dims >= 2) {
2248 if (offsets[1]) {
2249 y = lp_build_add(int_coord_bld, y, offsets[1]);
2250 }
2251 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
2252 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2253 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
2254 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2255
2256 if (dims >= 3) {
2257 if (offsets[2]) {
2258 z = lp_build_add(int_coord_bld, z, offsets[2]);
2259 }
2260 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
2261 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2262 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
2263 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2264 }
2265 }
2266
2267 lp_build_sample_offset(int_coord_bld,
2268 bld->format_desc,
2269 x, y, z, row_stride_vec, img_stride_vec,
2270 &offset, &i, &j);
2271
2272 if (bld->static_texture_state->target != PIPE_BUFFER) {
2273 offset = lp_build_add(int_coord_bld, offset,
2274 lp_build_get_mip_offsets(bld, ilevel));
2275 }
2276
2277 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
2278
2279 lp_build_fetch_rgba_soa(bld->gallivm,
2280 bld->format_desc,
2281 bld->texel_type,
2282 bld->base_ptr, offset,
2283 i, j,
2284 colors_out);
2285
2286 if (out_of_bound_ret_zero) {
2287 /*
2288 * Only needed for ARB_robust_buffer_access_behavior and d3d10.
2289 * Could use min/max above instead of out-of-bounds comparisons
2290 * if we don't care about the result returned for out-of-bounds.
2291 */
2292 for (chan = 0; chan < 4; chan++) {
2293 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
2294 bld->texel_bld.zero, colors_out[chan]);
2295 }
2296 }
2297 }
2298
2299
2300 /**
2301 * Just set texels to white instead of actually sampling the texture.
2302 * For debugging.
2303 */
2304 void
2305 lp_build_sample_nop(struct gallivm_state *gallivm,
2306 struct lp_type type,
2307 const LLVMValueRef *coords,
2308 LLVMValueRef texel_out[4])
2309 {
2310 LLVMValueRef one = lp_build_one(gallivm, type);
2311 unsigned chan;
2312
2313 for (chan = 0; chan < 4; chan++) {
2314 texel_out[chan] = one;
2315 }
2316 }
2317
2318
2319 /**
2320 * Build texture sampling code.
2321 * 'texel' will return a vector of four LLVMValueRefs corresponding to
2322 * R, G, B, A.
2323 * \param type vector float type to use for coords, etc.
2324 * \param is_fetch if this is a texel fetch instruction.
2325 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y
2326 */
2327 void
2328 lp_build_sample_soa(struct gallivm_state *gallivm,
2329 const struct lp_static_texture_state *static_texture_state,
2330 const struct lp_static_sampler_state *static_sampler_state,
2331 struct lp_sampler_dynamic_state *dynamic_state,
2332 struct lp_type type,
2333 boolean is_fetch,
2334 unsigned texture_index,
2335 unsigned sampler_index,
2336 const LLVMValueRef *coords,
2337 const LLVMValueRef *offsets,
2338 const struct lp_derivatives *derivs, /* optional */
2339 LLVMValueRef lod_bias, /* optional */
2340 LLVMValueRef explicit_lod, /* optional */
2341 enum lp_sampler_lod_property lod_property,
2342 LLVMValueRef texel_out[4])
2343 {
2344 unsigned target = static_texture_state->target;
2345 unsigned dims = texture_dims(target);
2346 unsigned num_quads = type.length / 4;
2347 unsigned mip_filter, min_img_filter, mag_img_filter, i;
2348 struct lp_build_sample_context bld;
2349 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
2350 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2351 LLVMBuilderRef builder = gallivm->builder;
2352 LLVMValueRef tex_width, newcoords[5];
2353
2354 if (0) {
2355 enum pipe_format fmt = static_texture_state->format;
2356 debug_printf("Sample from %s\n", util_format_name(fmt));
2357 }
2358
2359 if (static_texture_state->format == PIPE_FORMAT_NONE) {
2360 /*
2361 * If there's nothing bound, format is NONE, and we must return
2362 * all zero as mandated by d3d10 in this case.
2363 */
2364 unsigned chan;
2365 LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F);
2366 for (chan = 0; chan < 4; chan++) {
2367 texel_out[chan] = zero;
2368 }
2369 return;
2370 }
2371
2372 assert(type.floating);
2373
2374 /* Setup our build context */
2375 memset(&bld, 0, sizeof bld);
2376 bld.gallivm = gallivm;
2377 bld.static_sampler_state = &derived_sampler_state;
2378 bld.static_texture_state = static_texture_state;
2379 bld.dynamic_state = dynamic_state;
2380 bld.format_desc = util_format_description(static_texture_state->format);
2381 bld.dims = dims;
2382
2383 bld.vector_width = lp_type_width(type);
2384
2385 bld.float_type = lp_type_float(32);
2386 bld.int_type = lp_type_int(32);
2387 bld.coord_type = type;
2388 bld.int_coord_type = lp_int_type(type);
2389 bld.float_size_in_type = lp_type_float(32);
2390 bld.float_size_in_type.length = dims > 1 ? 4 : 1;
2391 bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
2392 bld.texel_type = type;
2393
2394 /* always using the first channel hopefully should be safe,
2395 * if not things WILL break in other places anyway.
2396 */
2397 if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
2398 bld.format_desc->channel[0].pure_integer) {
2399 if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
2400 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2401 }
2402 else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2403 bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
2404 }
2405 }
2406 else if (util_format_has_stencil(bld.format_desc) &&
2407 !util_format_has_depth(bld.format_desc)) {
2408 /* for stencil only formats, sample stencil (uint) */
2409 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2410 }
2411
2412 if (!static_texture_state->level_zero_only) {
2413 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
2414 } else {
2415 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2416 }
2417 mip_filter = derived_sampler_state.min_mip_filter;
2418
2419 if (0) {
2420 debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
2421 }
2422
2423 if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2424 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2425 {
2426 /*
2427 * Seamless filtering ignores wrap modes.
2428 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
2429 * bilinear it's not correct but way better than using for instance repeat.
2430 * Note we even set this for non-seamless. Technically GL allows any wrap
2431 * mode, which made sense when supporting true borders (can get seamless
2432 * effect with border and CLAMP_TO_BORDER), but gallium doesn't support
2433 * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
2434 * up the sampler state (as it makes it texture dependent).
2435 */
2436 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2437 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2438 }
2439
2440 min_img_filter = derived_sampler_state.min_img_filter;
2441 mag_img_filter = derived_sampler_state.mag_img_filter;
2442
2443
2444 /*
2445 * This is all a bit complicated different paths are chosen for performance
2446 * reasons.
2447 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
2448 * everything (the last two options are equivalent for 4-wide case).
2449 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
2450 * lod is calculated then the lod value extracted afterwards so making this
2451 * case basically the same as far as lod handling is concerned for the
2452 * further sample/filter code as the 1 lod for everything case.
2453 * Different lod handling mostly shows up when building mipmap sizes
2454 * (lp_build_mipmap_level_sizes() and friends) and also in filtering
2455 * (getting the fractional part of the lod to the right texels).
2456 */
2457
2458 /*
2459 * There are other situations where at least the multiple int lods could be
2460 * avoided like min and max lod being equal.
2461 */
2462 bld.num_mips = bld.num_lods = 1;
2463
2464 if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2465 (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2466 (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
2467 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2468 /*
2469 * special case for using per-pixel lod even for implicit lod,
2470 * which is generally never required (ok by APIs) except to please
2471 * some (somewhat broken imho) tests (because per-pixel face selection
2472 * can cause derivatives to be different for pixels outside the primitive
2473 * due to the major axis division even if pre-project derivatives are
2474 * looking normal).
2475 */
2476 bld.num_mips = type.length;
2477 bld.num_lods = type.length;
2478 }
2479 else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
2480 (explicit_lod || lod_bias || derivs)) {
2481 if ((is_fetch && target != PIPE_BUFFER) ||
2482 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2483 bld.num_mips = type.length;
2484 bld.num_lods = type.length;
2485 }
2486 else if (!is_fetch && min_img_filter != mag_img_filter) {
2487 bld.num_mips = 1;
2488 bld.num_lods = type.length;
2489 }
2490 }
2491 /* TODO: for true scalar_lod should only use 1 lod value */
2492 else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
2493 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2494 bld.num_mips = num_quads;
2495 bld.num_lods = num_quads;
2496 }
2497 else if (!is_fetch && min_img_filter != mag_img_filter) {
2498 bld.num_mips = 1;
2499 bld.num_lods = num_quads;
2500 }
2501
2502
2503 bld.lodf_type = type;
2504 /* we want native vector size to be able to use our intrinsics */
2505 if (bld.num_lods != type.length) {
2506 /* TODO: this currently always has to be per-quad or per-element */
2507 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
2508 }
2509 bld.lodi_type = lp_int_type(bld.lodf_type);
2510 bld.levelf_type = bld.lodf_type;
2511 if (bld.num_mips == 1) {
2512 bld.levelf_type.length = 1;
2513 }
2514 bld.leveli_type = lp_int_type(bld.levelf_type);
2515 bld.float_size_type = bld.float_size_in_type;
2516 /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
2517 * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
2518 if (bld.num_mips > 1) {
2519 bld.float_size_type.length = bld.num_mips == type.length ?
2520 bld.num_mips * bld.float_size_in_type.length :
2521 type.length;
2522 }
2523 bld.int_size_type = lp_int_type(bld.float_size_type);
2524
2525 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
2526 lp_build_context_init(&bld.float_vec_bld, gallivm, type);
2527 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
2528 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
2529 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
2530 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
2531 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
2532 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
2533 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
2534 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
2535 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
2536 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
2537 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
2538 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
2539
2540 /* Get the dynamic state */
2541 tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
2542 bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, texture_index);
2543 bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, texture_index);
2544 bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, texture_index);
2545 bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, texture_index);
2546 /* Note that mip_offsets is an array[level] of offsets to texture images */
2547
2548 /* width, height, depth as single int vector */
2549 if (dims <= 1) {
2550 bld.int_size = tex_width;
2551 }
2552 else {
2553 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2554 tex_width, LLVMConstInt(i32t, 0, 0), "");
2555 if (dims >= 2) {
2556 LLVMValueRef tex_height =
2557 dynamic_state->height(dynamic_state, gallivm, texture_index);
2558 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2559 tex_height, LLVMConstInt(i32t, 1, 0), "");
2560 if (dims >= 3) {
2561 LLVMValueRef tex_depth =
2562 dynamic_state->depth(dynamic_state, gallivm, texture_index);
2563 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2564 tex_depth, LLVMConstInt(i32t, 2, 0), "");
2565 }
2566 }
2567 }
2568
2569 for (i = 0; i < 5; i++) {
2570 newcoords[i] = coords[i];
2571 }
2572
2573 if (0) {
2574 /* For debug: no-op texture sampling */
2575 lp_build_sample_nop(gallivm,
2576 bld.texel_type,
2577 newcoords,
2578 texel_out);
2579 }
2580
2581 else if (is_fetch) {
2582 lp_build_fetch_texel(&bld, texture_index, newcoords,
2583 explicit_lod, offsets,
2584 texel_out);
2585 }
2586
2587 else {
2588 LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
2589 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
2590 boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
2591 /* not sure this is strictly needed or simply impossible */
2592 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
2593 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
2594
2595 use_aos &= bld.num_lods <= num_quads ||
2596 derived_sampler_state.min_img_filter ==
2597 derived_sampler_state.mag_img_filter;
2598 if (dims > 1) {
2599 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
2600 if (dims > 2) {
2601 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
2602 }
2603 }
2604 if (static_texture_state->target == PIPE_TEXTURE_CUBE &&
2605 derived_sampler_state.seamless_cube_map &&
2606 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
2607 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
2608 /* theoretically possible with AoS filtering but not implemented (complex!) */
2609 use_aos = 0;
2610 }
2611
2612 if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
2613 !use_aos && util_format_fits_8unorm(bld.format_desc)) {
2614 debug_printf("%s: using floating point linear filtering for %s\n",
2615 __FUNCTION__, bld.format_desc->short_name);
2616 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
2617 " wraps %d wrapt %d wrapr %d\n",
2618 derived_sampler_state.min_img_filter,
2619 derived_sampler_state.mag_img_filter,
2620 derived_sampler_state.min_mip_filter,
2621 static_texture_state->target,
2622 derived_sampler_state.seamless_cube_map,
2623 derived_sampler_state.wrap_s,
2624 derived_sampler_state.wrap_t,
2625 derived_sampler_state.wrap_r);
2626 }
2627
2628 lp_build_sample_common(&bld, texture_index, sampler_index,
2629 newcoords,
2630 derivs, lod_bias, explicit_lod,
2631 &lod_positive, &lod_fpart,
2632 &ilevel0, &ilevel1);
2633
2634 /*
2635 * we only try 8-wide sampling with soa as it appears to
2636 * be a loss with aos with AVX (but it should work, except
2637 * for conformance if min_filter != mag_filter if num_lods > 1).
2638 * (It should be faster if we'd support avx2)
2639 */
2640 if (num_quads == 1 || !use_aos) {
2641 if (use_aos) {
2642 /* do sampling/filtering with fixed pt arithmetic */
2643 lp_build_sample_aos(&bld, sampler_index,
2644 newcoords[0], newcoords[1],
2645 newcoords[2],
2646 offsets, lod_positive, lod_fpart,
2647 ilevel0, ilevel1,
2648 texel_out);
2649 }
2650
2651 else {
2652 lp_build_sample_general(&bld, sampler_index,
2653 newcoords, offsets,
2654 lod_positive, lod_fpart,
2655 ilevel0, ilevel1,
2656 texel_out);
2657 }
2658 }
2659 else {
2660 unsigned j;
2661 struct lp_build_sample_context bld4;
2662 struct lp_type type4 = type;
2663 unsigned i;
2664 LLVMValueRef texelout4[4];
2665 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
2666
2667 type4.length = 4;
2668
2669 /* Setup our build context */
2670 memset(&bld4, 0, sizeof bld4);
2671 bld4.gallivm = bld.gallivm;
2672 bld4.static_texture_state = bld.static_texture_state;
2673 bld4.static_sampler_state = bld.static_sampler_state;
2674 bld4.dynamic_state = bld.dynamic_state;
2675 bld4.format_desc = bld.format_desc;
2676 bld4.dims = bld.dims;
2677 bld4.row_stride_array = bld.row_stride_array;
2678 bld4.img_stride_array = bld.img_stride_array;
2679 bld4.base_ptr = bld.base_ptr;
2680 bld4.mip_offsets = bld.mip_offsets;
2681 bld4.int_size = bld.int_size;
2682
2683 bld4.vector_width = lp_type_width(type4);
2684
2685 bld4.float_type = lp_type_float(32);
2686 bld4.int_type = lp_type_int(32);
2687 bld4.coord_type = type4;
2688 bld4.int_coord_type = lp_int_type(type4);
2689 bld4.float_size_in_type = lp_type_float(32);
2690 bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
2691 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
2692 bld4.texel_type = bld.texel_type;
2693 bld4.texel_type.length = 4;
2694
2695 bld4.num_mips = bld4.num_lods = 1;
2696 if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2697 (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2698 (static_texture_state->target == PIPE_TEXTURE_CUBE) &&
2699 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2700 bld4.num_mips = type4.length;
2701 bld4.num_lods = type4.length;
2702 }
2703 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
2704 (explicit_lod || lod_bias || derivs)) {
2705 if ((is_fetch && target != PIPE_BUFFER) ||
2706 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2707 bld4.num_mips = type4.length;
2708 bld4.num_lods = type4.length;
2709 }
2710 else if (!is_fetch && min_img_filter != mag_img_filter) {
2711 bld4.num_mips = 1;
2712 bld4.num_lods = type4.length;
2713 }
2714 }
2715
2716 /* we want native vector size to be able to use our intrinsics */
2717 bld4.lodf_type = type4;
2718 if (bld4.num_lods != type4.length) {
2719 bld4.lodf_type.length = 1;
2720 }
2721 bld4.lodi_type = lp_int_type(bld4.lodf_type);
2722 bld4.levelf_type = type4;
2723 if (bld4.num_mips != type4.length) {
2724 bld4.levelf_type.length = 1;
2725 }
2726 bld4.leveli_type = lp_int_type(bld4.levelf_type);
2727 bld4.float_size_type = bld4.float_size_in_type;
2728 if (bld4.num_mips > 1) {
2729 bld4.float_size_type.length = bld4.num_mips == type4.length ?
2730 bld4.num_mips * bld4.float_size_in_type.length :
2731 type4.length;
2732 }
2733 bld4.int_size_type = lp_int_type(bld4.float_size_type);
2734
2735 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
2736 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
2737 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
2738 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
2739 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
2740 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
2741 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
2742 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
2743 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
2744 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
2745 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
2746 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
2747 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
2748 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
2749
2750 for (i = 0; i < num_quads; i++) {
2751 LLVMValueRef s4, t4, r4;
2752 LLVMValueRef lod_positive4, lod_fpart4 = NULL;
2753 LLVMValueRef ilevel04, ilevel14 = NULL;
2754 LLVMValueRef offsets4[4] = { NULL };
2755 unsigned num_lods = bld4.num_lods;
2756
2757 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
2758 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
2759 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
2760
2761 if (offsets[0]) {
2762 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
2763 if (dims > 1) {
2764 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
2765 if (dims > 2) {
2766 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
2767 }
2768 }
2769 }
2770 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
2771 ilevel04 = bld.num_mips == 1 ? ilevel0 :
2772 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
2773 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2774 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
2775 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
2776 }
2777
2778 if (use_aos) {
2779 /* do sampling/filtering with fixed pt arithmetic */
2780 lp_build_sample_aos(&bld4, sampler_index,
2781 s4, t4, r4, offsets4,
2782 lod_positive4, lod_fpart4,
2783 ilevel04, ilevel14,
2784 texelout4);
2785 }
2786
2787 else {
2788 /* this path is currently unreachable and hence might break easily... */
2789 LLVMValueRef newcoords4[5];
2790 newcoords4[0] = s4;
2791 newcoords4[1] = t4;
2792 newcoords4[2] = r4;
2793 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
2794 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
2795
2796 lp_build_sample_general(&bld4, sampler_index,
2797 newcoords4, offsets4,
2798 lod_positive4, lod_fpart4,
2799 ilevel04, ilevel14,
2800 texelout4);
2801 }
2802 for (j = 0; j < 4; j++) {
2803 texelouttmp[j][i] = texelout4[j];
2804 }
2805 }
2806
2807 for (j = 0; j < 4; j++) {
2808 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
2809 }
2810 }
2811 }
2812
2813 if (target != PIPE_BUFFER) {
2814 apply_sampler_swizzle(&bld, texel_out);
2815 }
2816
2817 /*
2818 * texel type can be a (32bit) int/uint (for pure int formats only),
2819 * however we are expected to always return floats (storage is untyped).
2820 */
2821 if (!bld.texel_type.floating) {
2822 unsigned chan;
2823 for (chan = 0; chan < 4; chan++) {
2824 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
2825 lp_build_vec_type(gallivm, type), "");
2826 }
2827 }
2828 }
2829
2830 void
2831 lp_build_size_query_soa(struct gallivm_state *gallivm,
2832 const struct lp_static_texture_state *static_state,
2833 struct lp_sampler_dynamic_state *dynamic_state,
2834 struct lp_type int_type,
2835 unsigned texture_unit,
2836 unsigned target,
2837 boolean is_sviewinfo,
2838 enum lp_sampler_lod_property lod_property,
2839 LLVMValueRef explicit_lod,
2840 LLVMValueRef *sizes_out)
2841 {
2842 LLVMValueRef lod, level, size;
2843 LLVMValueRef first_level = NULL;
2844 int dims, i;
2845 boolean has_array;
2846 unsigned num_lods = 1;
2847 struct lp_build_context bld_int_vec4;
2848
2849 if (static_state->format == PIPE_FORMAT_NONE) {
2850 /*
2851 * If there's nothing bound, format is NONE, and we must return
2852 * all zero as mandated by d3d10 in this case.
2853 */
2854 unsigned chan;
2855 LLVMValueRef zero = lp_build_const_vec(gallivm, int_type, 0.0F);
2856 for (chan = 0; chan < 4; chan++) {
2857 sizes_out[chan] = zero;
2858 }
2859 return;
2860 }
2861
2862 /*
2863 * Do some sanity verification about bound texture and shader dcl target.
2864 * Not entirely sure what's possible but assume array/non-array
2865 * always compatible (probably not ok for OpenGL but d3d10 has no
2866 * distinction of arrays at the resource level).
2867 * Everything else looks bogus (though not entirely sure about rect/2d).
2868 * Currently disabled because it causes assertion failures if there's
2869 * nothing bound (or rather a dummy texture, not that this case would
2870 * return the right values).
2871 */
2872 if (0 && static_state->target != target) {
2873 if (static_state->target == PIPE_TEXTURE_1D)
2874 assert(target == PIPE_TEXTURE_1D_ARRAY);
2875 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
2876 assert(target == PIPE_TEXTURE_1D);
2877 else if (static_state->target == PIPE_TEXTURE_2D)
2878 assert(target == PIPE_TEXTURE_2D_ARRAY);
2879 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
2880 assert(target == PIPE_TEXTURE_2D);
2881 else if (static_state->target == PIPE_TEXTURE_CUBE)
2882 assert(target == PIPE_TEXTURE_CUBE_ARRAY);
2883 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2884 assert(target == PIPE_TEXTURE_CUBE);
2885 else
2886 assert(0);
2887 }
2888
2889 dims = texture_dims(target);
2890
2891 switch (target) {
2892 case PIPE_TEXTURE_1D_ARRAY:
2893 case PIPE_TEXTURE_2D_ARRAY:
2894 has_array = TRUE;
2895 break;
2896 default:
2897 has_array = FALSE;
2898 break;
2899 }
2900
2901 assert(!int_type.floating);
2902
2903 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
2904
2905 if (explicit_lod) {
2906 /* FIXME: this needs to honor per-element lod */
2907 lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), "");
2908 first_level = dynamic_state->first_level(dynamic_state, gallivm, texture_unit);
2909 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
2910 lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
2911 } else {
2912 lod = bld_int_vec4.zero;
2913 }
2914
2915 size = bld_int_vec4.undef;
2916
2917 size = LLVMBuildInsertElement(gallivm->builder, size,
2918 dynamic_state->width(dynamic_state, gallivm, texture_unit),
2919 lp_build_const_int32(gallivm, 0), "");
2920
2921 if (dims >= 2) {
2922 size = LLVMBuildInsertElement(gallivm->builder, size,
2923 dynamic_state->height(dynamic_state, gallivm, texture_unit),
2924 lp_build_const_int32(gallivm, 1), "");
2925 }
2926
2927 if (dims >= 3) {
2928 size = LLVMBuildInsertElement(gallivm->builder, size,
2929 dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2930 lp_build_const_int32(gallivm, 2), "");
2931 }
2932
2933 size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
2934
2935 if (has_array)
2936 size = LLVMBuildInsertElement(gallivm->builder, size,
2937 dynamic_state->depth(dynamic_state, gallivm, texture_unit),
2938 lp_build_const_int32(gallivm, dims), "");
2939
2940 /*
2941 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
2942 * if level is out of bounds (note this can't cover unbound texture
2943 * here, which also requires returning zero).
2944 */
2945 if (explicit_lod && is_sviewinfo) {
2946 LLVMValueRef last_level, out, out1;
2947 struct lp_build_context leveli_bld;
2948
2949 /* everything is scalar for now */
2950 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
2951 last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2952
2953 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
2954 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
2955 out = lp_build_or(&leveli_bld, out, out1);
2956 if (num_lods == 1) {
2957 out = lp_build_broadcast_scalar(&bld_int_vec4, out);
2958 }
2959 else {
2960 /* TODO */
2961 assert(0);
2962 }
2963 size = lp_build_andnot(&bld_int_vec4, size, out);
2964 }
2965 for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
2966 sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
2967 size,
2968 lp_build_const_int32(gallivm, i));
2969 }
2970 if (is_sviewinfo) {
2971 for (; i < 4; i++) {
2972 sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
2973 }
2974 }
2975
2976 /*
2977 * if there's no explicit_lod (buffers, rects) queries requiring nr of
2978 * mips would be illegal.
2979 */
2980 if (is_sviewinfo && explicit_lod) {
2981 struct lp_build_context bld_int_scalar;
2982 LLVMValueRef num_levels;
2983 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
2984
2985 if (static_state->level_zero_only) {
2986 num_levels = bld_int_scalar.one;
2987 }
2988 else {
2989 LLVMValueRef last_level;
2990
2991 last_level = dynamic_state->last_level(dynamic_state, gallivm, texture_unit);
2992 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
2993 num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
2994 }
2995 sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
2996 num_levels);
2997 }
2998 }