gallivm: simplify sampler interface
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/u_debug.h"
40 #include "util/u_dump.h"
41 #include "util/u_memory.h"
42 #include "util/u_math.h"
43 #include "util/u_format.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/u_format_rgb9e5.h"
46 #include "lp_bld_debug.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_const.h"
49 #include "lp_bld_conv.h"
50 #include "lp_bld_arit.h"
51 #include "lp_bld_bitarit.h"
52 #include "lp_bld_logic.h"
53 #include "lp_bld_printf.h"
54 #include "lp_bld_swizzle.h"
55 #include "lp_bld_flow.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_format.h"
58 #include "lp_bld_sample.h"
59 #include "lp_bld_sample_aos.h"
60 #include "lp_bld_struct.h"
61 #include "lp_bld_quad.h"
62 #include "lp_bld_pack.h"
63
64
65 /**
66 * Generate code to fetch a texel from a texture at int coords (x, y, z).
67 * The computation depends on whether the texture is 1D, 2D or 3D.
68 * The result, texel, will be float vectors:
69 * texel[0] = red values
70 * texel[1] = green values
71 * texel[2] = blue values
72 * texel[3] = alpha values
73 */
74 static void
75 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
76 LLVMValueRef width,
77 LLVMValueRef height,
78 LLVMValueRef depth,
79 LLVMValueRef x,
80 LLVMValueRef y,
81 LLVMValueRef z,
82 LLVMValueRef y_stride,
83 LLVMValueRef z_stride,
84 LLVMValueRef data_ptr,
85 LLVMValueRef mipoffsets,
86 LLVMValueRef texel_out[4])
87 {
88 const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
89 const unsigned dims = bld->dims;
90 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
91 LLVMBuilderRef builder = bld->gallivm->builder;
92 LLVMValueRef offset;
93 LLVMValueRef i, j;
94 LLVMValueRef use_border = NULL;
95
96 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
97 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
98 static_state->min_img_filter,
99 static_state->mag_img_filter)) {
100 LLVMValueRef b1, b2;
101 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
102 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
103 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
104 }
105
106 if (dims >= 2 &&
107 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
108 static_state->min_img_filter,
109 static_state->mag_img_filter)) {
110 LLVMValueRef b1, b2;
111 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
112 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
113 if (use_border) {
114 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
115 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
116 }
117 else {
118 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
119 }
120 }
121
122 if (dims == 3 &&
123 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
124 static_state->min_img_filter,
125 static_state->mag_img_filter)) {
126 LLVMValueRef b1, b2;
127 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
128 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
129 if (use_border) {
130 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
131 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
132 }
133 else {
134 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
135 }
136 }
137
138 /* convert x,y,z coords to linear offset from start of texture, in bytes */
139 lp_build_sample_offset(&bld->int_coord_bld,
140 bld->format_desc,
141 x, y, z, y_stride, z_stride,
142 &offset, &i, &j);
143 if (mipoffsets) {
144 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
145 }
146
147 if (use_border) {
148 /* If we can sample the border color, it means that texcoords may
149 * lie outside the bounds of the texture image. We need to do
150 * something to prevent reading out of bounds and causing a segfault.
151 *
152 * Simply AND the texture coords with !use_border. This will cause
153 * coords which are out of bounds to become zero. Zero's guaranteed
154 * to be inside the texture image.
155 */
156 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
157 }
158
159 lp_build_fetch_rgba_soa(bld->gallivm,
160 bld->format_desc,
161 bld->texel_type,
162 data_ptr, offset,
163 i, j,
164 texel_out);
165
166 /*
167 * Note: if we find an app which frequently samples the texture border
168 * we might want to implement a true conditional here to avoid sampling
169 * the texture whenever possible (since that's quite a bit of code).
170 * Ex:
171 * if (use_border) {
172 * texel = border_color;
173 * }
174 * else {
175 * texel = sample_texture(coord);
176 * }
177 * As it is now, we always sample the texture, then selectively replace
178 * the texel color results with the border color.
179 */
180
181 if (use_border) {
182 /* select texel color or border color depending on use_border. */
183 const struct util_format_description *format_desc = bld->format_desc;
184 int chan;
185 struct lp_type border_type = bld->texel_type;
186 border_type.length = 4;
187 /*
188 * Only replace channels which are actually present. The others should
189 * get optimized away eventually by sampler_view swizzle anyway but it's
190 * easier too.
191 */
192 for (chan = 0; chan < 4; chan++) {
193 unsigned chan_s;
194 /* reverse-map channel... */
195 for (chan_s = 0; chan_s < 4; chan_s++) {
196 if (chan_s == format_desc->swizzle[chan]) {
197 break;
198 }
199 }
200 if (chan_s <= 3) {
201 /* use the already clamped color */
202 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
203 LLVMValueRef border_chan;
204
205 border_chan = lp_build_extract_broadcast(bld->gallivm,
206 border_type,
207 bld->texel_type,
208 bld->border_color_clamped,
209 idx);
210 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
211 border_chan, texel_out[chan]);
212 }
213 }
214 }
215 }
216
217
218 /**
219 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
220 */
221 static LLVMValueRef
222 lp_build_coord_mirror(struct lp_build_sample_context *bld,
223 LLVMValueRef coord)
224 {
225 struct lp_build_context *coord_bld = &bld->coord_bld;
226 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
227 LLVMValueRef fract, flr, isOdd;
228
229 lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
230
231 /* isOdd = flr & 1 */
232 isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
233
234 /* make coord positive or negative depending on isOdd */
235 coord = lp_build_set_sign(coord_bld, fract, isOdd);
236
237 /* convert isOdd to float */
238 isOdd = lp_build_int_to_float(coord_bld, isOdd);
239
240 /* add isOdd to coord */
241 coord = lp_build_add(coord_bld, coord, isOdd);
242
243 return coord;
244 }
245
246
247 /**
248 * Helper to compute the first coord and the weight for
249 * linear wrap repeat npot textures
250 */
251 void
252 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
253 LLVMValueRef coord_f,
254 LLVMValueRef length_i,
255 LLVMValueRef length_f,
256 LLVMValueRef *coord0_i,
257 LLVMValueRef *weight_f)
258 {
259 struct lp_build_context *coord_bld = &bld->coord_bld;
260 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
261 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
262 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
263 int_coord_bld->one);
264 LLVMValueRef mask;
265 /* wrap with normalized floats is just fract */
266 coord_f = lp_build_fract(coord_bld, coord_f);
267 /* mul by size and subtract 0.5 */
268 coord_f = lp_build_mul(coord_bld, coord_f, length_f);
269 coord_f = lp_build_sub(coord_bld, coord_f, half);
270 /*
271 * we avoided the 0.5/length division before the repeat wrap,
272 * now need to fix up edge cases with selects
273 */
274 /* convert to int, compute lerp weight */
275 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
276 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
277 PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
278 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
279 }
280
281
282 /**
283 * Build LLVM code for texture wrap mode for linear filtering.
284 * \param x0_out returns first integer texcoord
285 * \param x1_out returns second integer texcoord
286 * \param weight_out returns linear interpolation weight
287 */
288 static void
289 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
290 LLVMValueRef coord,
291 LLVMValueRef length,
292 LLVMValueRef length_f,
293 LLVMValueRef offset,
294 boolean is_pot,
295 unsigned wrap_mode,
296 LLVMValueRef *x0_out,
297 LLVMValueRef *x1_out,
298 LLVMValueRef *weight_out)
299 {
300 struct lp_build_context *coord_bld = &bld->coord_bld;
301 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
302 LLVMBuilderRef builder = bld->gallivm->builder;
303 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
304 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
305 LLVMValueRef coord0, coord1, weight;
306
307 switch(wrap_mode) {
308 case PIPE_TEX_WRAP_REPEAT:
309 if (is_pot) {
310 /* mul by size and subtract 0.5 */
311 coord = lp_build_mul(coord_bld, coord, length_f);
312 coord = lp_build_sub(coord_bld, coord, half);
313 if (offset) {
314 offset = lp_build_int_to_float(coord_bld, offset);
315 coord = lp_build_add(coord_bld, coord, offset);
316 }
317 /* convert to int, compute lerp weight */
318 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
319 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
320 /* repeat wrap */
321 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
322 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
323 }
324 else {
325 LLVMValueRef mask;
326 if (offset) {
327 offset = lp_build_int_to_float(coord_bld, offset);
328 offset = lp_build_div(coord_bld, offset, length_f);
329 coord = lp_build_add(coord_bld, coord, offset);
330 }
331 lp_build_coord_repeat_npot_linear(bld, coord,
332 length, length_f,
333 &coord0, &weight);
334 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
335 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
336 coord1 = LLVMBuildAnd(builder,
337 lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
338 mask, "");
339 }
340 break;
341
342 case PIPE_TEX_WRAP_CLAMP:
343 if (bld->static_sampler_state->normalized_coords) {
344 /* scale coord to length */
345 coord = lp_build_mul(coord_bld, coord, length_f);
346 }
347 if (offset) {
348 offset = lp_build_int_to_float(coord_bld, offset);
349 coord = lp_build_add(coord_bld, coord, offset);
350 }
351
352 /* clamp to [0, length] */
353 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
354
355 coord = lp_build_sub(coord_bld, coord, half);
356
357 /* convert to int, compute lerp weight */
358 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
359 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
360 break;
361
362 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
363 {
364 struct lp_build_context abs_coord_bld = bld->coord_bld;
365 abs_coord_bld.type.sign = FALSE;
366
367 if (bld->static_sampler_state->normalized_coords) {
368 /* mul by tex size */
369 coord = lp_build_mul(coord_bld, coord, length_f);
370 }
371 if (offset) {
372 offset = lp_build_int_to_float(coord_bld, offset);
373 coord = lp_build_add(coord_bld, coord, offset);
374 }
375
376 /* clamp to length max */
377 coord = lp_build_min(coord_bld, coord, length_f);
378 /* subtract 0.5 */
379 coord = lp_build_sub(coord_bld, coord, half);
380 /* clamp to [0, length - 0.5] */
381 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
382 /* convert to int, compute lerp weight */
383 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
384 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
385 /* coord1 = min(coord1, length-1) */
386 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
387 break;
388 }
389
390 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
391 if (bld->static_sampler_state->normalized_coords) {
392 /* scale coord to length */
393 coord = lp_build_mul(coord_bld, coord, length_f);
394 }
395 if (offset) {
396 offset = lp_build_int_to_float(coord_bld, offset);
397 coord = lp_build_add(coord_bld, coord, offset);
398 }
399 /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
400 /* can skip clamp (though might not work for very large coord values */
401 coord = lp_build_sub(coord_bld, coord, half);
402 /* convert to int, compute lerp weight */
403 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
404 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
405 break;
406
407 case PIPE_TEX_WRAP_MIRROR_REPEAT:
408 /* compute mirror function */
409 coord = lp_build_coord_mirror(bld, coord);
410
411 /* scale coord to length */
412 coord = lp_build_mul(coord_bld, coord, length_f);
413 coord = lp_build_sub(coord_bld, coord, half);
414 if (offset) {
415 offset = lp_build_int_to_float(coord_bld, offset);
416 coord = lp_build_add(coord_bld, coord, offset);
417 }
418
419 /* convert to int, compute lerp weight */
420 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
421 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
422
423 /* coord0 = max(coord0, 0) */
424 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
425 /* coord1 = min(coord1, length-1) */
426 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
427 break;
428
429 case PIPE_TEX_WRAP_MIRROR_CLAMP:
430 if (bld->static_sampler_state->normalized_coords) {
431 /* scale coord to length */
432 coord = lp_build_mul(coord_bld, coord, length_f);
433 }
434 if (offset) {
435 offset = lp_build_int_to_float(coord_bld, offset);
436 coord = lp_build_add(coord_bld, coord, offset);
437 }
438 coord = lp_build_abs(coord_bld, coord);
439
440 /* clamp to [0, length] */
441 coord = lp_build_min(coord_bld, coord, length_f);
442
443 coord = lp_build_sub(coord_bld, coord, half);
444
445 /* convert to int, compute lerp weight */
446 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
447 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
448 break;
449
450 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
451 {
452 struct lp_build_context abs_coord_bld = bld->coord_bld;
453 abs_coord_bld.type.sign = FALSE;
454
455 if (bld->static_sampler_state->normalized_coords) {
456 /* scale coord to length */
457 coord = lp_build_mul(coord_bld, coord, length_f);
458 }
459 if (offset) {
460 offset = lp_build_int_to_float(coord_bld, offset);
461 coord = lp_build_add(coord_bld, coord, offset);
462 }
463 coord = lp_build_abs(coord_bld, coord);
464
465 /* clamp to length max */
466 coord = lp_build_min(coord_bld, coord, length_f);
467 /* subtract 0.5 */
468 coord = lp_build_sub(coord_bld, coord, half);
469 /* clamp to [0, length - 0.5] */
470 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
471
472 /* convert to int, compute lerp weight */
473 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
474 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
475 /* coord1 = min(coord1, length-1) */
476 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
477 }
478 break;
479
480 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
481 {
482 if (bld->static_sampler_state->normalized_coords) {
483 /* scale coord to length */
484 coord = lp_build_mul(coord_bld, coord, length_f);
485 }
486 if (offset) {
487 offset = lp_build_int_to_float(coord_bld, offset);
488 coord = lp_build_add(coord_bld, coord, offset);
489 }
490 coord = lp_build_abs(coord_bld, coord);
491
492 /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
493 /* skip clamp - always positive, and other side
494 only potentially matters for very large coords */
495 coord = lp_build_sub(coord_bld, coord, half);
496
497 /* convert to int, compute lerp weight */
498 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
499 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
500 }
501 break;
502
503 default:
504 assert(0);
505 coord0 = NULL;
506 coord1 = NULL;
507 weight = NULL;
508 }
509
510 *x0_out = coord0;
511 *x1_out = coord1;
512 *weight_out = weight;
513 }
514
515
516 /**
517 * Build LLVM code for texture wrap mode for nearest filtering.
518 * \param coord the incoming texcoord (nominally in [0,1])
519 * \param length the texture size along one dimension, as int vector
520 * \param length_f the texture size along one dimension, as float vector
521 * \param offset texel offset along one dimension (as int vector)
522 * \param is_pot if TRUE, length is a power of two
523 * \param wrap_mode one of PIPE_TEX_WRAP_x
524 */
525 static LLVMValueRef
526 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
527 LLVMValueRef coord,
528 LLVMValueRef length,
529 LLVMValueRef length_f,
530 LLVMValueRef offset,
531 boolean is_pot,
532 unsigned wrap_mode)
533 {
534 struct lp_build_context *coord_bld = &bld->coord_bld;
535 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
536 LLVMBuilderRef builder = bld->gallivm->builder;
537 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
538 LLVMValueRef icoord;
539
540 switch(wrap_mode) {
541 case PIPE_TEX_WRAP_REPEAT:
542 if (is_pot) {
543 coord = lp_build_mul(coord_bld, coord, length_f);
544 icoord = lp_build_ifloor(coord_bld, coord);
545 if (offset) {
546 icoord = lp_build_add(int_coord_bld, icoord, offset);
547 }
548 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
549 }
550 else {
551 if (offset) {
552 offset = lp_build_int_to_float(coord_bld, offset);
553 offset = lp_build_div(coord_bld, offset, length_f);
554 coord = lp_build_add(coord_bld, coord, offset);
555 }
556 /* take fraction, unnormalize */
557 coord = lp_build_fract_safe(coord_bld, coord);
558 coord = lp_build_mul(coord_bld, coord, length_f);
559 icoord = lp_build_itrunc(coord_bld, coord);
560 }
561 break;
562
563 case PIPE_TEX_WRAP_CLAMP:
564 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
565 if (bld->static_sampler_state->normalized_coords) {
566 /* scale coord to length */
567 coord = lp_build_mul(coord_bld, coord, length_f);
568 }
569
570 /* floor */
571 /* use itrunc instead since we clamp to 0 anyway */
572 icoord = lp_build_itrunc(coord_bld, coord);
573 if (offset) {
574 icoord = lp_build_add(int_coord_bld, icoord, offset);
575 }
576
577 /* clamp to [0, length - 1]. */
578 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
579 length_minus_one);
580 break;
581
582 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
583 if (bld->static_sampler_state->normalized_coords) {
584 /* scale coord to length */
585 coord = lp_build_mul(coord_bld, coord, length_f);
586 }
587 /* no clamp necessary, border masking will handle this */
588 icoord = lp_build_ifloor(coord_bld, coord);
589 if (offset) {
590 icoord = lp_build_add(int_coord_bld, icoord, offset);
591 }
592 break;
593
594 case PIPE_TEX_WRAP_MIRROR_REPEAT:
595 if (offset) {
596 offset = lp_build_int_to_float(coord_bld, offset);
597 offset = lp_build_div(coord_bld, offset, length_f);
598 coord = lp_build_add(coord_bld, coord, offset);
599 }
600 /* compute mirror function */
601 coord = lp_build_coord_mirror(bld, coord);
602
603 /* scale coord to length */
604 assert(bld->static_sampler_state->normalized_coords);
605 coord = lp_build_mul(coord_bld, coord, length_f);
606
607 /* itrunc == ifloor here */
608 icoord = lp_build_itrunc(coord_bld, coord);
609
610 /* clamp to [0, length - 1] */
611 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
612 break;
613
614 case PIPE_TEX_WRAP_MIRROR_CLAMP:
615 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
616 if (bld->static_sampler_state->normalized_coords) {
617 /* scale coord to length */
618 coord = lp_build_mul(coord_bld, coord, length_f);
619 }
620 if (offset) {
621 offset = lp_build_int_to_float(coord_bld, offset);
622 coord = lp_build_add(coord_bld, coord, offset);
623 }
624 coord = lp_build_abs(coord_bld, coord);
625
626 /* itrunc == ifloor here */
627 icoord = lp_build_itrunc(coord_bld, coord);
628
629 /* clamp to [0, length - 1] */
630 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
631 break;
632
633 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
634 if (bld->static_sampler_state->normalized_coords) {
635 /* scale coord to length */
636 coord = lp_build_mul(coord_bld, coord, length_f);
637 }
638 if (offset) {
639 offset = lp_build_int_to_float(coord_bld, offset);
640 coord = lp_build_add(coord_bld, coord, offset);
641 }
642 coord = lp_build_abs(coord_bld, coord);
643
644 /* itrunc == ifloor here */
645 icoord = lp_build_itrunc(coord_bld, coord);
646 break;
647
648 default:
649 assert(0);
650 icoord = NULL;
651 }
652
653 return icoord;
654 }
655
656
657 /**
658 * Do shadow test/comparison.
659 * \param p shadow ref value
660 * \param texel the texel to compare against
661 */
662 static LLVMValueRef
663 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
664 LLVMValueRef p,
665 LLVMValueRef texel)
666 {
667 struct lp_build_context *texel_bld = &bld->texel_bld;
668 LLVMValueRef res;
669
670 if (0) {
671 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
672 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
673 }
674
675 /* result = (p FUNC texel) ? 1 : 0 */
676 /*
677 * honor d3d10 floating point rules here, which state that comparisons
678 * are ordered except NOT_EQUAL which is unordered.
679 */
680 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
681 res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
682 p, texel);
683 }
684 else {
685 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
686 p, texel);
687 }
688 return res;
689 }
690
691
692 /**
693 * Generate code to sample a mipmap level with nearest filtering.
694 * If sampling a cube texture, r = cube face in [0,5].
695 */
696 static void
697 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
698 LLVMValueRef size,
699 LLVMValueRef row_stride_vec,
700 LLVMValueRef img_stride_vec,
701 LLVMValueRef data_ptr,
702 LLVMValueRef mipoffsets,
703 LLVMValueRef *coords,
704 const LLVMValueRef *offsets,
705 LLVMValueRef colors_out[4])
706 {
707 const unsigned dims = bld->dims;
708 LLVMValueRef width_vec;
709 LLVMValueRef height_vec;
710 LLVMValueRef depth_vec;
711 LLVMValueRef flt_size;
712 LLVMValueRef flt_width_vec;
713 LLVMValueRef flt_height_vec;
714 LLVMValueRef flt_depth_vec;
715 LLVMValueRef x, y = NULL, z = NULL;
716
717 lp_build_extract_image_sizes(bld,
718 &bld->int_size_bld,
719 bld->int_coord_type,
720 size,
721 &width_vec, &height_vec, &depth_vec);
722
723 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
724
725 lp_build_extract_image_sizes(bld,
726 &bld->float_size_bld,
727 bld->coord_type,
728 flt_size,
729 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
730
731 /*
732 * Compute integer texcoords.
733 */
734 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
735 flt_width_vec, offsets[0],
736 bld->static_texture_state->pot_width,
737 bld->static_sampler_state->wrap_s);
738 lp_build_name(x, "tex.x.wrapped");
739
740 if (dims >= 2) {
741 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
742 flt_height_vec, offsets[1],
743 bld->static_texture_state->pot_height,
744 bld->static_sampler_state->wrap_t);
745 lp_build_name(y, "tex.y.wrapped");
746
747 if (dims == 3) {
748 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
749 flt_depth_vec, offsets[2],
750 bld->static_texture_state->pot_depth,
751 bld->static_sampler_state->wrap_r);
752 lp_build_name(z, "tex.z.wrapped");
753 }
754 }
755 if (has_layer_coord(bld->static_texture_state->target)) {
756 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
757 /* add cube layer to face */
758 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
759 }
760 else {
761 z = coords[2];
762 }
763 lp_build_name(z, "tex.z.layer");
764 }
765
766 /*
767 * Get texture colors.
768 */
769 lp_build_sample_texel_soa(bld,
770 width_vec, height_vec, depth_vec,
771 x, y, z,
772 row_stride_vec, img_stride_vec,
773 data_ptr, mipoffsets, colors_out);
774
775 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
776 LLVMValueRef cmpval;
777 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
778 /* this is really just a AND 1.0, cmpval but llvm is clever enough */
779 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
780 bld->texel_bld.one, bld->texel_bld.zero);
781 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
782 }
783
784 }
785
786
787 /**
788 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
789 */
790 static LLVMValueRef
791 lp_build_masklerp(struct lp_build_context *bld,
792 LLVMValueRef weight,
793 LLVMValueRef mask0,
794 LLVMValueRef mask1)
795 {
796 struct gallivm_state *gallivm = bld->gallivm;
797 LLVMBuilderRef builder = gallivm->builder;
798 LLVMValueRef weight2;
799
800 weight2 = lp_build_sub(bld, bld->one, weight);
801 weight = LLVMBuildBitCast(builder, weight,
802 lp_build_int_vec_type(gallivm, bld->type), "");
803 weight2 = LLVMBuildBitCast(builder, weight2,
804 lp_build_int_vec_type(gallivm, bld->type), "");
805 weight = LLVMBuildAnd(builder, weight, mask1, "");
806 weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
807 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
808 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
809 return lp_build_add(bld, weight, weight2);
810 }
811
812 /**
813 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
814 */
815 static LLVMValueRef
816 lp_build_masklerp2d(struct lp_build_context *bld,
817 LLVMValueRef weight0,
818 LLVMValueRef weight1,
819 LLVMValueRef mask00,
820 LLVMValueRef mask01,
821 LLVMValueRef mask10,
822 LLVMValueRef mask11)
823 {
824 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
825 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
826 return lp_build_lerp(bld, weight1, val0, val1, 0);
827 }
828
829 /*
830 * this is a bit excessive code for something OpenGL just recommends
831 * but does not require.
832 */
833 #define ACCURATE_CUBE_CORNERS 1
834
835 /**
836 * Generate code to sample a mipmap level with linear filtering.
837 * If sampling a cube texture, r = cube face in [0,5].
838 * If linear_mask is present, only pixels having their mask set
839 * will receive linear filtering, the rest will use nearest.
840 */
841 static void
842 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
843 LLVMValueRef size,
844 LLVMValueRef linear_mask,
845 LLVMValueRef row_stride_vec,
846 LLVMValueRef img_stride_vec,
847 LLVMValueRef data_ptr,
848 LLVMValueRef mipoffsets,
849 LLVMValueRef *coords,
850 const LLVMValueRef *offsets,
851 LLVMValueRef colors_out[4])
852 {
853 LLVMBuilderRef builder = bld->gallivm->builder;
854 struct lp_build_context *ivec_bld = &bld->int_coord_bld;
855 struct lp_build_context *coord_bld = &bld->coord_bld;
856 const unsigned dims = bld->dims;
857 LLVMValueRef width_vec;
858 LLVMValueRef height_vec;
859 LLVMValueRef depth_vec;
860 LLVMValueRef flt_size;
861 LLVMValueRef flt_width_vec;
862 LLVMValueRef flt_height_vec;
863 LLVMValueRef flt_depth_vec;
864 LLVMValueRef fall_off[4], have_corners;
865 LLVMValueRef z1 = NULL;
866 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
867 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
868 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
869 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
870 LLVMValueRef xs[4], ys[4], zs[4];
871 LLVMValueRef neighbors[2][2][4];
872 int chan, texel_index;
873 boolean seamless_cube_filter, accurate_cube_corners;
874
875 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
876 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
877 bld->static_sampler_state->seamless_cube_map;
878 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter;
879
880 lp_build_extract_image_sizes(bld,
881 &bld->int_size_bld,
882 bld->int_coord_type,
883 size,
884 &width_vec, &height_vec, &depth_vec);
885
886 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
887
888 lp_build_extract_image_sizes(bld,
889 &bld->float_size_bld,
890 bld->coord_type,
891 flt_size,
892 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
893
894 /*
895 * Compute integer texcoords.
896 */
897
898 if (!seamless_cube_filter) {
899 lp_build_sample_wrap_linear(bld, coords[0], width_vec,
900 flt_width_vec, offsets[0],
901 bld->static_texture_state->pot_width,
902 bld->static_sampler_state->wrap_s,
903 &x00, &x01, &s_fpart);
904 lp_build_name(x00, "tex.x0.wrapped");
905 lp_build_name(x01, "tex.x1.wrapped");
906 x10 = x00;
907 x11 = x01;
908
909 if (dims >= 2) {
910 lp_build_sample_wrap_linear(bld, coords[1], height_vec,
911 flt_height_vec, offsets[1],
912 bld->static_texture_state->pot_height,
913 bld->static_sampler_state->wrap_t,
914 &y00, &y10, &t_fpart);
915 lp_build_name(y00, "tex.y0.wrapped");
916 lp_build_name(y10, "tex.y1.wrapped");
917 y01 = y00;
918 y11 = y10;
919
920 if (dims == 3) {
921 lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
922 flt_depth_vec, offsets[2],
923 bld->static_texture_state->pot_depth,
924 bld->static_sampler_state->wrap_r,
925 &z00, &z1, &r_fpart);
926 z01 = z10 = z11 = z00;
927 lp_build_name(z00, "tex.z0.wrapped");
928 lp_build_name(z1, "tex.z1.wrapped");
929 }
930 }
931 if (has_layer_coord(bld->static_texture_state->target)) {
932 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
933 /* add cube layer to face */
934 z00 = z01 = z10 = z11 = z1 =
935 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
936 }
937 else {
938 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
939 }
940 lp_build_name(z00, "tex.z0.layer");
941 lp_build_name(z1, "tex.z1.layer");
942 }
943 }
944 else {
945 struct lp_build_if_state edge_if;
946 LLVMTypeRef int1t;
947 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
948 LLVMValueRef coord, have_edge, have_corner;
949 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
950 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
951 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
952 LLVMValueRef face = coords[2];
953 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
954 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
955 /* XXX drop height calcs. Could (should) do this without seamless filtering too */
956 height_vec = width_vec;
957 flt_height_vec = flt_width_vec;
958
959 /* XXX the overflow logic is actually sort of duplicated with trilinear,
960 * since an overflow in one mip should also have a corresponding overflow
961 * in another.
962 */
963 /* should always have normalized coords, and offsets are undefined */
964 assert(bld->static_sampler_state->normalized_coords);
965 coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
966 /* instead of clamp, build mask if overflowed */
967 coord = lp_build_sub(coord_bld, coord, half);
968 /* convert to int, compute lerp weight */
969 /* not ideal with AVX (and no AVX2) */
970 lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
971 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
972 coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
973 coord = lp_build_sub(coord_bld, coord, half);
974 lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
975 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
976
977 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
978 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
979 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
980 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
981
982 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
983 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
984 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
985 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
986
987 /* needed for accurate corner filtering branch later, rely on 0 init */
988 int1t = LLVMInt1TypeInContext(bld->gallivm->context);
989 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
990
991 for (texel_index = 0; texel_index < 4; texel_index++) {
992 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
993 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
994 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
995 }
996
997 lp_build_if(&edge_if, bld->gallivm, have_edge);
998
999 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1000 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1001 LLVMBuildStore(builder, have_corner, have_corners);
1002
1003 /*
1004 * Need to feed clamped values here for cheap corner handling,
1005 * but only for y coord (as when falling off both edges we only
1006 * fall off the x one) - this should be sufficient.
1007 */
1008 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1009 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1010
1011 /*
1012 * Get all possible new coords.
1013 */
1014 lp_build_cube_new_coords(ivec_bld, face,
1015 x0, x1, y0_clamped, y1_clamped,
1016 length_minus_one,
1017 new_faces, new_xcoords, new_ycoords);
1018
1019 /* handle fall off x-, x+ direction */
1020 /* determine new coords, face (not both fall_off vars can be true at same time) */
1021 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1022 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1023 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1024 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1025 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1026 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1027 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1028 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1029
1030 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1031 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1032
1033 /* handle fall off y-, y+ direction */
1034 /*
1035 * Cheap corner logic: just hack up things so a texel doesn't fall
1036 * off both sides (which means filter weights will be wrong but we'll only
1037 * use valid texels in the filter).
1038 * This means however (y) coords must additionally be clamped (see above).
1039 * This corner handling should be fully OpenGL (but not d3d10) compliant.
1040 */
1041 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1042 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1043 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1044 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1045
1046 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1047 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1048 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1049 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1050 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1051 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1052 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1053 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1054
1055 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1056 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1057 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1058 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1059
1060 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1061 /* now can add cube layer to face (per sample) */
1062 z00 = lp_build_add(ivec_bld, z00, coords[3]);
1063 z01 = lp_build_add(ivec_bld, z01, coords[3]);
1064 z10 = lp_build_add(ivec_bld, z10, coords[3]);
1065 z11 = lp_build_add(ivec_bld, z11, coords[3]);
1066 }
1067
1068 LLVMBuildStore(builder, x00, xs[0]);
1069 LLVMBuildStore(builder, x01, xs[1]);
1070 LLVMBuildStore(builder, x10, xs[2]);
1071 LLVMBuildStore(builder, x11, xs[3]);
1072 LLVMBuildStore(builder, y00, ys[0]);
1073 LLVMBuildStore(builder, y01, ys[1]);
1074 LLVMBuildStore(builder, y10, ys[2]);
1075 LLVMBuildStore(builder, y11, ys[3]);
1076 LLVMBuildStore(builder, z00, zs[0]);
1077 LLVMBuildStore(builder, z01, zs[1]);
1078 LLVMBuildStore(builder, z10, zs[2]);
1079 LLVMBuildStore(builder, z11, zs[3]);
1080
1081 lp_build_else(&edge_if);
1082
1083 LLVMBuildStore(builder, x0, xs[0]);
1084 LLVMBuildStore(builder, x1, xs[1]);
1085 LLVMBuildStore(builder, x0, xs[2]);
1086 LLVMBuildStore(builder, x1, xs[3]);
1087 LLVMBuildStore(builder, y0, ys[0]);
1088 LLVMBuildStore(builder, y0, ys[1]);
1089 LLVMBuildStore(builder, y1, ys[2]);
1090 LLVMBuildStore(builder, y1, ys[3]);
1091 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1092 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1093 LLVMBuildStore(builder, cube_layer, zs[0]);
1094 LLVMBuildStore(builder, cube_layer, zs[1]);
1095 LLVMBuildStore(builder, cube_layer, zs[2]);
1096 LLVMBuildStore(builder, cube_layer, zs[3]);
1097 }
1098 else {
1099 LLVMBuildStore(builder, face, zs[0]);
1100 LLVMBuildStore(builder, face, zs[1]);
1101 LLVMBuildStore(builder, face, zs[2]);
1102 LLVMBuildStore(builder, face, zs[3]);
1103 }
1104
1105 lp_build_endif(&edge_if);
1106
1107 x00 = LLVMBuildLoad(builder, xs[0], "");
1108 x01 = LLVMBuildLoad(builder, xs[1], "");
1109 x10 = LLVMBuildLoad(builder, xs[2], "");
1110 x11 = LLVMBuildLoad(builder, xs[3], "");
1111 y00 = LLVMBuildLoad(builder, ys[0], "");
1112 y01 = LLVMBuildLoad(builder, ys[1], "");
1113 y10 = LLVMBuildLoad(builder, ys[2], "");
1114 y11 = LLVMBuildLoad(builder, ys[3], "");
1115 z00 = LLVMBuildLoad(builder, zs[0], "");
1116 z01 = LLVMBuildLoad(builder, zs[1], "");
1117 z10 = LLVMBuildLoad(builder, zs[2], "");
1118 z11 = LLVMBuildLoad(builder, zs[3], "");
1119 }
1120
1121 if (linear_mask) {
1122 /*
1123 * Whack filter weights into place. Whatever texel had more weight is
1124 * the one which should have been selected by nearest filtering hence
1125 * just use 100% weight for it.
1126 */
1127 struct lp_build_context *c_bld = &bld->coord_bld;
1128 LLVMValueRef w1_mask, w1_weight;
1129 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1130
1131 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1132 /* this select is really just a "and" */
1133 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1134 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1135 if (dims >= 2) {
1136 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1137 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1138 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1139 if (dims == 3) {
1140 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1141 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1142 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1143 }
1144 }
1145 }
1146
1147 /*
1148 * Get texture colors.
1149 */
1150 /* get x0/x1 texels */
1151 lp_build_sample_texel_soa(bld,
1152 width_vec, height_vec, depth_vec,
1153 x00, y00, z00,
1154 row_stride_vec, img_stride_vec,
1155 data_ptr, mipoffsets, neighbors[0][0]);
1156 lp_build_sample_texel_soa(bld,
1157 width_vec, height_vec, depth_vec,
1158 x01, y01, z01,
1159 row_stride_vec, img_stride_vec,
1160 data_ptr, mipoffsets, neighbors[0][1]);
1161
1162 if (dims == 1) {
1163 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1164 /* Interpolate two samples from 1D image to produce one color */
1165 for (chan = 0; chan < 4; chan++) {
1166 colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1167 neighbors[0][0][chan],
1168 neighbors[0][1][chan],
1169 0);
1170 }
1171 }
1172 else {
1173 LLVMValueRef cmpval0, cmpval1;
1174 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1175 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1176 /* simplified lerp, AND mask with weight and add */
1177 colors_out[0] = lp_build_masklerp(&bld->texel_bld, s_fpart,
1178 cmpval0, cmpval1);
1179 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1180 }
1181 }
1182 else {
1183 /* 2D/3D texture */
1184 struct lp_build_if_state corner_if;
1185 LLVMValueRef colors0[4], colorss[4];
1186
1187 /* get x0/x1 texels at y1 */
1188 lp_build_sample_texel_soa(bld,
1189 width_vec, height_vec, depth_vec,
1190 x10, y10, z10,
1191 row_stride_vec, img_stride_vec,
1192 data_ptr, mipoffsets, neighbors[1][0]);
1193 lp_build_sample_texel_soa(bld,
1194 width_vec, height_vec, depth_vec,
1195 x11, y11, z11,
1196 row_stride_vec, img_stride_vec,
1197 data_ptr, mipoffsets, neighbors[1][1]);
1198
1199 /*
1200 * To avoid having to duplicate linear_mask / fetch code use
1201 * another branch (with corner condition though edge would work
1202 * as well) here.
1203 */
1204 if (accurate_cube_corners) {
1205 LLVMValueRef w00, w01, w10, w11, wx0, wy0;
1206 LLVMValueRef c_weight, c00, c01, c10, c11;
1207 LLVMValueRef have_corner, one_third, tmp;
1208
1209 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1210 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1211 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1212 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1213
1214 have_corner = LLVMBuildLoad(builder, have_corners, "");
1215
1216 lp_build_if(&corner_if, bld->gallivm, have_corner);
1217
1218 /*
1219 * we can't use standard 2d lerp as we need per-element weight
1220 * in case of corners, so just calculate bilinear result as
1221 * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1222 * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
1223 * however calculating the weights needs another 6, so actually probably
1224 * not slower than 2d lerp only for 4 channels as weights only need
1225 * to be calculated once - of course fixing the weights has additional cost.)
1226 */
1227 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1228 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1229 w00 = lp_build_mul(coord_bld, wx0, wy0);
1230 w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1231 w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1232 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1233
1234 /* find corner weight */
1235 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1236 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1237 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1238 c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1239 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1240 c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1241 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1242 c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1243
1244 /*
1245 * add 1/3 of the corner weight to each of the 3 other samples
1246 * and null out corner weight
1247 */
1248 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
1249 c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1250 w00 = lp_build_add(coord_bld, w00, c_weight);
1251 c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1252 w00 = lp_build_andnot(coord_bld, w00, c00);
1253 w01 = lp_build_add(coord_bld, w01, c_weight);
1254 c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1255 w01 = lp_build_andnot(coord_bld, w01, c01);
1256 w10 = lp_build_add(coord_bld, w10, c_weight);
1257 c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1258 w10 = lp_build_andnot(coord_bld, w10, c10);
1259 w11 = lp_build_add(coord_bld, w11, c_weight);
1260 c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1261 w11 = lp_build_andnot(coord_bld, w11, c11);
1262
1263 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1264 for (chan = 0; chan < 4; chan++) {
1265 colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
1266 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1267 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1268 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1269 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1270 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1271 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1272 }
1273 }
1274 else {
1275 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1276 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1277 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1278 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1279 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1280 /* inputs to interpolation are just masks so just add masked weights together */
1281 cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
1282 cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
1283 cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
1284 cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
1285 colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1286 tmp = lp_build_and(coord_bld, w01, cmpval01);
1287 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1288 tmp = lp_build_and(coord_bld, w10, cmpval10);
1289 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1290 tmp = lp_build_and(coord_bld, w11, cmpval11);
1291 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1292 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1293 }
1294
1295 LLVMBuildStore(builder, colors0[0], colorss[0]);
1296 LLVMBuildStore(builder, colors0[1], colorss[1]);
1297 LLVMBuildStore(builder, colors0[2], colorss[2]);
1298 LLVMBuildStore(builder, colors0[3], colorss[3]);
1299
1300 lp_build_else(&corner_if);
1301 }
1302
1303 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1304 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1305 for (chan = 0; chan < 4; chan++) {
1306 colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1307 s_fpart, t_fpart,
1308 neighbors[0][0][chan],
1309 neighbors[0][1][chan],
1310 neighbors[1][0][chan],
1311 neighbors[1][1][chan],
1312 0);
1313 }
1314 }
1315 else {
1316 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1317 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1318 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1319 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1320 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1321 colors0[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1322 cmpval00, cmpval01, cmpval10, cmpval11);
1323 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1324 }
1325
1326 if (accurate_cube_corners) {
1327 LLVMBuildStore(builder, colors0[0], colorss[0]);
1328 LLVMBuildStore(builder, colors0[1], colorss[1]);
1329 LLVMBuildStore(builder, colors0[2], colorss[2]);
1330 LLVMBuildStore(builder, colors0[3], colorss[3]);
1331
1332 lp_build_endif(&corner_if);
1333
1334 colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
1335 colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
1336 colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
1337 colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
1338 }
1339
1340 if (dims == 3) {
1341 LLVMValueRef neighbors1[2][2][4];
1342 LLVMValueRef colors1[4];
1343
1344 /* get x0/x1/y0/y1 texels at z1 */
1345 lp_build_sample_texel_soa(bld,
1346 width_vec, height_vec, depth_vec,
1347 x00, y00, z1,
1348 row_stride_vec, img_stride_vec,
1349 data_ptr, mipoffsets, neighbors1[0][0]);
1350 lp_build_sample_texel_soa(bld,
1351 width_vec, height_vec, depth_vec,
1352 x01, y01, z1,
1353 row_stride_vec, img_stride_vec,
1354 data_ptr, mipoffsets, neighbors1[0][1]);
1355 lp_build_sample_texel_soa(bld,
1356 width_vec, height_vec, depth_vec,
1357 x10, y10, z1,
1358 row_stride_vec, img_stride_vec,
1359 data_ptr, mipoffsets, neighbors1[1][0]);
1360 lp_build_sample_texel_soa(bld,
1361 width_vec, height_vec, depth_vec,
1362 x11, y11, z1,
1363 row_stride_vec, img_stride_vec,
1364 data_ptr, mipoffsets, neighbors1[1][1]);
1365
1366 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1367 /* Bilinear interpolate the four samples from the second Z slice */
1368 for (chan = 0; chan < 4; chan++) {
1369 colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1370 s_fpart, t_fpart,
1371 neighbors1[0][0][chan],
1372 neighbors1[0][1][chan],
1373 neighbors1[1][0][chan],
1374 neighbors1[1][1][chan],
1375 0);
1376 }
1377 /* Linearly interpolate the two samples from the two 3D slices */
1378 for (chan = 0; chan < 4; chan++) {
1379 colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1380 r_fpart,
1381 colors0[chan], colors1[chan],
1382 0);
1383 }
1384 }
1385 else {
1386 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1387 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1388 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1389 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1390 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1391 colors1[0] = lp_build_masklerp2d(&bld->texel_bld, s_fpart, t_fpart,
1392 cmpval00, cmpval01, cmpval10, cmpval11);
1393 /* Linearly interpolate the two samples from the two 3D slices */
1394 colors_out[0] = lp_build_lerp(&bld->texel_bld,
1395 r_fpart,
1396 colors0[0], colors1[0],
1397 0);
1398 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1399 }
1400 }
1401 else {
1402 /* 2D tex */
1403 for (chan = 0; chan < 4; chan++) {
1404 colors_out[chan] = colors0[chan];
1405 }
1406 }
1407 }
1408 }
1409
1410
1411 /**
1412 * Sample the texture/mipmap using given image filter and mip filter.
1413 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1414 * from (vectors or scalars).
1415 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1416 */
1417 static void
1418 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1419 unsigned img_filter,
1420 unsigned mip_filter,
1421 LLVMValueRef *coords,
1422 const LLVMValueRef *offsets,
1423 LLVMValueRef ilevel0,
1424 LLVMValueRef ilevel1,
1425 LLVMValueRef lod_fpart,
1426 LLVMValueRef *colors_out)
1427 {
1428 LLVMBuilderRef builder = bld->gallivm->builder;
1429 LLVMValueRef size0 = NULL;
1430 LLVMValueRef size1 = NULL;
1431 LLVMValueRef row_stride0_vec = NULL;
1432 LLVMValueRef row_stride1_vec = NULL;
1433 LLVMValueRef img_stride0_vec = NULL;
1434 LLVMValueRef img_stride1_vec = NULL;
1435 LLVMValueRef data_ptr0 = NULL;
1436 LLVMValueRef data_ptr1 = NULL;
1437 LLVMValueRef mipoff0 = NULL;
1438 LLVMValueRef mipoff1 = NULL;
1439 LLVMValueRef colors0[4], colors1[4];
1440 unsigned chan;
1441
1442 /* sample the first mipmap level */
1443 lp_build_mipmap_level_sizes(bld, ilevel0,
1444 &size0,
1445 &row_stride0_vec, &img_stride0_vec);
1446 if (bld->num_mips == 1) {
1447 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1448 }
1449 else {
1450 /* This path should work for num_lods 1 too but slightly less efficient */
1451 data_ptr0 = bld->base_ptr;
1452 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1453 }
1454 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1455 lp_build_sample_image_nearest(bld, size0,
1456 row_stride0_vec, img_stride0_vec,
1457 data_ptr0, mipoff0, coords, offsets,
1458 colors0);
1459 }
1460 else {
1461 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1462 lp_build_sample_image_linear(bld, size0, NULL,
1463 row_stride0_vec, img_stride0_vec,
1464 data_ptr0, mipoff0, coords, offsets,
1465 colors0);
1466 }
1467
1468 /* Store the first level's colors in the output variables */
1469 for (chan = 0; chan < 4; chan++) {
1470 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1471 }
1472
1473 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1474 struct lp_build_if_state if_ctx;
1475 LLVMValueRef need_lerp;
1476
1477 /* need_lerp = lod_fpart > 0 */
1478 if (bld->num_lods == 1) {
1479 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1480 lod_fpart, bld->lodf_bld.zero,
1481 "need_lerp");
1482 }
1483 else {
1484 /*
1485 * We'll do mip filtering if any of the quads (or individual
1486 * pixel in case of per-pixel lod) need it.
1487 * It might be better to split the vectors here and only fetch/filter
1488 * quads which need it (if there's one lod per quad).
1489 */
1490 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1491 PIPE_FUNC_GREATER,
1492 lod_fpart, bld->lodf_bld.zero);
1493 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1494 }
1495
1496 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1497 {
1498 /*
1499 * We unfortunately need to clamp lod_fpart here since we can get
1500 * negative values which would screw up filtering if not all
1501 * lod_fpart values have same sign.
1502 */
1503 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1504 bld->lodf_bld.zero);
1505 /* sample the second mipmap level */
1506 lp_build_mipmap_level_sizes(bld, ilevel1,
1507 &size1,
1508 &row_stride1_vec, &img_stride1_vec);
1509 if (bld->num_mips == 1) {
1510 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1511 }
1512 else {
1513 data_ptr1 = bld->base_ptr;
1514 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1515 }
1516 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1517 lp_build_sample_image_nearest(bld, size1,
1518 row_stride1_vec, img_stride1_vec,
1519 data_ptr1, mipoff1, coords, offsets,
1520 colors1);
1521 }
1522 else {
1523 lp_build_sample_image_linear(bld, size1, NULL,
1524 row_stride1_vec, img_stride1_vec,
1525 data_ptr1, mipoff1, coords, offsets,
1526 colors1);
1527 }
1528
1529 /* interpolate samples from the two mipmap levels */
1530
1531 if (bld->num_lods != bld->coord_type.length)
1532 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1533 bld->lodf_bld.type,
1534 bld->texel_bld.type,
1535 lod_fpart);
1536
1537 for (chan = 0; chan < 4; chan++) {
1538 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1539 colors0[chan], colors1[chan],
1540 0);
1541 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1542 }
1543 }
1544 lp_build_endif(&if_ctx);
1545 }
1546 }
1547
1548
1549 /**
1550 * Sample the texture/mipmap using given mip filter, and using
1551 * both nearest and linear filtering at the same time depending
1552 * on linear_mask.
1553 * lod can be per quad but linear_mask is always per pixel.
1554 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1555 * from (vectors or scalars).
1556 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1557 */
1558 static void
1559 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1560 LLVMValueRef linear_mask,
1561 unsigned mip_filter,
1562 LLVMValueRef *coords,
1563 const LLVMValueRef *offsets,
1564 LLVMValueRef ilevel0,
1565 LLVMValueRef ilevel1,
1566 LLVMValueRef lod_fpart,
1567 LLVMValueRef lod_positive,
1568 LLVMValueRef *colors_out)
1569 {
1570 LLVMBuilderRef builder = bld->gallivm->builder;
1571 LLVMValueRef size0 = NULL;
1572 LLVMValueRef size1 = NULL;
1573 LLVMValueRef row_stride0_vec = NULL;
1574 LLVMValueRef row_stride1_vec = NULL;
1575 LLVMValueRef img_stride0_vec = NULL;
1576 LLVMValueRef img_stride1_vec = NULL;
1577 LLVMValueRef data_ptr0 = NULL;
1578 LLVMValueRef data_ptr1 = NULL;
1579 LLVMValueRef mipoff0 = NULL;
1580 LLVMValueRef mipoff1 = NULL;
1581 LLVMValueRef colors0[4], colors1[4];
1582 unsigned chan;
1583
1584 /* sample the first mipmap level */
1585 lp_build_mipmap_level_sizes(bld, ilevel0,
1586 &size0,
1587 &row_stride0_vec, &img_stride0_vec);
1588 if (bld->num_mips == 1) {
1589 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1590 }
1591 else {
1592 /* This path should work for num_lods 1 too but slightly less efficient */
1593 data_ptr0 = bld->base_ptr;
1594 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1595 }
1596
1597 lp_build_sample_image_linear(bld, size0, linear_mask,
1598 row_stride0_vec, img_stride0_vec,
1599 data_ptr0, mipoff0, coords, offsets,
1600 colors0);
1601
1602 /* Store the first level's colors in the output variables */
1603 for (chan = 0; chan < 4; chan++) {
1604 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1605 }
1606
1607 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1608 struct lp_build_if_state if_ctx;
1609 LLVMValueRef need_lerp;
1610
1611 /*
1612 * We'll do mip filtering if any of the quads (or individual
1613 * pixel in case of per-pixel lod) need it.
1614 * Note using lod_positive here not lod_fpart since it may be the same
1615 * condition as that used in the outer "if" in the caller hence llvm
1616 * should be able to merge the branches in this case.
1617 */
1618 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
1619
1620 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1621 {
1622 /*
1623 * We unfortunately need to clamp lod_fpart here since we can get
1624 * negative values which would screw up filtering if not all
1625 * lod_fpart values have same sign.
1626 */
1627 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1628 bld->lodf_bld.zero);
1629 /* sample the second mipmap level */
1630 lp_build_mipmap_level_sizes(bld, ilevel1,
1631 &size1,
1632 &row_stride1_vec, &img_stride1_vec);
1633 if (bld->num_mips == 1) {
1634 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1635 }
1636 else {
1637 data_ptr1 = bld->base_ptr;
1638 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1639 }
1640
1641 lp_build_sample_image_linear(bld, size1, linear_mask,
1642 row_stride1_vec, img_stride1_vec,
1643 data_ptr1, mipoff1, coords, offsets,
1644 colors1);
1645
1646 /* interpolate samples from the two mipmap levels */
1647
1648 if (bld->num_lods != bld->coord_type.length)
1649 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1650 bld->lodf_bld.type,
1651 bld->texel_bld.type,
1652 lod_fpart);
1653
1654 for (chan = 0; chan < 4; chan++) {
1655 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1656 colors0[chan], colors1[chan],
1657 0);
1658 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1659 }
1660 }
1661 lp_build_endif(&if_ctx);
1662 }
1663 }
1664
1665
1666 /**
1667 * Build (per-coord) layer value.
1668 * Either clamp layer to valid values or fill in optional out_of_bounds
1669 * value and just return value unclamped.
1670 */
1671 static LLVMValueRef
1672 lp_build_layer_coord(struct lp_build_sample_context *bld,
1673 unsigned texture_unit,
1674 boolean is_cube_array,
1675 LLVMValueRef layer,
1676 LLVMValueRef *out_of_bounds)
1677 {
1678 LLVMValueRef num_layers;
1679 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1680
1681 num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm,
1682 bld->context_ptr, texture_unit);
1683
1684 if (out_of_bounds) {
1685 LLVMValueRef out1, out;
1686 assert(!is_cube_array);
1687 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1688 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1689 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1690 *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1691 return layer;
1692 }
1693 else {
1694 LLVMValueRef maxlayer;
1695 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
1696 bld->int_bld.one;
1697 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
1698 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1699 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1700 }
1701 }
1702
1703
1704 /**
1705 * Calculate cube face, lod, mip levels.
1706 */
1707 static void
1708 lp_build_sample_common(struct lp_build_sample_context *bld,
1709 unsigned texture_index,
1710 unsigned sampler_index,
1711 LLVMValueRef *coords,
1712 const struct lp_derivatives *derivs, /* optional */
1713 LLVMValueRef lod_bias, /* optional */
1714 LLVMValueRef explicit_lod, /* optional */
1715 LLVMValueRef *lod_pos_or_zero,
1716 LLVMValueRef *lod_fpart,
1717 LLVMValueRef *ilevel0,
1718 LLVMValueRef *ilevel1)
1719 {
1720 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1721 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1722 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1723 const unsigned target = bld->static_texture_state->target;
1724 LLVMValueRef first_level, cube_rho = NULL;
1725 LLVMValueRef lod_ipart = NULL;
1726 struct lp_derivatives cube_derivs;
1727
1728 /*
1729 printf("%s mip %d min %d mag %d\n", __FUNCTION__,
1730 mip_filter, min_filter, mag_filter);
1731 */
1732
1733 /*
1734 * Choose cube face, recompute texcoords for the chosen face and
1735 * compute rho here too (as it requires transform of derivatives).
1736 */
1737 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
1738 boolean need_derivs;
1739 need_derivs = ((min_filter != mag_filter ||
1740 mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1741 !bld->static_sampler_state->min_max_lod_equal &&
1742 !explicit_lod);
1743 lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
1744 derivs = &cube_derivs;
1745 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
1746 /* calculate cube layer coord now */
1747 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
1748 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
1749 layer = lp_build_mul(&bld->int_coord_bld, layer, six);
1750 coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL);
1751 /* because of seamless filtering can't add it to face (coords[2]) here. */
1752 }
1753 }
1754 else if (target == PIPE_TEXTURE_1D_ARRAY ||
1755 target == PIPE_TEXTURE_2D_ARRAY) {
1756 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1757 coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL);
1758 }
1759
1760 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1761 /*
1762 * Clamp p coords to [0,1] for fixed function depth texture format here.
1763 * Technically this is not entirely correct for unorm depth as the ref value
1764 * should be converted to the depth format (quantization!) and comparison
1765 * then done in texture format. This would actually help performance (since
1766 * only need to do it once and could save the per-sample conversion of texels
1767 * to floats instead), but it would need more messy code (would need to push
1768 * at least some bits down to actual fetch so conversion could be skipped,
1769 * and would have ugly interaction with border color, would need to convert
1770 * border color to that format too or do some other tricks to make it work).
1771 */
1772 const struct util_format_description *format_desc = bld->format_desc;
1773 unsigned chan_type;
1774 /* not entirely sure we couldn't end up with non-valid swizzle here */
1775 chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
1776 format_desc->channel[format_desc->swizzle[0]].type :
1777 UTIL_FORMAT_TYPE_FLOAT;
1778 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1779 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
1780 bld->coord_bld.zero, bld->coord_bld.one);
1781 }
1782 }
1783
1784 /*
1785 * Compute the level of detail (float).
1786 */
1787 if (min_filter != mag_filter ||
1788 mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1789 /* Need to compute lod either to choose mipmap levels or to
1790 * distinguish between minification/magnification with one mipmap level.
1791 */
1792 lp_build_lod_selector(bld, texture_index, sampler_index,
1793 coords[0], coords[1], coords[2], cube_rho,
1794 derivs, lod_bias, explicit_lod,
1795 mip_filter,
1796 &lod_ipart, lod_fpart, lod_pos_or_zero);
1797 } else {
1798 lod_ipart = bld->lodi_bld.zero;
1799 *lod_pos_or_zero = bld->lodi_bld.zero;
1800 }
1801
1802 if (bld->num_lods != bld->num_mips) {
1803 /* only makes sense if there's just a single mip level */
1804 assert(bld->num_mips == 1);
1805 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
1806 }
1807
1808 /*
1809 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1810 */
1811 switch (mip_filter) {
1812 default:
1813 assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1814 /* fall-through */
1815 case PIPE_TEX_MIPFILTER_NONE:
1816 /* always use mip level 0 */
1817 first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1818 bld->gallivm, bld->context_ptr,
1819 texture_index);
1820 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1821 *ilevel0 = first_level;
1822 break;
1823 case PIPE_TEX_MIPFILTER_NEAREST:
1824 assert(lod_ipart);
1825 lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
1826 break;
1827 case PIPE_TEX_MIPFILTER_LINEAR:
1828 assert(lod_ipart);
1829 assert(*lod_fpart);
1830 lp_build_linear_mip_levels(bld, texture_index,
1831 lod_ipart, lod_fpart,
1832 ilevel0, ilevel1);
1833 break;
1834 }
1835 }
1836
1837 static void
1838 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
1839 unsigned sampler_unit)
1840 {
1841 struct gallivm_state *gallivm = bld->gallivm;
1842 LLVMBuilderRef builder = gallivm->builder;
1843 LLVMValueRef border_color_ptr =
1844 bld->dynamic_state->border_color(bld->dynamic_state, gallivm,
1845 bld->context_ptr, sampler_unit);
1846 LLVMValueRef border_color;
1847 const struct util_format_description *format_desc = bld->format_desc;
1848 struct lp_type vec4_type = bld->texel_type;
1849 struct lp_build_context vec4_bld;
1850 LLVMValueRef min_clamp = NULL;
1851 LLVMValueRef max_clamp = NULL;
1852
1853 /*
1854 * For normalized format need to clamp border color (technically
1855 * probably should also quantize the data). Really sucks doing this
1856 * here but can't avoid at least for now since this is part of
1857 * sampler state and texture format is part of sampler_view state.
1858 * GL expects also expects clamping for uint/sint formats too so
1859 * do that as well (d3d10 can't end up here with uint/sint since it
1860 * only supports them with ld).
1861 */
1862 vec4_type.length = 4;
1863 lp_build_context_init(&vec4_bld, gallivm, vec4_type);
1864
1865 /*
1866 * Vectorized clamping of border color. Loading is a bit of a hack since
1867 * we just cast the pointer to float array to pointer to vec4
1868 * (int or float).
1869 */
1870 border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
1871 lp_build_const_int32(gallivm, 0));
1872 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
1873 LLVMPointerType(vec4_bld.vec_type, 0), "");
1874 border_color = LLVMBuildLoad(builder, border_color_ptr, "");
1875 /* we don't have aligned type in the dynamic state unfortunately */
1876 lp_set_load_alignment(border_color, 4);
1877
1878 /*
1879 * Instead of having some incredibly complex logic which will try to figure out
1880 * clamping necessary for each channel, simply use the first channel, and treat
1881 * mixed signed/unsigned normalized formats specially.
1882 * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
1883 * good reason.)
1884 */
1885 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1886 int chan;
1887 /* d/s needs special handling because both present means just sampling depth */
1888 if (util_format_is_depth_and_stencil(format_desc->format)) {
1889 chan = format_desc->swizzle[0];
1890 }
1891 else {
1892 chan = util_format_get_first_non_void_channel(format_desc->format);
1893 }
1894 if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
1895 unsigned chan_type = format_desc->channel[chan].type;
1896 unsigned chan_norm = format_desc->channel[chan].normalized;
1897 unsigned chan_pure = format_desc->channel[chan].pure_integer;
1898 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
1899 if (chan_norm) {
1900 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1901 max_clamp = vec4_bld.one;
1902 }
1903 else if (chan_pure) {
1904 /*
1905 * Border color was stored as int, hence need min/max clamp
1906 * only if chan has less than 32 bits..
1907 */
1908 unsigned chan_size = format_desc->channel[chan].size;
1909 if (chan_size < 32) {
1910 min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1911 0 - (1 << (chan_size - 1)));
1912 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1913 (1 << (chan_size - 1)) - 1);
1914 }
1915 }
1916 /* TODO: no idea about non-pure, non-normalized! */
1917 }
1918 else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
1919 if (chan_norm) {
1920 min_clamp = vec4_bld.zero;
1921 max_clamp = vec4_bld.one;
1922 }
1923 /*
1924 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
1925 * we use Z32_FLOAT_S8X24 to imply sampling depth component
1926 * and ignoring stencil, which will blow up here if we try to
1927 * do a uint clamp in a float texel build...
1928 * And even if we had that format, mesa st also thinks using z24s8
1929 * means depth sampling ignoring stencil.
1930 */
1931 else if (chan_pure) {
1932 /*
1933 * Border color was stored as uint, hence never need min
1934 * clamp, and only need max clamp if chan has less than 32 bits.
1935 */
1936 unsigned chan_size = format_desc->channel[chan].size;
1937 if (chan_size < 32) {
1938 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1939 (1 << chan_size) - 1);
1940 }
1941 /* TODO: no idea about non-pure, non-normalized! */
1942 }
1943 }
1944 else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
1945 /* TODO: I have no idea what clamp this would need if any! */
1946 }
1947 }
1948 /* mixed plain formats (or different pure size) */
1949 switch (format_desc->format) {
1950 case PIPE_FORMAT_B10G10R10A2_UINT:
1951 case PIPE_FORMAT_R10G10B10A2_UINT:
1952 {
1953 unsigned max10 = (1 << 10) - 1;
1954 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
1955 max10, (1 << 2) - 1, NULL);
1956 }
1957 break;
1958 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
1959 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1960 -1.0F, 0.0F, NULL);
1961 max_clamp = vec4_bld.one;
1962 break;
1963 case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
1964 case PIPE_FORMAT_R5SG5SB6U_NORM:
1965 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
1966 0.0F, 0.0F, NULL);
1967 max_clamp = vec4_bld.one;
1968 break;
1969 default:
1970 break;
1971 }
1972 }
1973 else {
1974 /* cannot figure this out from format description */
1975 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
1976 /* s3tc formats are always unorm */
1977 min_clamp = vec4_bld.zero;
1978 max_clamp = vec4_bld.one;
1979 }
1980 else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
1981 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
1982 switch (format_desc->format) {
1983 case PIPE_FORMAT_RGTC1_UNORM:
1984 case PIPE_FORMAT_RGTC2_UNORM:
1985 case PIPE_FORMAT_LATC1_UNORM:
1986 case PIPE_FORMAT_LATC2_UNORM:
1987 case PIPE_FORMAT_ETC1_RGB8:
1988 min_clamp = vec4_bld.zero;
1989 max_clamp = vec4_bld.one;
1990 break;
1991 case PIPE_FORMAT_RGTC1_SNORM:
1992 case PIPE_FORMAT_RGTC2_SNORM:
1993 case PIPE_FORMAT_LATC1_SNORM:
1994 case PIPE_FORMAT_LATC2_SNORM:
1995 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1996 max_clamp = vec4_bld.one;
1997 break;
1998 default:
1999 assert(0);
2000 break;
2001 }
2002 }
2003 /*
2004 * all others from subsampled/other group, though we don't care
2005 * about yuv (and should not have any from zs here)
2006 */
2007 else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2008 switch (format_desc->format) {
2009 case PIPE_FORMAT_R8G8_B8G8_UNORM:
2010 case PIPE_FORMAT_G8R8_G8B8_UNORM:
2011 case PIPE_FORMAT_G8R8_B8R8_UNORM:
2012 case PIPE_FORMAT_R8G8_R8B8_UNORM:
2013 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
2014 min_clamp = vec4_bld.zero;
2015 max_clamp = vec4_bld.one;
2016 break;
2017 case PIPE_FORMAT_R8G8Bx_SNORM:
2018 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2019 max_clamp = vec4_bld.one;
2020 break;
2021 /*
2022 * Note smallfloat formats usually don't need clamping
2023 * (they still have infinite range) however this is not
2024 * true for r11g11b10 and r9g9b9e5, which can't represent
2025 * negative numbers (and additionally r9g9b9e5 can't represent
2026 * very large numbers). d3d10 seems happy without clamping in
2027 * this case, but gl spec is pretty clear: "for floating
2028 * point and integer formats, border values are clamped to
2029 * the representable range of the format" so do that here.
2030 */
2031 case PIPE_FORMAT_R11G11B10_FLOAT:
2032 min_clamp = vec4_bld.zero;
2033 break;
2034 case PIPE_FORMAT_R9G9B9E5_FLOAT:
2035 min_clamp = vec4_bld.zero;
2036 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
2037 break;
2038 default:
2039 assert(0);
2040 break;
2041 }
2042 }
2043 }
2044
2045 if (min_clamp) {
2046 border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2047 }
2048 if (max_clamp) {
2049 border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2050 }
2051
2052 bld->border_color_clamped = border_color;
2053 }
2054
2055
2056 /**
2057 * General texture sampling codegen.
2058 * This function handles texture sampling for all texture targets (1D,
2059 * 2D, 3D, cube) and all filtering modes.
2060 */
2061 static void
2062 lp_build_sample_general(struct lp_build_sample_context *bld,
2063 unsigned sampler_unit,
2064 LLVMValueRef *coords,
2065 const LLVMValueRef *offsets,
2066 LLVMValueRef lod_positive,
2067 LLVMValueRef lod_fpart,
2068 LLVMValueRef ilevel0,
2069 LLVMValueRef ilevel1,
2070 LLVMValueRef *colors_out)
2071 {
2072 LLVMBuilderRef builder = bld->gallivm->builder;
2073 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2074 const unsigned mip_filter = sampler_state->min_mip_filter;
2075 const unsigned min_filter = sampler_state->min_img_filter;
2076 const unsigned mag_filter = sampler_state->mag_img_filter;
2077 LLVMValueRef texels[4];
2078 unsigned chan;
2079
2080 /* if we need border color, (potentially) clamp it now */
2081 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
2082 min_filter,
2083 mag_filter) ||
2084 (bld->dims > 1 &&
2085 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
2086 min_filter,
2087 mag_filter)) ||
2088 (bld->dims > 2 &&
2089 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
2090 min_filter,
2091 mag_filter))) {
2092 lp_build_clamp_border_color(bld, sampler_unit);
2093 }
2094
2095
2096 /*
2097 * Get/interpolate texture colors.
2098 */
2099
2100 for (chan = 0; chan < 4; ++chan) {
2101 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
2102 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
2103 }
2104
2105 if (min_filter == mag_filter) {
2106 /* no need to distinguish between minification and magnification */
2107 lp_build_sample_mipmap(bld, min_filter, mip_filter,
2108 coords, offsets,
2109 ilevel0, ilevel1, lod_fpart,
2110 texels);
2111 }
2112 else {
2113 /*
2114 * Could also get rid of the if-logic and always use mipmap_both, both
2115 * for the single lod and multi-lod case if nothing really uses this.
2116 */
2117 if (bld->num_lods == 1) {
2118 /* Emit conditional to choose min image filter or mag image filter
2119 * depending on the lod being > 0 or <= 0, respectively.
2120 */
2121 struct lp_build_if_state if_ctx;
2122
2123 lod_positive = LLVMBuildTrunc(builder, lod_positive,
2124 LLVMInt1TypeInContext(bld->gallivm->context), "");
2125
2126 lp_build_if(&if_ctx, bld->gallivm, lod_positive);
2127 {
2128 /* Use the minification filter */
2129 lp_build_sample_mipmap(bld, min_filter, mip_filter,
2130 coords, offsets,
2131 ilevel0, ilevel1, lod_fpart,
2132 texels);
2133 }
2134 lp_build_else(&if_ctx);
2135 {
2136 /* Use the magnification filter */
2137 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
2138 coords, offsets,
2139 ilevel0, NULL, NULL,
2140 texels);
2141 }
2142 lp_build_endif(&if_ctx);
2143 }
2144 else {
2145 LLVMValueRef need_linear, linear_mask;
2146 unsigned mip_filter_for_nearest;
2147 struct lp_build_if_state if_ctx;
2148
2149 if (min_filter == PIPE_TEX_FILTER_LINEAR) {
2150 linear_mask = lod_positive;
2151 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
2152 }
2153 else {
2154 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
2155 mip_filter_for_nearest = mip_filter;
2156 }
2157 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
2158 linear_mask);
2159
2160 if (bld->num_lods != bld->coord_type.length) {
2161 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2162 bld->lodi_type,
2163 bld->int_coord_type,
2164 linear_mask);
2165 }
2166
2167 lp_build_if(&if_ctx, bld->gallivm, need_linear);
2168 {
2169 /*
2170 * Do sampling with both filters simultaneously. This means using
2171 * a linear filter and doing some tricks (with weights) for the pixels
2172 * which need nearest filter.
2173 * Note that it's probably rare some pixels need nearest and some
2174 * linear filter but the fixups required for the nearest pixels
2175 * aren't all that complicated so just always run a combined path
2176 * if at least some pixels require linear.
2177 */
2178 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
2179 coords, offsets,
2180 ilevel0, ilevel1,
2181 lod_fpart, lod_positive,
2182 texels);
2183 }
2184 lp_build_else(&if_ctx);
2185 {
2186 /*
2187 * All pixels require just nearest filtering, which is way
2188 * cheaper than linear, hence do a separate path for that.
2189 */
2190 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
2191 mip_filter_for_nearest,
2192 coords, offsets,
2193 ilevel0, ilevel1, lod_fpart,
2194 texels);
2195 }
2196 lp_build_endif(&if_ctx);
2197 }
2198 }
2199
2200 for (chan = 0; chan < 4; ++chan) {
2201 colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
2202 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
2203 }
2204 }
2205
2206
2207 /**
2208 * Texel fetch function.
2209 * In contrast to general sampling there is no filtering, no coord minification,
2210 * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
2211 * directly to be applied to the selected mip level (after adding texel offsets).
2212 * This function handles texel fetch for all targets where texel fetch is supported
2213 * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
2214 */
2215 static void
2216 lp_build_fetch_texel(struct lp_build_sample_context *bld,
2217 unsigned texture_unit,
2218 const LLVMValueRef *coords,
2219 LLVMValueRef explicit_lod,
2220 const LLVMValueRef *offsets,
2221 LLVMValueRef *colors_out)
2222 {
2223 struct lp_build_context *perquadi_bld = &bld->lodi_bld;
2224 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2225 unsigned dims = bld->dims, chan;
2226 unsigned target = bld->static_texture_state->target;
2227 boolean out_of_bound_ret_zero = TRUE;
2228 LLVMValueRef size, ilevel;
2229 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
2230 LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
2231 LLVMValueRef width, height, depth, i, j;
2232 LLVMValueRef offset, out_of_bounds, out1;
2233
2234 out_of_bounds = int_coord_bld->zero;
2235
2236 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
2237 if (bld->num_mips != int_coord_bld->type.length) {
2238 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
2239 perquadi_bld->type, explicit_lod, 0);
2240 }
2241 else {
2242 ilevel = explicit_lod;
2243 }
2244 lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
2245 out_of_bound_ret_zero ? &out_of_bounds : NULL);
2246 }
2247 else {
2248 assert(bld->num_mips == 1);
2249 if (bld->static_texture_state->target != PIPE_BUFFER) {
2250 ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
2251 bld->context_ptr, texture_unit);
2252 }
2253 else {
2254 ilevel = lp_build_const_int32(bld->gallivm, 0);
2255 }
2256 }
2257 lp_build_mipmap_level_sizes(bld, ilevel,
2258 &size,
2259 &row_stride_vec, &img_stride_vec);
2260 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
2261 size, &width, &height, &depth);
2262
2263 if (target == PIPE_TEXTURE_1D_ARRAY ||
2264 target == PIPE_TEXTURE_2D_ARRAY) {
2265 if (out_of_bound_ret_zero) {
2266 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1);
2267 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2268 }
2269 else {
2270 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL);
2271 }
2272 }
2273
2274 /* This is a lot like border sampling */
2275 if (offsets[0]) {
2276 /*
2277 * coords are really unsigned, offsets are signed, but I don't think
2278 * exceeding 31 bits is possible
2279 */
2280 x = lp_build_add(int_coord_bld, x, offsets[0]);
2281 }
2282 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
2283 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2284 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
2285 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2286
2287 if (dims >= 2) {
2288 if (offsets[1]) {
2289 y = lp_build_add(int_coord_bld, y, offsets[1]);
2290 }
2291 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
2292 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2293 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
2294 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2295
2296 if (dims >= 3) {
2297 if (offsets[2]) {
2298 z = lp_build_add(int_coord_bld, z, offsets[2]);
2299 }
2300 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
2301 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2302 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
2303 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2304 }
2305 }
2306
2307 lp_build_sample_offset(int_coord_bld,
2308 bld->format_desc,
2309 x, y, z, row_stride_vec, img_stride_vec,
2310 &offset, &i, &j);
2311
2312 if (bld->static_texture_state->target != PIPE_BUFFER) {
2313 offset = lp_build_add(int_coord_bld, offset,
2314 lp_build_get_mip_offsets(bld, ilevel));
2315 }
2316
2317 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
2318
2319 lp_build_fetch_rgba_soa(bld->gallivm,
2320 bld->format_desc,
2321 bld->texel_type,
2322 bld->base_ptr, offset,
2323 i, j,
2324 colors_out);
2325
2326 if (out_of_bound_ret_zero) {
2327 /*
2328 * Only needed for ARB_robust_buffer_access_behavior and d3d10.
2329 * Could use min/max above instead of out-of-bounds comparisons
2330 * if we don't care about the result returned for out-of-bounds.
2331 */
2332 for (chan = 0; chan < 4; chan++) {
2333 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
2334 bld->texel_bld.zero, colors_out[chan]);
2335 }
2336 }
2337 }
2338
2339
2340 /**
2341 * Just set texels to white instead of actually sampling the texture.
2342 * For debugging.
2343 */
2344 void
2345 lp_build_sample_nop(struct gallivm_state *gallivm,
2346 struct lp_type type,
2347 const LLVMValueRef *coords,
2348 LLVMValueRef texel_out[4])
2349 {
2350 LLVMValueRef one = lp_build_one(gallivm, type);
2351 unsigned chan;
2352
2353 for (chan = 0; chan < 4; chan++) {
2354 texel_out[chan] = one;
2355 }
2356 }
2357
2358
2359 /**
2360 * Build the actual texture sampling code.
2361 * 'texel' will return a vector of four LLVMValueRefs corresponding to
2362 * R, G, B, A.
2363 * \param type vector float type to use for coords, etc.
2364 * \param sample_key
2365 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y
2366 */
2367 static void
2368 lp_build_sample_soa_code(struct gallivm_state *gallivm,
2369 const struct lp_static_texture_state *static_texture_state,
2370 const struct lp_static_sampler_state *static_sampler_state,
2371 struct lp_sampler_dynamic_state *dynamic_state,
2372 struct lp_type type,
2373 unsigned sample_key,
2374 unsigned texture_index,
2375 unsigned sampler_index,
2376 LLVMValueRef context_ptr,
2377 const LLVMValueRef *coords,
2378 const LLVMValueRef *offsets,
2379 const struct lp_derivatives *derivs, /* optional */
2380 LLVMValueRef lod, /* optional */
2381 LLVMValueRef texel_out[4])
2382 {
2383 unsigned target = static_texture_state->target;
2384 unsigned dims = texture_dims(target);
2385 unsigned num_quads = type.length / 4;
2386 unsigned mip_filter, min_img_filter, mag_img_filter, i;
2387 struct lp_build_sample_context bld;
2388 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
2389 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2390 LLVMBuilderRef builder = gallivm->builder;
2391 LLVMValueRef tex_width, newcoords[5];
2392 enum lp_sampler_lod_property lod_property;
2393 enum lp_sampler_lod_control lod_control;
2394 LLVMValueRef lod_bias = NULL;
2395 LLVMValueRef explicit_lod = NULL;
2396 boolean is_fetch = !!(sample_key & LP_SAMPLER_FETCH);
2397
2398 if (0) {
2399 enum pipe_format fmt = static_texture_state->format;
2400 debug_printf("Sample from %s\n", util_format_name(fmt));
2401 }
2402
2403 lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
2404 LP_SAMPLER_LOD_PROPERTY_SHIFT;
2405 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
2406 LP_SAMPLER_LOD_CONTROL_SHIFT;
2407
2408 if (lod_control == LP_SAMPLER_LOD_BIAS) {
2409 lod_bias = lod;
2410 assert(lod);
2411 assert(derivs == NULL);
2412 }
2413 else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
2414 explicit_lod = lod;
2415 assert(lod);
2416 assert(derivs == NULL);
2417 }
2418 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
2419 assert(derivs);
2420 assert(lod == NULL);
2421 }
2422 else {
2423 assert(derivs == NULL);
2424 assert(lod == NULL);
2425 }
2426
2427 if (static_texture_state->format == PIPE_FORMAT_NONE) {
2428 /*
2429 * If there's nothing bound, format is NONE, and we must return
2430 * all zero as mandated by d3d10 in this case.
2431 */
2432 unsigned chan;
2433 LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F);
2434 for (chan = 0; chan < 4; chan++) {
2435 texel_out[chan] = zero;
2436 }
2437 return;
2438 }
2439
2440 assert(type.floating);
2441
2442 /* Setup our build context */
2443 memset(&bld, 0, sizeof bld);
2444 bld.gallivm = gallivm;
2445 bld.context_ptr = context_ptr;
2446 bld.static_sampler_state = &derived_sampler_state;
2447 bld.static_texture_state = static_texture_state;
2448 bld.dynamic_state = dynamic_state;
2449 bld.format_desc = util_format_description(static_texture_state->format);
2450 bld.dims = dims;
2451
2452 bld.vector_width = lp_type_width(type);
2453
2454 bld.float_type = lp_type_float(32);
2455 bld.int_type = lp_type_int(32);
2456 bld.coord_type = type;
2457 bld.int_coord_type = lp_int_type(type);
2458 bld.float_size_in_type = lp_type_float(32);
2459 bld.float_size_in_type.length = dims > 1 ? 4 : 1;
2460 bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
2461 bld.texel_type = type;
2462
2463 /* always using the first channel hopefully should be safe,
2464 * if not things WILL break in other places anyway.
2465 */
2466 if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
2467 bld.format_desc->channel[0].pure_integer) {
2468 if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
2469 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2470 }
2471 else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2472 bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
2473 }
2474 }
2475 else if (util_format_has_stencil(bld.format_desc) &&
2476 !util_format_has_depth(bld.format_desc)) {
2477 /* for stencil only formats, sample stencil (uint) */
2478 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2479 }
2480
2481 if (!static_texture_state->level_zero_only) {
2482 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
2483 } else {
2484 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2485 }
2486 mip_filter = derived_sampler_state.min_mip_filter;
2487
2488 if (0) {
2489 debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
2490 }
2491
2492 if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2493 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2494 {
2495 /*
2496 * Seamless filtering ignores wrap modes.
2497 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
2498 * bilinear it's not correct but way better than using for instance repeat.
2499 * Note we even set this for non-seamless. Technically GL allows any wrap
2500 * mode, which made sense when supporting true borders (can get seamless
2501 * effect with border and CLAMP_TO_BORDER), but gallium doesn't support
2502 * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
2503 * up the sampler state (as it makes it texture dependent).
2504 */
2505 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2506 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2507 }
2508
2509 min_img_filter = derived_sampler_state.min_img_filter;
2510 mag_img_filter = derived_sampler_state.mag_img_filter;
2511
2512
2513 /*
2514 * This is all a bit complicated different paths are chosen for performance
2515 * reasons.
2516 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
2517 * everything (the last two options are equivalent for 4-wide case).
2518 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
2519 * lod is calculated then the lod value extracted afterwards so making this
2520 * case basically the same as far as lod handling is concerned for the
2521 * further sample/filter code as the 1 lod for everything case.
2522 * Different lod handling mostly shows up when building mipmap sizes
2523 * (lp_build_mipmap_level_sizes() and friends) and also in filtering
2524 * (getting the fractional part of the lod to the right texels).
2525 */
2526
2527 /*
2528 * There are other situations where at least the multiple int lods could be
2529 * avoided like min and max lod being equal.
2530 */
2531 bld.num_mips = bld.num_lods = 1;
2532
2533 if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2534 (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2535 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2536 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
2537 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2538 /*
2539 * special case for using per-pixel lod even for implicit lod,
2540 * which is generally never required (ok by APIs) except to please
2541 * some (somewhat broken imho) tests (because per-pixel face selection
2542 * can cause derivatives to be different for pixels outside the primitive
2543 * due to the major axis division even if pre-project derivatives are
2544 * looking normal).
2545 */
2546 bld.num_mips = type.length;
2547 bld.num_lods = type.length;
2548 }
2549 else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
2550 (explicit_lod || lod_bias || derivs)) {
2551 if ((is_fetch && target != PIPE_BUFFER) ||
2552 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2553 bld.num_mips = type.length;
2554 bld.num_lods = type.length;
2555 }
2556 else if (!is_fetch && min_img_filter != mag_img_filter) {
2557 bld.num_mips = 1;
2558 bld.num_lods = type.length;
2559 }
2560 }
2561 /* TODO: for true scalar_lod should only use 1 lod value */
2562 else if ((is_fetch && explicit_lod && target != PIPE_BUFFER) ||
2563 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2564 bld.num_mips = num_quads;
2565 bld.num_lods = num_quads;
2566 }
2567 else if (!is_fetch && min_img_filter != mag_img_filter) {
2568 bld.num_mips = 1;
2569 bld.num_lods = num_quads;
2570 }
2571
2572
2573 bld.lodf_type = type;
2574 /* we want native vector size to be able to use our intrinsics */
2575 if (bld.num_lods != type.length) {
2576 /* TODO: this currently always has to be per-quad or per-element */
2577 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
2578 }
2579 bld.lodi_type = lp_int_type(bld.lodf_type);
2580 bld.levelf_type = bld.lodf_type;
2581 if (bld.num_mips == 1) {
2582 bld.levelf_type.length = 1;
2583 }
2584 bld.leveli_type = lp_int_type(bld.levelf_type);
2585 bld.float_size_type = bld.float_size_in_type;
2586 /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
2587 * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
2588 if (bld.num_mips > 1) {
2589 bld.float_size_type.length = bld.num_mips == type.length ?
2590 bld.num_mips * bld.float_size_in_type.length :
2591 type.length;
2592 }
2593 bld.int_size_type = lp_int_type(bld.float_size_type);
2594
2595 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
2596 lp_build_context_init(&bld.float_vec_bld, gallivm, type);
2597 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
2598 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
2599 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
2600 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
2601 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
2602 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
2603 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
2604 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
2605 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
2606 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
2607 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
2608 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
2609
2610 /* Get the dynamic state */
2611 tex_width = dynamic_state->width(dynamic_state, gallivm,
2612 context_ptr, texture_index);
2613 bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm,
2614 context_ptr, texture_index);
2615 bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm,
2616 context_ptr, texture_index);
2617 bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
2618 context_ptr, texture_index);
2619 bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
2620 context_ptr, texture_index);
2621 /* Note that mip_offsets is an array[level] of offsets to texture images */
2622
2623 /* width, height, depth as single int vector */
2624 if (dims <= 1) {
2625 bld.int_size = tex_width;
2626 }
2627 else {
2628 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2629 tex_width,
2630 LLVMConstInt(i32t, 0, 0), "");
2631 if (dims >= 2) {
2632 LLVMValueRef tex_height =
2633 dynamic_state->height(dynamic_state, gallivm,
2634 context_ptr, texture_index);
2635 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2636 tex_height,
2637 LLVMConstInt(i32t, 1, 0), "");
2638 if (dims >= 3) {
2639 LLVMValueRef tex_depth =
2640 dynamic_state->depth(dynamic_state, gallivm, context_ptr,
2641 texture_index);
2642 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2643 tex_depth,
2644 LLVMConstInt(i32t, 2, 0), "");
2645 }
2646 }
2647 }
2648
2649 for (i = 0; i < 5; i++) {
2650 newcoords[i] = coords[i];
2651 }
2652
2653 if (0) {
2654 /* For debug: no-op texture sampling */
2655 lp_build_sample_nop(gallivm,
2656 bld.texel_type,
2657 newcoords,
2658 texel_out);
2659 }
2660
2661 else if (is_fetch) {
2662 lp_build_fetch_texel(&bld, texture_index, newcoords,
2663 lod, offsets,
2664 texel_out);
2665 }
2666
2667 else {
2668 LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
2669 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
2670 boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
2671 /* not sure this is strictly needed or simply impossible */
2672 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
2673 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
2674
2675 use_aos &= bld.num_lods <= num_quads ||
2676 derived_sampler_state.min_img_filter ==
2677 derived_sampler_state.mag_img_filter;
2678 if (dims > 1) {
2679 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
2680 if (dims > 2) {
2681 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
2682 }
2683 }
2684 if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
2685 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
2686 derived_sampler_state.seamless_cube_map &&
2687 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
2688 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
2689 /* theoretically possible with AoS filtering but not implemented (complex!) */
2690 use_aos = 0;
2691 }
2692
2693 if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
2694 !use_aos && util_format_fits_8unorm(bld.format_desc)) {
2695 debug_printf("%s: using floating point linear filtering for %s\n",
2696 __FUNCTION__, bld.format_desc->short_name);
2697 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
2698 " wraps %d wrapt %d wrapr %d\n",
2699 derived_sampler_state.min_img_filter,
2700 derived_sampler_state.mag_img_filter,
2701 derived_sampler_state.min_mip_filter,
2702 static_texture_state->target,
2703 derived_sampler_state.seamless_cube_map,
2704 derived_sampler_state.wrap_s,
2705 derived_sampler_state.wrap_t,
2706 derived_sampler_state.wrap_r);
2707 }
2708
2709 lp_build_sample_common(&bld, texture_index, sampler_index,
2710 newcoords,
2711 derivs, lod_bias, explicit_lod,
2712 &lod_positive, &lod_fpart,
2713 &ilevel0, &ilevel1);
2714
2715 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
2716 /* The aos path doesn't do seamless filtering so simply add cube layer
2717 * to face now.
2718 */
2719 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
2720 }
2721
2722 /*
2723 * we only try 8-wide sampling with soa as it appears to
2724 * be a loss with aos with AVX (but it should work, except
2725 * for conformance if min_filter != mag_filter if num_lods > 1).
2726 * (It should be faster if we'd support avx2)
2727 */
2728 if (num_quads == 1 || !use_aos) {
2729 if (use_aos) {
2730 /* do sampling/filtering with fixed pt arithmetic */
2731 lp_build_sample_aos(&bld, sampler_index,
2732 newcoords[0], newcoords[1],
2733 newcoords[2],
2734 offsets, lod_positive, lod_fpart,
2735 ilevel0, ilevel1,
2736 texel_out);
2737 }
2738
2739 else {
2740 lp_build_sample_general(&bld, sampler_index,
2741 newcoords, offsets,
2742 lod_positive, lod_fpart,
2743 ilevel0, ilevel1,
2744 texel_out);
2745 }
2746 }
2747 else {
2748 unsigned j;
2749 struct lp_build_sample_context bld4;
2750 struct lp_type type4 = type;
2751 unsigned i;
2752 LLVMValueRef texelout4[4];
2753 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
2754
2755 type4.length = 4;
2756
2757 /* Setup our build context */
2758 memset(&bld4, 0, sizeof bld4);
2759 bld4.gallivm = bld.gallivm;
2760 bld4.context_ptr = bld.context_ptr;
2761 bld4.static_texture_state = bld.static_texture_state;
2762 bld4.static_sampler_state = bld.static_sampler_state;
2763 bld4.dynamic_state = bld.dynamic_state;
2764 bld4.format_desc = bld.format_desc;
2765 bld4.dims = bld.dims;
2766 bld4.row_stride_array = bld.row_stride_array;
2767 bld4.img_stride_array = bld.img_stride_array;
2768 bld4.base_ptr = bld.base_ptr;
2769 bld4.mip_offsets = bld.mip_offsets;
2770 bld4.int_size = bld.int_size;
2771
2772 bld4.vector_width = lp_type_width(type4);
2773
2774 bld4.float_type = lp_type_float(32);
2775 bld4.int_type = lp_type_int(32);
2776 bld4.coord_type = type4;
2777 bld4.int_coord_type = lp_int_type(type4);
2778 bld4.float_size_in_type = lp_type_float(32);
2779 bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
2780 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
2781 bld4.texel_type = bld.texel_type;
2782 bld4.texel_type.length = 4;
2783
2784 bld4.num_mips = bld4.num_lods = 1;
2785 if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2786 (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2787 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2788 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
2789 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2790 bld4.num_mips = type4.length;
2791 bld4.num_lods = type4.length;
2792 }
2793 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
2794 (explicit_lod || lod_bias || derivs)) {
2795 if ((is_fetch && target != PIPE_BUFFER) ||
2796 (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2797 bld4.num_mips = type4.length;
2798 bld4.num_lods = type4.length;
2799 }
2800 else if (!is_fetch && min_img_filter != mag_img_filter) {
2801 bld4.num_mips = 1;
2802 bld4.num_lods = type4.length;
2803 }
2804 }
2805
2806 /* we want native vector size to be able to use our intrinsics */
2807 bld4.lodf_type = type4;
2808 if (bld4.num_lods != type4.length) {
2809 bld4.lodf_type.length = 1;
2810 }
2811 bld4.lodi_type = lp_int_type(bld4.lodf_type);
2812 bld4.levelf_type = type4;
2813 if (bld4.num_mips != type4.length) {
2814 bld4.levelf_type.length = 1;
2815 }
2816 bld4.leveli_type = lp_int_type(bld4.levelf_type);
2817 bld4.float_size_type = bld4.float_size_in_type;
2818 if (bld4.num_mips > 1) {
2819 bld4.float_size_type.length = bld4.num_mips == type4.length ?
2820 bld4.num_mips * bld4.float_size_in_type.length :
2821 type4.length;
2822 }
2823 bld4.int_size_type = lp_int_type(bld4.float_size_type);
2824
2825 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
2826 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
2827 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
2828 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
2829 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
2830 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
2831 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
2832 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
2833 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
2834 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
2835 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
2836 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
2837 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
2838 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
2839
2840 for (i = 0; i < num_quads; i++) {
2841 LLVMValueRef s4, t4, r4;
2842 LLVMValueRef lod_positive4, lod_fpart4 = NULL;
2843 LLVMValueRef ilevel04, ilevel14 = NULL;
2844 LLVMValueRef offsets4[4] = { NULL };
2845 unsigned num_lods = bld4.num_lods;
2846
2847 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
2848 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
2849 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
2850
2851 if (offsets[0]) {
2852 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
2853 if (dims > 1) {
2854 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
2855 if (dims > 2) {
2856 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
2857 }
2858 }
2859 }
2860 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
2861 ilevel04 = bld.num_mips == 1 ? ilevel0 :
2862 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
2863 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2864 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
2865 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
2866 }
2867
2868 if (use_aos) {
2869 /* do sampling/filtering with fixed pt arithmetic */
2870 lp_build_sample_aos(&bld4, sampler_index,
2871 s4, t4, r4, offsets4,
2872 lod_positive4, lod_fpart4,
2873 ilevel04, ilevel14,
2874 texelout4);
2875 }
2876
2877 else {
2878 /* this path is currently unreachable and hence might break easily... */
2879 LLVMValueRef newcoords4[5];
2880 newcoords4[0] = s4;
2881 newcoords4[1] = t4;
2882 newcoords4[2] = r4;
2883 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
2884 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
2885
2886 lp_build_sample_general(&bld4, sampler_index,
2887 newcoords4, offsets4,
2888 lod_positive4, lod_fpart4,
2889 ilevel04, ilevel14,
2890 texelout4);
2891 }
2892 for (j = 0; j < 4; j++) {
2893 texelouttmp[j][i] = texelout4[j];
2894 }
2895 }
2896
2897 for (j = 0; j < 4; j++) {
2898 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
2899 }
2900 }
2901 }
2902
2903 if (target != PIPE_BUFFER) {
2904 apply_sampler_swizzle(&bld, texel_out);
2905 }
2906
2907 /*
2908 * texel type can be a (32bit) int/uint (for pure int formats only),
2909 * however we are expected to always return floats (storage is untyped).
2910 */
2911 if (!bld.texel_type.floating) {
2912 unsigned chan;
2913 for (chan = 0; chan < 4; chan++) {
2914 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
2915 lp_build_vec_type(gallivm, type), "");
2916 }
2917 }
2918 }
2919
2920
2921 #define USE_TEX_FUNC_CALL 1
2922
2923 #define LP_MAX_TEX_FUNC_ARGS 32
2924
2925 static inline void
2926 get_target_info(enum pipe_texture_target target,
2927 unsigned *num_coords, unsigned *num_derivs,
2928 unsigned *num_offsets, unsigned *layer)
2929 {
2930 unsigned dims = texture_dims(target);
2931 *num_coords = dims;
2932 *num_offsets = dims;
2933 *num_derivs = (target == PIPE_TEXTURE_CUBE ||
2934 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
2935 *layer = has_layer_coord(target) ? 2: 0;
2936 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
2937 /*
2938 * dims doesn't include r coord for cubes - this is handled
2939 * by layer instead, but need to fix up for cube arrays...
2940 */
2941 *layer = 3;
2942 *num_coords = 3;
2943 }
2944 }
2945
2946
2947 /**
2948 * Generate the function body for a texture sampling function.
2949 */
2950 static void
2951 lp_build_sample_gen_func(struct gallivm_state *gallivm,
2952 const struct lp_static_texture_state *static_texture_state,
2953 const struct lp_static_sampler_state *static_sampler_state,
2954 struct lp_sampler_dynamic_state *dynamic_state,
2955 struct lp_type type,
2956 unsigned texture_index,
2957 unsigned sampler_index,
2958 LLVMValueRef function,
2959 unsigned num_args,
2960 unsigned sample_key)
2961 {
2962 LLVMBuilderRef old_builder;
2963 LLVMBasicBlockRef block;
2964 LLVMValueRef coords[5];
2965 LLVMValueRef offsets[3] = { NULL };
2966 LLVMValueRef lod = NULL;
2967 LLVMValueRef context_ptr;
2968 LLVMValueRef texel_out[4];
2969 struct lp_derivatives derivs;
2970 struct lp_derivatives *deriv_ptr = NULL;
2971 unsigned num_param = 0;
2972 unsigned i, num_coords, num_derivs, num_offsets, layer;
2973 enum lp_sampler_lod_control lod_control;
2974
2975 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
2976 LP_SAMPLER_LOD_CONTROL_SHIFT;
2977
2978 get_target_info(static_texture_state->target,
2979 &num_coords, &num_derivs, &num_offsets, &layer);
2980
2981 /* "unpack" arguments */
2982 context_ptr = LLVMGetParam(function, num_param++);
2983 for (i = 0; i < num_coords; i++) {
2984 coords[i] = LLVMGetParam(function, num_param++);
2985 }
2986 for (i = num_coords; i < 5; i++) {
2987 /* This is rather unfortunate... */
2988 coords[i] = lp_build_undef(gallivm, type);
2989 }
2990 if (layer) {
2991 coords[layer] = LLVMGetParam(function, num_param++);
2992 }
2993 if (sample_key & LP_SAMPLER_SHADOW) {
2994 coords[4] = LLVMGetParam(function, num_param++);
2995 }
2996 if (sample_key & LP_SAMPLER_OFFSETS) {
2997 for (i = 0; i < num_offsets; i++) {
2998 offsets[i] = LLVMGetParam(function, num_param++);
2999 }
3000 }
3001 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3002 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3003 lod = LLVMGetParam(function, num_param++);
3004 }
3005 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3006 for (i = 0; i < num_derivs; i++) {
3007 derivs.ddx[i] = LLVMGetParam(function, num_param++);
3008 derivs.ddy[i] = LLVMGetParam(function, num_param++);
3009 }
3010 deriv_ptr = &derivs;
3011 }
3012
3013 assert(num_args == num_param);
3014
3015 /*
3016 * Function body
3017 */
3018
3019 old_builder = gallivm->builder;
3020 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
3021 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
3022 LLVMPositionBuilderAtEnd(gallivm->builder, block);
3023
3024 lp_build_sample_soa_code(gallivm,
3025 static_texture_state,
3026 static_sampler_state,
3027 dynamic_state,
3028 type,
3029 sample_key,
3030 texture_index,
3031 sampler_index,
3032 context_ptr,
3033 coords,
3034 offsets,
3035 deriv_ptr,
3036 lod,
3037 texel_out);
3038
3039 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
3040
3041 LLVMDisposeBuilder(gallivm->builder);
3042 gallivm->builder = old_builder;
3043
3044 gallivm_verify_function(gallivm, function);
3045 }
3046
3047
3048 /**
3049 * Call the matching function for texture sampling.
3050 * If there's no match, generate a new one.
3051 */
3052 static void
3053 lp_build_sample_soa_func(struct gallivm_state *gallivm,
3054 const struct lp_static_texture_state *static_texture_state,
3055 const struct lp_static_sampler_state *static_sampler_state,
3056 struct lp_sampler_dynamic_state *dynamic_state,
3057 const struct lp_sampler_params *params)
3058 {
3059 LLVMBuilderRef builder = gallivm->builder;
3060 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
3061 LLVMGetInsertBlock(builder)));
3062 LLVMValueRef function, inst;
3063 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
3064 LLVMBasicBlockRef bb;
3065 LLVMValueRef tex_ret;
3066 unsigned num_args = 0;
3067 char func_name[64];
3068 unsigned i, num_coords, num_derivs, num_offsets, layer;
3069 unsigned texture_index = params->texture_index;
3070 unsigned sampler_index = params->sampler_index;
3071 unsigned sample_key = params->sample_key;
3072 const LLVMValueRef *coords = params->coords;
3073 const LLVMValueRef *offsets = params->offsets;
3074 const struct lp_derivatives *derivs = params->derivs;
3075 enum lp_sampler_lod_control lod_control;
3076
3077 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3078 LP_SAMPLER_LOD_CONTROL_SHIFT;
3079
3080 get_target_info(static_texture_state->target,
3081 &num_coords, &num_derivs, &num_offsets, &layer);
3082
3083 /*
3084 * texture function matches are found by name.
3085 * Thus the name has to include both the texture and sampler unit
3086 * (which covers all static state) plus the actual texture function
3087 * (including things like offsets, shadow coord, lod control).
3088 * Additionally lod_property has to be included too.
3089 */
3090
3091 util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
3092 texture_index, sampler_index, sample_key);
3093
3094 function = LLVMGetNamedFunction(module, func_name);
3095
3096 if(!function) {
3097 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
3098 LLVMTypeRef ret_type;
3099 LLVMTypeRef function_type;
3100 LLVMTypeRef val_type[4];
3101 unsigned num_param = 0;
3102
3103 /*
3104 * Generate the function prototype.
3105 */
3106
3107 arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
3108 for (i = 0; i < num_coords; i++) {
3109 arg_types[num_param++] = LLVMTypeOf(coords[0]);
3110 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
3111 }
3112 if (layer) {
3113 arg_types[num_param++] = LLVMTypeOf(coords[layer]);
3114 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
3115 }
3116 if (sample_key & LP_SAMPLER_SHADOW) {
3117 arg_types[num_param++] = LLVMTypeOf(coords[0]);
3118 }
3119 if (sample_key & LP_SAMPLER_OFFSETS) {
3120 for (i = 0; i < num_offsets; i++) {
3121 arg_types[num_param++] = LLVMTypeOf(offsets[0]);
3122 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
3123 }
3124 }
3125 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3126 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3127 arg_types[num_param++] = LLVMTypeOf(params->lod);
3128 }
3129 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3130 for (i = 0; i < num_derivs; i++) {
3131 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
3132 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
3133 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
3134 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
3135 }
3136 }
3137
3138 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
3139 lp_build_vec_type(gallivm, params->type);
3140 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
3141 function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
3142 function = LLVMAddFunction(module, func_name, function_type);
3143
3144 for (i = 0; i < num_param; ++i) {
3145 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
3146 LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
3147 }
3148 }
3149
3150 LLVMSetFunctionCallConv(function, LLVMFastCallConv);
3151 LLVMSetLinkage(function, LLVMPrivateLinkage);
3152
3153 lp_build_sample_gen_func(gallivm,
3154 static_texture_state,
3155 static_sampler_state,
3156 dynamic_state,
3157 params->type,
3158 texture_index,
3159 sampler_index,
3160 function,
3161 num_param,
3162 sample_key);
3163 }
3164
3165 num_args = 0;
3166 args[num_args++] = params->context_ptr;
3167 for (i = 0; i < num_coords; i++) {
3168 args[num_args++] = coords[i];
3169 }
3170 if (layer) {
3171 args[num_args++] = coords[layer];
3172 }
3173 if (sample_key & LP_SAMPLER_SHADOW) {
3174 args[num_args++] = coords[4];
3175 }
3176 if (sample_key & LP_SAMPLER_OFFSETS) {
3177 for (i = 0; i < num_offsets; i++) {
3178 args[num_args++] = offsets[i];
3179 }
3180 }
3181 if (lod_control == LP_SAMPLER_LOD_BIAS ||
3182 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3183 args[num_args++] = params->lod;
3184 }
3185 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3186 for (i = 0; i < num_derivs; i++) {
3187 args[num_args++] = derivs->ddx[i];
3188 args[num_args++] = derivs->ddy[i];
3189 }
3190 }
3191
3192 assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
3193
3194 tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
3195 bb = LLVMGetInsertBlock(builder);
3196 inst = LLVMGetLastInstruction(bb);
3197 LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
3198
3199 for (i = 0; i < 4; i++) {
3200 params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
3201 }
3202 }
3203
3204
3205 /**
3206 * Build texture sampling code.
3207 * Either via a function call or inline it directly.
3208 */
3209 void
3210 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
3211 const struct lp_static_sampler_state *static_sampler_state,
3212 struct lp_sampler_dynamic_state *dynamic_state,
3213 struct gallivm_state *gallivm,
3214 const struct lp_sampler_params *params)
3215 {
3216 if (USE_TEX_FUNC_CALL) {
3217 lp_build_sample_soa_func(gallivm,
3218 static_texture_state,
3219 static_sampler_state,
3220 dynamic_state,
3221 params);
3222 }
3223 else {
3224 lp_build_sample_soa_code(gallivm,
3225 static_texture_state,
3226 static_sampler_state,
3227 dynamic_state,
3228 params->type,
3229 params->sample_key,
3230 params->texture_index,
3231 params->sampler_index,
3232 params->context_ptr,
3233 params->coords,
3234 params->offsets,
3235 params->derivs,
3236 params->lod,
3237 params->texel);
3238 }
3239 }
3240
3241
3242 void
3243 lp_build_size_query_soa(struct gallivm_state *gallivm,
3244 const struct lp_static_texture_state *static_state,
3245 struct lp_sampler_dynamic_state *dynamic_state,
3246 struct lp_type int_type,
3247 unsigned texture_unit,
3248 unsigned target,
3249 LLVMValueRef context_ptr,
3250 boolean is_sviewinfo,
3251 enum lp_sampler_lod_property lod_property,
3252 LLVMValueRef explicit_lod,
3253 LLVMValueRef *sizes_out)
3254 {
3255 LLVMValueRef lod, level, size;
3256 LLVMValueRef first_level = NULL;
3257 int dims, i;
3258 boolean has_array;
3259 unsigned num_lods = 1;
3260 struct lp_build_context bld_int_vec4;
3261
3262 if (static_state->format == PIPE_FORMAT_NONE) {
3263 /*
3264 * If there's nothing bound, format is NONE, and we must return
3265 * all zero as mandated by d3d10 in this case.
3266 */
3267 unsigned chan;
3268 LLVMValueRef zero = lp_build_const_vec(gallivm, int_type, 0.0F);
3269 for (chan = 0; chan < 4; chan++) {
3270 sizes_out[chan] = zero;
3271 }
3272 return;
3273 }
3274
3275 /*
3276 * Do some sanity verification about bound texture and shader dcl target.
3277 * Not entirely sure what's possible but assume array/non-array
3278 * always compatible (probably not ok for OpenGL but d3d10 has no
3279 * distinction of arrays at the resource level).
3280 * Everything else looks bogus (though not entirely sure about rect/2d).
3281 * Currently disabled because it causes assertion failures if there's
3282 * nothing bound (or rather a dummy texture, not that this case would
3283 * return the right values).
3284 */
3285 if (0 && static_state->target != target) {
3286 if (static_state->target == PIPE_TEXTURE_1D)
3287 assert(target == PIPE_TEXTURE_1D_ARRAY);
3288 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
3289 assert(target == PIPE_TEXTURE_1D);
3290 else if (static_state->target == PIPE_TEXTURE_2D)
3291 assert(target == PIPE_TEXTURE_2D_ARRAY);
3292 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
3293 assert(target == PIPE_TEXTURE_2D);
3294 else if (static_state->target == PIPE_TEXTURE_CUBE)
3295 assert(target == PIPE_TEXTURE_CUBE_ARRAY);
3296 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
3297 assert(target == PIPE_TEXTURE_CUBE);
3298 else
3299 assert(0);
3300 }
3301
3302 dims = texture_dims(target);
3303
3304 switch (target) {
3305 case PIPE_TEXTURE_1D_ARRAY:
3306 case PIPE_TEXTURE_2D_ARRAY:
3307 case PIPE_TEXTURE_CUBE_ARRAY:
3308 has_array = TRUE;
3309 break;
3310 default:
3311 has_array = FALSE;
3312 break;
3313 }
3314
3315 assert(!int_type.floating);
3316
3317 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
3318
3319 if (explicit_lod) {
3320 /* FIXME: this needs to honor per-element lod */
3321 lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod,
3322 lp_build_const_int32(gallivm, 0), "");
3323 first_level = dynamic_state->first_level(dynamic_state, gallivm,
3324 context_ptr, texture_unit);
3325 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
3326 lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
3327 } else {
3328 lod = bld_int_vec4.zero;
3329 }
3330
3331 size = bld_int_vec4.undef;
3332
3333 size = LLVMBuildInsertElement(gallivm->builder, size,
3334 dynamic_state->width(dynamic_state, gallivm,
3335 context_ptr, texture_unit),
3336 lp_build_const_int32(gallivm, 0), "");
3337
3338 if (dims >= 2) {
3339 size = LLVMBuildInsertElement(gallivm->builder, size,
3340 dynamic_state->height(dynamic_state, gallivm,
3341 context_ptr, texture_unit),
3342 lp_build_const_int32(gallivm, 1), "");
3343 }
3344
3345 if (dims >= 3) {
3346 size = LLVMBuildInsertElement(gallivm->builder, size,
3347 dynamic_state->depth(dynamic_state, gallivm,
3348 context_ptr, texture_unit),
3349 lp_build_const_int32(gallivm, 2), "");
3350 }
3351
3352 size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
3353
3354 if (has_array) {
3355 LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm,
3356 context_ptr, texture_unit);
3357 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3358 /*
3359 * It looks like GL wants number of cubes, d3d10.1 has it undefined?
3360 * Could avoid this by passing in number of cubes instead of total
3361 * number of layers (might make things easier elsewhere too).
3362 */
3363 LLVMValueRef six = lp_build_const_int32(gallivm, 6);
3364 layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
3365 }
3366 size = LLVMBuildInsertElement(gallivm->builder, size, layers,
3367 lp_build_const_int32(gallivm, dims), "");
3368 }
3369
3370 /*
3371 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
3372 * if level is out of bounds (note this can't cover unbound texture
3373 * here, which also requires returning zero).
3374 */
3375 if (explicit_lod && is_sviewinfo) {
3376 LLVMValueRef last_level, out, out1;
3377 struct lp_build_context leveli_bld;
3378
3379 /* everything is scalar for now */
3380 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
3381 last_level = dynamic_state->last_level(dynamic_state, gallivm,
3382 context_ptr, texture_unit);
3383
3384 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
3385 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
3386 out = lp_build_or(&leveli_bld, out, out1);
3387 if (num_lods == 1) {
3388 out = lp_build_broadcast_scalar(&bld_int_vec4, out);
3389 }
3390 else {
3391 /* TODO */
3392 assert(0);
3393 }
3394 size = lp_build_andnot(&bld_int_vec4, size, out);
3395 }
3396 for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
3397 sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
3398 size,
3399 lp_build_const_int32(gallivm, i));
3400 }
3401 if (is_sviewinfo) {
3402 for (; i < 4; i++) {
3403 sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
3404 }
3405 }
3406
3407 /*
3408 * if there's no explicit_lod (buffers, rects) queries requiring nr of
3409 * mips would be illegal.
3410 */
3411 if (is_sviewinfo && explicit_lod) {
3412 struct lp_build_context bld_int_scalar;
3413 LLVMValueRef num_levels;
3414 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
3415
3416 if (static_state->level_zero_only) {
3417 num_levels = bld_int_scalar.one;
3418 }
3419 else {
3420 LLVMValueRef last_level;
3421
3422 last_level = dynamic_state->last_level(dynamic_state, gallivm,
3423 context_ptr, texture_unit);
3424 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
3425 num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
3426 }
3427 sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
3428 num_levels);
3429 }
3430 }