1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- SoA.
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/u_debug.h"
40 #include "util/u_dump.h"
41 #include "util/u_memory.h"
42 #include "util/u_math.h"
43 #include "util/u_format.h"
44 #include "util/u_cpu_detect.h"
45 #include "lp_bld_debug.h"
46 #include "lp_bld_type.h"
47 #include "lp_bld_const.h"
48 #include "lp_bld_conv.h"
49 #include "lp_bld_arit.h"
50 #include "lp_bld_bitarit.h"
51 #include "lp_bld_logic.h"
52 #include "lp_bld_printf.h"
53 #include "lp_bld_swizzle.h"
54 #include "lp_bld_flow.h"
55 #include "lp_bld_gather.h"
56 #include "lp_bld_format.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_struct.h"
60 #include "lp_bld_quad.h"
61 #include "lp_bld_pack.h"
65 * Generate code to fetch a texel from a texture at int coords (x, y, z).
66 * The computation depends on whether the texture is 1D, 2D or 3D.
67 * The result, texel, will be float vectors:
68 * texel[0] = red values
69 * texel[1] = green values
70 * texel[2] = blue values
71 * texel[3] = alpha values
74 lp_build_sample_texel_soa(struct lp_build_sample_context
*bld
,
82 LLVMValueRef y_stride
,
83 LLVMValueRef z_stride
,
84 LLVMValueRef data_ptr
,
85 LLVMValueRef texel_out
[4])
87 const struct lp_sampler_static_state
*static_state
= bld
->static_state
;
88 const unsigned dims
= bld
->dims
;
89 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
90 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
93 LLVMValueRef use_border
= NULL
;
95 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
96 if (lp_sampler_wrap_mode_uses_border_color(static_state
->wrap_s
,
97 static_state
->min_img_filter
,
98 static_state
->mag_img_filter
)) {
100 b1
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_LESS
, x
, int_coord_bld
->zero
);
101 b2
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_GEQUAL
, x
, width
);
102 use_border
= LLVMBuildOr(builder
, b1
, b2
, "b1_or_b2");
106 lp_sampler_wrap_mode_uses_border_color(static_state
->wrap_t
,
107 static_state
->min_img_filter
,
108 static_state
->mag_img_filter
)) {
110 b1
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_LESS
, y
, int_coord_bld
->zero
);
111 b2
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_GEQUAL
, y
, height
);
113 use_border
= LLVMBuildOr(builder
, use_border
, b1
, "ub_or_b1");
114 use_border
= LLVMBuildOr(builder
, use_border
, b2
, "ub_or_b2");
117 use_border
= LLVMBuildOr(builder
, b1
, b2
, "b1_or_b2");
122 lp_sampler_wrap_mode_uses_border_color(static_state
->wrap_r
,
123 static_state
->min_img_filter
,
124 static_state
->mag_img_filter
)) {
126 b1
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_LESS
, z
, int_coord_bld
->zero
);
127 b2
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_GEQUAL
, z
, depth
);
129 use_border
= LLVMBuildOr(builder
, use_border
, b1
, "ub_or_b1");
130 use_border
= LLVMBuildOr(builder
, use_border
, b2
, "ub_or_b2");
133 use_border
= LLVMBuildOr(builder
, b1
, b2
, "b1_or_b2");
137 /* convert x,y,z coords to linear offset from start of texture, in bytes */
138 lp_build_sample_offset(&bld
->int_coord_bld
,
140 x
, y
, z
, y_stride
, z_stride
,
144 /* If we can sample the border color, it means that texcoords may
145 * lie outside the bounds of the texture image. We need to do
146 * something to prevent reading out of bounds and causing a segfault.
148 * Simply AND the texture coords with !use_border. This will cause
149 * coords which are out of bounds to become zero. Zero's guaranteed
150 * to be inside the texture image.
152 offset
= lp_build_andnot(&bld
->int_coord_bld
, offset
, use_border
);
155 lp_build_fetch_rgba_soa(bld
->gallivm
,
163 * Note: if we find an app which frequently samples the texture border
164 * we might want to implement a true conditional here to avoid sampling
165 * the texture whenever possible (since that's quite a bit of code).
168 * texel = border_color;
171 * texel = sample_texture(coord);
173 * As it is now, we always sample the texture, then selectively replace
174 * the texel color results with the border color.
178 /* select texel color or border color depending on use_border */
179 LLVMValueRef border_color_ptr
=
180 bld
->dynamic_state
->border_color(bld
->dynamic_state
,
183 for (chan
= 0; chan
< 4; chan
++) {
184 LLVMValueRef border_chan
=
185 lp_build_array_get(bld
->gallivm
, border_color_ptr
,
186 lp_build_const_int32(bld
->gallivm
, chan
));
187 LLVMValueRef border_chan_vec
=
188 lp_build_broadcast_scalar(&bld
->float_vec_bld
, border_chan
);
189 texel_out
[chan
] = lp_build_select(&bld
->texel_bld
, use_border
,
190 border_chan_vec
, texel_out
[chan
]);
197 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
200 lp_build_coord_mirror(struct lp_build_sample_context
*bld
,
203 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
204 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
205 LLVMValueRef fract
, flr
, isOdd
;
207 lp_build_ifloor_fract(coord_bld
, coord
, &flr
, &fract
);
209 /* isOdd = flr & 1 */
210 isOdd
= LLVMBuildAnd(bld
->gallivm
->builder
, flr
, int_coord_bld
->one
, "");
212 /* make coord positive or negative depending on isOdd */
213 coord
= lp_build_set_sign(coord_bld
, fract
, isOdd
);
215 /* convert isOdd to float */
216 isOdd
= lp_build_int_to_float(coord_bld
, isOdd
);
218 /* add isOdd to coord */
219 coord
= lp_build_add(coord_bld
, coord
, isOdd
);
226 * Helper to compute the first coord and the weight for
227 * linear wrap repeat npot textures
230 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context
*bld
,
231 LLVMValueRef coord_f
,
232 LLVMValueRef length_i
,
233 LLVMValueRef length_f
,
234 LLVMValueRef
*coord0_i
,
235 LLVMValueRef
*weight_f
)
237 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
238 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
239 LLVMValueRef half
= lp_build_const_vec(bld
->gallivm
, coord_bld
->type
, 0.5);
240 LLVMValueRef length_minus_one
= lp_build_sub(int_coord_bld
, length_i
,
243 /* wrap with normalized floats is just fract */
244 coord_f
= lp_build_fract(coord_bld
, coord_f
);
245 /* mul by size and subtract 0.5 */
246 coord_f
= lp_build_mul(coord_bld
, coord_f
, length_f
);
247 coord_f
= lp_build_sub(coord_bld
, coord_f
, half
);
249 * we avoided the 0.5/length division before the repeat wrap,
250 * now need to fix up edge cases with selects
252 /* convert to int, compute lerp weight */
253 lp_build_ifloor_fract(coord_bld
, coord_f
, coord0_i
, weight_f
);
254 mask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
255 PIPE_FUNC_LESS
, *coord0_i
, int_coord_bld
->zero
);
256 *coord0_i
= lp_build_select(int_coord_bld
, mask
, length_minus_one
, *coord0_i
);
261 * Build LLVM code for texture wrap mode for linear filtering.
262 * \param x0_out returns first integer texcoord
263 * \param x1_out returns second integer texcoord
264 * \param weight_out returns linear interpolation weight
267 lp_build_sample_wrap_linear(struct lp_build_sample_context
*bld
,
270 LLVMValueRef length_f
,
273 LLVMValueRef
*x0_out
,
274 LLVMValueRef
*x1_out
,
275 LLVMValueRef
*weight_out
)
277 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
278 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
279 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
280 LLVMValueRef half
= lp_build_const_vec(bld
->gallivm
, coord_bld
->type
, 0.5);
281 LLVMValueRef length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
282 LLVMValueRef coord0
, coord1
, weight
;
285 case PIPE_TEX_WRAP_REPEAT
:
287 /* mul by size and subtract 0.5 */
288 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
289 coord
= lp_build_sub(coord_bld
, coord
, half
);
290 /* convert to int, compute lerp weight */
291 lp_build_ifloor_fract(coord_bld
, coord
, &coord0
, &weight
);
292 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
294 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
295 coord1
= LLVMBuildAnd(builder
, coord1
, length_minus_one
, "");
299 lp_build_coord_repeat_npot_linear(bld
, coord
,
302 mask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
303 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
304 coord1
= LLVMBuildAnd(builder
,
305 lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
),
310 case PIPE_TEX_WRAP_CLAMP
:
311 if (bld
->static_state
->normalized_coords
) {
312 /* scale coord to length */
313 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
316 /* clamp to [0, length] */
317 coord
= lp_build_clamp(coord_bld
, coord
, coord_bld
->zero
, length_f
);
319 coord
= lp_build_sub(coord_bld
, coord
, half
);
321 /* convert to int, compute lerp weight */
322 lp_build_ifloor_fract(coord_bld
, coord
, &coord0
, &weight
);
323 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
326 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
328 struct lp_build_context abs_coord_bld
= bld
->coord_bld
;
329 abs_coord_bld
.type
.sign
= FALSE
;
331 if (bld
->static_state
->normalized_coords
) {
332 /* mul by tex size */
333 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
335 /* clamp to length max */
336 coord
= lp_build_min(coord_bld
, coord
, length_f
);
338 coord
= lp_build_sub(coord_bld
, coord
, half
);
339 /* clamp to [0, length - 0.5] */
340 coord
= lp_build_max(coord_bld
, coord
, coord_bld
->zero
);
341 /* convert to int, compute lerp weight */
342 lp_build_ifloor_fract(&abs_coord_bld
, coord
, &coord0
, &weight
);
343 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
344 /* coord1 = min(coord1, length-1) */
345 coord1
= lp_build_min(int_coord_bld
, coord1
, length_minus_one
);
349 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
352 if (bld
->static_state
->normalized_coords
) {
353 /* scale coord to length */
354 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
356 /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
357 coord
= lp_build_sub(coord_bld
, coord
, half
);
358 min
= lp_build_const_vec(bld
->gallivm
, coord_bld
->type
, -1.0F
);
359 coord
= lp_build_clamp(coord_bld
, coord
, min
, length_f
);
360 /* convert to int, compute lerp weight */
361 lp_build_ifloor_fract(coord_bld
, coord
, &coord0
, &weight
);
362 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
366 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
367 /* compute mirror function */
368 coord
= lp_build_coord_mirror(bld
, coord
);
370 /* scale coord to length */
371 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
372 coord
= lp_build_sub(coord_bld
, coord
, half
);
374 /* convert to int, compute lerp weight */
375 lp_build_ifloor_fract(coord_bld
, coord
, &coord0
, &weight
);
376 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
378 /* coord0 = max(coord0, 0) */
379 coord0
= lp_build_max(int_coord_bld
, coord0
, int_coord_bld
->zero
);
380 /* coord1 = min(coord1, length-1) */
381 coord1
= lp_build_min(int_coord_bld
, coord1
, length_minus_one
);
384 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
385 coord
= lp_build_abs(coord_bld
, coord
);
387 if (bld
->static_state
->normalized_coords
) {
388 /* scale coord to length */
389 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
392 /* clamp to [0, length] */
393 coord
= lp_build_min(coord_bld
, coord
, length_f
);
395 coord
= lp_build_sub(coord_bld
, coord
, half
);
397 /* convert to int, compute lerp weight */
398 lp_build_ifloor_fract(coord_bld
, coord
, &coord0
, &weight
);
399 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
402 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
404 LLVMValueRef min
, max
;
405 struct lp_build_context abs_coord_bld
= bld
->coord_bld
;
406 abs_coord_bld
.type
.sign
= FALSE
;
407 coord
= lp_build_abs(coord_bld
, coord
);
409 if (bld
->static_state
->normalized_coords
) {
410 /* scale coord to length */
411 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
414 /* clamp to [0.5, length - 0.5] */
416 max
= lp_build_sub(coord_bld
, length_f
, min
);
417 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
419 coord
= lp_build_sub(coord_bld
, coord
, half
);
421 /* convert to int, compute lerp weight */
422 lp_build_ifloor_fract(&abs_coord_bld
, coord
, &coord0
, &weight
);
423 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
427 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
429 coord
= lp_build_abs(coord_bld
, coord
);
431 if (bld
->static_state
->normalized_coords
) {
432 /* scale coord to length */
433 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
436 /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
437 /* skip -0.5 clamp (always positive), do sub first */
438 coord
= lp_build_sub(coord_bld
, coord
, half
);
439 coord
= lp_build_min(coord_bld
, coord
, length_f
);
441 /* convert to int, compute lerp weight */
442 lp_build_ifloor_fract(coord_bld
, coord
, &coord0
, &weight
);
443 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
456 *weight_out
= weight
;
461 * Build LLVM code for texture wrap mode for nearest filtering.
462 * \param coord the incoming texcoord (nominally in [0,1])
463 * \param length the texture size along one dimension, as int vector
464 * \param is_pot if TRUE, length is a power of two
465 * \param wrap_mode one of PIPE_TEX_WRAP_x
468 lp_build_sample_wrap_nearest(struct lp_build_sample_context
*bld
,
471 LLVMValueRef length_f
,
475 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
476 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
477 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
478 LLVMValueRef length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
482 case PIPE_TEX_WRAP_REPEAT
:
484 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
485 icoord
= lp_build_ifloor(coord_bld
, coord
);
486 icoord
= LLVMBuildAnd(builder
, icoord
, length_minus_one
, "");
489 /* take fraction, unnormalize */
490 coord
= lp_build_fract_safe(coord_bld
, coord
);
491 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
492 icoord
= lp_build_itrunc(coord_bld
, coord
);
496 case PIPE_TEX_WRAP_CLAMP
:
497 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
498 if (bld
->static_state
->normalized_coords
) {
499 /* scale coord to length */
500 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
504 /* use itrunc instead since we clamp to 0 anyway */
505 icoord
= lp_build_itrunc(coord_bld
, coord
);
507 /* clamp to [0, length - 1]. */
508 icoord
= lp_build_clamp(int_coord_bld
, icoord
, int_coord_bld
->zero
,
512 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
513 /* Note: this is the same as CLAMP_TO_EDGE, except min = -1 */
515 LLVMValueRef min
, max
;
517 if (bld
->static_state
->normalized_coords
) {
518 /* scale coord to length */
519 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
522 icoord
= lp_build_ifloor(coord_bld
, coord
);
524 /* clamp to [-1, length] */
525 min
= lp_build_negate(int_coord_bld
, int_coord_bld
->one
);
527 icoord
= lp_build_clamp(int_coord_bld
, icoord
, min
, max
);
531 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
532 /* compute mirror function */
533 coord
= lp_build_coord_mirror(bld
, coord
);
535 /* scale coord to length */
536 assert(bld
->static_state
->normalized_coords
);
537 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
539 /* itrunc == ifloor here */
540 icoord
= lp_build_itrunc(coord_bld
, coord
);
542 /* clamp to [0, length - 1] */
543 icoord
= lp_build_min(int_coord_bld
, icoord
, length_minus_one
);
546 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
547 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
548 coord
= lp_build_abs(coord_bld
, coord
);
550 if (bld
->static_state
->normalized_coords
) {
551 /* scale coord to length */
552 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
555 /* itrunc == ifloor here */
556 icoord
= lp_build_itrunc(coord_bld
, coord
);
558 /* clamp to [0, length - 1] */
559 icoord
= lp_build_min(int_coord_bld
, icoord
, length_minus_one
);
562 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
563 coord
= lp_build_abs(coord_bld
, coord
);
565 if (bld
->static_state
->normalized_coords
) {
566 /* scale coord to length */
567 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
570 /* itrunc == ifloor here */
571 icoord
= lp_build_itrunc(coord_bld
, coord
);
573 /* clamp to [0, length] */
574 icoord
= lp_build_min(int_coord_bld
, icoord
, length
);
587 * Generate code to sample a mipmap level with nearest filtering.
588 * If sampling a cube texture, r = cube face in [0,5].
591 lp_build_sample_image_nearest(struct lp_build_sample_context
*bld
,
594 LLVMValueRef row_stride_vec
,
595 LLVMValueRef img_stride_vec
,
596 LLVMValueRef data_ptr
,
600 LLVMValueRef colors_out
[4])
602 const unsigned dims
= bld
->dims
;
603 LLVMValueRef width_vec
;
604 LLVMValueRef height_vec
;
605 LLVMValueRef depth_vec
;
606 LLVMValueRef flt_size
;
607 LLVMValueRef flt_width_vec
;
608 LLVMValueRef flt_height_vec
;
609 LLVMValueRef flt_depth_vec
;
610 LLVMValueRef x
, y
, z
;
612 lp_build_extract_image_sizes(bld
,
616 &width_vec
, &height_vec
, &depth_vec
);
618 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, size
);
620 lp_build_extract_image_sizes(bld
,
621 bld
->float_size_type
,
624 &flt_width_vec
, &flt_height_vec
, &flt_depth_vec
);
627 * Compute integer texcoords.
629 x
= lp_build_sample_wrap_nearest(bld
, s
, width_vec
, flt_width_vec
,
630 bld
->static_state
->pot_width
,
631 bld
->static_state
->wrap_s
);
632 lp_build_name(x
, "tex.x.wrapped");
635 y
= lp_build_sample_wrap_nearest(bld
, t
, height_vec
, flt_height_vec
,
636 bld
->static_state
->pot_height
,
637 bld
->static_state
->wrap_t
);
638 lp_build_name(y
, "tex.y.wrapped");
641 z
= lp_build_sample_wrap_nearest(bld
, r
, depth_vec
, flt_depth_vec
,
642 bld
->static_state
->pot_depth
,
643 bld
->static_state
->wrap_r
);
644 lp_build_name(z
, "tex.z.wrapped");
646 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
658 * Get texture colors.
660 lp_build_sample_texel_soa(bld
, unit
,
661 width_vec
, height_vec
, depth_vec
,
663 row_stride_vec
, img_stride_vec
,
664 data_ptr
, colors_out
);
669 * Generate code to sample a mipmap level with linear filtering.
670 * If sampling a cube texture, r = cube face in [0,5].
673 lp_build_sample_image_linear(struct lp_build_sample_context
*bld
,
676 LLVMValueRef row_stride_vec
,
677 LLVMValueRef img_stride_vec
,
678 LLVMValueRef data_ptr
,
682 LLVMValueRef colors_out
[4])
684 const unsigned dims
= bld
->dims
;
685 LLVMValueRef width_vec
;
686 LLVMValueRef height_vec
;
687 LLVMValueRef depth_vec
;
688 LLVMValueRef flt_size
;
689 LLVMValueRef flt_width_vec
;
690 LLVMValueRef flt_height_vec
;
691 LLVMValueRef flt_depth_vec
;
692 LLVMValueRef x0
, y0
, z0
, x1
, y1
, z1
;
693 LLVMValueRef s_fpart
, t_fpart
, r_fpart
;
694 LLVMValueRef neighbors
[2][2][4];
697 lp_build_extract_image_sizes(bld
,
701 &width_vec
, &height_vec
, &depth_vec
);
703 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, size
);
705 lp_build_extract_image_sizes(bld
,
706 bld
->float_size_type
,
709 &flt_width_vec
, &flt_height_vec
, &flt_depth_vec
);
712 * Compute integer texcoords.
714 lp_build_sample_wrap_linear(bld
, s
, width_vec
, flt_width_vec
,
715 bld
->static_state
->pot_width
,
716 bld
->static_state
->wrap_s
,
718 lp_build_name(x0
, "tex.x0.wrapped");
719 lp_build_name(x1
, "tex.x1.wrapped");
722 lp_build_sample_wrap_linear(bld
, t
, height_vec
, flt_height_vec
,
723 bld
->static_state
->pot_height
,
724 bld
->static_state
->wrap_t
,
726 lp_build_name(y0
, "tex.y0.wrapped");
727 lp_build_name(y1
, "tex.y1.wrapped");
730 lp_build_sample_wrap_linear(bld
, r
, depth_vec
, flt_depth_vec
,
731 bld
->static_state
->pot_depth
,
732 bld
->static_state
->wrap_r
,
734 lp_build_name(z0
, "tex.z0.wrapped");
735 lp_build_name(z1
, "tex.z1.wrapped");
737 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
738 z0
= z1
= r
; /* cube face */
747 y0
= y1
= t_fpart
= NULL
;
748 z0
= z1
= r_fpart
= NULL
;
752 * Get texture colors.
754 /* get x0/x1 texels */
755 lp_build_sample_texel_soa(bld
, unit
,
756 width_vec
, height_vec
, depth_vec
,
758 row_stride_vec
, img_stride_vec
,
759 data_ptr
, neighbors
[0][0]);
760 lp_build_sample_texel_soa(bld
, unit
,
761 width_vec
, height_vec
, depth_vec
,
763 row_stride_vec
, img_stride_vec
,
764 data_ptr
, neighbors
[0][1]);
767 /* Interpolate two samples from 1D image to produce one color */
768 for (chan
= 0; chan
< 4; chan
++) {
769 colors_out
[chan
] = lp_build_lerp(&bld
->texel_bld
, s_fpart
,
770 neighbors
[0][0][chan
],
771 neighbors
[0][1][chan
]);
776 LLVMValueRef colors0
[4];
778 /* get x0/x1 texels at y1 */
779 lp_build_sample_texel_soa(bld
, unit
,
780 width_vec
, height_vec
, depth_vec
,
782 row_stride_vec
, img_stride_vec
,
783 data_ptr
, neighbors
[1][0]);
784 lp_build_sample_texel_soa(bld
, unit
,
785 width_vec
, height_vec
, depth_vec
,
787 row_stride_vec
, img_stride_vec
,
788 data_ptr
, neighbors
[1][1]);
790 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
791 for (chan
= 0; chan
< 4; chan
++) {
792 colors0
[chan
] = lp_build_lerp_2d(&bld
->texel_bld
,
794 neighbors
[0][0][chan
],
795 neighbors
[0][1][chan
],
796 neighbors
[1][0][chan
],
797 neighbors
[1][1][chan
]);
801 LLVMValueRef neighbors1
[2][2][4];
802 LLVMValueRef colors1
[4];
804 /* get x0/x1/y0/y1 texels at z1 */
805 lp_build_sample_texel_soa(bld
, unit
,
806 width_vec
, height_vec
, depth_vec
,
808 row_stride_vec
, img_stride_vec
,
809 data_ptr
, neighbors1
[0][0]);
810 lp_build_sample_texel_soa(bld
, unit
,
811 width_vec
, height_vec
, depth_vec
,
813 row_stride_vec
, img_stride_vec
,
814 data_ptr
, neighbors1
[0][1]);
815 lp_build_sample_texel_soa(bld
, unit
,
816 width_vec
, height_vec
, depth_vec
,
818 row_stride_vec
, img_stride_vec
,
819 data_ptr
, neighbors1
[1][0]);
820 lp_build_sample_texel_soa(bld
, unit
,
821 width_vec
, height_vec
, depth_vec
,
823 row_stride_vec
, img_stride_vec
,
824 data_ptr
, neighbors1
[1][1]);
826 /* Bilinear interpolate the four samples from the second Z slice */
827 for (chan
= 0; chan
< 4; chan
++) {
828 colors1
[chan
] = lp_build_lerp_2d(&bld
->texel_bld
,
830 neighbors1
[0][0][chan
],
831 neighbors1
[0][1][chan
],
832 neighbors1
[1][0][chan
],
833 neighbors1
[1][1][chan
]);
836 /* Linearly interpolate the two samples from the two 3D slices */
837 for (chan
= 0; chan
< 4; chan
++) {
838 colors_out
[chan
] = lp_build_lerp(&bld
->texel_bld
,
840 colors0
[chan
], colors1
[chan
]);
845 for (chan
= 0; chan
< 4; chan
++) {
846 colors_out
[chan
] = colors0
[chan
];
854 * Sample the texture/mipmap using given image filter and mip filter.
855 * data0_ptr and data1_ptr point to the two mipmap levels to sample
856 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
857 * If we're using nearest miplevel sampling the '1' values will be null/unused.
860 lp_build_sample_mipmap(struct lp_build_sample_context
*bld
,
867 LLVMValueRef ilevel0
,
868 LLVMValueRef ilevel1
,
869 LLVMValueRef lod_fpart
,
870 LLVMValueRef
*colors_out
)
872 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
873 LLVMValueRef size0
= NULL
;
874 LLVMValueRef size1
= NULL
;
875 LLVMValueRef row_stride0_vec
= NULL
;
876 LLVMValueRef row_stride1_vec
= NULL
;
877 LLVMValueRef img_stride0_vec
= NULL
;
878 LLVMValueRef img_stride1_vec
= NULL
;
879 LLVMValueRef data_ptr0
= NULL
;
880 LLVMValueRef data_ptr1
= NULL
;
881 LLVMValueRef colors0
[4], colors1
[4];
884 /* sample the first mipmap level */
885 lp_build_mipmap_level_sizes(bld
, ilevel0
,
887 &row_stride0_vec
, &img_stride0_vec
);
888 data_ptr0
= lp_build_get_mipmap_level(bld
, ilevel0
);
889 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
890 lp_build_sample_image_nearest(bld
, unit
,
892 row_stride0_vec
, img_stride0_vec
,
897 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
898 lp_build_sample_image_linear(bld
, unit
,
900 row_stride0_vec
, img_stride0_vec
,
905 /* Store the first level's colors in the output variables */
906 for (chan
= 0; chan
< 4; chan
++) {
907 LLVMBuildStore(builder
, colors0
[chan
], colors_out
[chan
]);
910 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
911 struct lp_build_if_state if_ctx
;
912 LLVMValueRef need_lerp
;
913 unsigned num_quads
= bld
->coord_bld
.type
.length
/ 4;
915 /* need_lerp = lod_fpart > 0 */
916 if (num_quads
== 1) {
917 need_lerp
= LLVMBuildFCmp(builder
, LLVMRealUGT
,
918 lod_fpart
, bld
->perquadf_bld
.zero
,
923 * We'll do mip filtering if any of the quads need it.
924 * It might be better to split the vectors here and only fetch/filter
925 * quads which need it.
928 * We unfortunately need to clamp lod_fpart here since we can get
929 * negative values which would screw up filtering if not all
930 * lod_fpart values have same sign.
932 lod_fpart
= lp_build_max(&bld
->perquadf_bld
, lod_fpart
,
933 bld
->perquadf_bld
.zero
);
934 need_lerp
= lp_build_compare(bld
->gallivm
, bld
->perquadf_bld
.type
,
936 lod_fpart
, bld
->perquadf_bld
.zero
);
937 need_lerp
= lp_build_any_true_range(&bld
->perquadi_bld
, num_quads
, need_lerp
);
940 lp_build_if(&if_ctx
, bld
->gallivm
, need_lerp
);
942 /* sample the second mipmap level */
943 lp_build_mipmap_level_sizes(bld
, ilevel1
,
945 &row_stride1_vec
, &img_stride1_vec
);
946 data_ptr1
= lp_build_get_mipmap_level(bld
, ilevel1
);
947 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
948 lp_build_sample_image_nearest(bld
, unit
,
950 row_stride1_vec
, img_stride1_vec
,
955 lp_build_sample_image_linear(bld
, unit
,
957 row_stride1_vec
, img_stride1_vec
,
962 /* interpolate samples from the two mipmap levels */
964 lod_fpart
= lp_build_unpack_broadcast_aos_scalars(bld
->gallivm
,
965 bld
->perquadf_bld
.type
,
969 for (chan
= 0; chan
< 4; chan
++) {
970 colors0
[chan
] = lp_build_lerp(&bld
->texel_bld
, lod_fpart
,
971 colors0
[chan
], colors1
[chan
]);
972 LLVMBuildStore(builder
, colors0
[chan
], colors_out
[chan
]);
975 lp_build_endif(&if_ctx
);
980 * Calculate cube face, lod, mip levels.
983 lp_build_sample_common(struct lp_build_sample_context
*bld
,
988 const struct lp_derivatives
*derivs
,
989 LLVMValueRef lod_bias
, /* optional */
990 LLVMValueRef explicit_lod
, /* optional */
991 LLVMValueRef
*lod_ipart
,
992 LLVMValueRef
*lod_fpart
,
993 LLVMValueRef
*ilevel0
,
994 LLVMValueRef
*ilevel1
)
996 const unsigned mip_filter
= bld
->static_state
->min_mip_filter
;
997 const unsigned min_filter
= bld
->static_state
->min_img_filter
;
998 const unsigned mag_filter
= bld
->static_state
->mag_img_filter
;
999 LLVMValueRef first_level
;
1000 struct lp_derivatives face_derivs
;
1003 printf("%s mip %d min %d mag %d\n", __FUNCTION__,
1004 mip_filter, min_filter, mag_filter);
1008 * Choose cube face, recompute texcoords and derivatives for the chosen face.
1010 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
1011 LLVMValueRef face
, face_s
, face_t
;
1012 lp_build_cube_lookup(bld
, *s
, *t
, *r
, &face
, &face_s
, &face_t
);
1013 *s
= face_s
; /* vec */
1014 *t
= face_t
; /* vec */
1015 /* use 'r' to indicate cube face */
1016 *r
= face
; /* vec */
1018 /* recompute ddx, ddy using the new (s,t) face texcoords */
1019 face_derivs
.ddx_ddy
[0] = lp_build_packed_ddx_ddy_twocoord(&bld
->coord_bld
, *s
, *t
);
1020 face_derivs
.ddx_ddy
[1] = NULL
;
1021 derivs
= &face_derivs
;
1025 * Compute the level of detail (float).
1027 if (min_filter
!= mag_filter
||
1028 mip_filter
!= PIPE_TEX_MIPFILTER_NONE
) {
1029 /* Need to compute lod either to choose mipmap levels or to
1030 * distinguish between minification/magnification with one mipmap level.
1032 lp_build_lod_selector(bld
, unit
, derivs
,
1033 lod_bias
, explicit_lod
,
1035 lod_ipart
, lod_fpart
);
1037 *lod_ipart
= bld
->perquadi_bld
.zero
;
1041 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1043 switch (mip_filter
) {
1045 assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1047 case PIPE_TEX_MIPFILTER_NONE
:
1048 /* always use mip level 0 */
1049 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
1050 /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1051 * We should be able to set ilevel0 = const(0) but that causes
1052 * bad x86 code to be emitted.
1053 * XXX should probably disable that on other llvm versions.
1056 lp_build_nearest_mip_level(bld
, unit
, *lod_ipart
, ilevel0
);
1059 first_level
= bld
->dynamic_state
->first_level(bld
->dynamic_state
,
1060 bld
->gallivm
, unit
);
1061 first_level
= lp_build_broadcast_scalar(&bld
->perquadi_bld
, first_level
);
1062 *ilevel0
= first_level
;
1065 case PIPE_TEX_MIPFILTER_NEAREST
:
1067 lp_build_nearest_mip_level(bld
, unit
, *lod_ipart
, ilevel0
);
1069 case PIPE_TEX_MIPFILTER_LINEAR
:
1072 lp_build_linear_mip_levels(bld
, unit
,
1073 *lod_ipart
, lod_fpart
,
1080 * General texture sampling codegen.
1081 * This function handles texture sampling for all texture targets (1D,
1082 * 2D, 3D, cube) and all filtering modes.
1085 lp_build_sample_general(struct lp_build_sample_context
*bld
,
1090 LLVMValueRef lod_ipart
,
1091 LLVMValueRef lod_fpart
,
1092 LLVMValueRef ilevel0
,
1093 LLVMValueRef ilevel1
,
1094 LLVMValueRef
*colors_out
)
1096 struct lp_build_context
*int_bld
= &bld
->int_bld
;
1097 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1098 const unsigned mip_filter
= bld
->static_state
->min_mip_filter
;
1099 const unsigned min_filter
= bld
->static_state
->min_img_filter
;
1100 const unsigned mag_filter
= bld
->static_state
->mag_img_filter
;
1101 LLVMValueRef texels
[4];
1105 * Get/interpolate texture colors.
1108 for (chan
= 0; chan
< 4; ++chan
) {
1109 texels
[chan
] = lp_build_alloca(bld
->gallivm
, bld
->texel_bld
.vec_type
, "");
1110 lp_build_name(texels
[chan
], "sampler%u_texel_%c_var", unit
, "xyzw"[chan
]);
1113 if (min_filter
== mag_filter
) {
1114 /* no need to distinguish between minification and magnification */
1115 lp_build_sample_mipmap(bld
, unit
,
1116 min_filter
, mip_filter
,
1118 ilevel0
, ilevel1
, lod_fpart
,
1122 /* Emit conditional to choose min image filter or mag image filter
1123 * depending on the lod being > 0 or <= 0, respectively.
1125 struct lp_build_if_state if_ctx
;
1126 LLVMValueRef minify
;
1128 /* minify = lod >= 0.0 */
1129 minify
= LLVMBuildICmp(builder
, LLVMIntSGE
,
1130 lod_ipart
, int_bld
->zero
, "");
1132 lp_build_if(&if_ctx
, bld
->gallivm
, minify
);
1134 /* Use the minification filter */
1135 lp_build_sample_mipmap(bld
, unit
,
1136 min_filter
, mip_filter
,
1138 ilevel0
, ilevel1
, lod_fpart
,
1141 lp_build_else(&if_ctx
);
1143 /* Use the magnification filter */
1144 lp_build_sample_mipmap(bld
, unit
,
1145 mag_filter
, PIPE_TEX_MIPFILTER_NONE
,
1147 ilevel0
, NULL
, NULL
,
1150 lp_build_endif(&if_ctx
);
1153 for (chan
= 0; chan
< 4; ++chan
) {
1154 colors_out
[chan
] = LLVMBuildLoad(builder
, texels
[chan
], "");
1155 lp_build_name(colors_out
[chan
], "sampler%u_texel_%c", unit
, "xyzw"[chan
]);
1161 * Do shadow test/comparison.
1162 * \param p the texcoord Z (aka R, aka P) component
1163 * \param texel the texel to compare against (use the X channel)
1166 lp_build_sample_compare(struct lp_build_sample_context
*bld
,
1168 LLVMValueRef texel
[4])
1170 struct lp_build_context
*texel_bld
= &bld
->texel_bld
;
1171 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1173 const unsigned chan
= 0;
1175 if (bld
->static_state
->compare_mode
== PIPE_TEX_COMPARE_NONE
)
1180 LLVMValueRef indx
= lp_build_const_int32(bld
->gallivm
, 0);
1181 LLVMValueRef coord
= LLVMBuildExtractElement(builder
, p
, indx
, "");
1182 LLVMValueRef tex
= LLVMBuildExtractElement(builder
, texel
[chan
], indx
, "");
1183 lp_build_printf(bld
->gallivm
, "shadow compare coord %f to texture %f\n",
1187 /* Clamp p coords to [0,1] */
1188 p
= lp_build_clamp(&bld
->coord_bld
, p
,
1189 bld
->coord_bld
.zero
,
1190 bld
->coord_bld
.one
);
1192 /* result = (p FUNC texel) ? 1 : 0 */
1193 res
= lp_build_cmp(texel_bld
, bld
->static_state
->compare_func
,
1195 res
= lp_build_select(texel_bld
, res
, texel_bld
->one
, texel_bld
->zero
);
1197 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1201 texel
[3] = texel_bld
->one
;
1206 * Just set texels to white instead of actually sampling the texture.
1210 lp_build_sample_nop(struct gallivm_state
*gallivm
,
1211 struct lp_type type
,
1212 unsigned num_coords
,
1213 const LLVMValueRef
*coords
,
1214 LLVMValueRef texel_out
[4])
1216 LLVMValueRef one
= lp_build_one(gallivm
, type
);
1219 for (chan
= 0; chan
< 4; chan
++) {
1220 texel_out
[chan
] = one
;
1226 * Build texture sampling code.
1227 * 'texel' will return a vector of four LLVMValueRefs corresponding to
1229 * \param type vector float type to use for coords, etc.
1230 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y
1233 lp_build_sample_soa(struct gallivm_state
*gallivm
,
1234 const struct lp_sampler_static_state
*static_state
,
1235 struct lp_sampler_dynamic_state
*dynamic_state
,
1236 struct lp_type type
,
1238 unsigned num_coords
,
1239 const LLVMValueRef
*coords
,
1240 const struct lp_derivatives
*derivs
,
1241 LLVMValueRef lod_bias
, /* optional */
1242 LLVMValueRef explicit_lod
, /* optional */
1243 LLVMValueRef texel_out
[4])
1245 unsigned dims
= texture_dims(static_state
->target
);
1246 struct lp_build_sample_context bld
;
1247 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
1248 LLVMBuilderRef builder
= gallivm
->builder
;
1249 LLVMValueRef tex_width
, tex_height
, tex_depth
;
1255 enum pipe_format fmt
= static_state
->format
;
1256 debug_printf("Sample from %s\n", util_format_name(fmt
));
1259 assert(type
.floating
);
1261 /* Setup our build context */
1262 memset(&bld
, 0, sizeof bld
);
1263 bld
.gallivm
= gallivm
;
1264 bld
.static_state
= static_state
;
1265 bld
.dynamic_state
= dynamic_state
;
1266 bld
.format_desc
= util_format_description(static_state
->format
);
1269 bld
.vector_width
= lp_type_width(type
);
1271 bld
.float_type
= lp_type_float(32);
1272 bld
.int_type
= lp_type_int(32);
1273 bld
.coord_type
= type
;
1274 bld
.int_coord_type
= lp_int_type(type
);
1275 bld
.float_size_type
= lp_type_float(32);
1276 bld
.float_size_type
.length
= dims
> 1 ? 4 : 1;
1277 bld
.int_size_type
= lp_int_type(bld
.float_size_type
);
1278 bld
.texel_type
= type
;
1279 bld
.perquadf_type
= type
;
1280 /* we want native vector size to be able to use our intrinsics */
1281 bld
.perquadf_type
.length
= type
.length
> 4 ? ((type
.length
+ 15) / 16) * 4 : 1;
1282 bld
.perquadi_type
= lp_int_type(bld
.perquadf_type
);
1284 lp_build_context_init(&bld
.float_bld
, gallivm
, bld
.float_type
);
1285 lp_build_context_init(&bld
.float_vec_bld
, gallivm
, type
);
1286 lp_build_context_init(&bld
.int_bld
, gallivm
, bld
.int_type
);
1287 lp_build_context_init(&bld
.coord_bld
, gallivm
, bld
.coord_type
);
1288 lp_build_context_init(&bld
.int_coord_bld
, gallivm
, bld
.int_coord_type
);
1289 lp_build_context_init(&bld
.int_size_bld
, gallivm
, bld
.int_size_type
);
1290 lp_build_context_init(&bld
.float_size_bld
, gallivm
, bld
.float_size_type
);
1291 lp_build_context_init(&bld
.texel_bld
, gallivm
, bld
.texel_type
);
1292 lp_build_context_init(&bld
.perquadf_bld
, gallivm
, bld
.perquadf_type
);
1293 lp_build_context_init(&bld
.perquadi_bld
, gallivm
, bld
.perquadi_type
);
1295 /* Get the dynamic state */
1296 tex_width
= dynamic_state
->width(dynamic_state
, gallivm
, unit
);
1297 tex_height
= dynamic_state
->height(dynamic_state
, gallivm
, unit
);
1298 tex_depth
= dynamic_state
->depth(dynamic_state
, gallivm
, unit
);
1299 bld
.row_stride_array
= dynamic_state
->row_stride(dynamic_state
, gallivm
, unit
);
1300 bld
.img_stride_array
= dynamic_state
->img_stride(dynamic_state
, gallivm
, unit
);
1301 bld
.data_array
= dynamic_state
->data_ptr(dynamic_state
, gallivm
, unit
);
1302 /* Note that data_array is an array[level] of pointers to texture images */
1308 /* width, height, depth as single int vector */
1310 bld
.int_size
= tex_width
;
1313 bld
.int_size
= LLVMBuildInsertElement(builder
, bld
.int_size_bld
.undef
,
1314 tex_width
, LLVMConstInt(i32t
, 0, 0), "");
1316 bld
.int_size
= LLVMBuildInsertElement(builder
, bld
.int_size
,
1317 tex_height
, LLVMConstInt(i32t
, 1, 0), "");
1319 bld
.int_size
= LLVMBuildInsertElement(builder
, bld
.int_size
,
1320 tex_depth
, LLVMConstInt(i32t
, 2, 0), "");
1326 /* For debug: no-op texture sampling */
1327 lp_build_sample_nop(gallivm
,
1334 LLVMValueRef lod_ipart
= NULL
, lod_fpart
= NULL
;
1335 LLVMValueRef ilevel0
= NULL
, ilevel1
= NULL
;
1336 unsigned num_quads
= type
.length
/ 4;
1337 const unsigned mip_filter
= bld
.static_state
->min_mip_filter
;
1338 boolean use_aos
= util_format_fits_8unorm(bld
.format_desc
) &&
1339 lp_is_simple_wrap_mode(static_state
->wrap_s
) &&
1340 lp_is_simple_wrap_mode(static_state
->wrap_t
);
1342 if ((gallivm_debug
& GALLIVM_DEBUG_PERF
) &&
1343 !use_aos
&& util_format_fits_8unorm(bld
.format_desc
)) {
1344 debug_printf("%s: using floating point linear filtering for %s\n",
1345 __FUNCTION__
, bld
.format_desc
->short_name
);
1346 debug_printf(" min_img %d mag_img %d mip %d wraps %d wrapt %d\n",
1347 static_state
->min_img_filter
,
1348 static_state
->mag_img_filter
,
1349 static_state
->min_mip_filter
,
1350 static_state
->wrap_s
,
1351 static_state
->wrap_t
);
1354 lp_build_sample_common(&bld
, unit
,
1356 derivs
, lod_bias
, explicit_lod
,
1357 &lod_ipart
, &lod_fpart
,
1358 &ilevel0
, &ilevel1
);
1361 * we only try 8-wide sampling with soa as it appears to
1362 * be a loss with aos with AVX.
1364 if (num_quads
== 1 || (mip_filter
== PIPE_TEX_MIPFILTER_NONE
&&
1367 if (num_quads
> 1) {
1368 LLVMValueRef index0
= lp_build_const_int32(gallivm
, 0);
1369 /* These parameters are the same for all quads */
1370 lod_ipart
= LLVMBuildExtractElement(builder
, lod_ipart
, index0
, "");
1371 ilevel0
= LLVMBuildExtractElement(builder
, ilevel0
, index0
, "");
1374 /* do sampling/filtering with fixed pt arithmetic */
1375 lp_build_sample_aos(&bld
, unit
,
1377 lod_ipart
, lod_fpart
,
1383 lp_build_sample_general(&bld
, unit
,
1385 lod_ipart
, lod_fpart
,
1391 struct lp_build_if_state if_ctx
;
1392 LLVMValueRef notsame_levels
, notsame
;
1393 LLVMValueRef index0
= lp_build_const_int32(gallivm
, 0);
1394 LLVMValueRef texels
[4];
1395 LLVMValueRef texelout
[4];
1398 texels
[0] = lp_build_alloca(gallivm
, bld
.texel_bld
.vec_type
, "texr");
1399 texels
[1] = lp_build_alloca(gallivm
, bld
.texel_bld
.vec_type
, "texg");
1400 texels
[2] = lp_build_alloca(gallivm
, bld
.texel_bld
.vec_type
, "texb");
1401 texels
[3] = lp_build_alloca(gallivm
, bld
.texel_bld
.vec_type
, "texa");
1403 /* only build the if if we MAY split, otherwise always split */
1405 notsame
= lp_build_extract_broadcast(gallivm
,
1406 bld
.perquadi_bld
.type
,
1407 bld
.perquadi_bld
.type
,
1409 notsame
= lp_build_sub(&bld
.perquadi_bld
, ilevel0
, notsame
);
1410 notsame_levels
= lp_build_any_true_range(&bld
.perquadi_bld
, num_quads
,
1412 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
1413 notsame
= lp_build_extract_broadcast(gallivm
,
1414 bld
.perquadi_bld
.type
,
1415 bld
.perquadi_bld
.type
,
1417 notsame
= lp_build_sub(&bld
.perquadi_bld
, ilevel1
, notsame
);
1418 notsame
= lp_build_any_true_range(&bld
.perquadi_bld
, num_quads
, notsame
);
1419 notsame_levels
= LLVMBuildOr(builder
, notsame_levels
, notsame
, "");
1421 lp_build_if(&if_ctx
, gallivm
, notsame_levels
);
1425 struct lp_build_sample_context bld4
;
1426 struct lp_type type4
= type
;
1428 LLVMValueRef texelout4
[4];
1429 LLVMValueRef texelouttmp
[4][LP_MAX_VECTOR_LENGTH
/16];
1433 /* Setup our build context */
1434 memset(&bld4
, 0, sizeof bld4
);
1435 bld4
.gallivm
= bld
.gallivm
;
1436 bld4
.static_state
= bld
.static_state
;
1437 bld4
.dynamic_state
= bld
.dynamic_state
;
1438 bld4
.format_desc
= bld
.format_desc
;
1439 bld4
.dims
= bld
.dims
;
1440 bld4
.row_stride_array
= bld
.row_stride_array
;
1441 bld4
.img_stride_array
= bld
.img_stride_array
;
1442 bld4
.data_array
= bld
.data_array
;
1443 bld4
.int_size
= bld
.int_size
;
1445 bld4
.vector_width
= lp_type_width(type4
);
1447 bld4
.float_type
= lp_type_float(32);
1448 bld4
.int_type
= lp_type_int(32);
1449 bld4
.coord_type
= type4
;
1450 bld4
.int_coord_type
= lp_int_type(type4
);
1451 bld4
.float_size_type
= lp_type_float(32);
1452 bld4
.float_size_type
.length
= dims
> 1 ? 4 : 1;
1453 bld4
.int_size_type
= lp_int_type(bld4
.float_size_type
);
1454 bld4
.texel_type
= type4
;
1455 bld4
.perquadf_type
= type4
;
1456 /* we want native vector size to be able to use our intrinsics */
1457 bld4
.perquadf_type
.length
= 1;
1458 bld4
.perquadi_type
= lp_int_type(bld4
.perquadf_type
);
1460 lp_build_context_init(&bld4
.float_bld
, gallivm
, bld4
.float_type
);
1461 lp_build_context_init(&bld4
.float_vec_bld
, gallivm
, type4
);
1462 lp_build_context_init(&bld4
.int_bld
, gallivm
, bld4
.int_type
);
1463 lp_build_context_init(&bld4
.coord_bld
, gallivm
, bld4
.coord_type
);
1464 lp_build_context_init(&bld4
.int_coord_bld
, gallivm
, bld4
.int_coord_type
);
1465 lp_build_context_init(&bld4
.int_size_bld
, gallivm
, bld4
.int_size_type
);
1466 lp_build_context_init(&bld4
.float_size_bld
, gallivm
, bld4
.float_size_type
);
1467 lp_build_context_init(&bld4
.texel_bld
, gallivm
, bld4
.texel_type
);
1468 lp_build_context_init(&bld4
.perquadf_bld
, gallivm
, bld4
.perquadf_type
);
1469 lp_build_context_init(&bld4
.perquadi_bld
, gallivm
, bld4
.perquadi_type
);
1471 for (i
= 0; i
< num_quads
; i
++) {
1472 LLVMValueRef s4
, t4
, r4
;
1473 LLVMValueRef lod_iparts
, lod_fparts
= NULL
;
1474 LLVMValueRef ilevel0s
, ilevel1s
= NULL
;
1475 LLVMValueRef indexi
= lp_build_const_int32(gallivm
, i
);
1477 s4
= lp_build_extract_range(gallivm
, s
, 4*i
, 4);
1478 t4
= lp_build_extract_range(gallivm
, t
, 4*i
, 4);
1479 r4
= lp_build_extract_range(gallivm
, r
, 4*i
, 4);
1480 lod_iparts
= LLVMBuildExtractElement(builder
, lod_ipart
, indexi
, "");
1481 ilevel0s
= LLVMBuildExtractElement(builder
, ilevel0
, indexi
, "");
1482 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
1483 ilevel1s
= LLVMBuildExtractElement(builder
, ilevel1
, indexi
, "");
1484 lod_fparts
= LLVMBuildExtractElement(builder
, lod_fpart
, indexi
, "");
1488 /* do sampling/filtering with fixed pt arithmetic */
1489 lp_build_sample_aos(&bld4
, unit
,
1491 lod_iparts
, lod_fparts
,
1497 lp_build_sample_general(&bld4
, unit
,
1499 lod_iparts
, lod_fparts
,
1503 for (j
= 0; j
< 4; j
++) {
1504 texelouttmp
[j
][i
] = texelout4
[j
];
1507 for (j
= 0; j
< 4; j
++) {
1508 texelout
[j
] = lp_build_concat(gallivm
, texelouttmp
[j
], type4
, num_quads
);
1509 LLVMBuildStore(builder
, texelout
[j
], texels
[j
]);
1513 LLVMValueRef ilevel0s
, lod_iparts
, ilevel1s
= NULL
;
1515 lp_build_else(&if_ctx
);
1517 /* These parameters are the same for all quads */
1518 lod_iparts
= LLVMBuildExtractElement(builder
, lod_ipart
, index0
, "");
1519 ilevel0s
= LLVMBuildExtractElement(builder
, ilevel0
, index0
, "");
1520 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
1521 ilevel1s
= LLVMBuildExtractElement(builder
, ilevel1
, index0
, "");
1525 /* do sampling/filtering with fixed pt arithmetic */
1526 lp_build_sample_aos(&bld
, unit
,
1528 lod_iparts
, lod_fpart
,
1534 lp_build_sample_general(&bld
, unit
,
1536 lod_iparts
, lod_fpart
,
1540 for (j
= 0; j
< 4; j
++) {
1541 LLVMBuildStore(builder
, texelout
[j
], texels
[j
]);
1544 lp_build_endif(&if_ctx
);
1547 for (j
= 0; j
< 4; j
++) {
1548 texel_out
[j
] = LLVMBuildLoad(builder
, texels
[j
], "");
1553 lp_build_sample_compare(&bld
, r
, texel_out
);
1555 apply_sampler_swizzle(&bld
, texel_out
);
1559 lp_build_size_query_soa(struct gallivm_state
*gallivm
,
1560 const struct lp_sampler_static_state
*static_state
,
1561 struct lp_sampler_dynamic_state
*dynamic_state
,
1562 struct lp_type int_type
,
1564 LLVMValueRef explicit_lod
,
1565 LLVMValueRef
*sizes_out
)
1570 struct lp_build_context bld_int_vec
;
1572 switch (static_state
->target
) {
1573 case PIPE_TEXTURE_1D
:
1577 case PIPE_TEXTURE_2D
:
1578 case PIPE_TEXTURE_CUBE
:
1579 case PIPE_TEXTURE_RECT
:
1582 case PIPE_TEXTURE_3D
:
1591 assert(!int_type
.floating
);
1593 lp_build_context_init(&bld_int_vec
, gallivm
, lp_type_int_vec(32, 128));
1596 LLVMValueRef first_level
;
1597 lod
= LLVMBuildExtractElement(gallivm
->builder
, explicit_lod
, lp_build_const_int32(gallivm
, 0), "");
1598 first_level
= dynamic_state
->first_level(dynamic_state
, gallivm
, unit
);
1599 lod
= lp_build_broadcast_scalar(&bld_int_vec
,
1600 LLVMBuildAdd(gallivm
->builder
, lod
, first_level
, "lod"));
1603 lod
= bld_int_vec
.zero
;
1606 size
= bld_int_vec
.undef
;
1608 size
= LLVMBuildInsertElement(gallivm
->builder
, size
,
1609 dynamic_state
->width(dynamic_state
, gallivm
, unit
),
1610 lp_build_const_int32(gallivm
, 0), "");
1613 size
= LLVMBuildInsertElement(gallivm
->builder
, size
,
1614 dynamic_state
->height(dynamic_state
, gallivm
, unit
),
1615 lp_build_const_int32(gallivm
, 1), "");
1619 size
= LLVMBuildInsertElement(gallivm
->builder
, size
,
1620 dynamic_state
->depth(dynamic_state
, gallivm
, unit
),
1621 lp_build_const_int32(gallivm
, 2), "");
1624 size
= lp_build_minify(&bld_int_vec
, size
, lod
);
1626 for (i
=0; i
< dims
; i
++) {
1627 sizes_out
[i
] = lp_build_extract_broadcast(gallivm
, bld_int_vec
.type
, int_type
,
1629 lp_build_const_int32(gallivm
, i
));