1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- AoS.
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_bitarit.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_swizzle.h"
52 #include "lp_bld_pack.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_format.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_quad.h"
63 * Build LLVM code for texture coord wrapping, for nearest filtering,
64 * for scaled integer texcoords.
65 * \param block_length is the length of the pixel block along the
67 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
68 * \param length the texture size along one dimension
69 * \param stride pixel stride along the coordinate axis (in bytes)
70 * \param is_pot if TRUE, length is a power of two
71 * \param wrap_mode one of PIPE_TEX_WRAP_x
72 * \param out_offset byte offset for the wrapped coordinate
73 * \param out_i resulting sub-block pixel coordinate for coord0
76 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context
*bld
,
77 unsigned block_length
,
84 LLVMValueRef
*out_offset
,
87 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
88 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
89 LLVMValueRef length_minus_one
;
91 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
94 case PIPE_TEX_WRAP_REPEAT
:
96 coord
= LLVMBuildAnd(builder
, coord
, length_minus_one
, "");
98 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
99 LLVMValueRef length_f
= lp_build_int_to_float(coord_bld
, length
);
100 coord
= lp_build_fract_safe(coord_bld
, coord_f
);
101 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
102 coord
= lp_build_itrunc(coord_bld
, coord
);
106 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
107 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
108 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
111 case PIPE_TEX_WRAP_CLAMP
:
112 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
113 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
114 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
115 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
116 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
121 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord
, stride
,
127 * Build LLVM code for texture coord wrapping, for nearest filtering,
128 * for float texcoords.
129 * \param coord the incoming texcoord (s,t,r or q)
130 * \param length the texture size along one dimension
131 * \param is_pot if TRUE, length is a power of two
132 * \param wrap_mode one of PIPE_TEX_WRAP_x
133 * \param icoord the texcoord after wrapping, as int
136 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context
*bld
,
141 LLVMValueRef
*icoord
)
143 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
144 LLVMValueRef length_minus_one
;
147 case PIPE_TEX_WRAP_REPEAT
:
148 /* take fraction, unnormalize */
149 coord
= lp_build_fract_safe(coord_bld
, coord
);
150 coord
= lp_build_mul(coord_bld
, coord
, length
);
151 *icoord
= lp_build_itrunc(coord_bld
, coord
);
153 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
154 length_minus_one
= lp_build_sub(coord_bld
, length
, coord_bld
->one
);
155 if (bld
->static_state
->normalized_coords
) {
156 /* scale coord to length */
157 coord
= lp_build_mul(coord_bld
, coord
, length
);
159 coord
= lp_build_clamp(coord_bld
, coord
, coord_bld
->zero
,
161 *icoord
= lp_build_itrunc(coord_bld
, coord
);
164 case PIPE_TEX_WRAP_CLAMP
:
165 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
166 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
167 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
168 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
169 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
177 * Build LLVM code for texture coord wrapping, for linear filtering,
178 * for scaled integer texcoords.
179 * \param block_length is the length of the pixel block along the
181 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
182 * \param length the texture size along one dimension
183 * \param stride pixel stride along the coordinate axis (in bytes)
184 * \param is_pot if TRUE, length is a power of two
185 * \param wrap_mode one of PIPE_TEX_WRAP_x
186 * \param offset0 resulting relative offset for coord0
187 * \param offset1 resulting relative offset for coord0 + 1
188 * \param i0 resulting sub-block pixel coordinate for coord0
189 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
192 lp_build_sample_wrap_linear_int(struct lp_build_sample_context
*bld
,
193 unsigned block_length
,
195 LLVMValueRef
*weight_i
,
196 LLVMValueRef coord_f
,
201 LLVMValueRef
*offset0
,
202 LLVMValueRef
*offset1
,
206 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
207 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
208 LLVMValueRef length_minus_one
;
209 LLVMValueRef lmask
, umask
, mask
;
212 * If the pixel block covers more than one pixel then there is no easy
213 * way to calculate offset1 relative to offset0. Instead, compute them
214 * independently. Otherwise, try to compute offset0 and offset1 with
215 * a single stride multiplication.
218 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
220 if (block_length
!= 1) {
223 case PIPE_TEX_WRAP_REPEAT
:
225 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
226 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
227 coord1
= LLVMBuildAnd(builder
, coord1
, length_minus_one
, "");
232 LLVMValueRef length_f
= lp_build_int_to_float(&bld
->coord_bld
, length
);
233 lp_build_coord_repeat_npot_linear(bld
, coord_f
,
236 mask
= lp_build_compare(bld
->gallivm
, int_coord_bld
->type
,
237 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
238 coord1
= LLVMBuildAnd(builder
,
239 lp_build_add(int_coord_bld
, coord0
,
242 weight
= lp_build_mul_imm(&bld
->coord_bld
, weight
, 256);
243 *weight_i
= lp_build_itrunc(&bld
->coord_bld
, weight
);
247 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
248 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
249 coord0
= lp_build_clamp(int_coord_bld
, coord0
, int_coord_bld
->zero
,
251 coord1
= lp_build_clamp(int_coord_bld
, coord1
, int_coord_bld
->zero
,
255 case PIPE_TEX_WRAP_CLAMP
:
256 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
257 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
258 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
259 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
260 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
263 coord0
= int_coord_bld
->zero
;
264 coord1
= int_coord_bld
->zero
;
267 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord0
, stride
,
269 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord1
, stride
,
274 *i0
= int_coord_bld
->zero
;
275 *i1
= int_coord_bld
->zero
;
278 case PIPE_TEX_WRAP_REPEAT
:
280 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
284 LLVMValueRef length_f
= lp_build_int_to_float(&bld
->coord_bld
, length
);
285 lp_build_coord_repeat_npot_linear(bld
, coord_f
,
288 weight
= lp_build_mul_imm(&bld
->coord_bld
, weight
, 256);
289 *weight_i
= lp_build_itrunc(&bld
->coord_bld
, weight
);
292 mask
= lp_build_compare(bld
->gallivm
, int_coord_bld
->type
,
293 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
295 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
296 *offset1
= LLVMBuildAnd(builder
,
297 lp_build_add(int_coord_bld
, *offset0
, stride
),
301 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
302 /* XXX this might be slower than the separate path
303 * on some newer cpus. With sse41 this is 8 instructions vs. 7
304 * - at least on SNB this is almost certainly slower since
305 * min/max are cheaper than selects, and the muls aren't bad.
307 lmask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
308 PIPE_FUNC_GEQUAL
, coord0
, int_coord_bld
->zero
);
309 umask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
310 PIPE_FUNC_LESS
, coord0
, length_minus_one
);
312 coord0
= lp_build_select(int_coord_bld
, lmask
, coord0
, int_coord_bld
->zero
);
313 coord0
= lp_build_select(int_coord_bld
, umask
, coord0
, length_minus_one
);
315 mask
= LLVMBuildAnd(builder
, lmask
, umask
, "");
317 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
318 *offset1
= lp_build_add(int_coord_bld
,
320 LLVMBuildAnd(builder
, stride
, mask
, ""));
323 case PIPE_TEX_WRAP_CLAMP
:
324 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
325 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
326 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
327 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
328 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
331 *offset0
= int_coord_bld
->zero
;
332 *offset1
= int_coord_bld
->zero
;
339 * Build LLVM code for texture coord wrapping, for linear filtering,
340 * for float texcoords.
341 * \param block_length is the length of the pixel block along the
343 * \param coord the incoming texcoord (s,t,r or q)
344 * \param length the texture size along one dimension
345 * \param is_pot if TRUE, length is a power of two
346 * \param wrap_mode one of PIPE_TEX_WRAP_x
347 * \param coord0 the first texcoord after wrapping, as int
348 * \param coord1 the second texcoord after wrapping, as int
349 * \param weight the filter weight as int (0-255)
350 * \param force_nearest if this coord actually uses nearest filtering
353 lp_build_sample_wrap_linear_float(struct lp_build_sample_context
*bld
,
354 unsigned block_length
,
359 LLVMValueRef
*coord0
,
360 LLVMValueRef
*coord1
,
361 LLVMValueRef
*weight
,
362 unsigned force_nearest
)
364 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
365 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
366 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
367 LLVMValueRef half
= lp_build_const_vec(bld
->gallivm
, coord_bld
->type
, 0.5);
368 LLVMValueRef length_minus_one
= lp_build_sub(coord_bld
, length
, coord_bld
->one
);
371 case PIPE_TEX_WRAP_REPEAT
:
373 /* mul by size and subtract 0.5 */
374 coord
= lp_build_mul(coord_bld
, coord
, length
);
376 coord
= lp_build_sub(coord_bld
, coord
, half
);
377 *coord1
= lp_build_add(coord_bld
, coord
, coord_bld
->one
);
378 /* convert to int, compute lerp weight */
379 lp_build_ifloor_fract(coord_bld
, coord
, coord0
, weight
);
380 *coord1
= lp_build_ifloor(coord_bld
, *coord1
);
382 length_minus_one
= lp_build_itrunc(coord_bld
, length_minus_one
);
383 *coord0
= LLVMBuildAnd(builder
, *coord0
, length_minus_one
, "");
384 *coord1
= LLVMBuildAnd(builder
, *coord1
, length_minus_one
, "");
388 /* wrap with normalized floats is just fract */
389 coord
= lp_build_fract(coord_bld
, coord
);
391 coord
= lp_build_mul(coord_bld
, coord
, length
);
393 * we avoided the 0.5/length division, have to fix up wrong
394 * edge cases with selects
396 *coord1
= lp_build_add(coord_bld
, coord
, half
);
397 coord
= lp_build_sub(coord_bld
, coord
, half
);
398 *weight
= lp_build_fract(coord_bld
, coord
);
399 mask
= lp_build_compare(coord_bld
->gallivm
, coord_bld
->type
,
400 PIPE_FUNC_LESS
, coord
, coord_bld
->zero
);
401 *coord0
= lp_build_select(coord_bld
, mask
, length_minus_one
, coord
);
402 *coord0
= lp_build_itrunc(coord_bld
, *coord0
);
403 mask
= lp_build_compare(coord_bld
->gallivm
, coord_bld
->type
,
404 PIPE_FUNC_LESS
, *coord1
, length
);
405 *coord1
= lp_build_select(coord_bld
, mask
, *coord1
, coord_bld
->zero
);
406 *coord1
= lp_build_itrunc(coord_bld
, *coord1
);
409 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
410 if (bld
->static_state
->normalized_coords
) {
411 /* mul by tex size */
412 coord
= lp_build_mul(coord_bld
, coord
, length
);
415 if (!force_nearest
) {
416 coord
= lp_build_sub(coord_bld
, coord
, half
);
418 /* clamp to [0, length - 1] */
419 coord
= lp_build_min(coord_bld
, coord
, length_minus_one
);
420 coord
= lp_build_max(coord_bld
, coord
, coord_bld
->zero
);
421 *coord1
= lp_build_add(coord_bld
, coord
, coord_bld
->one
);
422 /* convert to int, compute lerp weight */
423 lp_build_ifloor_fract(coord_bld
, coord
, coord0
, weight
);
424 /* coord1 = min(coord1, length-1) */
425 *coord1
= lp_build_min(coord_bld
, *coord1
, length_minus_one
);
426 *coord1
= lp_build_itrunc(coord_bld
, *coord1
);
430 *coord0
= int_coord_bld
->zero
;
431 *coord1
= int_coord_bld
->zero
;
432 *weight
= coord_bld
->zero
;
435 *weight
= lp_build_mul_imm(coord_bld
, *weight
, 256);
436 *weight
= lp_build_itrunc(coord_bld
, *weight
);
442 * Fetch texels for image with nearest sampling.
443 * Return filtered color as two vectors of 16-bit fixed point values.
446 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context
*bld
,
447 LLVMValueRef data_ptr
,
449 LLVMValueRef x_subcoord
,
450 LLVMValueRef y_subcoord
,
451 LLVMValueRef
*colors_lo
,
452 LLVMValueRef
*colors_hi
)
455 * Fetch the pixels as 4 x 32bit (rgba order might differ):
457 * rgba0 rgba1 rgba2 rgba3
459 * bit cast them into 16 x u8
461 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
463 * unpack them into two 8 x i16:
465 * r0 g0 b0 a0 r1 g1 b1 a1
466 * r2 g2 b2 a2 r3 g3 b3 a3
468 * The higher 8 bits of the resulting elements will be zero.
470 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
472 struct lp_build_context h16
, u8n
;
473 LLVMTypeRef u8n_vec_type
;
475 lp_build_context_init(&h16
, bld
->gallivm
, lp_type_ufixed(16, bld
->vector_width
));
476 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
477 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
479 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
481 * Given the format is a rgba8, just read the pixels as is,
482 * without any swizzling. Swizzling will be done later.
484 rgba8
= lp_build_gather(bld
->gallivm
,
485 bld
->texel_type
.length
,
486 bld
->format_desc
->block
.bits
,
487 bld
->texel_type
.width
,
490 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
493 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
501 /* Expand one 4*rgba8 to two 2*rgba16 */
502 lp_build_unpack2(bld
->gallivm
, u8n
.type
, h16
.type
,
504 colors_lo
, colors_hi
);
509 * Sample a single texture image with nearest sampling.
510 * If sampling a cube texture, r = cube face in [0,5].
511 * Return filtered color as two vectors of 16-bit fixed point values.
514 lp_build_sample_image_nearest(struct lp_build_sample_context
*bld
,
515 LLVMValueRef int_size
,
516 LLVMValueRef row_stride_vec
,
517 LLVMValueRef img_stride_vec
,
518 LLVMValueRef data_ptr
,
522 LLVMValueRef
*colors_lo
,
523 LLVMValueRef
*colors_hi
)
525 const unsigned dims
= bld
->dims
;
526 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
527 struct lp_build_context i32
;
528 LLVMTypeRef i32_vec_type
;
530 LLVMValueRef width_vec
, height_vec
, depth_vec
;
531 LLVMValueRef s_ipart
, t_ipart
= NULL
, r_ipart
= NULL
;
532 LLVMValueRef s_float
, t_float
= NULL
, r_float
= NULL
;
533 LLVMValueRef x_stride
;
534 LLVMValueRef x_offset
, offset
;
535 LLVMValueRef x_subcoord
, y_subcoord
, z_subcoord
;
537 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32, bld
->vector_width
));
539 i32_vec_type
= lp_build_vec_type(bld
->gallivm
, i32
.type
);
541 lp_build_extract_image_sizes(bld
,
549 s_float
= s
; t_float
= t
; r_float
= r
;
551 if (bld
->static_state
->normalized_coords
) {
552 LLVMValueRef scaled_size
;
553 LLVMValueRef flt_size
;
555 /* scale size by 256 (8 fractional bits) */
556 scaled_size
= lp_build_shl_imm(&bld
->int_size_bld
, int_size
, 8);
558 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, scaled_size
);
560 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
563 /* scale coords by 256 (8 fractional bits) */
564 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
566 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
568 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
571 /* convert float to int */
572 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
574 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
576 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
578 /* compute floor (shift right 8) */
579 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 8);
580 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
582 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
584 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
586 /* get pixel, row, image strides */
587 x_stride
= lp_build_const_vec(bld
->gallivm
,
588 bld
->int_coord_bld
.type
,
589 bld
->format_desc
->block
.bits
/8);
591 /* Do texcoord wrapping, compute texel offset */
592 lp_build_sample_wrap_nearest_int(bld
,
593 bld
->format_desc
->block
.width
,
596 bld
->static_state
->pot_width
,
597 bld
->static_state
->wrap_s
,
598 &x_offset
, &x_subcoord
);
601 LLVMValueRef y_offset
;
602 lp_build_sample_wrap_nearest_int(bld
,
603 bld
->format_desc
->block
.height
,
605 height_vec
, row_stride_vec
,
606 bld
->static_state
->pot_height
,
607 bld
->static_state
->wrap_t
,
608 &y_offset
, &y_subcoord
);
609 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, y_offset
);
611 LLVMValueRef z_offset
;
612 lp_build_sample_wrap_nearest_int(bld
,
613 1, /* block length (depth) */
615 depth_vec
, img_stride_vec
,
616 bld
->static_state
->pot_depth
,
617 bld
->static_state
->wrap_r
,
618 &z_offset
, &z_subcoord
);
619 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
621 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
622 LLVMValueRef z_offset
;
623 /* The r coord is the cube face in [0,5] */
624 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
625 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
629 lp_build_sample_fetch_image_nearest(bld
, data_ptr
, offset
,
630 x_subcoord
, y_subcoord
,
631 colors_lo
, colors_hi
);
636 * Sample a single texture image with nearest sampling.
637 * If sampling a cube texture, r = cube face in [0,5].
638 * Return filtered color as two vectors of 16-bit fixed point values.
639 * Does address calcs (except offsets) with floats.
640 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
643 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context
*bld
,
644 LLVMValueRef int_size
,
645 LLVMValueRef row_stride_vec
,
646 LLVMValueRef img_stride_vec
,
647 LLVMValueRef data_ptr
,
651 LLVMValueRef
*colors_lo
,
652 LLVMValueRef
*colors_hi
)
654 const unsigned dims
= bld
->dims
;
655 LLVMValueRef width_vec
, height_vec
, depth_vec
;
657 LLVMValueRef x_subcoord
, y_subcoord
;
658 LLVMValueRef x_icoord
, y_icoord
, z_icoord
;
659 LLVMValueRef flt_size
;
661 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, int_size
);
663 lp_build_extract_image_sizes(bld
,
664 bld
->float_size_type
,
671 /* Do texcoord wrapping */
672 lp_build_sample_wrap_nearest_float(bld
,
674 bld
->static_state
->pot_width
,
675 bld
->static_state
->wrap_s
,
679 lp_build_sample_wrap_nearest_float(bld
,
681 bld
->static_state
->pot_height
,
682 bld
->static_state
->wrap_t
,
686 lp_build_sample_wrap_nearest_float(bld
,
688 bld
->static_state
->pot_depth
,
689 bld
->static_state
->wrap_r
,
692 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
698 * From here on we deal with ints, and we should split up the 256bit
699 * vectors manually for better generated code.
703 * compute texel offsets -
704 * cannot do offset calc with floats, difficult for block-based formats,
705 * and not enough precision anyway.
707 lp_build_sample_offset(&bld
->int_coord_bld
,
711 row_stride_vec
, img_stride_vec
,
713 &x_subcoord
, &y_subcoord
);
715 lp_build_sample_fetch_image_nearest(bld
, data_ptr
, offset
,
716 x_subcoord
, y_subcoord
,
717 colors_lo
, colors_hi
);
722 * Fetch texels for image with linear sampling.
723 * Return filtered color as two vectors of 16-bit fixed point values.
726 lp_build_sample_fetch_image_linear(struct lp_build_sample_context
*bld
,
727 LLVMValueRef data_ptr
,
728 LLVMValueRef offset
[2][2][2],
729 LLVMValueRef x_subcoord
[2],
730 LLVMValueRef y_subcoord
[2],
731 LLVMValueRef s_fpart
,
732 LLVMValueRef t_fpart
,
733 LLVMValueRef r_fpart
,
734 LLVMValueRef
*colors_lo
,
735 LLVMValueRef
*colors_hi
)
737 const unsigned dims
= bld
->dims
;
738 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
739 struct lp_build_context h16
, u8n
;
740 LLVMTypeRef h16_vec_type
, u8n_vec_type
;
741 LLVMTypeRef elem_type
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
742 LLVMValueRef shuffles_lo
[LP_MAX_VECTOR_LENGTH
];
743 LLVMValueRef shuffles_hi
[LP_MAX_VECTOR_LENGTH
];
744 LLVMValueRef shuffle_lo
, shuffle_hi
;
745 LLVMValueRef s_fpart_lo
, s_fpart_hi
;
746 LLVMValueRef t_fpart_lo
= NULL
, t_fpart_hi
= NULL
;
747 LLVMValueRef r_fpart_lo
= NULL
, r_fpart_hi
= NULL
;
748 LLVMValueRef neighbors_lo
[2][2][2]; /* [z][y][x] */
749 LLVMValueRef neighbors_hi
[2][2][2]; /* [z][y][x] */
750 LLVMValueRef packed_lo
, packed_hi
;
754 lp_build_context_init(&h16
, bld
->gallivm
, lp_type_ufixed(16, bld
->vector_width
));
755 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
756 h16_vec_type
= lp_build_vec_type(bld
->gallivm
, h16
.type
);
757 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
760 * Transform 4 x i32 in
762 * s_fpart = {s0, s1, s2, s3}
766 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
770 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
771 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
773 * and likewise for t_fpart. There is no risk of loosing precision here
774 * since the fractional parts only use the lower 8bits.
776 s_fpart
= LLVMBuildBitCast(builder
, s_fpart
, h16_vec_type
, "");
778 t_fpart
= LLVMBuildBitCast(builder
, t_fpart
, h16_vec_type
, "");
780 r_fpart
= LLVMBuildBitCast(builder
, r_fpart
, h16_vec_type
, "");
782 for (j
= 0; j
< h16
.type
.length
; j
+= 4) {
783 #ifdef PIPE_ARCH_LITTLE_ENDIAN
784 unsigned subindex
= 0;
786 unsigned subindex
= 1;
790 index
= LLVMConstInt(elem_type
, j
/2 + subindex
, 0);
791 for (i
= 0; i
< 4; ++i
)
792 shuffles_lo
[j
+ i
] = index
;
794 index
= LLVMConstInt(elem_type
, h16
.type
.length
/2 + j
/2 + subindex
, 0);
795 for (i
= 0; i
< 4; ++i
)
796 shuffles_hi
[j
+ i
] = index
;
799 shuffle_lo
= LLVMConstVector(shuffles_lo
, h16
.type
.length
);
800 shuffle_hi
= LLVMConstVector(shuffles_hi
, h16
.type
.length
);
802 s_fpart_lo
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
,
804 s_fpart_hi
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
,
807 t_fpart_lo
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
,
809 t_fpart_hi
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
,
813 r_fpart_lo
= LLVMBuildShuffleVector(builder
, r_fpart
, h16
.undef
,
815 r_fpart_hi
= LLVMBuildShuffleVector(builder
, r_fpart
, h16
.undef
,
820 * Fetch the pixels as 4 x 32bit (rgba order might differ):
822 * rgba0 rgba1 rgba2 rgba3
824 * bit cast them into 16 x u8
826 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
828 * unpack them into two 8 x i16:
830 * r0 g0 b0 a0 r1 g1 b1 a1
831 * r2 g2 b2 a2 r3 g3 b3 a3
833 * The higher 8 bits of the resulting elements will be zero.
835 numj
= 1 + (dims
>= 2);
836 numk
= 1 + (dims
>= 3);
838 for (k
= 0; k
< numk
; k
++) {
839 for (j
= 0; j
< numj
; j
++) {
840 for (i
= 0; i
< 2; i
++) {
843 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
845 * Given the format is a rgba8, just read the pixels as is,
846 * without any swizzling. Swizzling will be done later.
848 rgba8
= lp_build_gather(bld
->gallivm
,
849 bld
->texel_type
.length
,
850 bld
->format_desc
->block
.bits
,
851 bld
->texel_type
.width
,
852 data_ptr
, offset
[k
][j
][i
]);
854 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
857 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
860 data_ptr
, offset
[k
][j
][i
],
865 /* Expand one 4*rgba8 to two 2*rgba16 */
866 lp_build_unpack2(bld
->gallivm
, u8n
.type
, h16
.type
,
868 &neighbors_lo
[k
][j
][i
], &neighbors_hi
[k
][j
][i
]);
874 * Linear interpolation with 8.8 fixed point.
876 if (bld
->static_state
->force_nearest_s
) {
877 /* special case 1-D lerp */
878 packed_lo
= lp_build_lerp(&h16
,
880 neighbors_lo
[0][0][0],
881 neighbors_lo
[0][0][1]);
883 packed_hi
= lp_build_lerp(&h16
,
885 neighbors_hi
[0][1][0],
886 neighbors_hi
[0][1][0]);
888 else if (bld
->static_state
->force_nearest_t
) {
889 /* special case 1-D lerp */
890 packed_lo
= lp_build_lerp(&h16
,
892 neighbors_lo
[0][0][0],
893 neighbors_lo
[0][0][1]);
895 packed_hi
= lp_build_lerp(&h16
,
897 neighbors_hi
[0][0][0],
898 neighbors_hi
[0][0][1]);
901 /* general 1/2/3-D lerping */
903 packed_lo
= lp_build_lerp(&h16
,
905 neighbors_lo
[0][0][0],
906 neighbors_lo
[0][0][1]);
908 packed_hi
= lp_build_lerp(&h16
,
910 neighbors_hi
[0][0][0],
911 neighbors_hi
[0][0][1]);
915 packed_lo
= lp_build_lerp_2d(&h16
,
916 s_fpart_lo
, t_fpart_lo
,
917 neighbors_lo
[0][0][0],
918 neighbors_lo
[0][0][1],
919 neighbors_lo
[0][1][0],
920 neighbors_lo
[0][1][1]);
922 packed_hi
= lp_build_lerp_2d(&h16
,
923 s_fpart_hi
, t_fpart_hi
,
924 neighbors_hi
[0][0][0],
925 neighbors_hi
[0][0][1],
926 neighbors_hi
[0][1][0],
927 neighbors_hi
[0][1][1]);
930 LLVMValueRef packed_lo2
, packed_hi2
;
932 /* lerp in the second z slice */
933 packed_lo2
= lp_build_lerp_2d(&h16
,
934 s_fpart_lo
, t_fpart_lo
,
935 neighbors_lo
[1][0][0],
936 neighbors_lo
[1][0][1],
937 neighbors_lo
[1][1][0],
938 neighbors_lo
[1][1][1]);
940 packed_hi2
= lp_build_lerp_2d(&h16
,
941 s_fpart_hi
, t_fpart_hi
,
942 neighbors_hi
[1][0][0],
943 neighbors_hi
[1][0][1],
944 neighbors_hi
[1][1][0],
945 neighbors_hi
[1][1][1]);
946 /* interp between two z slices */
947 packed_lo
= lp_build_lerp(&h16
, r_fpart_lo
,
948 packed_lo
, packed_lo2
);
949 packed_hi
= lp_build_lerp(&h16
, r_fpart_hi
,
950 packed_hi
, packed_hi2
);
955 *colors_lo
= packed_lo
;
956 *colors_hi
= packed_hi
;
960 * Sample a single texture image with (bi-)(tri-)linear sampling.
961 * Return filtered color as two vectors of 16-bit fixed point values.
964 lp_build_sample_image_linear(struct lp_build_sample_context
*bld
,
965 LLVMValueRef int_size
,
966 LLVMValueRef row_stride_vec
,
967 LLVMValueRef img_stride_vec
,
968 LLVMValueRef data_ptr
,
972 LLVMValueRef
*colors_lo
,
973 LLVMValueRef
*colors_hi
)
975 const unsigned dims
= bld
->dims
;
976 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
977 struct lp_build_context i32
;
978 LLVMTypeRef i32_vec_type
;
979 LLVMValueRef i32_c8
, i32_c128
, i32_c255
;
980 LLVMValueRef width_vec
, height_vec
, depth_vec
;
981 LLVMValueRef s_ipart
, s_fpart
, s_float
;
982 LLVMValueRef t_ipart
= NULL
, t_fpart
= NULL
, t_float
= NULL
;
983 LLVMValueRef r_ipart
= NULL
, r_fpart
= NULL
, r_float
= NULL
;
984 LLVMValueRef x_stride
, y_stride
, z_stride
;
985 LLVMValueRef x_offset0
, x_offset1
;
986 LLVMValueRef y_offset0
, y_offset1
;
987 LLVMValueRef z_offset0
, z_offset1
;
988 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
989 LLVMValueRef x_subcoord
[2], y_subcoord
[2], z_subcoord
[2];
992 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32, bld
->vector_width
));
994 i32_vec_type
= lp_build_vec_type(bld
->gallivm
, i32
.type
);
996 lp_build_extract_image_sizes(bld
,
1004 s_float
= s
; t_float
= t
; r_float
= r
;
1006 if (bld
->static_state
->normalized_coords
) {
1007 LLVMValueRef scaled_size
;
1008 LLVMValueRef flt_size
;
1010 /* scale size by 256 (8 fractional bits) */
1011 scaled_size
= lp_build_shl_imm(&bld
->int_size_bld
, int_size
, 8);
1013 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, scaled_size
);
1015 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
1018 /* scale coords by 256 (8 fractional bits) */
1019 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
1021 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
1023 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
1026 /* convert float to int */
1027 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
1029 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
1031 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
1033 /* subtract 0.5 (add -128) */
1034 i32_c128
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, -128);
1035 if (!bld
->static_state
->force_nearest_s
) {
1036 s
= LLVMBuildAdd(builder
, s
, i32_c128
, "");
1038 if (dims
>= 2 && !bld
->static_state
->force_nearest_t
) {
1039 t
= LLVMBuildAdd(builder
, t
, i32_c128
, "");
1042 r
= LLVMBuildAdd(builder
, r
, i32_c128
, "");
1045 /* compute floor (shift right 8) */
1046 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 8);
1047 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
1049 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
1051 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
1053 /* compute fractional part (AND with 0xff) */
1054 i32_c255
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 255);
1055 s_fpart
= LLVMBuildAnd(builder
, s
, i32_c255
, "");
1057 t_fpart
= LLVMBuildAnd(builder
, t
, i32_c255
, "");
1059 r_fpart
= LLVMBuildAnd(builder
, r
, i32_c255
, "");
1061 /* get pixel, row and image strides */
1062 x_stride
= lp_build_const_vec(bld
->gallivm
, bld
->int_coord_bld
.type
,
1063 bld
->format_desc
->block
.bits
/8);
1064 y_stride
= row_stride_vec
;
1065 z_stride
= img_stride_vec
;
1067 /* do texcoord wrapping and compute texel offsets */
1068 lp_build_sample_wrap_linear_int(bld
,
1069 bld
->format_desc
->block
.width
,
1070 s_ipart
, &s_fpart
, s_float
,
1071 width_vec
, x_stride
,
1072 bld
->static_state
->pot_width
,
1073 bld
->static_state
->wrap_s
,
1074 &x_offset0
, &x_offset1
,
1075 &x_subcoord
[0], &x_subcoord
[1]);
1076 for (z
= 0; z
< 2; z
++) {
1077 for (y
= 0; y
< 2; y
++) {
1078 offset
[z
][y
][0] = x_offset0
;
1079 offset
[z
][y
][1] = x_offset1
;
1084 lp_build_sample_wrap_linear_int(bld
,
1085 bld
->format_desc
->block
.height
,
1086 t_ipart
, &t_fpart
, t_float
,
1087 height_vec
, y_stride
,
1088 bld
->static_state
->pot_height
,
1089 bld
->static_state
->wrap_t
,
1090 &y_offset0
, &y_offset1
,
1091 &y_subcoord
[0], &y_subcoord
[1]);
1093 for (z
= 0; z
< 2; z
++) {
1094 for (x
= 0; x
< 2; x
++) {
1095 offset
[z
][0][x
] = lp_build_add(&bld
->int_coord_bld
,
1096 offset
[z
][0][x
], y_offset0
);
1097 offset
[z
][1][x
] = lp_build_add(&bld
->int_coord_bld
,
1098 offset
[z
][1][x
], y_offset1
);
1104 lp_build_sample_wrap_linear_int(bld
,
1105 bld
->format_desc
->block
.height
,
1106 r_ipart
, &r_fpart
, r_float
,
1107 depth_vec
, z_stride
,
1108 bld
->static_state
->pot_depth
,
1109 bld
->static_state
->wrap_r
,
1110 &z_offset0
, &z_offset1
,
1111 &z_subcoord
[0], &z_subcoord
[1]);
1112 for (y
= 0; y
< 2; y
++) {
1113 for (x
= 0; x
< 2; x
++) {
1114 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1115 offset
[0][y
][x
], z_offset0
);
1116 offset
[1][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1117 offset
[1][y
][x
], z_offset1
);
1121 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
1122 LLVMValueRef z_offset
;
1123 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
1124 for (y
= 0; y
< 2; y
++) {
1125 for (x
= 0; x
< 2; x
++) {
1126 /* The r coord is the cube face in [0,5] */
1127 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1128 offset
[0][y
][x
], z_offset
);
1133 lp_build_sample_fetch_image_linear(bld
, data_ptr
, offset
,
1134 x_subcoord
, y_subcoord
,
1135 s_fpart
, t_fpart
, r_fpart
,
1136 colors_lo
, colors_hi
);
1141 * Sample a single texture image with (bi-)(tri-)linear sampling.
1142 * Return filtered color as two vectors of 16-bit fixed point values.
1143 * Does address calcs (except offsets) with floats.
1144 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1147 lp_build_sample_image_linear_afloat(struct lp_build_sample_context
*bld
,
1148 LLVMValueRef int_size
,
1149 LLVMValueRef row_stride_vec
,
1150 LLVMValueRef img_stride_vec
,
1151 LLVMValueRef data_ptr
,
1155 LLVMValueRef
*colors_lo
,
1156 LLVMValueRef
*colors_hi
)
1158 const unsigned dims
= bld
->dims
;
1159 LLVMValueRef width_vec
, height_vec
, depth_vec
;
1160 LLVMValueRef s_fpart
;
1161 LLVMValueRef t_fpart
= NULL
;
1162 LLVMValueRef r_fpart
= NULL
;
1163 LLVMValueRef x_stride
, y_stride
, z_stride
;
1164 LLVMValueRef x_offset0
, x_offset1
;
1165 LLVMValueRef y_offset0
, y_offset1
;
1166 LLVMValueRef z_offset0
, z_offset1
;
1167 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
1168 LLVMValueRef x_subcoord
[2], y_subcoord
[2];
1169 LLVMValueRef flt_size
;
1170 LLVMValueRef x_icoord0
, x_icoord1
;
1171 LLVMValueRef y_icoord0
, y_icoord1
;
1172 LLVMValueRef z_icoord0
, z_icoord1
;
1175 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, int_size
);
1177 lp_build_extract_image_sizes(bld
,
1178 bld
->float_size_type
,
1185 /* do texcoord wrapping and compute texel offsets */
1186 lp_build_sample_wrap_linear_float(bld
,
1187 bld
->format_desc
->block
.width
,
1189 bld
->static_state
->pot_width
,
1190 bld
->static_state
->wrap_s
,
1191 &x_icoord0
, &x_icoord1
,
1193 bld
->static_state
->force_nearest_s
);
1196 lp_build_sample_wrap_linear_float(bld
,
1197 bld
->format_desc
->block
.height
,
1199 bld
->static_state
->pot_height
,
1200 bld
->static_state
->wrap_t
,
1201 &y_icoord0
, &y_icoord1
,
1203 bld
->static_state
->force_nearest_t
);
1206 lp_build_sample_wrap_linear_float(bld
,
1207 bld
->format_desc
->block
.height
,
1209 bld
->static_state
->pot_depth
,
1210 bld
->static_state
->wrap_r
,
1211 &z_icoord0
, &z_icoord1
,
1217 * From here on we deal with ints, and we should split up the 256bit
1218 * vectors manually for better generated code.
1221 /* get pixel, row and image strides */
1222 x_stride
= lp_build_const_vec(bld
->gallivm
,
1223 bld
->int_coord_bld
.type
,
1224 bld
->format_desc
->block
.bits
/8);
1225 y_stride
= row_stride_vec
;
1226 z_stride
= img_stride_vec
;
1229 * compute texel offset -
1230 * cannot do offset calc with floats, difficult for block-based formats,
1231 * and not enough precision anyway.
1233 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1234 bld
->format_desc
->block
.width
,
1235 x_icoord0
, x_stride
,
1236 &x_offset0
, &x_subcoord
[0]);
1237 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1238 bld
->format_desc
->block
.width
,
1239 x_icoord1
, x_stride
,
1240 &x_offset1
, &x_subcoord
[1]);
1241 for (z
= 0; z
< 2; z
++) {
1242 for (y
= 0; y
< 2; y
++) {
1243 offset
[z
][y
][0] = x_offset0
;
1244 offset
[z
][y
][1] = x_offset1
;
1249 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1250 bld
->format_desc
->block
.height
,
1251 y_icoord0
, y_stride
,
1252 &y_offset0
, &y_subcoord
[0]);
1253 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1254 bld
->format_desc
->block
.height
,
1255 y_icoord1
, y_stride
,
1256 &y_offset1
, &y_subcoord
[1]);
1257 for (z
= 0; z
< 2; z
++) {
1258 for (x
= 0; x
< 2; x
++) {
1259 offset
[z
][0][x
] = lp_build_add(&bld
->int_coord_bld
,
1260 offset
[z
][0][x
], y_offset0
);
1261 offset
[z
][1][x
] = lp_build_add(&bld
->int_coord_bld
,
1262 offset
[z
][1][x
], y_offset1
);
1268 LLVMValueRef z_subcoord
[2];
1269 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1271 z_icoord0
, z_stride
,
1272 &z_offset0
, &z_subcoord
[0]);
1273 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1275 z_icoord1
, z_stride
,
1276 &z_offset1
, &z_subcoord
[1]);
1277 for (y
= 0; y
< 2; y
++) {
1278 for (x
= 0; x
< 2; x
++) {
1279 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1280 offset
[0][y
][x
], z_offset0
);
1281 offset
[1][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1282 offset
[1][y
][x
], z_offset1
);
1286 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
1287 LLVMValueRef z_offset
;
1288 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
1289 for (y
= 0; y
< 2; y
++) {
1290 for (x
= 0; x
< 2; x
++) {
1291 /* The r coord is the cube face in [0,5] */
1292 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1293 offset
[0][y
][x
], z_offset
);
1298 lp_build_sample_fetch_image_linear(bld
, data_ptr
, offset
,
1299 x_subcoord
, y_subcoord
,
1300 s_fpart
, t_fpart
, r_fpart
,
1301 colors_lo
, colors_hi
);
1306 * Sample the texture/mipmap using given image filter and mip filter.
1307 * data0_ptr and data1_ptr point to the two mipmap levels to sample
1308 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1309 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1312 lp_build_sample_mipmap(struct lp_build_sample_context
*bld
,
1313 unsigned img_filter
,
1314 unsigned mip_filter
,
1318 LLVMValueRef ilevel0
,
1319 LLVMValueRef ilevel1
,
1320 LLVMValueRef lod_fpart
,
1321 LLVMValueRef colors_lo_var
,
1322 LLVMValueRef colors_hi_var
)
1324 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1327 LLVMValueRef row_stride0_vec
= NULL
;
1328 LLVMValueRef row_stride1_vec
= NULL
;
1329 LLVMValueRef img_stride0_vec
= NULL
;
1330 LLVMValueRef img_stride1_vec
= NULL
;
1331 LLVMValueRef data_ptr0
;
1332 LLVMValueRef data_ptr1
;
1333 LLVMValueRef colors0_lo
, colors0_hi
;
1334 LLVMValueRef colors1_lo
, colors1_hi
;
1336 /* sample the first mipmap level */
1337 lp_build_mipmap_level_sizes(bld
, ilevel0
,
1339 &row_stride0_vec
, &img_stride0_vec
);
1340 data_ptr0
= lp_build_get_mipmap_level(bld
, ilevel0
);
1341 if (util_cpu_caps
.has_avx
&& bld
->coord_type
.length
> 4) {
1342 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1343 lp_build_sample_image_nearest_afloat(bld
,
1345 row_stride0_vec
, img_stride0_vec
,
1347 &colors0_lo
, &colors0_hi
);
1350 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
1351 lp_build_sample_image_linear_afloat(bld
,
1353 row_stride0_vec
, img_stride0_vec
,
1355 &colors0_lo
, &colors0_hi
);
1359 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1360 lp_build_sample_image_nearest(bld
,
1362 row_stride0_vec
, img_stride0_vec
,
1364 &colors0_lo
, &colors0_hi
);
1367 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
1368 lp_build_sample_image_linear(bld
,
1370 row_stride0_vec
, img_stride0_vec
,
1372 &colors0_lo
, &colors0_hi
);
1376 /* Store the first level's colors in the output variables */
1377 LLVMBuildStore(builder
, colors0_lo
, colors_lo_var
);
1378 LLVMBuildStore(builder
, colors0_hi
, colors_hi_var
);
1380 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
1381 LLVMValueRef h16vec_scale
= lp_build_const_vec(bld
->gallivm
,
1382 bld
->perquadf_bld
.type
, 256.0);
1383 LLVMTypeRef i32vec_type
= lp_build_vec_type(bld
->gallivm
, bld
->perquadi_bld
.type
);
1384 struct lp_build_if_state if_ctx
;
1385 LLVMValueRef need_lerp
;
1386 unsigned num_quads
= bld
->coord_bld
.type
.length
/ 4;
1389 lod_fpart
= LLVMBuildFMul(builder
, lod_fpart
, h16vec_scale
, "");
1390 lod_fpart
= LLVMBuildFPToSI(builder
, lod_fpart
, i32vec_type
, "lod_fpart.fixed16");
1392 /* need_lerp = lod_fpart > 0 */
1393 if (num_quads
== 1) {
1394 need_lerp
= LLVMBuildICmp(builder
, LLVMIntSGT
,
1395 lod_fpart
, bld
->perquadi_bld
.zero
,
1400 * We'll do mip filtering if any of the quads need it.
1401 * It might be better to split the vectors here and only fetch/filter
1402 * quads which need it.
1405 * We need to clamp lod_fpart here since we can get negative
1406 * values which would screw up filtering if not all
1407 * lod_fpart values have same sign.
1408 * We can however then skip the greater than comparison.
1410 lod_fpart
= lp_build_max(&bld
->perquadi_bld
, lod_fpart
,
1411 bld
->perquadi_bld
.zero
);
1412 need_lerp
= lp_build_any_true_range(&bld
->perquadi_bld
, num_quads
, lod_fpart
);
1415 lp_build_if(&if_ctx
, bld
->gallivm
, need_lerp
);
1417 struct lp_build_context h16_bld
;
1419 lp_build_context_init(&h16_bld
, bld
->gallivm
, lp_type_ufixed(16, bld
->vector_width
));
1421 /* sample the second mipmap level */
1422 lp_build_mipmap_level_sizes(bld
, ilevel1
,
1424 &row_stride1_vec
, &img_stride1_vec
);
1425 data_ptr1
= lp_build_get_mipmap_level(bld
, ilevel1
);
1427 if (util_cpu_caps
.has_avx
&& bld
->coord_type
.length
> 4) {
1428 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1429 lp_build_sample_image_nearest_afloat(bld
,
1431 row_stride1_vec
, img_stride1_vec
,
1433 &colors1_lo
, &colors1_hi
);
1436 lp_build_sample_image_linear_afloat(bld
,
1438 row_stride1_vec
, img_stride1_vec
,
1440 &colors1_lo
, &colors1_hi
);
1444 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1445 lp_build_sample_image_nearest(bld
,
1447 row_stride1_vec
, img_stride1_vec
,
1449 &colors1_lo
, &colors1_hi
);
1452 lp_build_sample_image_linear(bld
,
1454 row_stride1_vec
, img_stride1_vec
,
1456 &colors1_lo
, &colors1_hi
);
1460 /* interpolate samples from the two mipmap levels */
1462 if (num_quads
== 1) {
1463 lod_fpart
= LLVMBuildTrunc(builder
, lod_fpart
, h16_bld
.elem_type
, "");
1464 lod_fpart
= lp_build_broadcast_scalar(&h16_bld
, lod_fpart
);
1466 #if HAVE_LLVM == 0x208
1467 /* This is a work-around for a bug in LLVM 2.8.
1468 * Evidently, something goes wrong in the construction of the
1469 * lod_fpart short[8] vector. Adding this no-effect shuffle seems
1470 * to force the vector to be properly constructed.
1471 * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
1474 LLVMValueRef shuffles
[8], shuffle
;
1475 assert(h16_bld
.type
.length
<= Elements(shuffles
));
1476 for (i
= 0; i
< h16_bld
.type
.length
; i
++)
1477 shuffles
[i
] = lp_build_const_int32(bld
->gallivm
, 2 * (i
& 1));
1478 shuffle
= LLVMConstVector(shuffles
, h16_bld
.type
.length
);
1479 lod_fpart
= LLVMBuildShuffleVector(builder
,
1480 lod_fpart
, lod_fpart
,
1485 colors0_lo
= lp_build_lerp(&h16_bld
, lod_fpart
,
1486 colors0_lo
, colors1_lo
);
1487 colors0_hi
= lp_build_lerp(&h16_bld
, lod_fpart
,
1488 colors0_hi
, colors1_hi
);
1491 LLVMValueRef lod_parts
[LP_MAX_VECTOR_LENGTH
/16];
1492 struct lp_type perquadi16_type
= bld
->perquadi_bld
.type
;
1493 perquadi16_type
.width
/= 2;
1494 perquadi16_type
.length
*= 2;
1495 lod_fpart
= LLVMBuildBitCast(builder
, lod_fpart
,
1496 lp_build_vec_type(bld
->gallivm
,
1497 perquadi16_type
), "");
1498 /* XXX this only works for exactly 2 quads. More quads need shuffle */
1499 assert(num_quads
== 2);
1500 for (i
= 0; i
< num_quads
; i
++) {
1501 LLVMValueRef indexi2
= lp_build_const_int32(bld
->gallivm
, i
*2);
1502 lod_parts
[i
] = lp_build_extract_broadcast(bld
->gallivm
,
1508 colors0_lo
= lp_build_lerp(&h16_bld
, lod_parts
[0],
1509 colors0_lo
, colors1_lo
);
1510 colors0_hi
= lp_build_lerp(&h16_bld
, lod_parts
[1],
1511 colors0_hi
, colors1_hi
);
1514 LLVMBuildStore(builder
, colors0_lo
, colors_lo_var
);
1515 LLVMBuildStore(builder
, colors0_hi
, colors_hi_var
);
1517 lp_build_endif(&if_ctx
);
1524 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
1525 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
1526 * but only limited texture coord wrap modes.
1529 lp_build_sample_aos(struct lp_build_sample_context
*bld
,
1534 LLVMValueRef lod_ipart
,
1535 LLVMValueRef lod_fpart
,
1536 LLVMValueRef ilevel0
,
1537 LLVMValueRef ilevel1
,
1538 LLVMValueRef texel_out
[4])
1540 struct lp_build_context
*int_bld
= &bld
->int_bld
;
1541 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1542 const unsigned mip_filter
= bld
->static_state
->min_mip_filter
;
1543 const unsigned min_filter
= bld
->static_state
->min_img_filter
;
1544 const unsigned mag_filter
= bld
->static_state
->mag_img_filter
;
1545 const unsigned dims
= bld
->dims
;
1546 LLVMValueRef packed
, packed_lo
, packed_hi
;
1547 LLVMValueRef unswizzled
[4];
1548 struct lp_build_context h16_bld
;
1550 /* we only support the common/simple wrap modes at this time */
1551 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_s
));
1553 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_t
));
1555 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_r
));
1558 /* make 16-bit fixed-pt builder context */
1559 lp_build_context_init(&h16_bld
, bld
->gallivm
, lp_type_ufixed(16, bld
->vector_width
));
1562 * Get/interpolate texture colors.
1565 packed_lo
= lp_build_alloca(bld
->gallivm
, h16_bld
.vec_type
, "packed_lo");
1566 packed_hi
= lp_build_alloca(bld
->gallivm
, h16_bld
.vec_type
, "packed_hi");
1568 if (min_filter
== mag_filter
) {
1569 /* no need to distinguish between minification and magnification */
1570 lp_build_sample_mipmap(bld
,
1571 min_filter
, mip_filter
,
1573 ilevel0
, ilevel1
, lod_fpart
,
1574 packed_lo
, packed_hi
);
1577 /* Emit conditional to choose min image filter or mag image filter
1578 * depending on the lod being > 0 or <= 0, respectively.
1580 struct lp_build_if_state if_ctx
;
1581 LLVMValueRef minify
;
1583 /* minify = lod >= 0.0 */
1584 minify
= LLVMBuildICmp(builder
, LLVMIntSGE
,
1585 lod_ipart
, int_bld
->zero
, "");
1587 lp_build_if(&if_ctx
, bld
->gallivm
, minify
);
1589 /* Use the minification filter */
1590 lp_build_sample_mipmap(bld
,
1591 min_filter
, mip_filter
,
1593 ilevel0
, ilevel1
, lod_fpart
,
1594 packed_lo
, packed_hi
);
1596 lp_build_else(&if_ctx
);
1598 /* Use the magnification filter */
1599 lp_build_sample_mipmap(bld
,
1600 mag_filter
, PIPE_TEX_MIPFILTER_NONE
,
1602 ilevel0
, NULL
, NULL
,
1603 packed_lo
, packed_hi
);
1605 lp_build_endif(&if_ctx
);
1609 * combine the values stored in 'packed_lo' and 'packed_hi' variables
1612 packed
= lp_build_pack2(bld
->gallivm
,
1613 h16_bld
.type
, lp_type_unorm(8, bld
->vector_width
),
1614 LLVMBuildLoad(builder
, packed_lo
, ""),
1615 LLVMBuildLoad(builder
, packed_hi
, ""));
1618 * Convert to SoA and swizzle.
1620 lp_build_rgba8_to_f32_soa(bld
->gallivm
,
1622 packed
, unswizzled
);
1624 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
1625 lp_build_format_swizzle_soa(bld
->format_desc
,
1627 unswizzled
, texel_out
);
1630 texel_out
[0] = unswizzled
[0];
1631 texel_out
[1] = unswizzled
[1];
1632 texel_out
[2] = unswizzled
[2];
1633 texel_out
[3] = unswizzled
[3];