1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- AoS.
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_bitarit.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_swizzle.h"
52 #include "lp_bld_pack.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_format.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_quad.h"
63 * Build LLVM code for texture coord wrapping, for nearest filtering,
64 * for scaled integer texcoords.
65 * \param block_length is the length of the pixel block along the
67 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
68 * \param length the texture size along one dimension
69 * \param stride pixel stride along the coordinate axis (in bytes)
70 * \param is_pot if TRUE, length is a power of two
71 * \param wrap_mode one of PIPE_TEX_WRAP_x
72 * \param out_offset byte offset for the wrapped coordinate
73 * \param out_i resulting sub-block pixel coordinate for coord0
76 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context
*bld
,
77 unsigned block_length
,
84 LLVMValueRef
*out_offset
,
87 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
88 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
89 LLVMValueRef length_minus_one
;
91 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
94 case PIPE_TEX_WRAP_REPEAT
:
96 coord
= LLVMBuildAnd(builder
, coord
, length_minus_one
, "");
98 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
99 LLVMValueRef length_f
= lp_build_int_to_float(coord_bld
, length
);
100 coord
= lp_build_fract_safe(coord_bld
, coord_f
);
101 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
102 coord
= lp_build_itrunc(coord_bld
, coord
);
106 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
107 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
108 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
111 case PIPE_TEX_WRAP_CLAMP
:
112 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
113 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
114 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
115 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
116 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
121 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord
, stride
,
127 * Build LLVM code for texture coord wrapping, for nearest filtering,
128 * for float texcoords.
129 * \param coord the incoming texcoord (s,t,r or q)
130 * \param length the texture size along one dimension
131 * \param is_pot if TRUE, length is a power of two
132 * \param wrap_mode one of PIPE_TEX_WRAP_x
133 * \param icoord the texcoord after wrapping, as int
136 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context
*bld
,
141 LLVMValueRef
*icoord
)
143 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
144 LLVMValueRef length_minus_one
;
147 case PIPE_TEX_WRAP_REPEAT
:
148 /* take fraction, unnormalize */
149 coord
= lp_build_fract_safe(coord_bld
, coord
);
150 coord
= lp_build_mul(coord_bld
, coord
, length
);
151 *icoord
= lp_build_itrunc(coord_bld
, coord
);
153 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
154 length_minus_one
= lp_build_sub(coord_bld
, length
, coord_bld
->one
);
155 if (bld
->static_state
->normalized_coords
) {
156 /* scale coord to length */
157 coord
= lp_build_mul(coord_bld
, coord
, length
);
159 coord
= lp_build_clamp(coord_bld
, coord
, coord_bld
->zero
,
161 *icoord
= lp_build_itrunc(coord_bld
, coord
);
164 case PIPE_TEX_WRAP_CLAMP
:
165 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
166 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
167 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
168 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
169 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
177 * Build LLVM code for texture coord wrapping, for linear filtering,
178 * for scaled integer texcoords.
179 * \param block_length is the length of the pixel block along the
181 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
182 * \param length the texture size along one dimension
183 * \param stride pixel stride along the coordinate axis (in bytes)
184 * \param is_pot if TRUE, length is a power of two
185 * \param wrap_mode one of PIPE_TEX_WRAP_x
186 * \param offset0 resulting relative offset for coord0
187 * \param offset1 resulting relative offset for coord0 + 1
188 * \param i0 resulting sub-block pixel coordinate for coord0
189 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
192 lp_build_sample_wrap_linear_int(struct lp_build_sample_context
*bld
,
193 unsigned block_length
,
195 LLVMValueRef
*weight_i
,
196 LLVMValueRef coord_f
,
201 LLVMValueRef
*offset0
,
202 LLVMValueRef
*offset1
,
206 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
207 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
208 LLVMValueRef length_minus_one
;
209 LLVMValueRef lmask
, umask
, mask
;
212 * If the pixel block covers more than one pixel then there is no easy
213 * way to calculate offset1 relative to offset0. Instead, compute them
214 * independently. Otherwise, try to compute offset0 and offset1 with
215 * a single stride multiplication.
218 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
220 if (block_length
!= 1) {
223 case PIPE_TEX_WRAP_REPEAT
:
225 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
226 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
227 coord1
= LLVMBuildAnd(builder
, coord1
, length_minus_one
, "");
232 LLVMValueRef length_f
= lp_build_int_to_float(&bld
->coord_bld
, length
);
233 lp_build_coord_repeat_npot_linear(bld
, coord_f
,
236 mask
= lp_build_compare(bld
->gallivm
, int_coord_bld
->type
,
237 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
238 coord1
= LLVMBuildAnd(builder
,
239 lp_build_add(int_coord_bld
, coord0
,
242 weight
= lp_build_mul_imm(&bld
->coord_bld
, weight
, 256);
243 *weight_i
= lp_build_itrunc(&bld
->coord_bld
, weight
);
247 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
248 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
249 coord0
= lp_build_clamp(int_coord_bld
, coord0
, int_coord_bld
->zero
,
251 coord1
= lp_build_clamp(int_coord_bld
, coord1
, int_coord_bld
->zero
,
255 case PIPE_TEX_WRAP_CLAMP
:
256 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
257 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
258 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
259 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
260 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
263 coord0
= int_coord_bld
->zero
;
264 coord1
= int_coord_bld
->zero
;
267 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord0
, stride
,
269 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord1
, stride
,
274 *i0
= int_coord_bld
->zero
;
275 *i1
= int_coord_bld
->zero
;
278 case PIPE_TEX_WRAP_REPEAT
:
280 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
284 LLVMValueRef length_f
= lp_build_int_to_float(&bld
->coord_bld
, length
);
285 lp_build_coord_repeat_npot_linear(bld
, coord_f
,
288 weight
= lp_build_mul_imm(&bld
->coord_bld
, weight
, 256);
289 *weight_i
= lp_build_itrunc(&bld
->coord_bld
, weight
);
292 mask
= lp_build_compare(bld
->gallivm
, int_coord_bld
->type
,
293 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
295 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
296 *offset1
= LLVMBuildAnd(builder
,
297 lp_build_add(int_coord_bld
, *offset0
, stride
),
301 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
302 /* XXX this might be slower than the separate path
303 * on some newer cpus. With sse41 this is 8 instructions vs. 7
304 * - at least on SNB this is almost certainly slower since
305 * min/max are cheaper than selects, and the muls aren't bad.
307 lmask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
308 PIPE_FUNC_GEQUAL
, coord0
, int_coord_bld
->zero
);
309 umask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
310 PIPE_FUNC_LESS
, coord0
, length_minus_one
);
312 coord0
= lp_build_select(int_coord_bld
, lmask
, coord0
, int_coord_bld
->zero
);
313 coord0
= lp_build_select(int_coord_bld
, umask
, coord0
, length_minus_one
);
315 mask
= LLVMBuildAnd(builder
, lmask
, umask
, "");
317 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
318 *offset1
= lp_build_add(int_coord_bld
,
320 LLVMBuildAnd(builder
, stride
, mask
, ""));
323 case PIPE_TEX_WRAP_CLAMP
:
324 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
325 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
326 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
327 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
328 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
331 *offset0
= int_coord_bld
->zero
;
332 *offset1
= int_coord_bld
->zero
;
339 * Build LLVM code for texture coord wrapping, for linear filtering,
340 * for float texcoords.
341 * \param block_length is the length of the pixel block along the
343 * \param coord the incoming texcoord (s,t,r or q)
344 * \param length the texture size along one dimension
345 * \param is_pot if TRUE, length is a power of two
346 * \param wrap_mode one of PIPE_TEX_WRAP_x
347 * \param coord0 the first texcoord after wrapping, as int
348 * \param coord1 the second texcoord after wrapping, as int
349 * \param weight the filter weight as int (0-255)
350 * \param force_nearest if this coord actually uses nearest filtering
353 lp_build_sample_wrap_linear_float(struct lp_build_sample_context
*bld
,
354 unsigned block_length
,
359 LLVMValueRef
*coord0
,
360 LLVMValueRef
*coord1
,
361 LLVMValueRef
*weight
,
362 unsigned force_nearest
)
364 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
365 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
366 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
367 LLVMValueRef half
= lp_build_const_vec(bld
->gallivm
, coord_bld
->type
, 0.5);
368 LLVMValueRef length_minus_one
= lp_build_sub(coord_bld
, length
, coord_bld
->one
);
371 case PIPE_TEX_WRAP_REPEAT
:
373 /* mul by size and subtract 0.5 */
374 coord
= lp_build_mul(coord_bld
, coord
, length
);
376 coord
= lp_build_sub(coord_bld
, coord
, half
);
377 *coord1
= lp_build_add(coord_bld
, coord
, coord_bld
->one
);
378 /* convert to int, compute lerp weight */
379 lp_build_ifloor_fract(coord_bld
, coord
, coord0
, weight
);
380 *coord1
= lp_build_ifloor(coord_bld
, *coord1
);
382 length_minus_one
= lp_build_itrunc(coord_bld
, length_minus_one
);
383 *coord0
= LLVMBuildAnd(builder
, *coord0
, length_minus_one
, "");
384 *coord1
= LLVMBuildAnd(builder
, *coord1
, length_minus_one
, "");
388 /* wrap with normalized floats is just fract */
389 coord
= lp_build_fract(coord_bld
, coord
);
391 coord
= lp_build_mul(coord_bld
, coord
, length
);
393 * we avoided the 0.5/length division, have to fix up wrong
394 * edge cases with selects
396 *coord1
= lp_build_add(coord_bld
, coord
, half
);
397 coord
= lp_build_sub(coord_bld
, coord
, half
);
398 *weight
= lp_build_fract(coord_bld
, coord
);
399 mask
= lp_build_compare(coord_bld
->gallivm
, coord_bld
->type
,
400 PIPE_FUNC_LESS
, coord
, coord_bld
->zero
);
401 *coord0
= lp_build_select(coord_bld
, mask
, length_minus_one
, coord
);
402 *coord0
= lp_build_itrunc(coord_bld
, *coord0
);
403 mask
= lp_build_compare(coord_bld
->gallivm
, coord_bld
->type
,
404 PIPE_FUNC_LESS
, *coord1
, length
);
405 *coord1
= lp_build_select(coord_bld
, mask
, *coord1
, coord_bld
->zero
);
406 *coord1
= lp_build_itrunc(coord_bld
, *coord1
);
409 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
410 if (bld
->static_state
->normalized_coords
) {
411 /* mul by tex size */
412 coord
= lp_build_mul(coord_bld
, coord
, length
);
415 if (!force_nearest
) {
416 coord
= lp_build_sub(coord_bld
, coord
, half
);
418 /* clamp to [0, length - 1] */
419 coord
= lp_build_min(coord_bld
, coord
, length_minus_one
);
420 coord
= lp_build_max(coord_bld
, coord
, coord_bld
->zero
);
421 *coord1
= lp_build_add(coord_bld
, coord
, coord_bld
->one
);
422 /* convert to int, compute lerp weight */
423 lp_build_ifloor_fract(coord_bld
, coord
, coord0
, weight
);
424 /* coord1 = min(coord1, length-1) */
425 *coord1
= lp_build_min(coord_bld
, *coord1
, length_minus_one
);
426 *coord1
= lp_build_itrunc(coord_bld
, *coord1
);
430 *coord0
= int_coord_bld
->zero
;
431 *coord1
= int_coord_bld
->zero
;
432 *weight
= coord_bld
->zero
;
435 *weight
= lp_build_mul_imm(coord_bld
, *weight
, 256);
436 *weight
= lp_build_itrunc(coord_bld
, *weight
);
442 * Fetch texels for image with nearest sampling.
443 * Return filtered color as two vectors of 16-bit fixed point values.
446 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context
*bld
,
447 LLVMValueRef data_ptr
,
449 LLVMValueRef x_subcoord
,
450 LLVMValueRef y_subcoord
,
451 LLVMValueRef
*colors_lo
,
452 LLVMValueRef
*colors_hi
)
455 * Fetch the pixels as 4 x 32bit (rgba order might differ):
457 * rgba0 rgba1 rgba2 rgba3
459 * bit cast them into 16 x u8
461 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
463 * unpack them into two 8 x i16:
465 * r0 g0 b0 a0 r1 g1 b1 a1
466 * r2 g2 b2 a2 r3 g3 b3 a3
468 * The higher 8 bits of the resulting elements will be zero.
470 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
472 struct lp_build_context h16
, u8n
;
473 LLVMTypeRef u8n_vec_type
;
475 lp_build_context_init(&h16
, bld
->gallivm
, lp_type_ufixed(16, bld
->vector_width
));
476 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
477 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
479 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
481 * Given the format is a rgba8, just read the pixels as is,
482 * without any swizzling. Swizzling will be done later.
484 rgba8
= lp_build_gather(bld
->gallivm
,
485 bld
->texel_type
.length
,
486 bld
->format_desc
->block
.bits
,
487 bld
->texel_type
.width
,
490 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
493 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
501 /* Expand one 4*rgba8 to two 2*rgba16 */
502 lp_build_unpack2(bld
->gallivm
, u8n
.type
, h16
.type
,
504 colors_lo
, colors_hi
);
509 * Sample a single texture image with nearest sampling.
510 * If sampling a cube texture, r = cube face in [0,5].
511 * Return filtered color as two vectors of 16-bit fixed point values.
514 lp_build_sample_image_nearest(struct lp_build_sample_context
*bld
,
515 LLVMValueRef int_size
,
516 LLVMValueRef row_stride_vec
,
517 LLVMValueRef img_stride_vec
,
518 LLVMValueRef data_ptr
,
519 LLVMValueRef mipoffsets
,
523 LLVMValueRef
*colors_lo
,
524 LLVMValueRef
*colors_hi
)
526 const unsigned dims
= bld
->dims
;
527 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
528 struct lp_build_context i32
;
529 LLVMTypeRef i32_vec_type
;
531 LLVMValueRef width_vec
, height_vec
, depth_vec
;
532 LLVMValueRef s_ipart
, t_ipart
= NULL
, r_ipart
= NULL
;
533 LLVMValueRef s_float
, t_float
= NULL
, r_float
= NULL
;
534 LLVMValueRef x_stride
;
535 LLVMValueRef x_offset
, offset
;
536 LLVMValueRef x_subcoord
, y_subcoord
, z_subcoord
;
538 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32, bld
->vector_width
));
540 i32_vec_type
= lp_build_vec_type(bld
->gallivm
, i32
.type
);
542 lp_build_extract_image_sizes(bld
,
550 s_float
= s
; t_float
= t
; r_float
= r
;
552 if (bld
->static_state
->normalized_coords
) {
553 LLVMValueRef scaled_size
;
554 LLVMValueRef flt_size
;
556 /* scale size by 256 (8 fractional bits) */
557 scaled_size
= lp_build_shl_imm(&bld
->int_size_bld
, int_size
, 8);
559 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, scaled_size
);
561 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
564 /* scale coords by 256 (8 fractional bits) */
565 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
567 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
569 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
572 /* convert float to int */
573 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
575 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
577 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
579 /* compute floor (shift right 8) */
580 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 8);
581 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
583 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
585 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
587 /* get pixel, row, image strides */
588 x_stride
= lp_build_const_vec(bld
->gallivm
,
589 bld
->int_coord_bld
.type
,
590 bld
->format_desc
->block
.bits
/8);
592 /* Do texcoord wrapping, compute texel offset */
593 lp_build_sample_wrap_nearest_int(bld
,
594 bld
->format_desc
->block
.width
,
597 bld
->static_state
->pot_width
,
598 bld
->static_state
->wrap_s
,
599 &x_offset
, &x_subcoord
);
602 LLVMValueRef y_offset
;
603 lp_build_sample_wrap_nearest_int(bld
,
604 bld
->format_desc
->block
.height
,
606 height_vec
, row_stride_vec
,
607 bld
->static_state
->pot_height
,
608 bld
->static_state
->wrap_t
,
609 &y_offset
, &y_subcoord
);
610 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, y_offset
);
612 LLVMValueRef z_offset
;
613 lp_build_sample_wrap_nearest_int(bld
,
614 1, /* block length (depth) */
616 depth_vec
, img_stride_vec
,
617 bld
->static_state
->pot_depth
,
618 bld
->static_state
->wrap_r
,
619 &z_offset
, &z_subcoord
);
620 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
623 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
||
624 bld
->static_state
->target
== PIPE_TEXTURE_1D_ARRAY
||
625 bld
->static_state
->target
== PIPE_TEXTURE_2D_ARRAY
) {
626 LLVMValueRef z_offset
;
627 /* The r coord is the cube face in [0,5] or array layer */
628 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
629 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
632 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, mipoffsets
);
635 lp_build_sample_fetch_image_nearest(bld
, data_ptr
, offset
,
636 x_subcoord
, y_subcoord
,
637 colors_lo
, colors_hi
);
642 * Sample a single texture image with nearest sampling.
643 * If sampling a cube texture, r = cube face in [0,5].
644 * Return filtered color as two vectors of 16-bit fixed point values.
645 * Does address calcs (except offsets) with floats.
646 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
649 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context
*bld
,
650 LLVMValueRef int_size
,
651 LLVMValueRef row_stride_vec
,
652 LLVMValueRef img_stride_vec
,
653 LLVMValueRef data_ptr
,
654 LLVMValueRef mipoffsets
,
658 LLVMValueRef
*colors_lo
,
659 LLVMValueRef
*colors_hi
)
661 const unsigned dims
= bld
->dims
;
662 LLVMValueRef width_vec
, height_vec
, depth_vec
;
664 LLVMValueRef x_subcoord
, y_subcoord
;
665 LLVMValueRef x_icoord
= NULL
, y_icoord
= NULL
, z_icoord
= NULL
;
666 LLVMValueRef flt_size
;
668 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, int_size
);
670 lp_build_extract_image_sizes(bld
,
671 &bld
->float_size_bld
,
678 /* Do texcoord wrapping */
679 lp_build_sample_wrap_nearest_float(bld
,
681 bld
->static_state
->pot_width
,
682 bld
->static_state
->wrap_s
,
686 lp_build_sample_wrap_nearest_float(bld
,
688 bld
->static_state
->pot_height
,
689 bld
->static_state
->wrap_t
,
693 lp_build_sample_wrap_nearest_float(bld
,
695 bld
->static_state
->pot_depth
,
696 bld
->static_state
->wrap_r
,
700 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
||
701 bld
->static_state
->target
== PIPE_TEXTURE_1D_ARRAY
||
702 bld
->static_state
->target
== PIPE_TEXTURE_2D_ARRAY
) {
707 * From here on we deal with ints, and we should split up the 256bit
708 * vectors manually for better generated code.
712 * compute texel offsets -
713 * cannot do offset calc with floats, difficult for block-based formats,
714 * and not enough precision anyway.
716 lp_build_sample_offset(&bld
->int_coord_bld
,
720 row_stride_vec
, img_stride_vec
,
722 &x_subcoord
, &y_subcoord
);
724 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, mipoffsets
);
727 lp_build_sample_fetch_image_nearest(bld
, data_ptr
, offset
,
728 x_subcoord
, y_subcoord
,
729 colors_lo
, colors_hi
);
734 * Fetch texels for image with linear sampling.
735 * Return filtered color as two vectors of 16-bit fixed point values.
738 lp_build_sample_fetch_image_linear(struct lp_build_sample_context
*bld
,
739 LLVMValueRef data_ptr
,
740 LLVMValueRef offset
[2][2][2],
741 LLVMValueRef x_subcoord
[2],
742 LLVMValueRef y_subcoord
[2],
743 LLVMValueRef s_fpart
,
744 LLVMValueRef t_fpart
,
745 LLVMValueRef r_fpart
,
746 LLVMValueRef
*colors_lo
,
747 LLVMValueRef
*colors_hi
)
749 const unsigned dims
= bld
->dims
;
750 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
751 struct lp_build_context h16
, u8n
;
752 LLVMTypeRef h16_vec_type
, u8n_vec_type
;
753 LLVMTypeRef elem_type
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
754 LLVMValueRef shuffles_lo
[LP_MAX_VECTOR_LENGTH
];
755 LLVMValueRef shuffles_hi
[LP_MAX_VECTOR_LENGTH
];
756 LLVMValueRef shuffle_lo
, shuffle_hi
;
757 LLVMValueRef s_fpart_lo
, s_fpart_hi
;
758 LLVMValueRef t_fpart_lo
= NULL
, t_fpart_hi
= NULL
;
759 LLVMValueRef r_fpart_lo
= NULL
, r_fpart_hi
= NULL
;
760 LLVMValueRef neighbors_lo
[2][2][2]; /* [z][y][x] */
761 LLVMValueRef neighbors_hi
[2][2][2]; /* [z][y][x] */
762 LLVMValueRef packed_lo
, packed_hi
;
766 lp_build_context_init(&h16
, bld
->gallivm
, lp_type_ufixed(16, bld
->vector_width
));
767 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
768 h16_vec_type
= lp_build_vec_type(bld
->gallivm
, h16
.type
);
769 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
772 * Transform 4 x i32 in
774 * s_fpart = {s0, s1, s2, s3}
778 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
782 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
783 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
785 * and likewise for t_fpart. There is no risk of loosing precision here
786 * since the fractional parts only use the lower 8bits.
788 s_fpart
= LLVMBuildBitCast(builder
, s_fpart
, h16_vec_type
, "");
790 t_fpart
= LLVMBuildBitCast(builder
, t_fpart
, h16_vec_type
, "");
792 r_fpart
= LLVMBuildBitCast(builder
, r_fpart
, h16_vec_type
, "");
794 for (j
= 0; j
< h16
.type
.length
; j
+= 4) {
795 #ifdef PIPE_ARCH_LITTLE_ENDIAN
796 unsigned subindex
= 0;
798 unsigned subindex
= 1;
802 index
= LLVMConstInt(elem_type
, j
/2 + subindex
, 0);
803 for (i
= 0; i
< 4; ++i
)
804 shuffles_lo
[j
+ i
] = index
;
806 index
= LLVMConstInt(elem_type
, h16
.type
.length
/2 + j
/2 + subindex
, 0);
807 for (i
= 0; i
< 4; ++i
)
808 shuffles_hi
[j
+ i
] = index
;
811 shuffle_lo
= LLVMConstVector(shuffles_lo
, h16
.type
.length
);
812 shuffle_hi
= LLVMConstVector(shuffles_hi
, h16
.type
.length
);
814 s_fpart_lo
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
,
816 s_fpart_hi
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
,
819 t_fpart_lo
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
,
821 t_fpart_hi
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
,
825 r_fpart_lo
= LLVMBuildShuffleVector(builder
, r_fpart
, h16
.undef
,
827 r_fpart_hi
= LLVMBuildShuffleVector(builder
, r_fpart
, h16
.undef
,
832 * Fetch the pixels as 4 x 32bit (rgba order might differ):
834 * rgba0 rgba1 rgba2 rgba3
836 * bit cast them into 16 x u8
838 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
840 * unpack them into two 8 x i16:
842 * r0 g0 b0 a0 r1 g1 b1 a1
843 * r2 g2 b2 a2 r3 g3 b3 a3
845 * The higher 8 bits of the resulting elements will be zero.
847 numj
= 1 + (dims
>= 2);
848 numk
= 1 + (dims
>= 3);
850 for (k
= 0; k
< numk
; k
++) {
851 for (j
= 0; j
< numj
; j
++) {
852 for (i
= 0; i
< 2; i
++) {
855 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
857 * Given the format is a rgba8, just read the pixels as is,
858 * without any swizzling. Swizzling will be done later.
860 rgba8
= lp_build_gather(bld
->gallivm
,
861 bld
->texel_type
.length
,
862 bld
->format_desc
->block
.bits
,
863 bld
->texel_type
.width
,
864 data_ptr
, offset
[k
][j
][i
]);
866 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
869 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
872 data_ptr
, offset
[k
][j
][i
],
877 /* Expand one 4*rgba8 to two 2*rgba16 */
878 lp_build_unpack2(bld
->gallivm
, u8n
.type
, h16
.type
,
880 &neighbors_lo
[k
][j
][i
], &neighbors_hi
[k
][j
][i
]);
886 * Linear interpolation with 8.8 fixed point.
888 if (bld
->static_state
->force_nearest_s
) {
889 /* special case 1-D lerp */
890 packed_lo
= lp_build_lerp(&h16
,
892 neighbors_lo
[0][0][0],
893 neighbors_lo
[0][0][1]);
895 packed_hi
= lp_build_lerp(&h16
,
897 neighbors_hi
[0][1][0],
898 neighbors_hi
[0][1][0]);
900 else if (bld
->static_state
->force_nearest_t
) {
901 /* special case 1-D lerp */
902 packed_lo
= lp_build_lerp(&h16
,
904 neighbors_lo
[0][0][0],
905 neighbors_lo
[0][0][1]);
907 packed_hi
= lp_build_lerp(&h16
,
909 neighbors_hi
[0][0][0],
910 neighbors_hi
[0][0][1]);
913 /* general 1/2/3-D lerping */
915 packed_lo
= lp_build_lerp(&h16
,
917 neighbors_lo
[0][0][0],
918 neighbors_lo
[0][0][1]);
920 packed_hi
= lp_build_lerp(&h16
,
922 neighbors_hi
[0][0][0],
923 neighbors_hi
[0][0][1]);
927 packed_lo
= lp_build_lerp_2d(&h16
,
928 s_fpart_lo
, t_fpart_lo
,
929 neighbors_lo
[0][0][0],
930 neighbors_lo
[0][0][1],
931 neighbors_lo
[0][1][0],
932 neighbors_lo
[0][1][1]);
934 packed_hi
= lp_build_lerp_2d(&h16
,
935 s_fpart_hi
, t_fpart_hi
,
936 neighbors_hi
[0][0][0],
937 neighbors_hi
[0][0][1],
938 neighbors_hi
[0][1][0],
939 neighbors_hi
[0][1][1]);
942 LLVMValueRef packed_lo2
, packed_hi2
;
944 /* lerp in the second z slice */
945 packed_lo2
= lp_build_lerp_2d(&h16
,
946 s_fpart_lo
, t_fpart_lo
,
947 neighbors_lo
[1][0][0],
948 neighbors_lo
[1][0][1],
949 neighbors_lo
[1][1][0],
950 neighbors_lo
[1][1][1]);
952 packed_hi2
= lp_build_lerp_2d(&h16
,
953 s_fpart_hi
, t_fpart_hi
,
954 neighbors_hi
[1][0][0],
955 neighbors_hi
[1][0][1],
956 neighbors_hi
[1][1][0],
957 neighbors_hi
[1][1][1]);
958 /* interp between two z slices */
959 packed_lo
= lp_build_lerp(&h16
, r_fpart_lo
,
960 packed_lo
, packed_lo2
);
961 packed_hi
= lp_build_lerp(&h16
, r_fpart_hi
,
962 packed_hi
, packed_hi2
);
967 *colors_lo
= packed_lo
;
968 *colors_hi
= packed_hi
;
972 * Sample a single texture image with (bi-)(tri-)linear sampling.
973 * Return filtered color as two vectors of 16-bit fixed point values.
976 lp_build_sample_image_linear(struct lp_build_sample_context
*bld
,
977 LLVMValueRef int_size
,
978 LLVMValueRef row_stride_vec
,
979 LLVMValueRef img_stride_vec
,
980 LLVMValueRef data_ptr
,
981 LLVMValueRef mipoffsets
,
985 LLVMValueRef
*colors_lo
,
986 LLVMValueRef
*colors_hi
)
988 const unsigned dims
= bld
->dims
;
989 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
990 struct lp_build_context i32
;
991 LLVMTypeRef i32_vec_type
;
992 LLVMValueRef i32_c8
, i32_c128
, i32_c255
;
993 LLVMValueRef width_vec
, height_vec
, depth_vec
;
994 LLVMValueRef s_ipart
, s_fpart
, s_float
;
995 LLVMValueRef t_ipart
= NULL
, t_fpart
= NULL
, t_float
= NULL
;
996 LLVMValueRef r_ipart
= NULL
, r_fpart
= NULL
, r_float
= NULL
;
997 LLVMValueRef x_stride
, y_stride
, z_stride
;
998 LLVMValueRef x_offset0
, x_offset1
;
999 LLVMValueRef y_offset0
, y_offset1
;
1000 LLVMValueRef z_offset0
, z_offset1
;
1001 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
1002 LLVMValueRef x_subcoord
[2], y_subcoord
[2], z_subcoord
[2];
1005 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32, bld
->vector_width
));
1007 i32_vec_type
= lp_build_vec_type(bld
->gallivm
, i32
.type
);
1009 lp_build_extract_image_sizes(bld
,
1011 bld
->int_coord_type
,
1017 s_float
= s
; t_float
= t
; r_float
= r
;
1019 if (bld
->static_state
->normalized_coords
) {
1020 LLVMValueRef scaled_size
;
1021 LLVMValueRef flt_size
;
1023 /* scale size by 256 (8 fractional bits) */
1024 scaled_size
= lp_build_shl_imm(&bld
->int_size_bld
, int_size
, 8);
1026 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, scaled_size
);
1028 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
1031 /* scale coords by 256 (8 fractional bits) */
1032 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
1034 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
1036 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
1039 /* convert float to int */
1040 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
1042 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
1044 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
1046 /* subtract 0.5 (add -128) */
1047 i32_c128
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, -128);
1048 if (!bld
->static_state
->force_nearest_s
) {
1049 s
= LLVMBuildAdd(builder
, s
, i32_c128
, "");
1051 if (dims
>= 2 && !bld
->static_state
->force_nearest_t
) {
1052 t
= LLVMBuildAdd(builder
, t
, i32_c128
, "");
1055 r
= LLVMBuildAdd(builder
, r
, i32_c128
, "");
1058 /* compute floor (shift right 8) */
1059 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 8);
1060 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
1062 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
1064 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
1066 /* compute fractional part (AND with 0xff) */
1067 i32_c255
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 255);
1068 s_fpart
= LLVMBuildAnd(builder
, s
, i32_c255
, "");
1070 t_fpart
= LLVMBuildAnd(builder
, t
, i32_c255
, "");
1072 r_fpart
= LLVMBuildAnd(builder
, r
, i32_c255
, "");
1074 /* get pixel, row and image strides */
1075 x_stride
= lp_build_const_vec(bld
->gallivm
, bld
->int_coord_bld
.type
,
1076 bld
->format_desc
->block
.bits
/8);
1077 y_stride
= row_stride_vec
;
1078 z_stride
= img_stride_vec
;
1080 /* do texcoord wrapping and compute texel offsets */
1081 lp_build_sample_wrap_linear_int(bld
,
1082 bld
->format_desc
->block
.width
,
1083 s_ipart
, &s_fpart
, s_float
,
1084 width_vec
, x_stride
,
1085 bld
->static_state
->pot_width
,
1086 bld
->static_state
->wrap_s
,
1087 &x_offset0
, &x_offset1
,
1088 &x_subcoord
[0], &x_subcoord
[1]);
1090 /* add potential cube/array/mip offsets now as they are constant per pixel */
1091 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
||
1092 bld
->static_state
->target
== PIPE_TEXTURE_1D_ARRAY
||
1093 bld
->static_state
->target
== PIPE_TEXTURE_2D_ARRAY
) {
1094 LLVMValueRef z_offset
;
1095 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
1096 /* The r coord is the cube face in [0,5] or array layer */
1097 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, z_offset
);
1098 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, z_offset
);
1101 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, mipoffsets
);
1102 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, mipoffsets
);
1105 for (z
= 0; z
< 2; z
++) {
1106 for (y
= 0; y
< 2; y
++) {
1107 offset
[z
][y
][0] = x_offset0
;
1108 offset
[z
][y
][1] = x_offset1
;
1113 lp_build_sample_wrap_linear_int(bld
,
1114 bld
->format_desc
->block
.height
,
1115 t_ipart
, &t_fpart
, t_float
,
1116 height_vec
, y_stride
,
1117 bld
->static_state
->pot_height
,
1118 bld
->static_state
->wrap_t
,
1119 &y_offset0
, &y_offset1
,
1120 &y_subcoord
[0], &y_subcoord
[1]);
1122 for (z
= 0; z
< 2; z
++) {
1123 for (x
= 0; x
< 2; x
++) {
1124 offset
[z
][0][x
] = lp_build_add(&bld
->int_coord_bld
,
1125 offset
[z
][0][x
], y_offset0
);
1126 offset
[z
][1][x
] = lp_build_add(&bld
->int_coord_bld
,
1127 offset
[z
][1][x
], y_offset1
);
1133 lp_build_sample_wrap_linear_int(bld
,
1134 bld
->format_desc
->block
.height
,
1135 r_ipart
, &r_fpart
, r_float
,
1136 depth_vec
, z_stride
,
1137 bld
->static_state
->pot_depth
,
1138 bld
->static_state
->wrap_r
,
1139 &z_offset0
, &z_offset1
,
1140 &z_subcoord
[0], &z_subcoord
[1]);
1141 for (y
= 0; y
< 2; y
++) {
1142 for (x
= 0; x
< 2; x
++) {
1143 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1144 offset
[0][y
][x
], z_offset0
);
1145 offset
[1][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1146 offset
[1][y
][x
], z_offset1
);
1151 lp_build_sample_fetch_image_linear(bld
, data_ptr
, offset
,
1152 x_subcoord
, y_subcoord
,
1153 s_fpart
, t_fpart
, r_fpart
,
1154 colors_lo
, colors_hi
);
1159 * Sample a single texture image with (bi-)(tri-)linear sampling.
1160 * Return filtered color as two vectors of 16-bit fixed point values.
1161 * Does address calcs (except offsets) with floats.
1162 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1165 lp_build_sample_image_linear_afloat(struct lp_build_sample_context
*bld
,
1166 LLVMValueRef int_size
,
1167 LLVMValueRef row_stride_vec
,
1168 LLVMValueRef img_stride_vec
,
1169 LLVMValueRef data_ptr
,
1170 LLVMValueRef mipoffsets
,
1174 LLVMValueRef
*colors_lo
,
1175 LLVMValueRef
*colors_hi
)
1177 const unsigned dims
= bld
->dims
;
1178 LLVMValueRef width_vec
, height_vec
, depth_vec
;
1179 LLVMValueRef s_fpart
;
1180 LLVMValueRef t_fpart
= NULL
;
1181 LLVMValueRef r_fpart
= NULL
;
1182 LLVMValueRef x_stride
, y_stride
, z_stride
;
1183 LLVMValueRef x_offset0
, x_offset1
;
1184 LLVMValueRef y_offset0
, y_offset1
;
1185 LLVMValueRef z_offset0
, z_offset1
;
1186 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
1187 LLVMValueRef x_subcoord
[2], y_subcoord
[2];
1188 LLVMValueRef flt_size
;
1189 LLVMValueRef x_icoord0
, x_icoord1
;
1190 LLVMValueRef y_icoord0
, y_icoord1
;
1191 LLVMValueRef z_icoord0
, z_icoord1
;
1194 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, int_size
);
1196 lp_build_extract_image_sizes(bld
,
1197 &bld
->float_size_bld
,
1204 /* do texcoord wrapping and compute texel offsets */
1205 lp_build_sample_wrap_linear_float(bld
,
1206 bld
->format_desc
->block
.width
,
1208 bld
->static_state
->pot_width
,
1209 bld
->static_state
->wrap_s
,
1210 &x_icoord0
, &x_icoord1
,
1212 bld
->static_state
->force_nearest_s
);
1215 lp_build_sample_wrap_linear_float(bld
,
1216 bld
->format_desc
->block
.height
,
1218 bld
->static_state
->pot_height
,
1219 bld
->static_state
->wrap_t
,
1220 &y_icoord0
, &y_icoord1
,
1222 bld
->static_state
->force_nearest_t
);
1225 lp_build_sample_wrap_linear_float(bld
,
1226 bld
->format_desc
->block
.height
,
1228 bld
->static_state
->pot_depth
,
1229 bld
->static_state
->wrap_r
,
1230 &z_icoord0
, &z_icoord1
,
1236 * From here on we deal with ints, and we should split up the 256bit
1237 * vectors manually for better generated code.
1240 /* get pixel, row and image strides */
1241 x_stride
= lp_build_const_vec(bld
->gallivm
,
1242 bld
->int_coord_bld
.type
,
1243 bld
->format_desc
->block
.bits
/8);
1244 y_stride
= row_stride_vec
;
1245 z_stride
= img_stride_vec
;
1248 * compute texel offset -
1249 * cannot do offset calc with floats, difficult for block-based formats,
1250 * and not enough precision anyway.
1252 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1253 bld
->format_desc
->block
.width
,
1254 x_icoord0
, x_stride
,
1255 &x_offset0
, &x_subcoord
[0]);
1256 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1257 bld
->format_desc
->block
.width
,
1258 x_icoord1
, x_stride
,
1259 &x_offset1
, &x_subcoord
[1]);
1261 /* add potential cube/array/mip offsets now as they are constant per pixel */
1262 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
||
1263 bld
->static_state
->target
== PIPE_TEXTURE_1D_ARRAY
||
1264 bld
->static_state
->target
== PIPE_TEXTURE_2D_ARRAY
) {
1265 LLVMValueRef z_offset
;
1266 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
1267 /* The r coord is the cube face in [0,5] or array layer */
1268 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, z_offset
);
1269 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, z_offset
);
1272 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, mipoffsets
);
1273 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, mipoffsets
);
1276 for (z
= 0; z
< 2; z
++) {
1277 for (y
= 0; y
< 2; y
++) {
1278 offset
[z
][y
][0] = x_offset0
;
1279 offset
[z
][y
][1] = x_offset1
;
1284 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1285 bld
->format_desc
->block
.height
,
1286 y_icoord0
, y_stride
,
1287 &y_offset0
, &y_subcoord
[0]);
1288 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1289 bld
->format_desc
->block
.height
,
1290 y_icoord1
, y_stride
,
1291 &y_offset1
, &y_subcoord
[1]);
1292 for (z
= 0; z
< 2; z
++) {
1293 for (x
= 0; x
< 2; x
++) {
1294 offset
[z
][0][x
] = lp_build_add(&bld
->int_coord_bld
,
1295 offset
[z
][0][x
], y_offset0
);
1296 offset
[z
][1][x
] = lp_build_add(&bld
->int_coord_bld
,
1297 offset
[z
][1][x
], y_offset1
);
1303 LLVMValueRef z_subcoord
[2];
1304 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1306 z_icoord0
, z_stride
,
1307 &z_offset0
, &z_subcoord
[0]);
1308 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1310 z_icoord1
, z_stride
,
1311 &z_offset1
, &z_subcoord
[1]);
1312 for (y
= 0; y
< 2; y
++) {
1313 for (x
= 0; x
< 2; x
++) {
1314 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1315 offset
[0][y
][x
], z_offset0
);
1316 offset
[1][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1317 offset
[1][y
][x
], z_offset1
);
1322 lp_build_sample_fetch_image_linear(bld
, data_ptr
, offset
,
1323 x_subcoord
, y_subcoord
,
1324 s_fpart
, t_fpart
, r_fpart
,
1325 colors_lo
, colors_hi
);
1330 * Sample the texture/mipmap using given image filter and mip filter.
1331 * data0_ptr and data1_ptr point to the two mipmap levels to sample
1332 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1333 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1336 lp_build_sample_mipmap(struct lp_build_sample_context
*bld
,
1337 unsigned img_filter
,
1338 unsigned mip_filter
,
1342 LLVMValueRef ilevel0
,
1343 LLVMValueRef ilevel1
,
1344 LLVMValueRef lod_fpart
,
1345 LLVMValueRef colors_lo_var
,
1346 LLVMValueRef colors_hi_var
)
1348 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1351 LLVMValueRef row_stride0_vec
= NULL
;
1352 LLVMValueRef row_stride1_vec
= NULL
;
1353 LLVMValueRef img_stride0_vec
= NULL
;
1354 LLVMValueRef img_stride1_vec
= NULL
;
1355 LLVMValueRef data_ptr0
;
1356 LLVMValueRef data_ptr1
;
1357 LLVMValueRef mipoff0
= NULL
;
1358 LLVMValueRef mipoff1
= NULL
;
1359 LLVMValueRef colors0_lo
, colors0_hi
;
1360 LLVMValueRef colors1_lo
, colors1_hi
;
1362 /* sample the first mipmap level */
1363 lp_build_mipmap_level_sizes(bld
, ilevel0
,
1365 &row_stride0_vec
, &img_stride0_vec
);
1366 if (bld
->num_lods
== 1) {
1367 data_ptr0
= lp_build_get_mipmap_level(bld
, ilevel0
);
1370 /* This path should work for num_lods 1 too but slightly less efficient */
1371 data_ptr0
= bld
->base_ptr
;
1372 mipoff0
= lp_build_get_mip_offsets(bld
, ilevel0
);
1375 if (util_cpu_caps
.has_avx
&& bld
->coord_type
.length
> 4) {
1376 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1377 lp_build_sample_image_nearest_afloat(bld
,
1379 row_stride0_vec
, img_stride0_vec
,
1380 data_ptr0
, mipoff0
, s
, t
, r
,
1381 &colors0_lo
, &colors0_hi
);
1384 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
1385 lp_build_sample_image_linear_afloat(bld
,
1387 row_stride0_vec
, img_stride0_vec
,
1388 data_ptr0
, mipoff0
, s
, t
, r
,
1389 &colors0_lo
, &colors0_hi
);
1393 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1394 lp_build_sample_image_nearest(bld
,
1396 row_stride0_vec
, img_stride0_vec
,
1397 data_ptr0
, mipoff0
, s
, t
, r
,
1398 &colors0_lo
, &colors0_hi
);
1401 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
1402 lp_build_sample_image_linear(bld
,
1404 row_stride0_vec
, img_stride0_vec
,
1405 data_ptr0
, mipoff0
, s
, t
, r
,
1406 &colors0_lo
, &colors0_hi
);
1410 /* Store the first level's colors in the output variables */
1411 LLVMBuildStore(builder
, colors0_lo
, colors_lo_var
);
1412 LLVMBuildStore(builder
, colors0_hi
, colors_hi_var
);
1414 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
1415 LLVMValueRef h16vec_scale
= lp_build_const_vec(bld
->gallivm
,
1416 bld
->perquadf_bld
.type
, 256.0);
1417 LLVMTypeRef i32vec_type
= lp_build_vec_type(bld
->gallivm
, bld
->perquadi_bld
.type
);
1418 struct lp_build_if_state if_ctx
;
1419 LLVMValueRef need_lerp
;
1420 unsigned num_quads
= bld
->coord_bld
.type
.length
/ 4;
1423 lod_fpart
= LLVMBuildFMul(builder
, lod_fpart
, h16vec_scale
, "");
1424 lod_fpart
= LLVMBuildFPToSI(builder
, lod_fpart
, i32vec_type
, "lod_fpart.fixed16");
1426 /* need_lerp = lod_fpart > 0 */
1427 if (num_quads
== 1) {
1428 need_lerp
= LLVMBuildICmp(builder
, LLVMIntSGT
,
1429 lod_fpart
, bld
->perquadi_bld
.zero
,
1434 * We'll do mip filtering if any of the quads need it.
1435 * It might be better to split the vectors here and only fetch/filter
1436 * quads which need it.
1439 * We need to clamp lod_fpart here since we can get negative
1440 * values which would screw up filtering if not all
1441 * lod_fpart values have same sign.
1442 * We can however then skip the greater than comparison.
1444 lod_fpart
= lp_build_max(&bld
->perquadi_bld
, lod_fpart
,
1445 bld
->perquadi_bld
.zero
);
1446 need_lerp
= lp_build_any_true_range(&bld
->perquadi_bld
, num_quads
, lod_fpart
);
1449 lp_build_if(&if_ctx
, bld
->gallivm
, need_lerp
);
1451 struct lp_build_context h16_bld
;
1453 lp_build_context_init(&h16_bld
, bld
->gallivm
, lp_type_ufixed(16, bld
->vector_width
));
1455 /* sample the second mipmap level */
1456 lp_build_mipmap_level_sizes(bld
, ilevel1
,
1458 &row_stride1_vec
, &img_stride1_vec
);
1459 lp_build_mipmap_level_sizes(bld
, ilevel1
,
1461 &row_stride1_vec
, &img_stride1_vec
);
1462 if (bld
->num_lods
== 1) {
1463 data_ptr1
= lp_build_get_mipmap_level(bld
, ilevel1
);
1466 data_ptr1
= bld
->base_ptr
;
1467 mipoff1
= lp_build_get_mip_offsets(bld
, ilevel1
);
1470 if (util_cpu_caps
.has_avx
&& bld
->coord_type
.length
> 4) {
1471 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1472 lp_build_sample_image_nearest_afloat(bld
,
1474 row_stride1_vec
, img_stride1_vec
,
1475 data_ptr1
, mipoff1
, s
, t
, r
,
1476 &colors1_lo
, &colors1_hi
);
1479 lp_build_sample_image_linear_afloat(bld
,
1481 row_stride1_vec
, img_stride1_vec
,
1482 data_ptr1
, mipoff1
, s
, t
, r
,
1483 &colors1_lo
, &colors1_hi
);
1487 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1488 lp_build_sample_image_nearest(bld
,
1490 row_stride1_vec
, img_stride1_vec
,
1491 data_ptr1
, mipoff1
, s
, t
, r
,
1492 &colors1_lo
, &colors1_hi
);
1495 lp_build_sample_image_linear(bld
,
1497 row_stride1_vec
, img_stride1_vec
,
1498 data_ptr1
, mipoff1
, s
, t
, r
,
1499 &colors1_lo
, &colors1_hi
);
1503 /* interpolate samples from the two mipmap levels */
1505 if (num_quads
== 1) {
1506 lod_fpart
= LLVMBuildTrunc(builder
, lod_fpart
, h16_bld
.elem_type
, "");
1507 lod_fpart
= lp_build_broadcast_scalar(&h16_bld
, lod_fpart
);
1509 #if HAVE_LLVM == 0x208
1510 /* This is a work-around for a bug in LLVM 2.8.
1511 * Evidently, something goes wrong in the construction of the
1512 * lod_fpart short[8] vector. Adding this no-effect shuffle seems
1513 * to force the vector to be properly constructed.
1514 * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
1517 LLVMValueRef shuffles
[8], shuffle
;
1518 assert(h16_bld
.type
.length
<= Elements(shuffles
));
1519 for (i
= 0; i
< h16_bld
.type
.length
; i
++)
1520 shuffles
[i
] = lp_build_const_int32(bld
->gallivm
, 2 * (i
& 1));
1521 shuffle
= LLVMConstVector(shuffles
, h16_bld
.type
.length
);
1522 lod_fpart
= LLVMBuildShuffleVector(builder
,
1523 lod_fpart
, lod_fpart
,
1528 colors0_lo
= lp_build_lerp(&h16_bld
, lod_fpart
,
1529 colors0_lo
, colors1_lo
);
1530 colors0_hi
= lp_build_lerp(&h16_bld
, lod_fpart
,
1531 colors0_hi
, colors1_hi
);
1534 LLVMValueRef lod_parts
[LP_MAX_VECTOR_LENGTH
/16];
1535 struct lp_type perquadi16_type
= bld
->perquadi_bld
.type
;
1536 perquadi16_type
.width
/= 2;
1537 perquadi16_type
.length
*= 2;
1538 lod_fpart
= LLVMBuildBitCast(builder
, lod_fpart
,
1539 lp_build_vec_type(bld
->gallivm
,
1540 perquadi16_type
), "");
1541 /* XXX this only works for exactly 2 quads. More quads need shuffle */
1542 assert(num_quads
== 2);
1543 for (i
= 0; i
< num_quads
; i
++) {
1544 LLVMValueRef indexi2
= lp_build_const_int32(bld
->gallivm
, i
*2);
1545 lod_parts
[i
] = lp_build_extract_broadcast(bld
->gallivm
,
1551 colors0_lo
= lp_build_lerp(&h16_bld
, lod_parts
[0],
1552 colors0_lo
, colors1_lo
);
1553 colors0_hi
= lp_build_lerp(&h16_bld
, lod_parts
[1],
1554 colors0_hi
, colors1_hi
);
1557 LLVMBuildStore(builder
, colors0_lo
, colors_lo_var
);
1558 LLVMBuildStore(builder
, colors0_hi
, colors_hi_var
);
1560 lp_build_endif(&if_ctx
);
1567 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
1568 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
1569 * but only limited texture coord wrap modes.
1572 lp_build_sample_aos(struct lp_build_sample_context
*bld
,
1577 LLVMValueRef lod_ipart
,
1578 LLVMValueRef lod_fpart
,
1579 LLVMValueRef ilevel0
,
1580 LLVMValueRef ilevel1
,
1581 LLVMValueRef texel_out
[4])
1583 struct lp_build_context
*int_bld
= &bld
->int_bld
;
1584 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1585 const unsigned mip_filter
= bld
->static_state
->min_mip_filter
;
1586 const unsigned min_filter
= bld
->static_state
->min_img_filter
;
1587 const unsigned mag_filter
= bld
->static_state
->mag_img_filter
;
1588 const unsigned dims
= bld
->dims
;
1589 LLVMValueRef packed
, packed_lo
, packed_hi
;
1590 LLVMValueRef unswizzled
[4];
1591 struct lp_build_context h16_bld
;
1593 /* we only support the common/simple wrap modes at this time */
1594 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_s
));
1596 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_t
));
1598 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_r
));
1601 /* make 16-bit fixed-pt builder context */
1602 lp_build_context_init(&h16_bld
, bld
->gallivm
, lp_type_ufixed(16, bld
->vector_width
));
1605 * Get/interpolate texture colors.
1608 packed_lo
= lp_build_alloca(bld
->gallivm
, h16_bld
.vec_type
, "packed_lo");
1609 packed_hi
= lp_build_alloca(bld
->gallivm
, h16_bld
.vec_type
, "packed_hi");
1611 if (min_filter
== mag_filter
) {
1612 /* no need to distinguish between minification and magnification */
1613 lp_build_sample_mipmap(bld
,
1614 min_filter
, mip_filter
,
1616 ilevel0
, ilevel1
, lod_fpart
,
1617 packed_lo
, packed_hi
);
1620 /* Emit conditional to choose min image filter or mag image filter
1621 * depending on the lod being > 0 or <= 0, respectively.
1623 struct lp_build_if_state if_ctx
;
1624 LLVMValueRef minify
;
1627 * XXX this should to all lods into account, if some are min
1628 * some max probably could hack up the coords/weights in the linear
1629 * path with selects to work for nearest.
1630 * If that's just two quads sitting next to each other it seems
1631 * quite ok to do the same filtering method on both though, at
1632 * least unless we have explicit lod (and who uses different
1633 * min/mag filter with that?)
1635 if (bld
->num_lods
> 1)
1636 lod_ipart
= LLVMBuildExtractElement(builder
, lod_ipart
,
1637 lp_build_const_int32(bld
->gallivm
, 0), "");
1639 /* minify = lod >= 0.0 */
1640 minify
= LLVMBuildICmp(builder
, LLVMIntSGE
,
1641 lod_ipart
, int_bld
->zero
, "");
1643 lp_build_if(&if_ctx
, bld
->gallivm
, minify
);
1645 /* Use the minification filter */
1646 lp_build_sample_mipmap(bld
,
1647 min_filter
, mip_filter
,
1649 ilevel0
, ilevel1
, lod_fpart
,
1650 packed_lo
, packed_hi
);
1652 lp_build_else(&if_ctx
);
1654 /* Use the magnification filter */
1655 lp_build_sample_mipmap(bld
,
1656 mag_filter
, PIPE_TEX_MIPFILTER_NONE
,
1658 ilevel0
, NULL
, NULL
,
1659 packed_lo
, packed_hi
);
1661 lp_build_endif(&if_ctx
);
1665 * combine the values stored in 'packed_lo' and 'packed_hi' variables
1668 packed
= lp_build_pack2(bld
->gallivm
,
1669 h16_bld
.type
, lp_type_unorm(8, bld
->vector_width
),
1670 LLVMBuildLoad(builder
, packed_lo
, ""),
1671 LLVMBuildLoad(builder
, packed_hi
, ""));
1674 * Convert to SoA and swizzle.
1676 lp_build_rgba8_to_f32_soa(bld
->gallivm
,
1678 packed
, unswizzled
);
1680 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
1681 lp_build_format_swizzle_soa(bld
->format_desc
,
1683 unswizzled
, texel_out
);
1686 texel_out
[0] = unswizzled
[0];
1687 texel_out
[1] = unswizzled
[1];
1688 texel_out
[2] = unswizzled
[2];
1689 texel_out
[3] = unswizzled
[3];