1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- AoS.
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_bitarit.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_swizzle.h"
52 #include "lp_bld_pack.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_format.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_quad.h"
63 * Build LLVM code for texture coord wrapping, for nearest filtering,
64 * for scaled integer texcoords.
65 * \param block_length is the length of the pixel block along the
67 * \param coord the incoming texcoord (s,t or r) scaled to the texture size
68 * \param coord_f the incoming texcoord (s,t or r) as float vec
69 * \param length the texture size along one dimension
70 * \param stride pixel stride along the coordinate axis (in bytes)
71 * \param offset the texel offset along the coord axis
72 * \param is_pot if TRUE, length is a power of two
73 * \param wrap_mode one of PIPE_TEX_WRAP_x
74 * \param out_offset byte offset for the wrapped coordinate
75 * \param out_i resulting sub-block pixel coordinate for coord0
78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context
*bld
,
79 unsigned block_length
,
87 LLVMValueRef
*out_offset
,
90 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
91 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
92 LLVMValueRef length_minus_one
;
94 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
97 case PIPE_TEX_WRAP_REPEAT
:
99 coord
= LLVMBuildAnd(builder
, coord
, length_minus_one
, "");
101 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
102 LLVMValueRef length_f
= lp_build_int_to_float(coord_bld
, length
);
104 offset
= lp_build_int_to_float(coord_bld
, offset
);
105 offset
= lp_build_div(coord_bld
, offset
, length_f
);
106 coord_f
= lp_build_add(coord_bld
, coord_f
, offset
);
108 coord
= lp_build_fract_safe(coord_bld
, coord_f
);
109 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
110 coord
= lp_build_itrunc(coord_bld
, coord
);
114 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
115 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
116 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
119 case PIPE_TEX_WRAP_CLAMP
:
120 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
121 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
122 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
123 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
124 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
129 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord
, stride
,
135 * Build LLVM code for texture coord wrapping, for nearest filtering,
136 * for float texcoords.
137 * \param coord the incoming texcoord (s,t or r)
138 * \param length the texture size along one dimension
139 * \param offset the texel offset along the coord axis
140 * \param is_pot if TRUE, length is a power of two
141 * \param wrap_mode one of PIPE_TEX_WRAP_x
142 * \param icoord the texcoord after wrapping, as int
145 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context
*bld
,
151 LLVMValueRef
*icoord
)
153 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
154 LLVMValueRef length_minus_one
;
157 case PIPE_TEX_WRAP_REPEAT
:
159 /* this is definitely not ideal for POT case */
160 offset
= lp_build_int_to_float(coord_bld
, offset
);
161 offset
= lp_build_div(coord_bld
, offset
, length
);
162 coord
= lp_build_add(coord_bld
, coord
, offset
);
164 /* take fraction, unnormalize */
165 coord
= lp_build_fract_safe(coord_bld
, coord
);
166 coord
= lp_build_mul(coord_bld
, coord
, length
);
167 *icoord
= lp_build_itrunc(coord_bld
, coord
);
169 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
170 length_minus_one
= lp_build_sub(coord_bld
, length
, coord_bld
->one
);
171 if (bld
->static_sampler_state
->normalized_coords
) {
172 /* scale coord to length */
173 coord
= lp_build_mul(coord_bld
, coord
, length
);
176 offset
= lp_build_int_to_float(coord_bld
, offset
);
177 coord
= lp_build_add(coord_bld
, coord
, offset
);
179 coord
= lp_build_clamp(coord_bld
, coord
, coord_bld
->zero
,
181 *icoord
= lp_build_itrunc(coord_bld
, coord
);
184 case PIPE_TEX_WRAP_CLAMP
:
185 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
186 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
187 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
188 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
189 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
197 * Build LLVM code for texture coord wrapping, for linear filtering,
198 * for scaled integer texcoords.
199 * \param block_length is the length of the pixel block along the
201 * \param coord0 the incoming texcoord (s,t or r) scaled to the texture size
202 * \param coord_f the incoming texcoord (s,t or r) as float vec
203 * \param length the texture size along one dimension
204 * \param stride pixel stride along the coordinate axis (in bytes)
205 * \param offset the texel offset along the coord axis
206 * \param is_pot if TRUE, length is a power of two
207 * \param wrap_mode one of PIPE_TEX_WRAP_x
208 * \param offset0 resulting relative offset for coord0
209 * \param offset1 resulting relative offset for coord0 + 1
210 * \param i0 resulting sub-block pixel coordinate for coord0
211 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
214 lp_build_sample_wrap_linear_int(struct lp_build_sample_context
*bld
,
215 unsigned block_length
,
217 LLVMValueRef
*weight_i
,
218 LLVMValueRef coord_f
,
224 LLVMValueRef
*offset0
,
225 LLVMValueRef
*offset1
,
229 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
230 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
231 LLVMValueRef length_minus_one
;
232 LLVMValueRef lmask
, umask
, mask
;
235 * If the pixel block covers more than one pixel then there is no easy
236 * way to calculate offset1 relative to offset0. Instead, compute them
237 * independently. Otherwise, try to compute offset0 and offset1 with
238 * a single stride multiplication.
241 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
243 if (block_length
!= 1) {
246 case PIPE_TEX_WRAP_REPEAT
:
248 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
249 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
250 coord1
= LLVMBuildAnd(builder
, coord1
, length_minus_one
, "");
255 LLVMValueRef length_f
= lp_build_int_to_float(&bld
->coord_bld
, length
);
257 offset
= lp_build_int_to_float(&bld
->coord_bld
, offset
);
258 offset
= lp_build_div(&bld
->coord_bld
, offset
, length_f
);
259 coord_f
= lp_build_add(&bld
->coord_bld
, coord_f
, offset
);
261 lp_build_coord_repeat_npot_linear(bld
, coord_f
,
264 mask
= lp_build_compare(bld
->gallivm
, int_coord_bld
->type
,
265 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
266 coord1
= LLVMBuildAnd(builder
,
267 lp_build_add(int_coord_bld
, coord0
,
270 weight
= lp_build_mul_imm(&bld
->coord_bld
, weight
, 256);
271 *weight_i
= lp_build_itrunc(&bld
->coord_bld
, weight
);
275 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
276 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
277 coord0
= lp_build_clamp(int_coord_bld
, coord0
, int_coord_bld
->zero
,
279 coord1
= lp_build_clamp(int_coord_bld
, coord1
, int_coord_bld
->zero
,
283 case PIPE_TEX_WRAP_CLAMP
:
284 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
285 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
286 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
287 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
291 coord0
= int_coord_bld
->zero
;
292 coord1
= int_coord_bld
->zero
;
295 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord0
, stride
,
297 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord1
, stride
,
302 *i0
= int_coord_bld
->zero
;
303 *i1
= int_coord_bld
->zero
;
306 case PIPE_TEX_WRAP_REPEAT
:
308 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
312 LLVMValueRef length_f
= lp_build_int_to_float(&bld
->coord_bld
, length
);
314 offset
= lp_build_int_to_float(&bld
->coord_bld
, offset
);
315 offset
= lp_build_div(&bld
->coord_bld
, offset
, length_f
);
316 coord_f
= lp_build_add(&bld
->coord_bld
, coord_f
, offset
);
318 lp_build_coord_repeat_npot_linear(bld
, coord_f
,
321 weight
= lp_build_mul_imm(&bld
->coord_bld
, weight
, 256);
322 *weight_i
= lp_build_itrunc(&bld
->coord_bld
, weight
);
325 mask
= lp_build_compare(bld
->gallivm
, int_coord_bld
->type
,
326 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
328 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
329 *offset1
= LLVMBuildAnd(builder
,
330 lp_build_add(int_coord_bld
, *offset0
, stride
),
334 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
335 /* XXX this might be slower than the separate path
336 * on some newer cpus. With sse41 this is 8 instructions vs. 7
337 * - at least on SNB this is almost certainly slower since
338 * min/max are cheaper than selects, and the muls aren't bad.
340 lmask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
341 PIPE_FUNC_GEQUAL
, coord0
, int_coord_bld
->zero
);
342 umask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
343 PIPE_FUNC_LESS
, coord0
, length_minus_one
);
345 coord0
= lp_build_select(int_coord_bld
, lmask
, coord0
, int_coord_bld
->zero
);
346 coord0
= lp_build_select(int_coord_bld
, umask
, coord0
, length_minus_one
);
348 mask
= LLVMBuildAnd(builder
, lmask
, umask
, "");
350 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
351 *offset1
= lp_build_add(int_coord_bld
,
353 LLVMBuildAnd(builder
, stride
, mask
, ""));
356 case PIPE_TEX_WRAP_CLAMP
:
357 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
358 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
359 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
360 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
361 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
364 *offset0
= int_coord_bld
->zero
;
365 *offset1
= int_coord_bld
->zero
;
372 * Build LLVM code for texture coord wrapping, for linear filtering,
373 * for float texcoords.
374 * \param block_length is the length of the pixel block along the
376 * \param coord the incoming texcoord (s,t or r)
377 * \param length the texture size along one dimension
378 * \param offset the texel offset along the coord axis
379 * \param is_pot if TRUE, length is a power of two
380 * \param wrap_mode one of PIPE_TEX_WRAP_x
381 * \param coord0 the first texcoord after wrapping, as int
382 * \param coord1 the second texcoord after wrapping, as int
383 * \param weight the filter weight as int (0-255)
384 * \param force_nearest if this coord actually uses nearest filtering
387 lp_build_sample_wrap_linear_float(struct lp_build_sample_context
*bld
,
388 unsigned block_length
,
394 LLVMValueRef
*coord0
,
395 LLVMValueRef
*coord1
,
396 LLVMValueRef
*weight
,
397 unsigned force_nearest
)
399 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
400 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
401 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
402 LLVMValueRef half
= lp_build_const_vec(bld
->gallivm
, coord_bld
->type
, 0.5);
403 LLVMValueRef length_minus_one
= lp_build_sub(coord_bld
, length
, coord_bld
->one
);
406 case PIPE_TEX_WRAP_REPEAT
:
408 /* mul by size and subtract 0.5 */
409 coord
= lp_build_mul(coord_bld
, coord
, length
);
411 offset
= lp_build_int_to_float(coord_bld
, offset
);
412 coord
= lp_build_add(coord_bld
, coord
, offset
);
415 coord
= lp_build_sub(coord_bld
, coord
, half
);
416 *coord1
= lp_build_add(coord_bld
, coord
, coord_bld
->one
);
417 /* convert to int, compute lerp weight */
418 lp_build_ifloor_fract(coord_bld
, coord
, coord0
, weight
);
419 *coord1
= lp_build_ifloor(coord_bld
, *coord1
);
421 length_minus_one
= lp_build_itrunc(coord_bld
, length_minus_one
);
422 *coord0
= LLVMBuildAnd(builder
, *coord0
, length_minus_one
, "");
423 *coord1
= LLVMBuildAnd(builder
, *coord1
, length_minus_one
, "");
428 offset
= lp_build_int_to_float(coord_bld
, offset
);
429 offset
= lp_build_div(coord_bld
, offset
, length
);
430 coord
= lp_build_add(coord_bld
, coord
, offset
);
432 /* wrap with normalized floats is just fract */
433 coord
= lp_build_fract(coord_bld
, coord
);
435 coord
= lp_build_mul(coord_bld
, coord
, length
);
437 * we avoided the 0.5/length division, have to fix up wrong
438 * edge cases with selects
440 *coord1
= lp_build_add(coord_bld
, coord
, half
);
441 coord
= lp_build_sub(coord_bld
, coord
, half
);
442 *weight
= lp_build_fract(coord_bld
, coord
);
443 mask
= lp_build_compare(coord_bld
->gallivm
, coord_bld
->type
,
444 PIPE_FUNC_LESS
, coord
, coord_bld
->zero
);
445 *coord0
= lp_build_select(coord_bld
, mask
, length_minus_one
, coord
);
446 *coord0
= lp_build_itrunc(coord_bld
, *coord0
);
447 mask
= lp_build_compare(coord_bld
->gallivm
, coord_bld
->type
,
448 PIPE_FUNC_LESS
, *coord1
, length
);
449 *coord1
= lp_build_select(coord_bld
, mask
, *coord1
, coord_bld
->zero
);
450 *coord1
= lp_build_itrunc(coord_bld
, *coord1
);
453 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
454 if (bld
->static_sampler_state
->normalized_coords
) {
455 /* mul by tex size */
456 coord
= lp_build_mul(coord_bld
, coord
, length
);
459 offset
= lp_build_int_to_float(coord_bld
, offset
);
460 coord
= lp_build_add(coord_bld
, coord
, offset
);
463 if (!force_nearest
) {
464 coord
= lp_build_sub(coord_bld
, coord
, half
);
466 /* clamp to [0, length - 1] */
467 coord
= lp_build_min(coord_bld
, coord
, length_minus_one
);
468 coord
= lp_build_max(coord_bld
, coord
, coord_bld
->zero
);
469 *coord1
= lp_build_add(coord_bld
, coord
, coord_bld
->one
);
470 /* convert to int, compute lerp weight */
471 lp_build_ifloor_fract(coord_bld
, coord
, coord0
, weight
);
472 /* coord1 = min(coord1, length-1) */
473 *coord1
= lp_build_min(coord_bld
, *coord1
, length_minus_one
);
474 *coord1
= lp_build_itrunc(coord_bld
, *coord1
);
478 *coord0
= int_coord_bld
->zero
;
479 *coord1
= int_coord_bld
->zero
;
480 *weight
= coord_bld
->zero
;
483 *weight
= lp_build_mul_imm(coord_bld
, *weight
, 256);
484 *weight
= lp_build_itrunc(coord_bld
, *weight
);
490 * Fetch texels for image with nearest sampling.
491 * Return filtered color as two vectors of 16-bit fixed point values.
494 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context
*bld
,
495 LLVMValueRef data_ptr
,
497 LLVMValueRef x_subcoord
,
498 LLVMValueRef y_subcoord
,
499 LLVMValueRef
*colors
)
502 * Fetch the pixels as 4 x 32bit (rgba order might differ):
504 * rgba0 rgba1 rgba2 rgba3
506 * bit cast them into 16 x u8
508 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
510 * unpack them into two 8 x i16:
512 * r0 g0 b0 a0 r1 g1 b1 a1
513 * r2 g2 b2 a2 r3 g3 b3 a3
515 * The higher 8 bits of the resulting elements will be zero.
517 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
519 struct lp_build_context u8n
;
520 LLVMTypeRef u8n_vec_type
;
522 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
523 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
525 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
527 * Given the format is a rgba8, just read the pixels as is,
528 * without any swizzling. Swizzling will be done later.
530 rgba8
= lp_build_gather(bld
->gallivm
,
531 bld
->texel_type
.length
,
532 bld
->format_desc
->block
.bits
,
533 bld
->texel_type
.width
,
534 data_ptr
, offset
, TRUE
);
536 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
539 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
552 * Sample a single texture image with nearest sampling.
553 * If sampling a cube texture, r = cube face in [0,5].
554 * Return filtered color as two vectors of 16-bit fixed point values.
557 lp_build_sample_image_nearest(struct lp_build_sample_context
*bld
,
558 LLVMValueRef int_size
,
559 LLVMValueRef row_stride_vec
,
560 LLVMValueRef img_stride_vec
,
561 LLVMValueRef data_ptr
,
562 LLVMValueRef mipoffsets
,
566 const LLVMValueRef
*offsets
,
567 LLVMValueRef
*colors
)
569 const unsigned dims
= bld
->dims
;
570 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
571 struct lp_build_context i32
;
572 LLVMTypeRef i32_vec_type
;
574 LLVMValueRef width_vec
, height_vec
, depth_vec
;
575 LLVMValueRef s_ipart
, t_ipart
= NULL
, r_ipart
= NULL
;
576 LLVMValueRef s_float
, t_float
= NULL
, r_float
= NULL
;
577 LLVMValueRef x_stride
;
578 LLVMValueRef x_offset
, offset
;
579 LLVMValueRef x_subcoord
, y_subcoord
, z_subcoord
;
581 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32, bld
->vector_width
));
583 i32_vec_type
= lp_build_vec_type(bld
->gallivm
, i32
.type
);
585 lp_build_extract_image_sizes(bld
,
593 s_float
= s
; t_float
= t
; r_float
= r
;
595 if (bld
->static_sampler_state
->normalized_coords
) {
596 LLVMValueRef scaled_size
;
597 LLVMValueRef flt_size
;
599 /* scale size by 256 (8 fractional bits) */
600 scaled_size
= lp_build_shl_imm(&bld
->int_size_bld
, int_size
, 8);
602 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, scaled_size
);
604 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
607 /* scale coords by 256 (8 fractional bits) */
608 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
610 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
612 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
615 /* convert float to int */
616 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
618 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
620 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
622 /* compute floor (shift right 8) */
623 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 8);
624 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
626 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
628 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
630 /* add texel offsets */
632 s_ipart
= lp_build_add(&i32
, s_ipart
, offsets
[0]);
634 t_ipart
= lp_build_add(&i32
, t_ipart
, offsets
[1]);
636 r_ipart
= lp_build_add(&i32
, r_ipart
, offsets
[2]);
641 /* get pixel, row, image strides */
642 x_stride
= lp_build_const_vec(bld
->gallivm
,
643 bld
->int_coord_bld
.type
,
644 bld
->format_desc
->block
.bits
/8);
646 /* Do texcoord wrapping, compute texel offset */
647 lp_build_sample_wrap_nearest_int(bld
,
648 bld
->format_desc
->block
.width
,
650 width_vec
, x_stride
, offsets
[0],
651 bld
->static_texture_state
->pot_width
,
652 bld
->static_sampler_state
->wrap_s
,
653 &x_offset
, &x_subcoord
);
656 LLVMValueRef y_offset
;
657 lp_build_sample_wrap_nearest_int(bld
,
658 bld
->format_desc
->block
.height
,
660 height_vec
, row_stride_vec
, offsets
[1],
661 bld
->static_texture_state
->pot_height
,
662 bld
->static_sampler_state
->wrap_t
,
663 &y_offset
, &y_subcoord
);
664 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, y_offset
);
666 LLVMValueRef z_offset
;
667 lp_build_sample_wrap_nearest_int(bld
,
668 1, /* block length (depth) */
670 depth_vec
, img_stride_vec
, offsets
[2],
671 bld
->static_texture_state
->pot_depth
,
672 bld
->static_sampler_state
->wrap_r
,
673 &z_offset
, &z_subcoord
);
674 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
677 if (bld
->static_texture_state
->target
== PIPE_TEXTURE_CUBE
||
678 bld
->static_texture_state
->target
== PIPE_TEXTURE_1D_ARRAY
||
679 bld
->static_texture_state
->target
== PIPE_TEXTURE_2D_ARRAY
) {
680 LLVMValueRef z_offset
;
681 /* The r coord is the cube face in [0,5] or array layer */
682 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
683 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
686 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, mipoffsets
);
689 lp_build_sample_fetch_image_nearest(bld
, data_ptr
, offset
,
690 x_subcoord
, y_subcoord
,
696 * Sample a single texture image with nearest sampling.
697 * If sampling a cube texture, r = cube face in [0,5].
698 * Return filtered color as two vectors of 16-bit fixed point values.
699 * Does address calcs (except offsets) with floats.
700 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
703 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context
*bld
,
704 LLVMValueRef int_size
,
705 LLVMValueRef row_stride_vec
,
706 LLVMValueRef img_stride_vec
,
707 LLVMValueRef data_ptr
,
708 LLVMValueRef mipoffsets
,
712 const LLVMValueRef
*offsets
,
713 LLVMValueRef
*colors
)
715 const unsigned dims
= bld
->dims
;
716 LLVMValueRef width_vec
, height_vec
, depth_vec
;
718 LLVMValueRef x_subcoord
, y_subcoord
;
719 LLVMValueRef x_icoord
= NULL
, y_icoord
= NULL
, z_icoord
= NULL
;
720 LLVMValueRef flt_size
;
722 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, int_size
);
724 lp_build_extract_image_sizes(bld
,
725 &bld
->float_size_bld
,
732 /* Do texcoord wrapping */
733 lp_build_sample_wrap_nearest_float(bld
,
734 s
, width_vec
, offsets
[0],
735 bld
->static_texture_state
->pot_width
,
736 bld
->static_sampler_state
->wrap_s
,
740 lp_build_sample_wrap_nearest_float(bld
,
741 t
, height_vec
, offsets
[1],
742 bld
->static_texture_state
->pot_height
,
743 bld
->static_sampler_state
->wrap_t
,
747 lp_build_sample_wrap_nearest_float(bld
,
748 r
, depth_vec
, offsets
[2],
749 bld
->static_texture_state
->pot_depth
,
750 bld
->static_sampler_state
->wrap_r
,
754 if (bld
->static_texture_state
->target
== PIPE_TEXTURE_CUBE
||
755 bld
->static_texture_state
->target
== PIPE_TEXTURE_1D_ARRAY
||
756 bld
->static_texture_state
->target
== PIPE_TEXTURE_2D_ARRAY
) {
761 * From here on we deal with ints, and we should split up the 256bit
762 * vectors manually for better generated code.
766 * compute texel offsets -
767 * cannot do offset calc with floats, difficult for block-based formats,
768 * and not enough precision anyway.
770 lp_build_sample_offset(&bld
->int_coord_bld
,
774 row_stride_vec
, img_stride_vec
,
776 &x_subcoord
, &y_subcoord
);
778 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, mipoffsets
);
781 lp_build_sample_fetch_image_nearest(bld
, data_ptr
, offset
,
782 x_subcoord
, y_subcoord
,
788 * Fetch texels for image with linear sampling.
789 * Return filtered color as two vectors of 16-bit fixed point values.
792 lp_build_sample_fetch_image_linear(struct lp_build_sample_context
*bld
,
793 LLVMValueRef data_ptr
,
794 LLVMValueRef offset
[2][2][2],
795 LLVMValueRef x_subcoord
[2],
796 LLVMValueRef y_subcoord
[2],
797 LLVMValueRef s_fpart
,
798 LLVMValueRef t_fpart
,
799 LLVMValueRef r_fpart
,
800 LLVMValueRef
*colors
)
802 const unsigned dims
= bld
->dims
;
803 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
804 struct lp_build_context u8n
;
805 LLVMTypeRef u8n_vec_type
;
806 LLVMTypeRef elem_type
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
807 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
808 LLVMValueRef shuffle
;
809 LLVMValueRef neighbors
[2][2][2]; /* [z][y][x] */
814 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
815 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
818 * Transform 4 x i32 in
820 * s_fpart = {s0, s1, s2, s3}
822 * where each value is between 0 and 0xff,
826 * s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
828 * and likewise for t_fpart. There is no risk of loosing precision here
829 * since the fractional parts only use the lower 8bits.
831 s_fpart
= LLVMBuildBitCast(builder
, s_fpart
, u8n_vec_type
, "");
833 t_fpart
= LLVMBuildBitCast(builder
, t_fpart
, u8n_vec_type
, "");
835 r_fpart
= LLVMBuildBitCast(builder
, r_fpart
, u8n_vec_type
, "");
837 for (j
= 0; j
< u8n
.type
.length
; j
+= 4) {
838 #ifdef PIPE_ARCH_LITTLE_ENDIAN
839 unsigned subindex
= 0;
841 unsigned subindex
= 3;
845 index
= LLVMConstInt(elem_type
, j
+ subindex
, 0);
846 for (i
= 0; i
< 4; ++i
)
847 shuffles
[j
+ i
] = index
;
850 shuffle
= LLVMConstVector(shuffles
, u8n
.type
.length
);
852 s_fpart
= LLVMBuildShuffleVector(builder
, s_fpart
, u8n
.undef
,
855 t_fpart
= LLVMBuildShuffleVector(builder
, t_fpart
, u8n
.undef
,
859 r_fpart
= LLVMBuildShuffleVector(builder
, r_fpart
, u8n
.undef
,
864 * Fetch the pixels as 4 x 32bit (rgba order might differ):
866 * rgba0 rgba1 rgba2 rgba3
868 * bit cast them into 16 x u8
870 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
872 * unpack them into two 8 x i16:
874 * r0 g0 b0 a0 r1 g1 b1 a1
875 * r2 g2 b2 a2 r3 g3 b3 a3
877 * The higher 8 bits of the resulting elements will be zero.
879 numj
= 1 + (dims
>= 2);
880 numk
= 1 + (dims
>= 3);
882 for (k
= 0; k
< numk
; k
++) {
883 for (j
= 0; j
< numj
; j
++) {
884 for (i
= 0; i
< 2; i
++) {
887 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
889 * Given the format is a rgba8, just read the pixels as is,
890 * without any swizzling. Swizzling will be done later.
892 rgba8
= lp_build_gather(bld
->gallivm
,
893 bld
->texel_type
.length
,
894 bld
->format_desc
->block
.bits
,
895 bld
->texel_type
.width
,
896 data_ptr
, offset
[k
][j
][i
], TRUE
);
898 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
901 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
904 data_ptr
, offset
[k
][j
][i
],
909 neighbors
[k
][j
][i
] = rgba8
;
915 * Linear interpolation with 8.8 fixed point.
917 if (bld
->static_sampler_state
->force_nearest_s
) {
918 /* special case 1-D lerp */
919 packed
= lp_build_lerp(&u8n
,
923 LP_BLD_LERP_PRESCALED_WEIGHTS
);
925 else if (bld
->static_sampler_state
->force_nearest_t
) {
926 /* special case 1-D lerp */
927 packed
= lp_build_lerp(&u8n
,
931 LP_BLD_LERP_PRESCALED_WEIGHTS
);
934 /* general 1/2/3-D lerping */
936 packed
= lp_build_lerp(&u8n
,
940 LP_BLD_LERP_PRESCALED_WEIGHTS
);
941 } else if (dims
== 2) {
943 packed
= lp_build_lerp_2d(&u8n
,
949 LP_BLD_LERP_PRESCALED_WEIGHTS
);
953 packed
= lp_build_lerp_3d(&u8n
,
954 s_fpart
, t_fpart
, r_fpart
,
963 LP_BLD_LERP_PRESCALED_WEIGHTS
);
971 * Sample a single texture image with (bi-)(tri-)linear sampling.
972 * Return filtered color as two vectors of 16-bit fixed point values.
975 lp_build_sample_image_linear(struct lp_build_sample_context
*bld
,
976 LLVMValueRef int_size
,
977 LLVMValueRef row_stride_vec
,
978 LLVMValueRef img_stride_vec
,
979 LLVMValueRef data_ptr
,
980 LLVMValueRef mipoffsets
,
984 const LLVMValueRef
*offsets
,
985 LLVMValueRef
*colors
)
987 const unsigned dims
= bld
->dims
;
988 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
989 struct lp_build_context i32
;
990 LLVMTypeRef i32_vec_type
;
991 LLVMValueRef i32_c8
, i32_c128
, i32_c255
;
992 LLVMValueRef width_vec
, height_vec
, depth_vec
;
993 LLVMValueRef s_ipart
, s_fpart
, s_float
;
994 LLVMValueRef t_ipart
= NULL
, t_fpart
= NULL
, t_float
= NULL
;
995 LLVMValueRef r_ipart
= NULL
, r_fpart
= NULL
, r_float
= NULL
;
996 LLVMValueRef x_stride
, y_stride
, z_stride
;
997 LLVMValueRef x_offset0
, x_offset1
;
998 LLVMValueRef y_offset0
, y_offset1
;
999 LLVMValueRef z_offset0
, z_offset1
;
1000 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
1001 LLVMValueRef x_subcoord
[2], y_subcoord
[2], z_subcoord
[2];
1004 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32, bld
->vector_width
));
1006 i32_vec_type
= lp_build_vec_type(bld
->gallivm
, i32
.type
);
1008 lp_build_extract_image_sizes(bld
,
1010 bld
->int_coord_type
,
1016 s_float
= s
; t_float
= t
; r_float
= r
;
1018 if (bld
->static_sampler_state
->normalized_coords
) {
1019 LLVMValueRef scaled_size
;
1020 LLVMValueRef flt_size
;
1022 /* scale size by 256 (8 fractional bits) */
1023 scaled_size
= lp_build_shl_imm(&bld
->int_size_bld
, int_size
, 8);
1025 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, scaled_size
);
1027 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
1030 /* scale coords by 256 (8 fractional bits) */
1031 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
1033 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
1035 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
1038 /* convert float to int */
1039 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
1041 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
1043 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
1045 /* subtract 0.5 (add -128) */
1046 i32_c128
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, -128);
1047 if (!bld
->static_sampler_state
->force_nearest_s
) {
1048 s
= LLVMBuildAdd(builder
, s
, i32_c128
, "");
1050 if (dims
>= 2 && !bld
->static_sampler_state
->force_nearest_t
) {
1051 t
= LLVMBuildAdd(builder
, t
, i32_c128
, "");
1054 r
= LLVMBuildAdd(builder
, r
, i32_c128
, "");
1057 /* compute floor (shift right 8) */
1058 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 8);
1059 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
1061 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
1063 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
1065 /* add texel offsets */
1067 s_ipart
= lp_build_add(&i32
, s_ipart
, offsets
[0]);
1069 t_ipart
= lp_build_add(&i32
, t_ipart
, offsets
[1]);
1071 r_ipart
= lp_build_add(&i32
, r_ipart
, offsets
[2]);
1076 /* compute fractional part (AND with 0xff) */
1077 i32_c255
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 255);
1078 s_fpart
= LLVMBuildAnd(builder
, s
, i32_c255
, "");
1080 t_fpart
= LLVMBuildAnd(builder
, t
, i32_c255
, "");
1082 r_fpart
= LLVMBuildAnd(builder
, r
, i32_c255
, "");
1084 /* get pixel, row and image strides */
1085 x_stride
= lp_build_const_vec(bld
->gallivm
, bld
->int_coord_bld
.type
,
1086 bld
->format_desc
->block
.bits
/8);
1087 y_stride
= row_stride_vec
;
1088 z_stride
= img_stride_vec
;
1090 /* do texcoord wrapping and compute texel offsets */
1091 lp_build_sample_wrap_linear_int(bld
,
1092 bld
->format_desc
->block
.width
,
1093 s_ipart
, &s_fpart
, s_float
,
1094 width_vec
, x_stride
, offsets
[0],
1095 bld
->static_texture_state
->pot_width
,
1096 bld
->static_sampler_state
->wrap_s
,
1097 &x_offset0
, &x_offset1
,
1098 &x_subcoord
[0], &x_subcoord
[1]);
1100 /* add potential cube/array/mip offsets now as they are constant per pixel */
1101 if (bld
->static_texture_state
->target
== PIPE_TEXTURE_CUBE
||
1102 bld
->static_texture_state
->target
== PIPE_TEXTURE_1D_ARRAY
||
1103 bld
->static_texture_state
->target
== PIPE_TEXTURE_2D_ARRAY
) {
1104 LLVMValueRef z_offset
;
1105 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
1106 /* The r coord is the cube face in [0,5] or array layer */
1107 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, z_offset
);
1108 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, z_offset
);
1111 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, mipoffsets
);
1112 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, mipoffsets
);
1115 for (z
= 0; z
< 2; z
++) {
1116 for (y
= 0; y
< 2; y
++) {
1117 offset
[z
][y
][0] = x_offset0
;
1118 offset
[z
][y
][1] = x_offset1
;
1123 lp_build_sample_wrap_linear_int(bld
,
1124 bld
->format_desc
->block
.height
,
1125 t_ipart
, &t_fpart
, t_float
,
1126 height_vec
, y_stride
, offsets
[1],
1127 bld
->static_texture_state
->pot_height
,
1128 bld
->static_sampler_state
->wrap_t
,
1129 &y_offset0
, &y_offset1
,
1130 &y_subcoord
[0], &y_subcoord
[1]);
1132 for (z
= 0; z
< 2; z
++) {
1133 for (x
= 0; x
< 2; x
++) {
1134 offset
[z
][0][x
] = lp_build_add(&bld
->int_coord_bld
,
1135 offset
[z
][0][x
], y_offset0
);
1136 offset
[z
][1][x
] = lp_build_add(&bld
->int_coord_bld
,
1137 offset
[z
][1][x
], y_offset1
);
1143 lp_build_sample_wrap_linear_int(bld
,
1144 1, /* block length (depth) */
1145 r_ipart
, &r_fpart
, r_float
,
1146 depth_vec
, z_stride
, offsets
[2],
1147 bld
->static_texture_state
->pot_depth
,
1148 bld
->static_sampler_state
->wrap_r
,
1149 &z_offset0
, &z_offset1
,
1150 &z_subcoord
[0], &z_subcoord
[1]);
1151 for (y
= 0; y
< 2; y
++) {
1152 for (x
= 0; x
< 2; x
++) {
1153 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1154 offset
[0][y
][x
], z_offset0
);
1155 offset
[1][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1156 offset
[1][y
][x
], z_offset1
);
1161 lp_build_sample_fetch_image_linear(bld
, data_ptr
, offset
,
1162 x_subcoord
, y_subcoord
,
1163 s_fpart
, t_fpart
, r_fpart
,
1169 * Sample a single texture image with (bi-)(tri-)linear sampling.
1170 * Return filtered color as two vectors of 16-bit fixed point values.
1171 * Does address calcs (except offsets) with floats.
1172 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1175 lp_build_sample_image_linear_afloat(struct lp_build_sample_context
*bld
,
1176 LLVMValueRef int_size
,
1177 LLVMValueRef row_stride_vec
,
1178 LLVMValueRef img_stride_vec
,
1179 LLVMValueRef data_ptr
,
1180 LLVMValueRef mipoffsets
,
1184 const LLVMValueRef
*offsets
,
1185 LLVMValueRef
*colors
)
1187 const unsigned dims
= bld
->dims
;
1188 LLVMValueRef width_vec
, height_vec
, depth_vec
;
1189 LLVMValueRef s_fpart
;
1190 LLVMValueRef t_fpart
= NULL
;
1191 LLVMValueRef r_fpart
= NULL
;
1192 LLVMValueRef x_stride
, y_stride
, z_stride
;
1193 LLVMValueRef x_offset0
, x_offset1
;
1194 LLVMValueRef y_offset0
, y_offset1
;
1195 LLVMValueRef z_offset0
, z_offset1
;
1196 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
1197 LLVMValueRef x_subcoord
[2], y_subcoord
[2];
1198 LLVMValueRef flt_size
;
1199 LLVMValueRef x_icoord0
, x_icoord1
;
1200 LLVMValueRef y_icoord0
, y_icoord1
;
1201 LLVMValueRef z_icoord0
, z_icoord1
;
1204 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, int_size
);
1206 lp_build_extract_image_sizes(bld
,
1207 &bld
->float_size_bld
,
1214 /* do texcoord wrapping and compute texel offsets */
1215 lp_build_sample_wrap_linear_float(bld
,
1216 bld
->format_desc
->block
.width
,
1217 s
, width_vec
, offsets
[0],
1218 bld
->static_texture_state
->pot_width
,
1219 bld
->static_sampler_state
->wrap_s
,
1220 &x_icoord0
, &x_icoord1
,
1222 bld
->static_sampler_state
->force_nearest_s
);
1225 lp_build_sample_wrap_linear_float(bld
,
1226 bld
->format_desc
->block
.height
,
1227 t
, height_vec
, offsets
[1],
1228 bld
->static_texture_state
->pot_height
,
1229 bld
->static_sampler_state
->wrap_t
,
1230 &y_icoord0
, &y_icoord1
,
1232 bld
->static_sampler_state
->force_nearest_t
);
1235 lp_build_sample_wrap_linear_float(bld
,
1236 1, /* block length (depth) */
1237 r
, depth_vec
, offsets
[2],
1238 bld
->static_texture_state
->pot_depth
,
1239 bld
->static_sampler_state
->wrap_r
,
1240 &z_icoord0
, &z_icoord1
,
1246 * From here on we deal with ints, and we should split up the 256bit
1247 * vectors manually for better generated code.
1250 /* get pixel, row and image strides */
1251 x_stride
= lp_build_const_vec(bld
->gallivm
,
1252 bld
->int_coord_bld
.type
,
1253 bld
->format_desc
->block
.bits
/8);
1254 y_stride
= row_stride_vec
;
1255 z_stride
= img_stride_vec
;
1258 * compute texel offset -
1259 * cannot do offset calc with floats, difficult for block-based formats,
1260 * and not enough precision anyway.
1262 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1263 bld
->format_desc
->block
.width
,
1264 x_icoord0
, x_stride
,
1265 &x_offset0
, &x_subcoord
[0]);
1266 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1267 bld
->format_desc
->block
.width
,
1268 x_icoord1
, x_stride
,
1269 &x_offset1
, &x_subcoord
[1]);
1271 /* add potential cube/array/mip offsets now as they are constant per pixel */
1272 if (bld
->static_texture_state
->target
== PIPE_TEXTURE_CUBE
||
1273 bld
->static_texture_state
->target
== PIPE_TEXTURE_1D_ARRAY
||
1274 bld
->static_texture_state
->target
== PIPE_TEXTURE_2D_ARRAY
) {
1275 LLVMValueRef z_offset
;
1276 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
1277 /* The r coord is the cube face in [0,5] or array layer */
1278 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, z_offset
);
1279 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, z_offset
);
1282 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, mipoffsets
);
1283 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, mipoffsets
);
1286 for (z
= 0; z
< 2; z
++) {
1287 for (y
= 0; y
< 2; y
++) {
1288 offset
[z
][y
][0] = x_offset0
;
1289 offset
[z
][y
][1] = x_offset1
;
1294 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1295 bld
->format_desc
->block
.height
,
1296 y_icoord0
, y_stride
,
1297 &y_offset0
, &y_subcoord
[0]);
1298 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1299 bld
->format_desc
->block
.height
,
1300 y_icoord1
, y_stride
,
1301 &y_offset1
, &y_subcoord
[1]);
1302 for (z
= 0; z
< 2; z
++) {
1303 for (x
= 0; x
< 2; x
++) {
1304 offset
[z
][0][x
] = lp_build_add(&bld
->int_coord_bld
,
1305 offset
[z
][0][x
], y_offset0
);
1306 offset
[z
][1][x
] = lp_build_add(&bld
->int_coord_bld
,
1307 offset
[z
][1][x
], y_offset1
);
1313 LLVMValueRef z_subcoord
[2];
1314 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1316 z_icoord0
, z_stride
,
1317 &z_offset0
, &z_subcoord
[0]);
1318 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1320 z_icoord1
, z_stride
,
1321 &z_offset1
, &z_subcoord
[1]);
1322 for (y
= 0; y
< 2; y
++) {
1323 for (x
= 0; x
< 2; x
++) {
1324 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1325 offset
[0][y
][x
], z_offset0
);
1326 offset
[1][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1327 offset
[1][y
][x
], z_offset1
);
1332 lp_build_sample_fetch_image_linear(bld
, data_ptr
, offset
,
1333 x_subcoord
, y_subcoord
,
1334 s_fpart
, t_fpart
, r_fpart
,
1340 * Sample the texture/mipmap using given image filter and mip filter.
1341 * data0_ptr and data1_ptr point to the two mipmap levels to sample
1342 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1343 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1346 lp_build_sample_mipmap(struct lp_build_sample_context
*bld
,
1347 unsigned img_filter
,
1348 unsigned mip_filter
,
1352 const LLVMValueRef
*offsets
,
1353 LLVMValueRef ilevel0
,
1354 LLVMValueRef ilevel1
,
1355 LLVMValueRef lod_fpart
,
1356 LLVMValueRef colors_var
)
1358 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1361 LLVMValueRef row_stride0_vec
= NULL
;
1362 LLVMValueRef row_stride1_vec
= NULL
;
1363 LLVMValueRef img_stride0_vec
= NULL
;
1364 LLVMValueRef img_stride1_vec
= NULL
;
1365 LLVMValueRef data_ptr0
;
1366 LLVMValueRef data_ptr1
;
1367 LLVMValueRef mipoff0
= NULL
;
1368 LLVMValueRef mipoff1
= NULL
;
1369 LLVMValueRef colors0
;
1370 LLVMValueRef colors1
;
1372 /* sample the first mipmap level */
1373 lp_build_mipmap_level_sizes(bld
, ilevel0
,
1375 &row_stride0_vec
, &img_stride0_vec
);
1376 if (bld
->num_lods
== 1) {
1377 data_ptr0
= lp_build_get_mipmap_level(bld
, ilevel0
);
1380 /* This path should work for num_lods 1 too but slightly less efficient */
1381 data_ptr0
= bld
->base_ptr
;
1382 mipoff0
= lp_build_get_mip_offsets(bld
, ilevel0
);
1385 if (util_cpu_caps
.has_avx
&& bld
->coord_type
.length
> 4) {
1386 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1387 lp_build_sample_image_nearest_afloat(bld
,
1389 row_stride0_vec
, img_stride0_vec
,
1390 data_ptr0
, mipoff0
, s
, t
, r
, offsets
,
1394 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
1395 lp_build_sample_image_linear_afloat(bld
,
1397 row_stride0_vec
, img_stride0_vec
,
1398 data_ptr0
, mipoff0
, s
, t
, r
, offsets
,
1403 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1404 lp_build_sample_image_nearest(bld
,
1406 row_stride0_vec
, img_stride0_vec
,
1407 data_ptr0
, mipoff0
, s
, t
, r
, offsets
,
1411 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
1412 lp_build_sample_image_linear(bld
,
1414 row_stride0_vec
, img_stride0_vec
,
1415 data_ptr0
, mipoff0
, s
, t
, r
, offsets
,
1420 /* Store the first level's colors in the output variables */
1421 LLVMBuildStore(builder
, colors0
, colors_var
);
1423 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
1424 LLVMValueRef h16vec_scale
= lp_build_const_vec(bld
->gallivm
,
1425 bld
->levelf_bld
.type
, 256.0);
1426 LLVMTypeRef i32vec_type
= bld
->leveli_bld
.vec_type
;
1427 struct lp_build_if_state if_ctx
;
1428 LLVMValueRef need_lerp
;
1429 unsigned num_quads
= bld
->coord_bld
.type
.length
/ 4;
1432 lod_fpart
= LLVMBuildFMul(builder
, lod_fpart
, h16vec_scale
, "");
1433 lod_fpart
= LLVMBuildFPToSI(builder
, lod_fpart
, i32vec_type
, "lod_fpart.fixed16");
1435 /* need_lerp = lod_fpart > 0 */
1436 if (bld
->num_lods
== 1) {
1437 need_lerp
= LLVMBuildICmp(builder
, LLVMIntSGT
,
1438 lod_fpart
, bld
->leveli_bld
.zero
,
1443 * We'll do mip filtering if any of the quads need it.
1444 * It might be better to split the vectors here and only fetch/filter
1445 * quads which need it.
1448 * We need to clamp lod_fpart here since we can get negative
1449 * values which would screw up filtering if not all
1450 * lod_fpart values have same sign.
1451 * We can however then skip the greater than comparison.
1453 lod_fpart
= lp_build_max(&bld
->leveli_bld
, lod_fpart
,
1454 bld
->leveli_bld
.zero
);
1455 need_lerp
= lp_build_any_true_range(&bld
->leveli_bld
, bld
->num_lods
, lod_fpart
);
1458 lp_build_if(&if_ctx
, bld
->gallivm
, need_lerp
);
1460 struct lp_build_context u8n_bld
;
1462 lp_build_context_init(&u8n_bld
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
1464 /* sample the second mipmap level */
1465 lp_build_mipmap_level_sizes(bld
, ilevel1
,
1467 &row_stride1_vec
, &img_stride1_vec
);
1468 if (bld
->num_lods
== 1) {
1469 data_ptr1
= lp_build_get_mipmap_level(bld
, ilevel1
);
1472 data_ptr1
= bld
->base_ptr
;
1473 mipoff1
= lp_build_get_mip_offsets(bld
, ilevel1
);
1476 if (util_cpu_caps
.has_avx
&& bld
->coord_type
.length
> 4) {
1477 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1478 lp_build_sample_image_nearest_afloat(bld
,
1480 row_stride1_vec
, img_stride1_vec
,
1481 data_ptr1
, mipoff1
, s
, t
, r
, offsets
,
1485 lp_build_sample_image_linear_afloat(bld
,
1487 row_stride1_vec
, img_stride1_vec
,
1488 data_ptr1
, mipoff1
, s
, t
, r
, offsets
,
1493 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1494 lp_build_sample_image_nearest(bld
,
1496 row_stride1_vec
, img_stride1_vec
,
1497 data_ptr1
, mipoff1
, s
, t
, r
, offsets
,
1501 lp_build_sample_image_linear(bld
,
1503 row_stride1_vec
, img_stride1_vec
,
1504 data_ptr1
, mipoff1
, s
, t
, r
, offsets
,
1509 /* interpolate samples from the two mipmap levels */
1511 if (num_quads
== 1 && bld
->num_lods
== 1) {
1512 lod_fpart
= LLVMBuildTrunc(builder
, lod_fpart
, u8n_bld
.elem_type
, "");
1513 lod_fpart
= lp_build_broadcast_scalar(&u8n_bld
, lod_fpart
);
1515 #if HAVE_LLVM == 0x208
1516 /* This was a work-around for a bug in LLVM 2.8.
1517 * Evidently, something goes wrong in the construction of the
1518 * lod_fpart short[8] vector. Adding this no-effect shuffle seems
1519 * to force the vector to be properly constructed.
1520 * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
1526 unsigned num_chans_per_lod
= 4 * bld
->coord_type
.length
/ bld
->num_lods
;
1527 LLVMTypeRef tmp_vec_type
= LLVMVectorType(u8n_bld
.elem_type
, bld
->leveli_bld
.type
.length
);
1528 LLVMValueRef shuffle
[LP_MAX_VECTOR_LENGTH
];
1530 /* Take the LSB of lod_fpart */
1531 lod_fpart
= LLVMBuildTrunc(builder
, lod_fpart
, tmp_vec_type
, "");
1533 /* Broadcast each lod weight into their respective channels */
1534 for (i
= 0; i
< u8n_bld
.type
.length
; ++i
) {
1535 shuffle
[i
] = lp_build_const_int32(bld
->gallivm
, i
/ num_chans_per_lod
);
1537 lod_fpart
= LLVMBuildShuffleVector(builder
, lod_fpart
, LLVMGetUndef(tmp_vec_type
),
1538 LLVMConstVector(shuffle
, u8n_bld
.type
.length
), "");
1541 colors0
= lp_build_lerp(&u8n_bld
, lod_fpart
,
1543 LP_BLD_LERP_PRESCALED_WEIGHTS
);
1545 LLVMBuildStore(builder
, colors0
, colors_var
);
1547 lp_build_endif(&if_ctx
);
1554 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
1555 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
1556 * but only limited texture coord wrap modes.
1559 lp_build_sample_aos(struct lp_build_sample_context
*bld
,
1560 unsigned sampler_unit
,
1564 const LLVMValueRef
*offsets
,
1565 LLVMValueRef lod_ipart
,
1566 LLVMValueRef lod_fpart
,
1567 LLVMValueRef ilevel0
,
1568 LLVMValueRef ilevel1
,
1569 LLVMValueRef texel_out
[4])
1571 struct lp_build_context
*int_bld
= &bld
->int_bld
;
1572 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1573 const unsigned mip_filter
= bld
->static_sampler_state
->min_mip_filter
;
1574 const unsigned min_filter
= bld
->static_sampler_state
->min_img_filter
;
1575 const unsigned mag_filter
= bld
->static_sampler_state
->mag_img_filter
;
1576 const unsigned dims
= bld
->dims
;
1577 LLVMValueRef packed_var
, packed
;
1578 LLVMValueRef unswizzled
[4];
1579 struct lp_build_context u8n_bld
;
1581 /* we only support the common/simple wrap modes at this time */
1582 assert(lp_is_simple_wrap_mode(bld
->static_sampler_state
->wrap_s
));
1584 assert(lp_is_simple_wrap_mode(bld
->static_sampler_state
->wrap_t
));
1586 assert(lp_is_simple_wrap_mode(bld
->static_sampler_state
->wrap_r
));
1589 /* make 8-bit unorm builder context */
1590 lp_build_context_init(&u8n_bld
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
1593 * Get/interpolate texture colors.
1596 packed_var
= lp_build_alloca(bld
->gallivm
, u8n_bld
.vec_type
, "packed_var");
1598 if (min_filter
== mag_filter
) {
1599 /* no need to distinguish between minification and magnification */
1600 lp_build_sample_mipmap(bld
,
1601 min_filter
, mip_filter
,
1603 ilevel0
, ilevel1
, lod_fpart
,
1607 /* Emit conditional to choose min image filter or mag image filter
1608 * depending on the lod being > 0 or <= 0, respectively.
1610 struct lp_build_if_state if_ctx
;
1611 LLVMValueRef minify
;
1614 * XXX this should take all lods into account, if some are min
1615 * some max probably could hack up the coords/weights in the linear
1616 * path with selects to work for nearest.
1617 * If that's just two quads sitting next to each other it seems
1618 * quite ok to do the same filtering method on both though, at
1619 * least unless we have explicit lod (and who uses different
1620 * min/mag filter with that?)
1622 if (bld
->num_lods
> 1)
1623 lod_ipart
= LLVMBuildExtractElement(builder
, lod_ipart
,
1624 lp_build_const_int32(bld
->gallivm
, 0), "");
1626 /* minify = lod >= 0.0 */
1627 minify
= LLVMBuildICmp(builder
, LLVMIntSGE
,
1628 lod_ipart
, int_bld
->zero
, "");
1630 lp_build_if(&if_ctx
, bld
->gallivm
, minify
);
1632 /* Use the minification filter */
1633 lp_build_sample_mipmap(bld
,
1634 min_filter
, mip_filter
,
1636 ilevel0
, ilevel1
, lod_fpart
,
1639 lp_build_else(&if_ctx
);
1641 /* Use the magnification filter */
1642 lp_build_sample_mipmap(bld
,
1643 mag_filter
, PIPE_TEX_MIPFILTER_NONE
,
1645 ilevel0
, NULL
, NULL
,
1648 lp_build_endif(&if_ctx
);
1651 packed
= LLVMBuildLoad(builder
, packed_var
, "");
1654 * Convert to SoA and swizzle.
1656 lp_build_rgba8_to_fi32_soa(bld
->gallivm
,
1658 packed
, unswizzled
);
1660 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
1661 lp_build_format_swizzle_soa(bld
->format_desc
,
1663 unswizzled
, texel_out
);
1666 texel_out
[0] = unswizzled
[0];
1667 texel_out
[1] = unswizzled
[1];
1668 texel_out
[2] = unswizzled
[2];
1669 texel_out
[3] = unswizzled
[3];