1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- AoS.
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_bitarit.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_swizzle.h"
52 #include "lp_bld_pack.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_format.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_quad.h"
63 * Build LLVM code for texture coord wrapping, for nearest filtering,
64 * for scaled integer texcoords.
65 * \param block_length is the length of the pixel block along the
67 * \param coord the incoming texcoord (s,t or r) scaled to the texture size
68 * \param coord_f the incoming texcoord (s,t or r) as float vec
69 * \param length the texture size along one dimension
70 * \param stride pixel stride along the coordinate axis (in bytes)
71 * \param offset the texel offset along the coord axis
72 * \param is_pot if TRUE, length is a power of two
73 * \param wrap_mode one of PIPE_TEX_WRAP_x
74 * \param out_offset byte offset for the wrapped coordinate
75 * \param out_i resulting sub-block pixel coordinate for coord0
78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context
*bld
,
79 unsigned block_length
,
87 LLVMValueRef
*out_offset
,
90 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
91 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
92 LLVMValueRef length_minus_one
;
94 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
97 case PIPE_TEX_WRAP_REPEAT
:
99 coord
= LLVMBuildAnd(builder
, coord
, length_minus_one
, "");
101 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
102 LLVMValueRef length_f
= lp_build_int_to_float(coord_bld
, length
);
104 offset
= lp_build_int_to_float(coord_bld
, offset
);
105 offset
= lp_build_div(coord_bld
, offset
, length_f
);
106 coord_f
= lp_build_add(coord_bld
, coord_f
, offset
);
108 coord
= lp_build_fract_safe(coord_bld
, coord_f
);
109 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
110 coord
= lp_build_itrunc(coord_bld
, coord
);
114 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
115 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
116 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
119 case PIPE_TEX_WRAP_CLAMP
:
120 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
121 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
122 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
123 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
124 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
129 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord
, stride
,
135 * Build LLVM code for texture coord wrapping, for nearest filtering,
136 * for float texcoords.
137 * \param coord the incoming texcoord (s,t or r)
138 * \param length the texture size along one dimension
139 * \param offset the texel offset along the coord axis
140 * \param is_pot if TRUE, length is a power of two
141 * \param wrap_mode one of PIPE_TEX_WRAP_x
142 * \param icoord the texcoord after wrapping, as int
145 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context
*bld
,
151 LLVMValueRef
*icoord
)
153 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
154 LLVMValueRef length_minus_one
;
157 case PIPE_TEX_WRAP_REPEAT
:
159 /* this is definitely not ideal for POT case */
160 offset
= lp_build_int_to_float(coord_bld
, offset
);
161 offset
= lp_build_div(coord_bld
, offset
, length
);
162 coord
= lp_build_add(coord_bld
, coord
, offset
);
164 /* take fraction, unnormalize */
165 coord
= lp_build_fract_safe(coord_bld
, coord
);
166 coord
= lp_build_mul(coord_bld
, coord
, length
);
167 *icoord
= lp_build_itrunc(coord_bld
, coord
);
169 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
170 length_minus_one
= lp_build_sub(coord_bld
, length
, coord_bld
->one
);
171 if (bld
->static_sampler_state
->normalized_coords
) {
172 /* scale coord to length */
173 coord
= lp_build_mul(coord_bld
, coord
, length
);
176 offset
= lp_build_int_to_float(coord_bld
, offset
);
177 coord
= lp_build_add(coord_bld
, coord
, offset
);
179 coord
= lp_build_clamp(coord_bld
, coord
, coord_bld
->zero
,
181 *icoord
= lp_build_itrunc(coord_bld
, coord
);
184 case PIPE_TEX_WRAP_CLAMP
:
185 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
186 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
187 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
188 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
189 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
197 * Helper to compute the first coord and the weight for
198 * linear wrap repeat npot textures
201 lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context
*bld
,
202 LLVMValueRef coord_f
,
203 LLVMValueRef length_i
,
204 LLVMValueRef length_f
,
205 LLVMValueRef
*coord0_i
,
206 LLVMValueRef
*weight_i
)
208 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
209 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
210 struct lp_build_context abs_coord_bld
;
211 struct lp_type abs_type
;
212 LLVMValueRef length_minus_one
= lp_build_sub(int_coord_bld
, length_i
,
214 LLVMValueRef mask
, i32_c8
, i32_c128
, i32_c255
;
216 /* wrap with normalized floats is just fract */
217 coord_f
= lp_build_fract(coord_bld
, coord_f
);
219 coord_f
= lp_build_mul(coord_bld
, coord_f
, length_f
);
220 /* convert to int, compute lerp weight */
221 coord_f
= lp_build_mul_imm(&bld
->coord_bld
, coord_f
, 256);
223 /* At this point we don't have any negative numbers so use non-signed
224 * build context which might help on some archs.
226 abs_type
= coord_bld
->type
;
228 lp_build_context_init(&abs_coord_bld
, bld
->gallivm
, abs_type
);
229 *coord0_i
= lp_build_iround(&abs_coord_bld
, coord_f
);
231 /* subtract 0.5 (add -128) */
232 i32_c128
= lp_build_const_int_vec(bld
->gallivm
, bld
->int_coord_type
, -128);
233 *coord0_i
= LLVMBuildAdd(bld
->gallivm
->builder
, *coord0_i
, i32_c128
, "");
235 /* compute fractional part (AND with 0xff) */
236 i32_c255
= lp_build_const_int_vec(bld
->gallivm
, bld
->int_coord_type
, 255);
237 *weight_i
= LLVMBuildAnd(bld
->gallivm
->builder
, *coord0_i
, i32_c255
, "");
239 /* compute floor (shift right 8) */
240 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, bld
->int_coord_type
, 8);
241 *coord0_i
= LLVMBuildAShr(bld
->gallivm
->builder
, *coord0_i
, i32_c8
, "");
243 * we avoided the 0.5/length division before the repeat wrap,
244 * now need to fix up edge cases with selects
246 mask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
247 PIPE_FUNC_LESS
, *coord0_i
, int_coord_bld
->zero
);
248 *coord0_i
= lp_build_select(int_coord_bld
, mask
, length_minus_one
, *coord0_i
);
250 * We should never get values too large - except if coord was nan or inf,
251 * in which case things go terribly wrong...
252 * Alternatively, could use fract_safe above...
254 *coord0_i
= lp_build_min(int_coord_bld
, *coord0_i
, length_minus_one
);
259 * Build LLVM code for texture coord wrapping, for linear filtering,
260 * for scaled integer texcoords.
261 * \param block_length is the length of the pixel block along the
263 * \param coord0 the incoming texcoord (s,t or r) scaled to the texture size
264 * \param coord_f the incoming texcoord (s,t or r) as float vec
265 * \param length the texture size along one dimension
266 * \param stride pixel stride along the coordinate axis (in bytes)
267 * \param offset the texel offset along the coord axis
268 * \param is_pot if TRUE, length is a power of two
269 * \param wrap_mode one of PIPE_TEX_WRAP_x
270 * \param offset0 resulting relative offset for coord0
271 * \param offset1 resulting relative offset for coord0 + 1
272 * \param i0 resulting sub-block pixel coordinate for coord0
273 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
276 lp_build_sample_wrap_linear_int(struct lp_build_sample_context
*bld
,
277 unsigned block_length
,
279 LLVMValueRef
*weight_i
,
280 LLVMValueRef coord_f
,
286 LLVMValueRef
*offset0
,
287 LLVMValueRef
*offset1
,
291 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
292 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
293 LLVMValueRef length_minus_one
;
294 LLVMValueRef lmask
, umask
, mask
;
297 * If the pixel block covers more than one pixel then there is no easy
298 * way to calculate offset1 relative to offset0. Instead, compute them
299 * independently. Otherwise, try to compute offset0 and offset1 with
300 * a single stride multiplication.
303 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
305 if (block_length
!= 1) {
308 case PIPE_TEX_WRAP_REPEAT
:
310 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
311 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
312 coord1
= LLVMBuildAnd(builder
, coord1
, length_minus_one
, "");
316 LLVMValueRef length_f
= lp_build_int_to_float(&bld
->coord_bld
, length
);
318 offset
= lp_build_int_to_float(&bld
->coord_bld
, offset
);
319 offset
= lp_build_div(&bld
->coord_bld
, offset
, length_f
);
320 coord_f
= lp_build_add(&bld
->coord_bld
, coord_f
, offset
);
322 lp_build_coord_repeat_npot_linear_int(bld
, coord_f
,
325 mask
= lp_build_compare(bld
->gallivm
, int_coord_bld
->type
,
326 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
327 coord1
= LLVMBuildAnd(builder
,
328 lp_build_add(int_coord_bld
, coord0
,
334 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
335 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
336 coord0
= lp_build_clamp(int_coord_bld
, coord0
, int_coord_bld
->zero
,
338 coord1
= lp_build_clamp(int_coord_bld
, coord1
, int_coord_bld
->zero
,
342 case PIPE_TEX_WRAP_CLAMP
:
343 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
344 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
345 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
346 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
347 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
350 coord0
= int_coord_bld
->zero
;
351 coord1
= int_coord_bld
->zero
;
354 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord0
, stride
,
356 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord1
, stride
,
361 *i0
= int_coord_bld
->zero
;
362 *i1
= int_coord_bld
->zero
;
365 case PIPE_TEX_WRAP_REPEAT
:
367 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
370 LLVMValueRef length_f
= lp_build_int_to_float(&bld
->coord_bld
, length
);
372 offset
= lp_build_int_to_float(&bld
->coord_bld
, offset
);
373 offset
= lp_build_div(&bld
->coord_bld
, offset
, length_f
);
374 coord_f
= lp_build_add(&bld
->coord_bld
, coord_f
, offset
);
376 lp_build_coord_repeat_npot_linear_int(bld
, coord_f
,
381 mask
= lp_build_compare(bld
->gallivm
, int_coord_bld
->type
,
382 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
384 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
385 *offset1
= LLVMBuildAnd(builder
,
386 lp_build_add(int_coord_bld
, *offset0
, stride
),
390 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
391 /* XXX this might be slower than the separate path
392 * on some newer cpus. With sse41 this is 8 instructions vs. 7
393 * - at least on SNB this is almost certainly slower since
394 * min/max are cheaper than selects, and the muls aren't bad.
396 lmask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
397 PIPE_FUNC_GEQUAL
, coord0
, int_coord_bld
->zero
);
398 umask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
399 PIPE_FUNC_LESS
, coord0
, length_minus_one
);
401 coord0
= lp_build_select(int_coord_bld
, lmask
, coord0
, int_coord_bld
->zero
);
402 coord0
= lp_build_select(int_coord_bld
, umask
, coord0
, length_minus_one
);
404 mask
= LLVMBuildAnd(builder
, lmask
, umask
, "");
406 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
407 *offset1
= lp_build_add(int_coord_bld
,
409 LLVMBuildAnd(builder
, stride
, mask
, ""));
412 case PIPE_TEX_WRAP_CLAMP
:
413 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
414 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
415 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
416 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
417 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
420 *offset0
= int_coord_bld
->zero
;
421 *offset1
= int_coord_bld
->zero
;
428 * Build LLVM code for texture coord wrapping, for linear filtering,
429 * for float texcoords.
430 * \param block_length is the length of the pixel block along the
432 * \param coord the incoming texcoord (s,t or r)
433 * \param length the texture size along one dimension
434 * \param offset the texel offset along the coord axis
435 * \param is_pot if TRUE, length is a power of two
436 * \param wrap_mode one of PIPE_TEX_WRAP_x
437 * \param coord0 the first texcoord after wrapping, as int
438 * \param coord1 the second texcoord after wrapping, as int
439 * \param weight the filter weight as int (0-255)
440 * \param force_nearest if this coord actually uses nearest filtering
443 lp_build_sample_wrap_linear_float(struct lp_build_sample_context
*bld
,
444 unsigned block_length
,
450 LLVMValueRef
*coord0
,
451 LLVMValueRef
*coord1
,
452 LLVMValueRef
*weight
,
453 unsigned force_nearest
)
455 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
456 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
457 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
458 LLVMValueRef half
= lp_build_const_vec(bld
->gallivm
, coord_bld
->type
, 0.5);
459 LLVMValueRef length_minus_one
= lp_build_sub(coord_bld
, length
, coord_bld
->one
);
462 case PIPE_TEX_WRAP_REPEAT
:
464 /* mul by size and subtract 0.5 */
465 coord
= lp_build_mul(coord_bld
, coord
, length
);
467 offset
= lp_build_int_to_float(coord_bld
, offset
);
468 coord
= lp_build_add(coord_bld
, coord
, offset
);
471 coord
= lp_build_sub(coord_bld
, coord
, half
);
472 *coord1
= lp_build_add(coord_bld
, coord
, coord_bld
->one
);
473 /* convert to int, compute lerp weight */
474 lp_build_ifloor_fract(coord_bld
, coord
, coord0
, weight
);
475 *coord1
= lp_build_ifloor(coord_bld
, *coord1
);
477 length_minus_one
= lp_build_itrunc(coord_bld
, length_minus_one
);
478 *coord0
= LLVMBuildAnd(builder
, *coord0
, length_minus_one
, "");
479 *coord1
= LLVMBuildAnd(builder
, *coord1
, length_minus_one
, "");
484 offset
= lp_build_int_to_float(coord_bld
, offset
);
485 offset
= lp_build_div(coord_bld
, offset
, length
);
486 coord
= lp_build_add(coord_bld
, coord
, offset
);
488 /* wrap with normalized floats is just fract */
489 coord
= lp_build_fract(coord_bld
, coord
);
491 coord
= lp_build_mul(coord_bld
, coord
, length
);
493 * we avoided the 0.5/length division, have to fix up wrong
494 * edge cases with selects
496 *coord1
= lp_build_add(coord_bld
, coord
, half
);
497 coord
= lp_build_sub(coord_bld
, coord
, half
);
498 *weight
= lp_build_fract(coord_bld
, coord
);
500 * It is important for this comparison to be unordered
501 * (or need fract_safe above).
503 mask
= lp_build_compare(coord_bld
->gallivm
, coord_bld
->type
,
504 PIPE_FUNC_LESS
, coord
, coord_bld
->zero
);
505 *coord0
= lp_build_select(coord_bld
, mask
, length_minus_one
, coord
);
506 *coord0
= lp_build_itrunc(coord_bld
, *coord0
);
507 mask
= lp_build_compare(coord_bld
->gallivm
, coord_bld
->type
,
508 PIPE_FUNC_LESS
, *coord1
, length
);
509 *coord1
= lp_build_select(coord_bld
, mask
, *coord1
, coord_bld
->zero
);
510 *coord1
= lp_build_itrunc(coord_bld
, *coord1
);
513 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
514 if (bld
->static_sampler_state
->normalized_coords
) {
515 /* mul by tex size */
516 coord
= lp_build_mul(coord_bld
, coord
, length
);
519 offset
= lp_build_int_to_float(coord_bld
, offset
);
520 coord
= lp_build_add(coord_bld
, coord
, offset
);
523 if (!force_nearest
) {
524 coord
= lp_build_sub(coord_bld
, coord
, half
);
526 /* clamp to [0, length - 1] */
527 coord
= lp_build_min_ext(coord_bld
, coord
, length_minus_one
,
528 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN
);
529 coord
= lp_build_max(coord_bld
, coord
, coord_bld
->zero
);
530 *coord1
= lp_build_add(coord_bld
, coord
, coord_bld
->one
);
531 /* convert to int, compute lerp weight */
532 lp_build_ifloor_fract(coord_bld
, coord
, coord0
, weight
);
533 /* coord1 = min(coord1, length-1) */
534 *coord1
= lp_build_min(coord_bld
, *coord1
, length_minus_one
);
535 *coord1
= lp_build_itrunc(coord_bld
, *coord1
);
539 *coord0
= int_coord_bld
->zero
;
540 *coord1
= int_coord_bld
->zero
;
541 *weight
= coord_bld
->zero
;
544 *weight
= lp_build_mul_imm(coord_bld
, *weight
, 256);
545 *weight
= lp_build_itrunc(coord_bld
, *weight
);
551 * Fetch texels for image with nearest sampling.
552 * Return filtered color as two vectors of 16-bit fixed point values.
555 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context
*bld
,
556 LLVMValueRef data_ptr
,
558 LLVMValueRef x_subcoord
,
559 LLVMValueRef y_subcoord
,
560 LLVMValueRef
*colors
)
563 * Fetch the pixels as 4 x 32bit (rgba order might differ):
565 * rgba0 rgba1 rgba2 rgba3
567 * bit cast them into 16 x u8
569 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
571 * unpack them into two 8 x i16:
573 * r0 g0 b0 a0 r1 g1 b1 a1
574 * r2 g2 b2 a2 r3 g3 b3 a3
576 * The higher 8 bits of the resulting elements will be zero.
578 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
580 struct lp_build_context u8n
;
581 LLVMTypeRef u8n_vec_type
;
582 struct lp_type fetch_type
;
584 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
585 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
587 fetch_type
= lp_type_uint(bld
->texel_type
.width
);
588 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
590 * Given the format is a rgba8, just read the pixels as is,
591 * without any swizzling. Swizzling will be done later.
593 rgba8
= lp_build_gather(bld
->gallivm
,
594 bld
->texel_type
.length
,
595 bld
->format_desc
->block
.bits
,
598 data_ptr
, offset
, TRUE
);
600 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
603 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
618 * Sample a single texture image with nearest sampling.
619 * If sampling a cube texture, r = cube face in [0,5].
620 * Return filtered color as two vectors of 16-bit fixed point values.
623 lp_build_sample_image_nearest(struct lp_build_sample_context
*bld
,
624 LLVMValueRef int_size
,
625 LLVMValueRef row_stride_vec
,
626 LLVMValueRef img_stride_vec
,
627 LLVMValueRef data_ptr
,
628 LLVMValueRef mipoffsets
,
632 const LLVMValueRef
*offsets
,
633 LLVMValueRef
*colors
)
635 const unsigned dims
= bld
->dims
;
636 struct lp_build_context i32
;
637 LLVMValueRef width_vec
, height_vec
, depth_vec
;
638 LLVMValueRef s_ipart
, t_ipart
= NULL
, r_ipart
= NULL
;
639 LLVMValueRef s_float
, t_float
= NULL
, r_float
= NULL
;
640 LLVMValueRef x_stride
;
641 LLVMValueRef x_offset
, offset
;
642 LLVMValueRef x_subcoord
, y_subcoord
, z_subcoord
;
644 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32, bld
->vector_width
));
646 lp_build_extract_image_sizes(bld
,
654 s_float
= s
; t_float
= t
; r_float
= r
;
656 if (bld
->static_sampler_state
->normalized_coords
) {
657 LLVMValueRef flt_size
;
659 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, int_size
);
661 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
664 /* convert float to int */
665 /* For correct rounding, need floor, not truncation here.
666 * Note that in some cases (clamp to edge, no texel offsets) we
667 * could use a non-signed build context which would help archs
668 * greatly which don't have arch rounding.
670 s_ipart
= lp_build_ifloor(&bld
->coord_bld
, s
);
672 t_ipart
= lp_build_ifloor(&bld
->coord_bld
, t
);
674 r_ipart
= lp_build_ifloor(&bld
->coord_bld
, r
);
676 /* add texel offsets */
678 s_ipart
= lp_build_add(&i32
, s_ipart
, offsets
[0]);
680 t_ipart
= lp_build_add(&i32
, t_ipart
, offsets
[1]);
682 r_ipart
= lp_build_add(&i32
, r_ipart
, offsets
[2]);
687 /* get pixel, row, image strides */
688 x_stride
= lp_build_const_vec(bld
->gallivm
,
689 bld
->int_coord_bld
.type
,
690 bld
->format_desc
->block
.bits
/8);
692 /* Do texcoord wrapping, compute texel offset */
693 lp_build_sample_wrap_nearest_int(bld
,
694 bld
->format_desc
->block
.width
,
696 width_vec
, x_stride
, offsets
[0],
697 bld
->static_texture_state
->pot_width
,
698 bld
->static_sampler_state
->wrap_s
,
699 &x_offset
, &x_subcoord
);
702 LLVMValueRef y_offset
;
703 lp_build_sample_wrap_nearest_int(bld
,
704 bld
->format_desc
->block
.height
,
706 height_vec
, row_stride_vec
, offsets
[1],
707 bld
->static_texture_state
->pot_height
,
708 bld
->static_sampler_state
->wrap_t
,
709 &y_offset
, &y_subcoord
);
710 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, y_offset
);
712 LLVMValueRef z_offset
;
713 lp_build_sample_wrap_nearest_int(bld
,
714 1, /* block length (depth) */
716 depth_vec
, img_stride_vec
, offsets
[2],
717 bld
->static_texture_state
->pot_depth
,
718 bld
->static_sampler_state
->wrap_r
,
719 &z_offset
, &z_subcoord
);
720 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
723 if (has_layer_coord(bld
->static_texture_state
->target
)) {
724 LLVMValueRef z_offset
;
725 /* The r coord is the cube face in [0,5] or array layer */
726 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
727 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
730 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, mipoffsets
);
733 lp_build_sample_fetch_image_nearest(bld
, data_ptr
, offset
,
734 x_subcoord
, y_subcoord
,
740 * Sample a single texture image with nearest sampling.
741 * If sampling a cube texture, r = cube face in [0,5].
742 * Return filtered color as two vectors of 16-bit fixed point values.
743 * Does address calcs (except offsets) with floats.
744 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
747 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context
*bld
,
748 LLVMValueRef int_size
,
749 LLVMValueRef row_stride_vec
,
750 LLVMValueRef img_stride_vec
,
751 LLVMValueRef data_ptr
,
752 LLVMValueRef mipoffsets
,
756 const LLVMValueRef
*offsets
,
757 LLVMValueRef
*colors
)
759 const unsigned dims
= bld
->dims
;
760 LLVMValueRef width_vec
, height_vec
, depth_vec
;
762 LLVMValueRef x_subcoord
, y_subcoord
;
763 LLVMValueRef x_icoord
= NULL
, y_icoord
= NULL
, z_icoord
= NULL
;
764 LLVMValueRef flt_size
;
766 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, int_size
);
768 lp_build_extract_image_sizes(bld
,
769 &bld
->float_size_bld
,
776 /* Do texcoord wrapping */
777 lp_build_sample_wrap_nearest_float(bld
,
778 s
, width_vec
, offsets
[0],
779 bld
->static_texture_state
->pot_width
,
780 bld
->static_sampler_state
->wrap_s
,
784 lp_build_sample_wrap_nearest_float(bld
,
785 t
, height_vec
, offsets
[1],
786 bld
->static_texture_state
->pot_height
,
787 bld
->static_sampler_state
->wrap_t
,
791 lp_build_sample_wrap_nearest_float(bld
,
792 r
, depth_vec
, offsets
[2],
793 bld
->static_texture_state
->pot_depth
,
794 bld
->static_sampler_state
->wrap_r
,
798 if (has_layer_coord(bld
->static_texture_state
->target
)) {
803 * From here on we deal with ints, and we should split up the 256bit
804 * vectors manually for better generated code.
808 * compute texel offsets -
809 * cannot do offset calc with floats, difficult for block-based formats,
810 * and not enough precision anyway.
812 lp_build_sample_offset(&bld
->int_coord_bld
,
816 row_stride_vec
, img_stride_vec
,
818 &x_subcoord
, &y_subcoord
);
820 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, mipoffsets
);
823 lp_build_sample_fetch_image_nearest(bld
, data_ptr
, offset
,
824 x_subcoord
, y_subcoord
,
830 * Fetch texels for image with linear sampling.
831 * Return filtered color as two vectors of 16-bit fixed point values.
834 lp_build_sample_fetch_image_linear(struct lp_build_sample_context
*bld
,
835 LLVMValueRef data_ptr
,
836 LLVMValueRef offset
[2][2][2],
837 LLVMValueRef x_subcoord
[2],
838 LLVMValueRef y_subcoord
[2],
839 LLVMValueRef s_fpart
,
840 LLVMValueRef t_fpart
,
841 LLVMValueRef r_fpart
,
842 LLVMValueRef
*colors
)
844 const unsigned dims
= bld
->dims
;
845 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
846 struct lp_build_context u8n
;
847 LLVMTypeRef u8n_vec_type
;
848 LLVMTypeRef elem_type
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
849 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
850 LLVMValueRef shuffle
;
851 LLVMValueRef neighbors
[2][2][2]; /* [z][y][x] */
856 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
857 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
860 * Transform 4 x i32 in
862 * s_fpart = {s0, s1, s2, s3}
864 * where each value is between 0 and 0xff,
868 * s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
870 * and likewise for t_fpart. There is no risk of loosing precision here
871 * since the fractional parts only use the lower 8bits.
873 s_fpart
= LLVMBuildBitCast(builder
, s_fpart
, u8n_vec_type
, "");
875 t_fpart
= LLVMBuildBitCast(builder
, t_fpart
, u8n_vec_type
, "");
877 r_fpart
= LLVMBuildBitCast(builder
, r_fpart
, u8n_vec_type
, "");
879 for (j
= 0; j
< u8n
.type
.length
; j
+= 4) {
880 #ifdef PIPE_ARCH_LITTLE_ENDIAN
881 unsigned subindex
= 0;
883 unsigned subindex
= 3;
887 index
= LLVMConstInt(elem_type
, j
+ subindex
, 0);
888 for (i
= 0; i
< 4; ++i
)
889 shuffles
[j
+ i
] = index
;
892 shuffle
= LLVMConstVector(shuffles
, u8n
.type
.length
);
894 s_fpart
= LLVMBuildShuffleVector(builder
, s_fpart
, u8n
.undef
,
897 t_fpart
= LLVMBuildShuffleVector(builder
, t_fpart
, u8n
.undef
,
901 r_fpart
= LLVMBuildShuffleVector(builder
, r_fpart
, u8n
.undef
,
906 * Fetch the pixels as 4 x 32bit (rgba order might differ):
908 * rgba0 rgba1 rgba2 rgba3
910 * bit cast them into 16 x u8
912 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
914 * unpack them into two 8 x i16:
916 * r0 g0 b0 a0 r1 g1 b1 a1
917 * r2 g2 b2 a2 r3 g3 b3 a3
919 * The higher 8 bits of the resulting elements will be zero.
921 numj
= 1 + (dims
>= 2);
922 numk
= 1 + (dims
>= 3);
924 for (k
= 0; k
< numk
; k
++) {
925 for (j
= 0; j
< numj
; j
++) {
926 for (i
= 0; i
< 2; i
++) {
929 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
930 struct lp_type fetch_type
;
932 * Given the format is a rgba8, just read the pixels as is,
933 * without any swizzling. Swizzling will be done later.
935 fetch_type
= lp_type_uint(bld
->texel_type
.width
);
936 rgba8
= lp_build_gather(bld
->gallivm
,
937 bld
->texel_type
.length
,
938 bld
->format_desc
->block
.bits
,
941 data_ptr
, offset
[k
][j
][i
], TRUE
);
943 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
946 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
950 data_ptr
, offset
[k
][j
][i
],
956 neighbors
[k
][j
][i
] = rgba8
;
962 * Linear interpolation with 8.8 fixed point.
964 if (bld
->static_sampler_state
->force_nearest_s
) {
965 /* special case 1-D lerp */
966 packed
= lp_build_lerp(&u8n
,
970 LP_BLD_LERP_PRESCALED_WEIGHTS
);
972 else if (bld
->static_sampler_state
->force_nearest_t
) {
973 /* special case 1-D lerp */
974 packed
= lp_build_lerp(&u8n
,
978 LP_BLD_LERP_PRESCALED_WEIGHTS
);
981 /* general 1/2/3-D lerping */
983 packed
= lp_build_lerp(&u8n
,
987 LP_BLD_LERP_PRESCALED_WEIGHTS
);
988 } else if (dims
== 2) {
990 packed
= lp_build_lerp_2d(&u8n
,
996 LP_BLD_LERP_PRESCALED_WEIGHTS
);
1000 packed
= lp_build_lerp_3d(&u8n
,
1001 s_fpart
, t_fpart
, r_fpart
,
1010 LP_BLD_LERP_PRESCALED_WEIGHTS
);
1018 * Sample a single texture image with (bi-)(tri-)linear sampling.
1019 * Return filtered color as two vectors of 16-bit fixed point values.
1022 lp_build_sample_image_linear(struct lp_build_sample_context
*bld
,
1023 LLVMValueRef int_size
,
1024 LLVMValueRef row_stride_vec
,
1025 LLVMValueRef img_stride_vec
,
1026 LLVMValueRef data_ptr
,
1027 LLVMValueRef mipoffsets
,
1031 const LLVMValueRef
*offsets
,
1032 LLVMValueRef
*colors
)
1034 const unsigned dims
= bld
->dims
;
1035 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1036 struct lp_build_context i32
;
1037 LLVMValueRef i32_c8
, i32_c128
, i32_c255
;
1038 LLVMValueRef width_vec
, height_vec
, depth_vec
;
1039 LLVMValueRef s_ipart
, s_fpart
, s_float
;
1040 LLVMValueRef t_ipart
= NULL
, t_fpart
= NULL
, t_float
= NULL
;
1041 LLVMValueRef r_ipart
= NULL
, r_fpart
= NULL
, r_float
= NULL
;
1042 LLVMValueRef x_stride
, y_stride
, z_stride
;
1043 LLVMValueRef x_offset0
, x_offset1
;
1044 LLVMValueRef y_offset0
, y_offset1
;
1045 LLVMValueRef z_offset0
, z_offset1
;
1046 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
1047 LLVMValueRef x_subcoord
[2], y_subcoord
[2], z_subcoord
[2];
1050 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32, bld
->vector_width
));
1052 lp_build_extract_image_sizes(bld
,
1054 bld
->int_coord_type
,
1060 s_float
= s
; t_float
= t
; r_float
= r
;
1062 if (bld
->static_sampler_state
->normalized_coords
) {
1063 LLVMValueRef scaled_size
;
1064 LLVMValueRef flt_size
;
1066 /* scale size by 256 (8 fractional bits) */
1067 scaled_size
= lp_build_shl_imm(&bld
->int_size_bld
, int_size
, 8);
1069 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, scaled_size
);
1071 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
1074 /* scale coords by 256 (8 fractional bits) */
1075 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
1077 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
1079 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
1082 /* convert float to int */
1083 /* For correct rounding, need round to nearest, not truncation here.
1084 * Note that in some cases (clamp to edge, no texel offsets) we
1085 * could use a non-signed build context which would help archs which
1086 * don't have fptosi intrinsic with nearest rounding implemented.
1088 s
= lp_build_iround(&bld
->coord_bld
, s
);
1090 t
= lp_build_iround(&bld
->coord_bld
, t
);
1092 r
= lp_build_iround(&bld
->coord_bld
, r
);
1094 /* subtract 0.5 (add -128) */
1095 i32_c128
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, -128);
1096 if (!bld
->static_sampler_state
->force_nearest_s
) {
1097 s
= LLVMBuildAdd(builder
, s
, i32_c128
, "");
1099 if (dims
>= 2 && !bld
->static_sampler_state
->force_nearest_t
) {
1100 t
= LLVMBuildAdd(builder
, t
, i32_c128
, "");
1103 r
= LLVMBuildAdd(builder
, r
, i32_c128
, "");
1106 /* compute floor (shift right 8) */
1107 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 8);
1108 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
1110 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
1112 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
1114 /* add texel offsets */
1116 s_ipart
= lp_build_add(&i32
, s_ipart
, offsets
[0]);
1118 t_ipart
= lp_build_add(&i32
, t_ipart
, offsets
[1]);
1120 r_ipart
= lp_build_add(&i32
, r_ipart
, offsets
[2]);
1125 /* compute fractional part (AND with 0xff) */
1126 i32_c255
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 255);
1127 s_fpart
= LLVMBuildAnd(builder
, s
, i32_c255
, "");
1129 t_fpart
= LLVMBuildAnd(builder
, t
, i32_c255
, "");
1131 r_fpart
= LLVMBuildAnd(builder
, r
, i32_c255
, "");
1133 /* get pixel, row and image strides */
1134 x_stride
= lp_build_const_vec(bld
->gallivm
, bld
->int_coord_bld
.type
,
1135 bld
->format_desc
->block
.bits
/8);
1136 y_stride
= row_stride_vec
;
1137 z_stride
= img_stride_vec
;
1139 /* do texcoord wrapping and compute texel offsets */
1140 lp_build_sample_wrap_linear_int(bld
,
1141 bld
->format_desc
->block
.width
,
1142 s_ipart
, &s_fpart
, s_float
,
1143 width_vec
, x_stride
, offsets
[0],
1144 bld
->static_texture_state
->pot_width
,
1145 bld
->static_sampler_state
->wrap_s
,
1146 &x_offset0
, &x_offset1
,
1147 &x_subcoord
[0], &x_subcoord
[1]);
1149 /* add potential cube/array/mip offsets now as they are constant per pixel */
1150 if (has_layer_coord(bld
->static_texture_state
->target
)) {
1151 LLVMValueRef z_offset
;
1152 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
1153 /* The r coord is the cube face in [0,5] or array layer */
1154 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, z_offset
);
1155 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, z_offset
);
1158 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, mipoffsets
);
1159 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, mipoffsets
);
1162 for (z
= 0; z
< 2; z
++) {
1163 for (y
= 0; y
< 2; y
++) {
1164 offset
[z
][y
][0] = x_offset0
;
1165 offset
[z
][y
][1] = x_offset1
;
1170 lp_build_sample_wrap_linear_int(bld
,
1171 bld
->format_desc
->block
.height
,
1172 t_ipart
, &t_fpart
, t_float
,
1173 height_vec
, y_stride
, offsets
[1],
1174 bld
->static_texture_state
->pot_height
,
1175 bld
->static_sampler_state
->wrap_t
,
1176 &y_offset0
, &y_offset1
,
1177 &y_subcoord
[0], &y_subcoord
[1]);
1179 for (z
= 0; z
< 2; z
++) {
1180 for (x
= 0; x
< 2; x
++) {
1181 offset
[z
][0][x
] = lp_build_add(&bld
->int_coord_bld
,
1182 offset
[z
][0][x
], y_offset0
);
1183 offset
[z
][1][x
] = lp_build_add(&bld
->int_coord_bld
,
1184 offset
[z
][1][x
], y_offset1
);
1190 lp_build_sample_wrap_linear_int(bld
,
1191 1, /* block length (depth) */
1192 r_ipart
, &r_fpart
, r_float
,
1193 depth_vec
, z_stride
, offsets
[2],
1194 bld
->static_texture_state
->pot_depth
,
1195 bld
->static_sampler_state
->wrap_r
,
1196 &z_offset0
, &z_offset1
,
1197 &z_subcoord
[0], &z_subcoord
[1]);
1198 for (y
= 0; y
< 2; y
++) {
1199 for (x
= 0; x
< 2; x
++) {
1200 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1201 offset
[0][y
][x
], z_offset0
);
1202 offset
[1][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1203 offset
[1][y
][x
], z_offset1
);
1208 lp_build_sample_fetch_image_linear(bld
, data_ptr
, offset
,
1209 x_subcoord
, y_subcoord
,
1210 s_fpart
, t_fpart
, r_fpart
,
1216 * Sample a single texture image with (bi-)(tri-)linear sampling.
1217 * Return filtered color as two vectors of 16-bit fixed point values.
1218 * Does address calcs (except offsets) with floats.
1219 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1222 lp_build_sample_image_linear_afloat(struct lp_build_sample_context
*bld
,
1223 LLVMValueRef int_size
,
1224 LLVMValueRef row_stride_vec
,
1225 LLVMValueRef img_stride_vec
,
1226 LLVMValueRef data_ptr
,
1227 LLVMValueRef mipoffsets
,
1231 const LLVMValueRef
*offsets
,
1232 LLVMValueRef
*colors
)
1234 const unsigned dims
= bld
->dims
;
1235 LLVMValueRef width_vec
, height_vec
, depth_vec
;
1236 LLVMValueRef s_fpart
;
1237 LLVMValueRef t_fpart
= NULL
;
1238 LLVMValueRef r_fpart
= NULL
;
1239 LLVMValueRef x_stride
, y_stride
, z_stride
;
1240 LLVMValueRef x_offset0
, x_offset1
;
1241 LLVMValueRef y_offset0
, y_offset1
;
1242 LLVMValueRef z_offset0
, z_offset1
;
1243 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
1244 LLVMValueRef x_subcoord
[2], y_subcoord
[2];
1245 LLVMValueRef flt_size
;
1246 LLVMValueRef x_icoord0
, x_icoord1
;
1247 LLVMValueRef y_icoord0
, y_icoord1
;
1248 LLVMValueRef z_icoord0
, z_icoord1
;
1251 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, int_size
);
1253 lp_build_extract_image_sizes(bld
,
1254 &bld
->float_size_bld
,
1261 /* do texcoord wrapping and compute texel offsets */
1262 lp_build_sample_wrap_linear_float(bld
,
1263 bld
->format_desc
->block
.width
,
1264 s
, width_vec
, offsets
[0],
1265 bld
->static_texture_state
->pot_width
,
1266 bld
->static_sampler_state
->wrap_s
,
1267 &x_icoord0
, &x_icoord1
,
1269 bld
->static_sampler_state
->force_nearest_s
);
1272 lp_build_sample_wrap_linear_float(bld
,
1273 bld
->format_desc
->block
.height
,
1274 t
, height_vec
, offsets
[1],
1275 bld
->static_texture_state
->pot_height
,
1276 bld
->static_sampler_state
->wrap_t
,
1277 &y_icoord0
, &y_icoord1
,
1279 bld
->static_sampler_state
->force_nearest_t
);
1282 lp_build_sample_wrap_linear_float(bld
,
1283 1, /* block length (depth) */
1284 r
, depth_vec
, offsets
[2],
1285 bld
->static_texture_state
->pot_depth
,
1286 bld
->static_sampler_state
->wrap_r
,
1287 &z_icoord0
, &z_icoord1
,
1293 * From here on we deal with ints, and we should split up the 256bit
1294 * vectors manually for better generated code.
1297 /* get pixel, row and image strides */
1298 x_stride
= lp_build_const_vec(bld
->gallivm
,
1299 bld
->int_coord_bld
.type
,
1300 bld
->format_desc
->block
.bits
/8);
1301 y_stride
= row_stride_vec
;
1302 z_stride
= img_stride_vec
;
1305 * compute texel offset -
1306 * cannot do offset calc with floats, difficult for block-based formats,
1307 * and not enough precision anyway.
1309 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1310 bld
->format_desc
->block
.width
,
1311 x_icoord0
, x_stride
,
1312 &x_offset0
, &x_subcoord
[0]);
1313 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1314 bld
->format_desc
->block
.width
,
1315 x_icoord1
, x_stride
,
1316 &x_offset1
, &x_subcoord
[1]);
1318 /* add potential cube/array/mip offsets now as they are constant per pixel */
1319 if (has_layer_coord(bld
->static_texture_state
->target
)) {
1320 LLVMValueRef z_offset
;
1321 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
1322 /* The r coord is the cube face in [0,5] or array layer */
1323 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, z_offset
);
1324 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, z_offset
);
1327 x_offset0
= lp_build_add(&bld
->int_coord_bld
, x_offset0
, mipoffsets
);
1328 x_offset1
= lp_build_add(&bld
->int_coord_bld
, x_offset1
, mipoffsets
);
1331 for (z
= 0; z
< 2; z
++) {
1332 for (y
= 0; y
< 2; y
++) {
1333 offset
[z
][y
][0] = x_offset0
;
1334 offset
[z
][y
][1] = x_offset1
;
1339 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1340 bld
->format_desc
->block
.height
,
1341 y_icoord0
, y_stride
,
1342 &y_offset0
, &y_subcoord
[0]);
1343 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1344 bld
->format_desc
->block
.height
,
1345 y_icoord1
, y_stride
,
1346 &y_offset1
, &y_subcoord
[1]);
1347 for (z
= 0; z
< 2; z
++) {
1348 for (x
= 0; x
< 2; x
++) {
1349 offset
[z
][0][x
] = lp_build_add(&bld
->int_coord_bld
,
1350 offset
[z
][0][x
], y_offset0
);
1351 offset
[z
][1][x
] = lp_build_add(&bld
->int_coord_bld
,
1352 offset
[z
][1][x
], y_offset1
);
1358 LLVMValueRef z_subcoord
[2];
1359 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1361 z_icoord0
, z_stride
,
1362 &z_offset0
, &z_subcoord
[0]);
1363 lp_build_sample_partial_offset(&bld
->int_coord_bld
,
1365 z_icoord1
, z_stride
,
1366 &z_offset1
, &z_subcoord
[1]);
1367 for (y
= 0; y
< 2; y
++) {
1368 for (x
= 0; x
< 2; x
++) {
1369 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1370 offset
[0][y
][x
], z_offset0
);
1371 offset
[1][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
1372 offset
[1][y
][x
], z_offset1
);
1377 lp_build_sample_fetch_image_linear(bld
, data_ptr
, offset
,
1378 x_subcoord
, y_subcoord
,
1379 s_fpart
, t_fpart
, r_fpart
,
1385 * Sample the texture/mipmap using given image filter and mip filter.
1386 * data0_ptr and data1_ptr point to the two mipmap levels to sample
1387 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1388 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1391 lp_build_sample_mipmap(struct lp_build_sample_context
*bld
,
1392 unsigned img_filter
,
1393 unsigned mip_filter
,
1397 const LLVMValueRef
*offsets
,
1398 LLVMValueRef ilevel0
,
1399 LLVMValueRef ilevel1
,
1400 LLVMValueRef lod_fpart
,
1401 LLVMValueRef colors_var
)
1403 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1406 LLVMValueRef row_stride0_vec
= NULL
;
1407 LLVMValueRef row_stride1_vec
= NULL
;
1408 LLVMValueRef img_stride0_vec
= NULL
;
1409 LLVMValueRef img_stride1_vec
= NULL
;
1410 LLVMValueRef data_ptr0
;
1411 LLVMValueRef data_ptr1
;
1412 LLVMValueRef mipoff0
= NULL
;
1413 LLVMValueRef mipoff1
= NULL
;
1414 LLVMValueRef colors0
;
1415 LLVMValueRef colors1
;
1416 boolean use_floats
= util_cpu_caps
.has_avx
&&
1417 !util_cpu_caps
.has_avx2
&&
1418 bld
->coord_type
.length
> 4;
1420 /* sample the first mipmap level */
1421 lp_build_mipmap_level_sizes(bld
, ilevel0
,
1423 &row_stride0_vec
, &img_stride0_vec
);
1424 if (bld
->num_mips
== 1) {
1425 data_ptr0
= lp_build_get_mipmap_level(bld
, ilevel0
);
1428 /* This path should work for num_lods 1 too but slightly less efficient */
1429 data_ptr0
= bld
->base_ptr
;
1430 mipoff0
= lp_build_get_mip_offsets(bld
, ilevel0
);
1434 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1435 lp_build_sample_image_nearest_afloat(bld
,
1437 row_stride0_vec
, img_stride0_vec
,
1438 data_ptr0
, mipoff0
, s
, t
, r
, offsets
,
1442 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
1443 lp_build_sample_image_linear_afloat(bld
,
1445 row_stride0_vec
, img_stride0_vec
,
1446 data_ptr0
, mipoff0
, s
, t
, r
, offsets
,
1451 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1452 lp_build_sample_image_nearest(bld
,
1454 row_stride0_vec
, img_stride0_vec
,
1455 data_ptr0
, mipoff0
, s
, t
, r
, offsets
,
1459 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
1460 lp_build_sample_image_linear(bld
,
1462 row_stride0_vec
, img_stride0_vec
,
1463 data_ptr0
, mipoff0
, s
, t
, r
, offsets
,
1468 /* Store the first level's colors in the output variables */
1469 LLVMBuildStore(builder
, colors0
, colors_var
);
1471 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
1472 LLVMValueRef h16vec_scale
= lp_build_const_vec(bld
->gallivm
,
1473 bld
->lodf_bld
.type
, 256.0);
1474 LLVMTypeRef i32vec_type
= bld
->lodi_bld
.vec_type
;
1475 struct lp_build_if_state if_ctx
;
1476 LLVMValueRef need_lerp
;
1477 unsigned num_quads
= bld
->coord_bld
.type
.length
/ 4;
1480 lod_fpart
= LLVMBuildFMul(builder
, lod_fpart
, h16vec_scale
, "");
1481 lod_fpart
= LLVMBuildFPToSI(builder
, lod_fpart
, i32vec_type
, "lod_fpart.fixed16");
1483 /* need_lerp = lod_fpart > 0 */
1484 if (bld
->num_lods
== 1) {
1485 need_lerp
= LLVMBuildICmp(builder
, LLVMIntSGT
,
1486 lod_fpart
, bld
->lodi_bld
.zero
,
1491 * We'll do mip filtering if any of the quads need it.
1492 * It might be better to split the vectors here and only fetch/filter
1493 * quads which need it.
1496 * We need to clamp lod_fpart here since we can get negative
1497 * values which would screw up filtering if not all
1498 * lod_fpart values have same sign.
1499 * We can however then skip the greater than comparison.
1501 lod_fpart
= lp_build_max(&bld
->lodi_bld
, lod_fpart
,
1502 bld
->lodi_bld
.zero
);
1503 need_lerp
= lp_build_any_true_range(&bld
->lodi_bld
, bld
->num_lods
, lod_fpart
);
1506 lp_build_if(&if_ctx
, bld
->gallivm
, need_lerp
);
1508 struct lp_build_context u8n_bld
;
1510 lp_build_context_init(&u8n_bld
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
1512 /* sample the second mipmap level */
1513 lp_build_mipmap_level_sizes(bld
, ilevel1
,
1515 &row_stride1_vec
, &img_stride1_vec
);
1516 if (bld
->num_mips
== 1) {
1517 data_ptr1
= lp_build_get_mipmap_level(bld
, ilevel1
);
1520 data_ptr1
= bld
->base_ptr
;
1521 mipoff1
= lp_build_get_mip_offsets(bld
, ilevel1
);
1525 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1526 lp_build_sample_image_nearest_afloat(bld
,
1528 row_stride1_vec
, img_stride1_vec
,
1529 data_ptr1
, mipoff1
, s
, t
, r
, offsets
,
1533 lp_build_sample_image_linear_afloat(bld
,
1535 row_stride1_vec
, img_stride1_vec
,
1536 data_ptr1
, mipoff1
, s
, t
, r
, offsets
,
1541 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
1542 lp_build_sample_image_nearest(bld
,
1544 row_stride1_vec
, img_stride1_vec
,
1545 data_ptr1
, mipoff1
, s
, t
, r
, offsets
,
1549 lp_build_sample_image_linear(bld
,
1551 row_stride1_vec
, img_stride1_vec
,
1552 data_ptr1
, mipoff1
, s
, t
, r
, offsets
,
1557 /* interpolate samples from the two mipmap levels */
1559 if (num_quads
== 1 && bld
->num_lods
== 1) {
1560 lod_fpart
= LLVMBuildTrunc(builder
, lod_fpart
, u8n_bld
.elem_type
, "");
1561 lod_fpart
= lp_build_broadcast_scalar(&u8n_bld
, lod_fpart
);
1564 unsigned num_chans_per_lod
= 4 * bld
->coord_type
.length
/ bld
->num_lods
;
1565 LLVMTypeRef tmp_vec_type
= LLVMVectorType(u8n_bld
.elem_type
, bld
->lodi_bld
.type
.length
);
1566 LLVMValueRef shuffle
[LP_MAX_VECTOR_LENGTH
];
1568 /* Take the LSB of lod_fpart */
1569 lod_fpart
= LLVMBuildTrunc(builder
, lod_fpart
, tmp_vec_type
, "");
1571 /* Broadcast each lod weight into their respective channels */
1572 for (i
= 0; i
< u8n_bld
.type
.length
; ++i
) {
1573 shuffle
[i
] = lp_build_const_int32(bld
->gallivm
, i
/ num_chans_per_lod
);
1575 lod_fpart
= LLVMBuildShuffleVector(builder
, lod_fpart
, LLVMGetUndef(tmp_vec_type
),
1576 LLVMConstVector(shuffle
, u8n_bld
.type
.length
), "");
1579 colors0
= lp_build_lerp(&u8n_bld
, lod_fpart
,
1581 LP_BLD_LERP_PRESCALED_WEIGHTS
);
1583 LLVMBuildStore(builder
, colors0
, colors_var
);
1585 lp_build_endif(&if_ctx
);
1592 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
1593 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
1594 * but only limited texture coord wrap modes.
1597 lp_build_sample_aos(struct lp_build_sample_context
*bld
,
1598 unsigned sampler_unit
,
1602 const LLVMValueRef
*offsets
,
1603 LLVMValueRef lod_positive
,
1604 LLVMValueRef lod_fpart
,
1605 LLVMValueRef ilevel0
,
1606 LLVMValueRef ilevel1
,
1607 LLVMValueRef texel_out
[4])
1609 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1610 const unsigned mip_filter
= bld
->static_sampler_state
->min_mip_filter
;
1611 const unsigned min_filter
= bld
->static_sampler_state
->min_img_filter
;
1612 const unsigned mag_filter
= bld
->static_sampler_state
->mag_img_filter
;
1613 const unsigned dims
= bld
->dims
;
1614 LLVMValueRef packed_var
, packed
;
1615 LLVMValueRef unswizzled
[4];
1616 struct lp_build_context u8n_bld
;
1618 /* we only support the common/simple wrap modes at this time */
1619 assert(lp_is_simple_wrap_mode(bld
->static_sampler_state
->wrap_s
));
1621 assert(lp_is_simple_wrap_mode(bld
->static_sampler_state
->wrap_t
));
1623 assert(lp_is_simple_wrap_mode(bld
->static_sampler_state
->wrap_r
));
1626 /* make 8-bit unorm builder context */
1627 lp_build_context_init(&u8n_bld
, bld
->gallivm
, lp_type_unorm(8, bld
->vector_width
));
1630 * Get/interpolate texture colors.
1633 packed_var
= lp_build_alloca(bld
->gallivm
, u8n_bld
.vec_type
, "packed_var");
1635 if (min_filter
== mag_filter
) {
1636 /* no need to distinguish between minification and magnification */
1637 lp_build_sample_mipmap(bld
,
1638 min_filter
, mip_filter
,
1640 ilevel0
, ilevel1
, lod_fpart
,
1644 /* Emit conditional to choose min image filter or mag image filter
1645 * depending on the lod being > 0 or <= 0, respectively.
1647 struct lp_build_if_state if_ctx
;
1650 * FIXME this should take all lods into account, if some are min
1651 * some max probably could hack up the weights in the linear
1652 * path with selects to work for nearest.
1654 if (bld
->num_lods
> 1)
1655 lod_positive
= LLVMBuildExtractElement(builder
, lod_positive
,
1656 lp_build_const_int32(bld
->gallivm
, 0), "");
1658 lod_positive
= LLVMBuildTrunc(builder
, lod_positive
,
1659 LLVMInt1TypeInContext(bld
->gallivm
->context
), "");
1661 lp_build_if(&if_ctx
, bld
->gallivm
, lod_positive
);
1663 /* Use the minification filter */
1664 lp_build_sample_mipmap(bld
,
1665 min_filter
, mip_filter
,
1667 ilevel0
, ilevel1
, lod_fpart
,
1670 lp_build_else(&if_ctx
);
1672 /* Use the magnification filter */
1673 lp_build_sample_mipmap(bld
,
1674 mag_filter
, PIPE_TEX_MIPFILTER_NONE
,
1676 ilevel0
, NULL
, NULL
,
1679 lp_build_endif(&if_ctx
);
1682 packed
= LLVMBuildLoad(builder
, packed_var
, "");
1685 * Convert to SoA and swizzle.
1687 lp_build_rgba8_to_fi32_soa(bld
->gallivm
,
1689 packed
, unswizzled
);
1691 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
1692 lp_build_format_swizzle_soa(bld
->format_desc
,
1694 unswizzled
, texel_out
);
1697 texel_out
[0] = unswizzled
[0];
1698 texel_out
[1] = unswizzled
[1];
1699 texel_out
[2] = unswizzled
[2];
1700 texel_out
[3] = unswizzled
[3];