1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- SoA.
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_logic.h"
50 #include "lp_bld_swizzle.h"
51 #include "lp_bld_pack.h"
52 #include "lp_bld_flow.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_format.h"
55 #include "lp_bld_sample.h"
56 #include "lp_bld_sample_aos.h"
57 #include "lp_bld_quad.h"
61 * Build LLVM code for texture coord wrapping, for nearest filtering,
62 * for scaled integer texcoords.
63 * \param block_length is the length of the pixel block along the
65 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
66 * \param length the texture size along one dimension
67 * \param stride pixel stride along the coordinate axis (in bytes)
68 * \param is_pot if TRUE, length is a power of two
69 * \param wrap_mode one of PIPE_TEX_WRAP_x
70 * \param out_offset byte offset for the wrapped coordinate
71 * \param out_i resulting sub-block pixel coordinate for coord0
74 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context
*bld
,
75 unsigned block_length
,
81 LLVMValueRef
*out_offset
,
84 struct lp_build_context
*uint_coord_bld
= &bld
->uint_coord_bld
;
85 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
86 LLVMValueRef length_minus_one
;
88 length_minus_one
= lp_build_sub(uint_coord_bld
, length
, uint_coord_bld
->one
);
91 case PIPE_TEX_WRAP_REPEAT
:
93 coord
= LLVMBuildAnd(bld
->builder
, coord
, length_minus_one
, "");
95 /* Signed remainder won't give the right results for negative
96 * dividends but unsigned remainder does.*/
97 coord
= LLVMBuildURem(bld
->builder
, coord
, length
, "");
100 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
101 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
102 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
105 case PIPE_TEX_WRAP_CLAMP
:
106 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
107 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
108 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
109 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
110 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
115 lp_build_sample_partial_offset(uint_coord_bld
, block_length
, coord
, stride
,
121 * Build LLVM code for texture coord wrapping, for linear filtering,
122 * for scaled integer texcoords.
123 * \param block_length is the length of the pixel block along the
125 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
126 * \param length the texture size along one dimension
127 * \param stride pixel stride along the coordinate axis (in bytes)
128 * \param is_pot if TRUE, length is a power of two
129 * \param wrap_mode one of PIPE_TEX_WRAP_x
130 * \param offset0 resulting relative offset for coord0
131 * \param offset1 resulting relative offset for coord0 + 1
132 * \param i0 resulting sub-block pixel coordinate for coord0
133 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
136 lp_build_sample_wrap_linear_int(struct lp_build_sample_context
*bld
,
137 unsigned block_length
,
143 LLVMValueRef
*offset0
,
144 LLVMValueRef
*offset1
,
148 struct lp_build_context
*uint_coord_bld
= &bld
->uint_coord_bld
;
149 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
150 LLVMValueRef length_minus_one
;
151 LLVMValueRef lmask
, umask
, mask
;
153 if (block_length
!= 1) {
155 * If the pixel block covers more than one pixel then there is no easy
156 * way to calculate offset1 relative to offset0. Instead, compute them
162 lp_build_sample_wrap_nearest_int(bld
,
171 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
173 lp_build_sample_wrap_nearest_int(bld
,
186 * Scalar pixels -- try to compute offset0 and offset1 with a single stride
190 *i0
= uint_coord_bld
->zero
;
191 *i1
= uint_coord_bld
->zero
;
193 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
196 case PIPE_TEX_WRAP_REPEAT
:
198 coord0
= LLVMBuildAnd(bld
->builder
, coord0
, length_minus_one
, "");
201 /* Signed remainder won't give the right results for negative
202 * dividends but unsigned remainder does.*/
203 coord0
= LLVMBuildURem(bld
->builder
, coord0
, length
, "");
206 mask
= lp_build_compare(bld
->builder
, int_coord_bld
->type
,
207 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
209 *offset0
= lp_build_mul(uint_coord_bld
, coord0
, stride
);
210 *offset1
= LLVMBuildAnd(bld
->builder
,
211 lp_build_add(uint_coord_bld
, *offset0
, stride
),
215 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
216 lmask
= lp_build_compare(int_coord_bld
->builder
, int_coord_bld
->type
,
217 PIPE_FUNC_GEQUAL
, coord0
, int_coord_bld
->zero
);
218 umask
= lp_build_compare(int_coord_bld
->builder
, int_coord_bld
->type
,
219 PIPE_FUNC_LESS
, coord0
, length_minus_one
);
221 coord0
= lp_build_select(int_coord_bld
, lmask
, coord0
, int_coord_bld
->zero
);
222 coord0
= lp_build_select(int_coord_bld
, umask
, coord0
, length_minus_one
);
224 mask
= LLVMBuildAnd(bld
->builder
, lmask
, umask
, "");
226 *offset0
= lp_build_mul(uint_coord_bld
, coord0
, stride
);
227 *offset1
= lp_build_add(uint_coord_bld
,
229 LLVMBuildAnd(bld
->builder
, stride
, mask
, ""));
232 case PIPE_TEX_WRAP_CLAMP
:
233 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
234 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
235 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
236 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
237 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
240 *offset0
= uint_coord_bld
->zero
;
241 *offset1
= uint_coord_bld
->zero
;
248 * Sample a single texture image with nearest sampling.
249 * If sampling a cube texture, r = cube face in [0,5].
250 * Return filtered color as two vectors of 16-bit fixed point values.
253 lp_build_sample_image_nearest(struct lp_build_sample_context
*bld
,
254 LLVMValueRef width_vec
,
255 LLVMValueRef height_vec
,
256 LLVMValueRef depth_vec
,
257 LLVMValueRef row_stride_vec
,
258 LLVMValueRef img_stride_vec
,
259 LLVMValueRef data_ptr
,
263 LLVMValueRef
*colors_lo
,
264 LLVMValueRef
*colors_hi
)
266 const int dims
= texture_dims(bld
->static_state
->target
);
267 LLVMBuilderRef builder
= bld
->builder
;
268 struct lp_build_context i32
, h16
, u8n
;
269 LLVMTypeRef i32_vec_type
, h16_vec_type
, u8n_vec_type
;
271 LLVMValueRef s_ipart
, t_ipart
, r_ipart
;
272 LLVMValueRef x_stride
;
273 LLVMValueRef x_offset
, offset
;
274 LLVMValueRef x_subcoord
, y_subcoord
, z_subcoord
;
276 lp_build_context_init(&i32
, builder
, lp_type_int_vec(32));
277 lp_build_context_init(&h16
, builder
, lp_type_ufixed(16));
278 lp_build_context_init(&u8n
, builder
, lp_type_unorm(8));
280 i32_vec_type
= lp_build_vec_type(i32
.type
);
281 h16_vec_type
= lp_build_vec_type(h16
.type
);
282 u8n_vec_type
= lp_build_vec_type(u8n
.type
);
284 if (bld
->static_state
->normalized_coords
) {
285 /* s = s * width, t = t * height */
286 LLVMTypeRef coord_vec_type
= lp_build_vec_type(bld
->coord_type
);
287 LLVMValueRef fp_width
= LLVMBuildSIToFP(bld
->builder
, width_vec
,
289 s
= lp_build_mul(&bld
->coord_bld
, s
, fp_width
);
291 LLVMValueRef fp_height
= LLVMBuildSIToFP(bld
->builder
, height_vec
,
293 t
= lp_build_mul(&bld
->coord_bld
, t
, fp_height
);
295 LLVMValueRef fp_depth
= LLVMBuildSIToFP(bld
->builder
, depth_vec
,
297 r
= lp_build_mul(&bld
->coord_bld
, r
, fp_depth
);
302 /* scale coords by 256 (8 fractional bits) */
303 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
305 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
307 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
309 /* convert float to int */
310 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
312 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
314 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
316 /* compute floor (shift right 8) */
317 i32_c8
= lp_build_const_int_vec(i32
.type
, 8);
318 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
320 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
322 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
324 /* get pixel, row, image strides */
325 x_stride
= lp_build_const_vec(bld
->uint_coord_bld
.type
,
326 bld
->format_desc
->block
.bits
/8);
328 /* Do texcoord wrapping, compute texel offset */
329 lp_build_sample_wrap_nearest_int(bld
,
330 bld
->format_desc
->block
.width
,
331 s_ipart
, width_vec
, x_stride
,
332 bld
->static_state
->pot_width
,
333 bld
->static_state
->wrap_s
,
334 &x_offset
, &x_subcoord
);
337 LLVMValueRef y_offset
;
338 lp_build_sample_wrap_nearest_int(bld
,
339 bld
->format_desc
->block
.height
,
340 t_ipart
, height_vec
, row_stride_vec
,
341 bld
->static_state
->pot_height
,
342 bld
->static_state
->wrap_t
,
343 &y_offset
, &y_subcoord
);
344 offset
= lp_build_add(&bld
->uint_coord_bld
, offset
, y_offset
);
346 LLVMValueRef z_offset
;
347 lp_build_sample_wrap_nearest_int(bld
,
348 1, /* block length (depth) */
349 r_ipart
, depth_vec
, img_stride_vec
,
350 bld
->static_state
->pot_height
,
351 bld
->static_state
->wrap_r
,
352 &z_offset
, &z_subcoord
);
353 offset
= lp_build_add(&bld
->uint_coord_bld
, offset
, z_offset
);
355 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
356 LLVMValueRef z_offset
;
357 /* The r coord is the cube face in [0,5] */
358 z_offset
= lp_build_mul(&bld
->uint_coord_bld
, r
, img_stride_vec
);
359 offset
= lp_build_add(&bld
->uint_coord_bld
, offset
, z_offset
);
364 * Fetch the pixels as 4 x 32bit (rgba order might differ):
366 * rgba0 rgba1 rgba2 rgba3
368 * bit cast them into 16 x u8
370 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
372 * unpack them into two 8 x i16:
374 * r0 g0 b0 a0 r1 g1 b1 a1
375 * r2 g2 b2 a2 r3 g3 b3 a3
377 * The higher 8 bits of the resulting elements will be zero.
382 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
384 * Given the format is a rgba8, just read the pixels as is,
385 * without any swizzling. Swizzling will be done later.
387 rgba8
= lp_build_gather(bld
->builder
,
388 bld
->texel_type
.length
,
389 bld
->format_desc
->block
.bits
,
390 bld
->texel_type
.width
,
393 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
396 rgba8
= lp_build_fetch_rgba_aos(bld
->builder
,
404 /* Expand one 4*rgba8 to two 2*rgba16 */
405 lp_build_unpack2(builder
, u8n
.type
, h16
.type
,
407 colors_lo
, colors_hi
);
413 * Sample a single texture image with (bi-)(tri-)linear sampling.
414 * Return filtered color as two vectors of 16-bit fixed point values.
417 lp_build_sample_image_linear(struct lp_build_sample_context
*bld
,
418 LLVMValueRef width_vec
,
419 LLVMValueRef height_vec
,
420 LLVMValueRef depth_vec
,
421 LLVMValueRef row_stride_vec
,
422 LLVMValueRef img_stride_vec
,
423 LLVMValueRef data_ptr
,
427 LLVMValueRef
*colors_lo
,
428 LLVMValueRef
*colors_hi
)
430 const int dims
= texture_dims(bld
->static_state
->target
);
431 LLVMBuilderRef builder
= bld
->builder
;
432 struct lp_build_context i32
, h16
, u8n
;
433 LLVMTypeRef i32_vec_type
, h16_vec_type
, u8n_vec_type
;
434 LLVMValueRef i32_c8
, i32_c128
, i32_c255
;
435 LLVMValueRef s_ipart
, s_fpart
, s_fpart_lo
, s_fpart_hi
;
436 LLVMValueRef t_ipart
, t_fpart
, t_fpart_lo
, t_fpart_hi
;
437 LLVMValueRef r_ipart
, r_fpart
, r_fpart_lo
, r_fpart_hi
;
438 LLVMValueRef x_stride
, y_stride
, z_stride
;
439 LLVMValueRef x_offset0
, x_offset1
;
440 LLVMValueRef y_offset0
, y_offset1
;
441 LLVMValueRef z_offset0
, z_offset1
;
442 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
443 LLVMValueRef x_subcoord
[2], y_subcoord
[2], z_subcoord
[2];
444 LLVMValueRef neighbors_lo
[2][2][2]; /* [z][y][x] */
445 LLVMValueRef neighbors_hi
[2][2][2]; /* [z][y][x] */
446 LLVMValueRef packed_lo
, packed_hi
;
451 lp_build_context_init(&i32
, builder
, lp_type_int_vec(32));
452 lp_build_context_init(&h16
, builder
, lp_type_ufixed(16));
453 lp_build_context_init(&u8n
, builder
, lp_type_unorm(8));
455 i32_vec_type
= lp_build_vec_type(i32
.type
);
456 h16_vec_type
= lp_build_vec_type(h16
.type
);
457 u8n_vec_type
= lp_build_vec_type(u8n
.type
);
459 if (bld
->static_state
->normalized_coords
) {
460 /* s = s * width, t = t * height */
461 LLVMTypeRef coord_vec_type
= lp_build_vec_type(bld
->coord_type
);
462 LLVMValueRef fp_width
= LLVMBuildSIToFP(bld
->builder
, width_vec
,
464 s
= lp_build_mul(&bld
->coord_bld
, s
, fp_width
);
466 LLVMValueRef fp_height
= LLVMBuildSIToFP(bld
->builder
, height_vec
,
468 t
= lp_build_mul(&bld
->coord_bld
, t
, fp_height
);
471 LLVMValueRef fp_depth
= LLVMBuildSIToFP(bld
->builder
, depth_vec
,
473 r
= lp_build_mul(&bld
->coord_bld
, r
, fp_depth
);
477 /* scale coords by 256 (8 fractional bits) */
478 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
480 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
482 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
484 /* convert float to int */
485 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
487 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
489 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
491 /* subtract 0.5 (add -128) */
492 i32_c128
= lp_build_const_int_vec(i32
.type
, -128);
493 if (!bld
->static_state
->force_nearest_s
) {
494 s
= LLVMBuildAdd(builder
, s
, i32_c128
, "");
496 if (dims
>= 2 && !bld
->static_state
->force_nearest_t
) {
497 t
= LLVMBuildAdd(builder
, t
, i32_c128
, "");
500 r
= LLVMBuildAdd(builder
, r
, i32_c128
, "");
503 /* compute floor (shift right 8) */
504 i32_c8
= lp_build_const_int_vec(i32
.type
, 8);
505 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
507 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
509 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
511 /* compute fractional part (AND with 0xff) */
512 i32_c255
= lp_build_const_int_vec(i32
.type
, 255);
513 s_fpart
= LLVMBuildAnd(builder
, s
, i32_c255
, "");
515 t_fpart
= LLVMBuildAnd(builder
, t
, i32_c255
, "");
517 r_fpart
= LLVMBuildAnd(builder
, r
, i32_c255
, "");
519 /* get pixel, row and image strides */
520 x_stride
= lp_build_const_vec(bld
->uint_coord_bld
.type
,
521 bld
->format_desc
->block
.bits
/8);
522 y_stride
= row_stride_vec
;
523 z_stride
= img_stride_vec
;
525 /* do texcoord wrapping and compute texel offsets */
526 lp_build_sample_wrap_linear_int(bld
,
527 bld
->format_desc
->block
.width
,
528 s_ipart
, width_vec
, x_stride
,
529 bld
->static_state
->pot_width
,
530 bld
->static_state
->wrap_s
,
531 &x_offset0
, &x_offset1
,
532 &x_subcoord
[0], &x_subcoord
[1]);
533 for (z
= 0; z
< 2; z
++) {
534 for (y
= 0; y
< 2; y
++) {
535 offset
[z
][y
][0] = x_offset0
;
536 offset
[z
][y
][1] = x_offset1
;
541 lp_build_sample_wrap_linear_int(bld
,
542 bld
->format_desc
->block
.height
,
543 t_ipart
, height_vec
, y_stride
,
544 bld
->static_state
->pot_height
,
545 bld
->static_state
->wrap_t
,
546 &y_offset0
, &y_offset1
,
547 &y_subcoord
[0], &y_subcoord
[1]);
549 for (z
= 0; z
< 2; z
++) {
550 for (x
= 0; x
< 2; x
++) {
551 offset
[z
][0][x
] = lp_build_add(&bld
->uint_coord_bld
,
552 offset
[z
][0][x
], y_offset0
);
553 offset
[z
][1][x
] = lp_build_add(&bld
->uint_coord_bld
,
554 offset
[z
][1][x
], y_offset1
);
560 lp_build_sample_wrap_linear_int(bld
,
561 bld
->format_desc
->block
.height
,
562 r_ipart
, depth_vec
, z_stride
,
563 bld
->static_state
->pot_depth
,
564 bld
->static_state
->wrap_r
,
565 &z_offset0
, &z_offset1
,
566 &z_subcoord
[0], &z_subcoord
[1]);
567 for (y
= 0; y
< 2; y
++) {
568 for (x
= 0; x
< 2; x
++) {
569 offset
[0][y
][x
] = lp_build_add(&bld
->uint_coord_bld
,
570 offset
[0][y
][x
], z_offset0
);
571 offset
[1][y
][x
] = lp_build_add(&bld
->uint_coord_bld
,
572 offset
[1][y
][x
], z_offset1
);
576 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
577 LLVMValueRef z_offset
;
578 z_offset
= lp_build_mul(&bld
->uint_coord_bld
, r
, img_stride_vec
);
579 for (y
= 0; y
< 2; y
++) {
580 for (x
= 0; x
< 2; x
++) {
581 /* The r coord is the cube face in [0,5] */
582 offset
[0][y
][x
] = lp_build_add(&bld
->uint_coord_bld
,
583 offset
[0][y
][x
], z_offset
);
589 * Transform 4 x i32 in
591 * s_fpart = {s0, s1, s2, s3}
595 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
599 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
600 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
602 * and likewise for t_fpart. There is no risk of loosing precision here
603 * since the fractional parts only use the lower 8bits.
605 s_fpart
= LLVMBuildBitCast(builder
, s_fpart
, h16_vec_type
, "");
607 t_fpart
= LLVMBuildBitCast(builder
, t_fpart
, h16_vec_type
, "");
609 r_fpart
= LLVMBuildBitCast(builder
, r_fpart
, h16_vec_type
, "");
612 LLVMTypeRef elem_type
= LLVMInt32Type();
613 LLVMValueRef shuffles_lo
[LP_MAX_VECTOR_LENGTH
];
614 LLVMValueRef shuffles_hi
[LP_MAX_VECTOR_LENGTH
];
615 LLVMValueRef shuffle_lo
;
616 LLVMValueRef shuffle_hi
;
618 for (j
= 0; j
< h16
.type
.length
; j
+= 4) {
619 #ifdef PIPE_ARCH_LITTLE_ENDIAN
620 unsigned subindex
= 0;
622 unsigned subindex
= 1;
626 index
= LLVMConstInt(elem_type
, j
/2 + subindex
, 0);
627 for (i
= 0; i
< 4; ++i
)
628 shuffles_lo
[j
+ i
] = index
;
630 index
= LLVMConstInt(elem_type
, h16
.type
.length
/2 + j
/2 + subindex
, 0);
631 for (i
= 0; i
< 4; ++i
)
632 shuffles_hi
[j
+ i
] = index
;
635 shuffle_lo
= LLVMConstVector(shuffles_lo
, h16
.type
.length
);
636 shuffle_hi
= LLVMConstVector(shuffles_hi
, h16
.type
.length
);
638 s_fpart_lo
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
,
640 s_fpart_hi
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
,
643 t_fpart_lo
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
,
645 t_fpart_hi
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
,
649 r_fpart_lo
= LLVMBuildShuffleVector(builder
, r_fpart
, h16
.undef
,
651 r_fpart_hi
= LLVMBuildShuffleVector(builder
, r_fpart
, h16
.undef
,
657 * Fetch the pixels as 4 x 32bit (rgba order might differ):
659 * rgba0 rgba1 rgba2 rgba3
661 * bit cast them into 16 x u8
663 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
665 * unpack them into two 8 x i16:
667 * r0 g0 b0 a0 r1 g1 b1 a1
668 * r2 g2 b2 a2 r3 g3 b3 a3
670 * The higher 8 bits of the resulting elements will be zero.
672 numj
= 1 + (dims
>= 2);
673 numk
= 1 + (dims
>= 3);
675 for (k
= 0; k
< numk
; k
++) {
676 for (j
= 0; j
< numj
; j
++) {
677 for (i
= 0; i
< 2; i
++) {
680 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
682 * Given the format is a rgba8, just read the pixels as is,
683 * without any swizzling. Swizzling will be done later.
685 rgba8
= lp_build_gather(bld
->builder
,
686 bld
->texel_type
.length
,
687 bld
->format_desc
->block
.bits
,
688 bld
->texel_type
.width
,
689 data_ptr
, offset
[k
][j
][i
]);
691 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
694 rgba8
= lp_build_fetch_rgba_aos(bld
->builder
,
697 data_ptr
, offset
[k
][j
][i
],
702 /* Expand one 4*rgba8 to two 2*rgba16 */
703 lp_build_unpack2(builder
, u8n
.type
, h16
.type
,
705 &neighbors_lo
[k
][j
][i
], &neighbors_hi
[k
][j
][i
]);
711 * Linear interpolation with 8.8 fixed point.
713 if (bld
->static_state
->force_nearest_s
) {
714 /* special case 1-D lerp */
715 packed_lo
= lp_build_lerp(&h16
,
717 neighbors_lo
[0][0][0],
718 neighbors_lo
[0][0][1]);
720 packed_hi
= lp_build_lerp(&h16
,
722 neighbors_hi
[0][1][0],
723 neighbors_hi
[0][1][0]);
725 else if (bld
->static_state
->force_nearest_t
) {
726 /* special case 1-D lerp */
727 packed_lo
= lp_build_lerp(&h16
,
729 neighbors_lo
[0][0][0],
730 neighbors_lo
[0][0][1]);
732 packed_hi
= lp_build_lerp(&h16
,
734 neighbors_hi
[0][0][0],
735 neighbors_hi
[0][0][1]);
738 /* general 1/2/3-D lerping */
740 packed_lo
= lp_build_lerp(&h16
,
742 neighbors_lo
[0][0][0],
743 neighbors_lo
[0][0][1]);
745 packed_hi
= lp_build_lerp(&h16
,
747 neighbors_hi
[0][0][0],
748 neighbors_hi
[0][0][1]);
752 packed_lo
= lp_build_lerp_2d(&h16
,
753 s_fpart_lo
, t_fpart_lo
,
754 neighbors_lo
[0][0][0],
755 neighbors_lo
[0][0][1],
756 neighbors_lo
[0][1][0],
757 neighbors_lo
[0][1][1]);
759 packed_hi
= lp_build_lerp_2d(&h16
,
760 s_fpart_hi
, t_fpart_hi
,
761 neighbors_hi
[0][0][0],
762 neighbors_hi
[0][0][1],
763 neighbors_hi
[0][1][0],
764 neighbors_hi
[0][1][1]);
767 LLVMValueRef packed_lo2
, packed_hi2
;
769 /* lerp in the second z slice */
770 packed_lo2
= lp_build_lerp_2d(&h16
,
771 s_fpart_lo
, t_fpart_lo
,
772 neighbors_lo
[1][0][0],
773 neighbors_lo
[1][0][1],
774 neighbors_lo
[1][1][0],
775 neighbors_lo
[1][1][1]);
777 packed_hi2
= lp_build_lerp_2d(&h16
,
778 s_fpart_hi
, t_fpart_hi
,
779 neighbors_hi
[1][0][0],
780 neighbors_hi
[1][0][1],
781 neighbors_hi
[1][1][0],
782 neighbors_hi
[1][1][1]);
783 /* interp between two z slices */
784 packed_lo
= lp_build_lerp(&h16
, r_fpart_lo
,
785 packed_lo
, packed_lo2
);
786 packed_hi
= lp_build_lerp(&h16
, r_fpart_hi
,
787 packed_hi
, packed_hi2
);
792 *colors_lo
= packed_lo
;
793 *colors_hi
= packed_hi
;
798 * Sample the texture/mipmap using given image filter and mip filter.
799 * data0_ptr and data1_ptr point to the two mipmap levels to sample
800 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
801 * If we're using nearest miplevel sampling the '1' values will be null/unused.
804 lp_build_sample_mipmap(struct lp_build_sample_context
*bld
,
810 LLVMValueRef lod_fpart
,
811 LLVMValueRef width0_vec
,
812 LLVMValueRef width1_vec
,
813 LLVMValueRef height0_vec
,
814 LLVMValueRef height1_vec
,
815 LLVMValueRef depth0_vec
,
816 LLVMValueRef depth1_vec
,
817 LLVMValueRef row_stride0_vec
,
818 LLVMValueRef row_stride1_vec
,
819 LLVMValueRef img_stride0_vec
,
820 LLVMValueRef img_stride1_vec
,
821 LLVMValueRef data_ptr0
,
822 LLVMValueRef data_ptr1
,
823 LLVMValueRef
*colors_lo
,
824 LLVMValueRef
*colors_hi
)
826 LLVMValueRef colors0_lo
, colors0_hi
;
827 LLVMValueRef colors1_lo
, colors1_hi
;
829 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
830 /* sample the first mipmap level */
831 lp_build_sample_image_nearest(bld
,
832 width0_vec
, height0_vec
, depth0_vec
,
833 row_stride0_vec
, img_stride0_vec
,
835 &colors0_lo
, &colors0_hi
);
837 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
838 /* sample the second mipmap level */
839 lp_build_sample_image_nearest(bld
,
840 width1_vec
, height1_vec
, depth1_vec
,
841 row_stride1_vec
, img_stride1_vec
,
843 &colors1_lo
, &colors1_hi
);
847 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
849 /* sample the first mipmap level */
850 lp_build_sample_image_linear(bld
,
851 width0_vec
, height0_vec
, depth0_vec
,
852 row_stride0_vec
, img_stride0_vec
,
854 &colors0_lo
, &colors0_hi
);
856 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
857 /* sample the second mipmap level */
858 lp_build_sample_image_linear(bld
,
859 width1_vec
, height1_vec
, depth1_vec
,
860 row_stride1_vec
, img_stride1_vec
,
862 &colors1_lo
, &colors1_hi
);
866 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
867 /* interpolate samples from the two mipmap levels */
868 struct lp_build_context h16
;
869 lp_build_context_init(&h16
, bld
->builder
, lp_type_ufixed(16));
871 *colors_lo
= lp_build_lerp(&h16
, lod_fpart
,
872 colors0_lo
, colors1_lo
);
873 *colors_hi
= lp_build_lerp(&h16
, lod_fpart
,
874 colors0_hi
, colors1_hi
);
877 /* use first/only level's colors */
878 *colors_lo
= colors0_lo
;
879 *colors_hi
= colors0_hi
;
886 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
887 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
888 * but only limited texture coord wrap modes.
891 lp_build_sample_aos(struct lp_build_sample_context
*bld
,
896 const LLVMValueRef
*ddx
,
897 const LLVMValueRef
*ddy
,
898 LLVMValueRef lod_bias
, /* optional */
899 LLVMValueRef explicit_lod
, /* optional */
903 LLVMValueRef width_vec
,
904 LLVMValueRef height_vec
,
905 LLVMValueRef depth_vec
,
906 LLVMValueRef row_stride_array
,
907 LLVMValueRef img_stride_array
,
908 LLVMValueRef data_array
,
909 LLVMValueRef texel_out
[4])
911 struct lp_build_context
*float_bld
= &bld
->float_bld
;
912 LLVMBuilderRef builder
= bld
->builder
;
913 const unsigned mip_filter
= bld
->static_state
->min_mip_filter
;
914 const unsigned min_filter
= bld
->static_state
->min_img_filter
;
915 const unsigned mag_filter
= bld
->static_state
->mag_img_filter
;
916 const int dims
= texture_dims(bld
->static_state
->target
);
917 LLVMValueRef lod
= NULL
, lod_fpart
= NULL
;
918 LLVMValueRef ilevel0
, ilevel1
= NULL
;
919 LLVMValueRef width0_vec
= NULL
, height0_vec
= NULL
, depth0_vec
= NULL
;
920 LLVMValueRef width1_vec
= NULL
, height1_vec
= NULL
, depth1_vec
= NULL
;
921 LLVMValueRef row_stride0_vec
= NULL
, row_stride1_vec
= NULL
;
922 LLVMValueRef img_stride0_vec
= NULL
, img_stride1_vec
= NULL
;
923 LLVMValueRef data_ptr0
, data_ptr1
= NULL
;
924 LLVMValueRef packed
, packed_lo
, packed_hi
;
925 LLVMValueRef unswizzled
[4];
926 LLVMValueRef face_ddx
[4], face_ddy
[4];
927 struct lp_build_context h16
;
928 LLVMTypeRef h16_vec_type
;
930 /* we only support the common/simple wrap modes at this time */
931 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_s
));
933 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_t
));
935 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_r
));
938 /* make 16-bit fixed-pt builder context */
939 lp_build_context_init(&h16
, builder
, lp_type_ufixed(16));
940 h16_vec_type
= lp_build_vec_type(h16
.type
);
943 /* cube face selection, compute pre-face coords, etc. */
944 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
945 LLVMValueRef face
, face_s
, face_t
;
946 lp_build_cube_lookup(bld
, s
, t
, r
, &face
, &face_s
, &face_t
);
947 s
= face_s
; /* vec */
948 t
= face_t
; /* vec */
949 /* use 'r' to indicate cube face */
950 r
= lp_build_broadcast_scalar(&bld
->int_coord_bld
, face
); /* vec */
952 /* recompute ddx, ddy using the new (s,t) face texcoords */
953 face_ddx
[0] = lp_build_ddx(&bld
->coord_bld
, s
);
954 face_ddx
[1] = lp_build_ddx(&bld
->coord_bld
, t
);
957 face_ddy
[0] = lp_build_ddy(&bld
->coord_bld
, s
);
958 face_ddy
[1] = lp_build_ddy(&bld
->coord_bld
, t
);
967 * Compute the level of detail (float).
969 if (min_filter
!= mag_filter
||
970 mip_filter
!= PIPE_TEX_MIPFILTER_NONE
) {
971 /* Need to compute lod either to choose mipmap levels or to
972 * distinguish between minification/magnification with one mipmap level.
974 lod
= lp_build_lod_selector(bld
, ddx
, ddy
,
975 lod_bias
, explicit_lod
,
976 width
, height
, depth
);
980 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
981 * If mipfilter=linear, also compute the weight between the two
982 * mipmap levels: lod_fpart
984 switch (mip_filter
) {
986 assert(0 && "bad mip_filter value in lp_build_sample_aos()");
988 case PIPE_TEX_MIPFILTER_NONE
:
989 /* always use mip level 0 */
990 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
991 /* XXX this is a work-around for an apparent bug in LLVM 2.7.
992 * We should be able to set ilevel0 = const(0) but that causes
993 * bad x86 code to be emitted.
995 lod
= lp_build_const_elem(bld
->coord_bld
.type
, 0.0);
996 lp_build_nearest_mip_level(bld
, unit
, lod
, &ilevel0
);
999 ilevel0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
1002 case PIPE_TEX_MIPFILTER_NEAREST
:
1004 lp_build_nearest_mip_level(bld
, unit
, lod
, &ilevel0
);
1006 case PIPE_TEX_MIPFILTER_LINEAR
:
1008 LLVMValueRef f256
= LLVMConstReal(LLVMFloatType(), 256.0);
1009 LLVMValueRef i255
= lp_build_const_int32(255);
1010 LLVMTypeRef i16_type
= LLVMIntType(16);
1014 lp_build_linear_mip_levels(bld
, unit
, lod
, &ilevel0
, &ilevel1
,
1016 lod_fpart
= LLVMBuildFMul(builder
, lod_fpart
, f256
, "");
1017 lod_fpart
= lp_build_ifloor(&bld
->float_bld
, lod_fpart
);
1018 lod_fpart
= LLVMBuildAnd(builder
, lod_fpart
, i255
, "");
1019 lod_fpart
= LLVMBuildTrunc(builder
, lod_fpart
, i16_type
, "");
1020 lod_fpart
= lp_build_broadcast_scalar(&h16
, lod_fpart
);
1022 /* the lod_fpart values will be fixed pt values in [0,1) */
1027 /* compute image size(s) of source mipmap level(s) */
1028 lp_build_mipmap_level_sizes(bld
, dims
, width_vec
, height_vec
, depth_vec
,
1030 row_stride_array
, img_stride_array
,
1031 &width0_vec
, &width1_vec
,
1032 &height0_vec
, &height1_vec
,
1033 &depth0_vec
, &depth1_vec
,
1034 &row_stride0_vec
, &row_stride1_vec
,
1035 &img_stride0_vec
, &img_stride1_vec
);
1038 * Get pointer(s) to image data for mipmap level(s).
1040 data_ptr0
= lp_build_get_mipmap_level(bld
, data_array
, ilevel0
);
1041 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
1042 data_ptr1
= lp_build_get_mipmap_level(bld
, data_array
, ilevel1
);
1047 * Get/interpolate texture colors.
1049 if (min_filter
== mag_filter
) {
1050 /* no need to distinquish between minification and magnification */
1051 lp_build_sample_mipmap(bld
, min_filter
, mip_filter
,
1053 width0_vec
, width1_vec
,
1054 height0_vec
, height1_vec
,
1055 depth0_vec
, depth1_vec
,
1056 row_stride0_vec
, row_stride1_vec
,
1057 img_stride0_vec
, img_stride1_vec
,
1058 data_ptr0
, data_ptr1
,
1059 &packed_lo
, &packed_hi
);
1062 /* Emit conditional to choose min image filter or mag image filter
1063 * depending on the lod being > 0 or <= 0, respectively.
1065 struct lp_build_flow_context
*flow_ctx
;
1066 struct lp_build_if_state if_ctx
;
1067 LLVMValueRef minify
;
1069 flow_ctx
= lp_build_flow_create(builder
);
1070 lp_build_flow_scope_begin(flow_ctx
);
1072 packed_lo
= LLVMGetUndef(h16_vec_type
);
1073 packed_hi
= LLVMGetUndef(h16_vec_type
);
1075 lp_build_flow_scope_declare(flow_ctx
, &packed_lo
);
1076 lp_build_flow_scope_declare(flow_ctx
, &packed_hi
);
1078 /* minify = lod > 0.0 */
1079 minify
= LLVMBuildFCmp(builder
, LLVMRealUGE
,
1080 lod
, float_bld
->zero
, "");
1082 lp_build_if(&if_ctx
, flow_ctx
, builder
, minify
);
1084 /* Use the minification filter */
1085 lp_build_sample_mipmap(bld
, min_filter
, mip_filter
,
1087 width0_vec
, width1_vec
,
1088 height0_vec
, height1_vec
,
1089 depth0_vec
, depth1_vec
,
1090 row_stride0_vec
, row_stride1_vec
,
1091 img_stride0_vec
, img_stride1_vec
,
1092 data_ptr0
, data_ptr1
,
1093 &packed_lo
, &packed_hi
);
1095 lp_build_else(&if_ctx
);
1097 /* Use the magnification filter */
1098 lp_build_sample_mipmap(bld
, mag_filter
, mip_filter
,
1100 width0_vec
, width1_vec
,
1101 height0_vec
, height1_vec
,
1102 depth0_vec
, depth1_vec
,
1103 row_stride0_vec
, row_stride1_vec
,
1104 img_stride0_vec
, img_stride1_vec
,
1105 data_ptr0
, data_ptr1
,
1106 &packed_lo
, &packed_hi
);
1108 lp_build_endif(&if_ctx
);
1110 lp_build_flow_scope_end(flow_ctx
);
1111 lp_build_flow_destroy(flow_ctx
);
1114 /* combine 'packed_lo', 'packed_hi' into 'packed' */
1116 struct lp_build_context h16
, u8n
;
1118 lp_build_context_init(&h16
, builder
, lp_type_ufixed(16));
1119 lp_build_context_init(&u8n
, builder
, lp_type_unorm(8));
1121 packed
= lp_build_pack2(builder
, h16
.type
, u8n
.type
,
1122 packed_lo
, packed_hi
);
1126 * Convert to SoA and swizzle.
1128 lp_build_rgba8_to_f32_soa(builder
,
1130 packed
, unswizzled
);
1132 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
1133 lp_build_format_swizzle_soa(bld
->format_desc
,
1135 unswizzled
, texel_out
);
1138 texel_out
[0] = unswizzled
[0];
1139 texel_out
[1] = unswizzled
[1];
1140 texel_out
[2] = unswizzled
[2];
1141 texel_out
[3] = unswizzled
[3];
1144 apply_sampler_swizzle(bld
, texel_out
);