1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- SoA.
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_bitarit.h"
49 #include "lp_bld_logic.h"
50 #include "lp_bld_swizzle.h"
51 #include "lp_bld_pack.h"
52 #include "lp_bld_flow.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_format.h"
55 #include "lp_bld_init.h"
56 #include "lp_bld_sample.h"
57 #include "lp_bld_sample_aos.h"
58 #include "lp_bld_quad.h"
62 * Build LLVM code for texture coord wrapping, for nearest filtering,
63 * for scaled integer texcoords.
64 * \param block_length is the length of the pixel block along the
66 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
67 * \param length the texture size along one dimension
68 * \param stride pixel stride along the coordinate axis (in bytes)
69 * \param is_pot if TRUE, length is a power of two
70 * \param wrap_mode one of PIPE_TEX_WRAP_x
71 * \param out_offset byte offset for the wrapped coordinate
72 * \param out_i resulting sub-block pixel coordinate for coord0
75 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context
*bld
,
76 unsigned block_length
,
82 LLVMValueRef
*out_offset
,
85 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
86 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
87 LLVMValueRef length_minus_one
;
89 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
92 case PIPE_TEX_WRAP_REPEAT
:
94 coord
= LLVMBuildAnd(builder
, coord
, length_minus_one
, "");
96 /* Add a bias to the texcoord to handle negative coords */
97 LLVMValueRef bias
= lp_build_mul_imm(int_coord_bld
, length
, 1024);
98 coord
= LLVMBuildAdd(builder
, coord
, bias
, "");
99 coord
= LLVMBuildURem(builder
, coord
, length
, "");
103 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
104 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
105 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
108 case PIPE_TEX_WRAP_CLAMP
:
109 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
110 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
111 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
112 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
113 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
118 lp_build_sample_partial_offset(int_coord_bld
, block_length
, coord
, stride
,
124 * Build LLVM code for texture coord wrapping, for linear filtering,
125 * for scaled integer texcoords.
126 * \param block_length is the length of the pixel block along the
128 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
129 * \param length the texture size along one dimension
130 * \param stride pixel stride along the coordinate axis (in bytes)
131 * \param is_pot if TRUE, length is a power of two
132 * \param wrap_mode one of PIPE_TEX_WRAP_x
133 * \param offset0 resulting relative offset for coord0
134 * \param offset1 resulting relative offset for coord0 + 1
135 * \param i0 resulting sub-block pixel coordinate for coord0
136 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
139 lp_build_sample_wrap_linear_int(struct lp_build_sample_context
*bld
,
140 unsigned block_length
,
146 LLVMValueRef
*offset0
,
147 LLVMValueRef
*offset1
,
151 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
152 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
153 LLVMValueRef length_minus_one
;
154 LLVMValueRef lmask
, umask
, mask
;
156 if (block_length
!= 1) {
158 * If the pixel block covers more than one pixel then there is no easy
159 * way to calculate offset1 relative to offset0. Instead, compute them
165 lp_build_sample_wrap_nearest_int(bld
,
174 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
176 lp_build_sample_wrap_nearest_int(bld
,
189 * Scalar pixels -- try to compute offset0 and offset1 with a single stride
193 *i0
= int_coord_bld
->zero
;
194 *i1
= int_coord_bld
->zero
;
196 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
199 case PIPE_TEX_WRAP_REPEAT
:
201 coord0
= LLVMBuildAnd(builder
, coord0
, length_minus_one
, "");
204 /* Add a bias to the texcoord to handle negative coords */
205 LLVMValueRef bias
= lp_build_mul_imm(int_coord_bld
, length
, 1024);
206 coord0
= LLVMBuildAdd(builder
, coord0
, bias
, "");
207 coord0
= LLVMBuildURem(builder
, coord0
, length
, "");
210 mask
= lp_build_compare(bld
->gallivm
, int_coord_bld
->type
,
211 PIPE_FUNC_NOTEQUAL
, coord0
, length_minus_one
);
213 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
214 *offset1
= LLVMBuildAnd(builder
,
215 lp_build_add(int_coord_bld
, *offset0
, stride
),
219 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
220 lmask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
221 PIPE_FUNC_GEQUAL
, coord0
, int_coord_bld
->zero
);
222 umask
= lp_build_compare(int_coord_bld
->gallivm
, int_coord_bld
->type
,
223 PIPE_FUNC_LESS
, coord0
, length_minus_one
);
225 coord0
= lp_build_select(int_coord_bld
, lmask
, coord0
, int_coord_bld
->zero
);
226 coord0
= lp_build_select(int_coord_bld
, umask
, coord0
, length_minus_one
);
228 mask
= LLVMBuildAnd(builder
, lmask
, umask
, "");
230 *offset0
= lp_build_mul(int_coord_bld
, coord0
, stride
);
231 *offset1
= lp_build_add(int_coord_bld
,
233 LLVMBuildAnd(builder
, stride
, mask
, ""));
236 case PIPE_TEX_WRAP_CLAMP
:
237 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
238 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
239 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
240 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
241 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
244 *offset0
= int_coord_bld
->zero
;
245 *offset1
= int_coord_bld
->zero
;
252 * Sample a single texture image with nearest sampling.
253 * If sampling a cube texture, r = cube face in [0,5].
254 * Return filtered color as two vectors of 16-bit fixed point values.
257 lp_build_sample_image_nearest(struct lp_build_sample_context
*bld
,
258 LLVMValueRef int_size
,
259 LLVMValueRef row_stride_vec
,
260 LLVMValueRef img_stride_vec
,
261 LLVMValueRef data_ptr
,
265 LLVMValueRef
*colors_lo
,
266 LLVMValueRef
*colors_hi
)
268 const unsigned dims
= bld
->dims
;
269 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
270 struct lp_build_context i32
, h16
, u8n
;
271 LLVMTypeRef i32_vec_type
, u8n_vec_type
;
273 LLVMValueRef width_vec
, height_vec
, depth_vec
;
274 LLVMValueRef s_ipart
, t_ipart
= NULL
, r_ipart
= NULL
;
275 LLVMValueRef x_stride
;
276 LLVMValueRef x_offset
, offset
;
277 LLVMValueRef x_subcoord
, y_subcoord
, z_subcoord
;
279 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32));
280 lp_build_context_init(&h16
, bld
->gallivm
, lp_type_ufixed(16));
281 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8));
283 i32_vec_type
= lp_build_vec_type(bld
->gallivm
, i32
.type
);
284 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
286 lp_build_extract_image_sizes(bld
,
294 if (bld
->static_state
->normalized_coords
) {
295 LLVMValueRef scaled_size
;
296 LLVMValueRef flt_size
;
298 /* scale size by 256 (8 fractional bits) */
299 scaled_size
= lp_build_shl_imm(&bld
->int_size_bld
, int_size
, 8);
301 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, scaled_size
);
303 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
306 /* scale coords by 256 (8 fractional bits) */
307 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
309 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
311 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
314 /* convert float to int */
315 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
317 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
319 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
321 /* compute floor (shift right 8) */
322 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 8);
323 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
325 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
327 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
329 /* get pixel, row, image strides */
330 x_stride
= lp_build_const_vec(bld
->gallivm
,
331 bld
->int_coord_bld
.type
,
332 bld
->format_desc
->block
.bits
/8);
334 /* Do texcoord wrapping, compute texel offset */
335 lp_build_sample_wrap_nearest_int(bld
,
336 bld
->format_desc
->block
.width
,
337 s_ipart
, width_vec
, x_stride
,
338 bld
->static_state
->pot_width
,
339 bld
->static_state
->wrap_s
,
340 &x_offset
, &x_subcoord
);
343 LLVMValueRef y_offset
;
344 lp_build_sample_wrap_nearest_int(bld
,
345 bld
->format_desc
->block
.height
,
346 t_ipart
, height_vec
, row_stride_vec
,
347 bld
->static_state
->pot_height
,
348 bld
->static_state
->wrap_t
,
349 &y_offset
, &y_subcoord
);
350 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, y_offset
);
352 LLVMValueRef z_offset
;
353 lp_build_sample_wrap_nearest_int(bld
,
354 1, /* block length (depth) */
355 r_ipart
, depth_vec
, img_stride_vec
,
356 bld
->static_state
->pot_depth
,
357 bld
->static_state
->wrap_r
,
358 &z_offset
, &z_subcoord
);
359 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
361 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
362 LLVMValueRef z_offset
;
363 /* The r coord is the cube face in [0,5] */
364 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
365 offset
= lp_build_add(&bld
->int_coord_bld
, offset
, z_offset
);
370 * Fetch the pixels as 4 x 32bit (rgba order might differ):
372 * rgba0 rgba1 rgba2 rgba3
374 * bit cast them into 16 x u8
376 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
378 * unpack them into two 8 x i16:
380 * r0 g0 b0 a0 r1 g1 b1 a1
381 * r2 g2 b2 a2 r3 g3 b3 a3
383 * The higher 8 bits of the resulting elements will be zero.
388 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
390 * Given the format is a rgba8, just read the pixels as is,
391 * without any swizzling. Swizzling will be done later.
393 rgba8
= lp_build_gather(bld
->gallivm
,
394 bld
->texel_type
.length
,
395 bld
->format_desc
->block
.bits
,
396 bld
->texel_type
.width
,
399 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
402 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
410 /* Expand one 4*rgba8 to two 2*rgba16 */
411 lp_build_unpack2(bld
->gallivm
, u8n
.type
, h16
.type
,
413 colors_lo
, colors_hi
);
419 * Sample a single texture image with (bi-)(tri-)linear sampling.
420 * Return filtered color as two vectors of 16-bit fixed point values.
423 lp_build_sample_image_linear(struct lp_build_sample_context
*bld
,
424 LLVMValueRef int_size
,
425 LLVMValueRef row_stride_vec
,
426 LLVMValueRef img_stride_vec
,
427 LLVMValueRef data_ptr
,
431 LLVMValueRef
*colors_lo
,
432 LLVMValueRef
*colors_hi
)
434 const unsigned dims
= bld
->dims
;
435 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
436 struct lp_build_context i32
, h16
, u8n
;
437 LLVMTypeRef i32_vec_type
, h16_vec_type
, u8n_vec_type
;
438 LLVMValueRef i32_c8
, i32_c128
, i32_c255
;
439 LLVMValueRef width_vec
, height_vec
, depth_vec
;
440 LLVMValueRef s_ipart
, s_fpart
, s_fpart_lo
, s_fpart_hi
;
441 LLVMValueRef t_ipart
= NULL
, t_fpart
= NULL
, t_fpart_lo
= NULL
, t_fpart_hi
= NULL
;
442 LLVMValueRef r_ipart
= NULL
, r_fpart
= NULL
, r_fpart_lo
= NULL
, r_fpart_hi
= NULL
;
443 LLVMValueRef x_stride
, y_stride
, z_stride
;
444 LLVMValueRef x_offset0
, x_offset1
;
445 LLVMValueRef y_offset0
, y_offset1
;
446 LLVMValueRef z_offset0
, z_offset1
;
447 LLVMValueRef offset
[2][2][2]; /* [z][y][x] */
448 LLVMValueRef x_subcoord
[2], y_subcoord
[2], z_subcoord
[2];
449 LLVMValueRef neighbors_lo
[2][2][2]; /* [z][y][x] */
450 LLVMValueRef neighbors_hi
[2][2][2]; /* [z][y][x] */
451 LLVMValueRef packed_lo
, packed_hi
;
456 lp_build_context_init(&i32
, bld
->gallivm
, lp_type_int_vec(32));
457 lp_build_context_init(&h16
, bld
->gallivm
, lp_type_ufixed(16));
458 lp_build_context_init(&u8n
, bld
->gallivm
, lp_type_unorm(8));
460 i32_vec_type
= lp_build_vec_type(bld
->gallivm
, i32
.type
);
461 h16_vec_type
= lp_build_vec_type(bld
->gallivm
, h16
.type
);
462 u8n_vec_type
= lp_build_vec_type(bld
->gallivm
, u8n
.type
);
464 lp_build_extract_image_sizes(bld
,
472 if (bld
->static_state
->normalized_coords
) {
473 LLVMValueRef scaled_size
;
474 LLVMValueRef flt_size
;
476 /* scale size by 256 (8 fractional bits) */
477 scaled_size
= lp_build_shl_imm(&bld
->int_size_bld
, int_size
, 8);
479 flt_size
= lp_build_int_to_float(&bld
->float_size_bld
, scaled_size
);
481 lp_build_unnormalized_coords(bld
, flt_size
, &s
, &t
, &r
);
484 /* scale coords by 256 (8 fractional bits) */
485 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
487 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
489 r
= lp_build_mul_imm(&bld
->coord_bld
, r
, 256);
492 /* convert float to int */
493 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
495 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
497 r
= LLVMBuildFPToSI(builder
, r
, i32_vec_type
, "");
499 /* subtract 0.5 (add -128) */
500 i32_c128
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, -128);
501 s
= LLVMBuildAdd(builder
, s
, i32_c128
, "");
503 t
= LLVMBuildAdd(builder
, t
, i32_c128
, "");
506 r
= LLVMBuildAdd(builder
, r
, i32_c128
, "");
509 /* compute floor (shift right 8) */
510 i32_c8
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 8);
511 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
513 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
515 r_ipart
= LLVMBuildAShr(builder
, r
, i32_c8
, "");
517 /* compute fractional part (AND with 0xff) */
518 i32_c255
= lp_build_const_int_vec(bld
->gallivm
, i32
.type
, 255);
519 s_fpart
= LLVMBuildAnd(builder
, s
, i32_c255
, "");
521 t_fpart
= LLVMBuildAnd(builder
, t
, i32_c255
, "");
523 r_fpart
= LLVMBuildAnd(builder
, r
, i32_c255
, "");
525 /* get pixel, row and image strides */
526 x_stride
= lp_build_const_vec(bld
->gallivm
, bld
->int_coord_bld
.type
,
527 bld
->format_desc
->block
.bits
/8);
528 y_stride
= row_stride_vec
;
529 z_stride
= img_stride_vec
;
531 /* do texcoord wrapping and compute texel offsets */
532 lp_build_sample_wrap_linear_int(bld
,
533 bld
->format_desc
->block
.width
,
534 s_ipart
, width_vec
, x_stride
,
535 bld
->static_state
->pot_width
,
536 bld
->static_state
->wrap_s
,
537 &x_offset0
, &x_offset1
,
538 &x_subcoord
[0], &x_subcoord
[1]);
539 for (z
= 0; z
< 2; z
++) {
540 for (y
= 0; y
< 2; y
++) {
541 offset
[z
][y
][0] = x_offset0
;
542 offset
[z
][y
][1] = x_offset1
;
547 lp_build_sample_wrap_linear_int(bld
,
548 bld
->format_desc
->block
.height
,
549 t_ipart
, height_vec
, y_stride
,
550 bld
->static_state
->pot_height
,
551 bld
->static_state
->wrap_t
,
552 &y_offset0
, &y_offset1
,
553 &y_subcoord
[0], &y_subcoord
[1]);
555 for (z
= 0; z
< 2; z
++) {
556 for (x
= 0; x
< 2; x
++) {
557 offset
[z
][0][x
] = lp_build_add(&bld
->int_coord_bld
,
558 offset
[z
][0][x
], y_offset0
);
559 offset
[z
][1][x
] = lp_build_add(&bld
->int_coord_bld
,
560 offset
[z
][1][x
], y_offset1
);
566 lp_build_sample_wrap_linear_int(bld
,
567 bld
->format_desc
->block
.height
,
568 r_ipart
, depth_vec
, z_stride
,
569 bld
->static_state
->pot_depth
,
570 bld
->static_state
->wrap_r
,
571 &z_offset0
, &z_offset1
,
572 &z_subcoord
[0], &z_subcoord
[1]);
573 for (y
= 0; y
< 2; y
++) {
574 for (x
= 0; x
< 2; x
++) {
575 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
576 offset
[0][y
][x
], z_offset0
);
577 offset
[1][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
578 offset
[1][y
][x
], z_offset1
);
582 else if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
583 LLVMValueRef z_offset
;
584 z_offset
= lp_build_mul(&bld
->int_coord_bld
, r
, img_stride_vec
);
585 for (y
= 0; y
< 2; y
++) {
586 for (x
= 0; x
< 2; x
++) {
587 /* The r coord is the cube face in [0,5] */
588 offset
[0][y
][x
] = lp_build_add(&bld
->int_coord_bld
,
589 offset
[0][y
][x
], z_offset
);
595 * Transform 4 x i32 in
597 * s_fpart = {s0, s1, s2, s3}
601 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
605 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
606 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
608 * and likewise for t_fpart. There is no risk of loosing precision here
609 * since the fractional parts only use the lower 8bits.
611 s_fpart
= LLVMBuildBitCast(builder
, s_fpart
, h16_vec_type
, "");
613 t_fpart
= LLVMBuildBitCast(builder
, t_fpart
, h16_vec_type
, "");
615 r_fpart
= LLVMBuildBitCast(builder
, r_fpart
, h16_vec_type
, "");
618 LLVMTypeRef elem_type
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
619 LLVMValueRef shuffles_lo
[LP_MAX_VECTOR_LENGTH
];
620 LLVMValueRef shuffles_hi
[LP_MAX_VECTOR_LENGTH
];
621 LLVMValueRef shuffle_lo
;
622 LLVMValueRef shuffle_hi
;
624 for (j
= 0; j
< h16
.type
.length
; j
+= 4) {
625 #ifdef PIPE_ARCH_LITTLE_ENDIAN
626 unsigned subindex
= 0;
628 unsigned subindex
= 1;
632 index
= LLVMConstInt(elem_type
, j
/2 + subindex
, 0);
633 for (i
= 0; i
< 4; ++i
)
634 shuffles_lo
[j
+ i
] = index
;
636 index
= LLVMConstInt(elem_type
, h16
.type
.length
/2 + j
/2 + subindex
, 0);
637 for (i
= 0; i
< 4; ++i
)
638 shuffles_hi
[j
+ i
] = index
;
641 shuffle_lo
= LLVMConstVector(shuffles_lo
, h16
.type
.length
);
642 shuffle_hi
= LLVMConstVector(shuffles_hi
, h16
.type
.length
);
644 s_fpart_lo
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
,
646 s_fpart_hi
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
,
649 t_fpart_lo
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
,
651 t_fpart_hi
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
,
655 r_fpart_lo
= LLVMBuildShuffleVector(builder
, r_fpart
, h16
.undef
,
657 r_fpart_hi
= LLVMBuildShuffleVector(builder
, r_fpart
, h16
.undef
,
663 * Fetch the pixels as 4 x 32bit (rgba order might differ):
665 * rgba0 rgba1 rgba2 rgba3
667 * bit cast them into 16 x u8
669 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
671 * unpack them into two 8 x i16:
673 * r0 g0 b0 a0 r1 g1 b1 a1
674 * r2 g2 b2 a2 r3 g3 b3 a3
676 * The higher 8 bits of the resulting elements will be zero.
678 numj
= 1 + (dims
>= 2);
679 numk
= 1 + (dims
>= 3);
681 for (k
= 0; k
< numk
; k
++) {
682 for (j
= 0; j
< numj
; j
++) {
683 for (i
= 0; i
< 2; i
++) {
686 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
688 * Given the format is a rgba8, just read the pixels as is,
689 * without any swizzling. Swizzling will be done later.
691 rgba8
= lp_build_gather(bld
->gallivm
,
692 bld
->texel_type
.length
,
693 bld
->format_desc
->block
.bits
,
694 bld
->texel_type
.width
,
695 data_ptr
, offset
[k
][j
][i
]);
697 rgba8
= LLVMBuildBitCast(builder
, rgba8
, u8n_vec_type
, "");
700 rgba8
= lp_build_fetch_rgba_aos(bld
->gallivm
,
703 data_ptr
, offset
[k
][j
][i
],
708 /* Expand one 4*rgba8 to two 2*rgba16 */
709 lp_build_unpack2(bld
->gallivm
, u8n
.type
, h16
.type
,
711 &neighbors_lo
[k
][j
][i
], &neighbors_hi
[k
][j
][i
]);
717 * Linear interpolation with 8.8 fixed point.
721 packed_lo
= lp_build_lerp(&h16
,
723 neighbors_lo
[0][0][0],
724 neighbors_lo
[0][0][1]);
726 packed_hi
= lp_build_lerp(&h16
,
728 neighbors_hi
[0][0][0],
729 neighbors_hi
[0][0][1]);
733 packed_lo
= lp_build_lerp_2d(&h16
,
734 s_fpart_lo
, t_fpart_lo
,
735 neighbors_lo
[0][0][0],
736 neighbors_lo
[0][0][1],
737 neighbors_lo
[0][1][0],
738 neighbors_lo
[0][1][1]);
740 packed_hi
= lp_build_lerp_2d(&h16
,
741 s_fpart_hi
, t_fpart_hi
,
742 neighbors_hi
[0][0][0],
743 neighbors_hi
[0][0][1],
744 neighbors_hi
[0][1][0],
745 neighbors_hi
[0][1][1]);
748 LLVMValueRef packed_lo2
, packed_hi2
;
750 /* lerp in the second z slice */
751 packed_lo2
= lp_build_lerp_2d(&h16
,
752 s_fpart_lo
, t_fpart_lo
,
753 neighbors_lo
[1][0][0],
754 neighbors_lo
[1][0][1],
755 neighbors_lo
[1][1][0],
756 neighbors_lo
[1][1][1]);
758 packed_hi2
= lp_build_lerp_2d(&h16
,
759 s_fpart_hi
, t_fpart_hi
,
760 neighbors_hi
[1][0][0],
761 neighbors_hi
[1][0][1],
762 neighbors_hi
[1][1][0],
763 neighbors_hi
[1][1][1]);
764 /* interp between two z slices */
765 packed_lo
= lp_build_lerp(&h16
, r_fpart_lo
,
766 packed_lo
, packed_lo2
);
767 packed_hi
= lp_build_lerp(&h16
, r_fpart_hi
,
768 packed_hi
, packed_hi2
);
772 *colors_lo
= packed_lo
;
773 *colors_hi
= packed_hi
;
778 * Sample the texture/mipmap using given image filter and mip filter.
779 * data0_ptr and data1_ptr point to the two mipmap levels to sample
780 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
781 * If we're using nearest miplevel sampling the '1' values will be null/unused.
784 lp_build_sample_mipmap(struct lp_build_sample_context
*bld
,
790 LLVMValueRef ilevel0
,
791 LLVMValueRef ilevel1
,
792 LLVMValueRef lod_fpart
,
793 LLVMValueRef colors_lo_var
,
794 LLVMValueRef colors_hi_var
)
796 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
799 LLVMValueRef row_stride0_vec
;
800 LLVMValueRef row_stride1_vec
;
801 LLVMValueRef img_stride0_vec
;
802 LLVMValueRef img_stride1_vec
;
803 LLVMValueRef data_ptr0
;
804 LLVMValueRef data_ptr1
;
805 LLVMValueRef colors0_lo
, colors0_hi
;
806 LLVMValueRef colors1_lo
, colors1_hi
;
808 /* sample the first mipmap level */
809 lp_build_mipmap_level_sizes(bld
, ilevel0
,
811 &row_stride0_vec
, &img_stride0_vec
);
812 data_ptr0
= lp_build_get_mipmap_level(bld
, ilevel0
);
813 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
814 lp_build_sample_image_nearest(bld
,
816 row_stride0_vec
, img_stride0_vec
,
818 &colors0_lo
, &colors0_hi
);
821 assert(img_filter
== PIPE_TEX_FILTER_LINEAR
);
822 lp_build_sample_image_linear(bld
,
824 row_stride0_vec
, img_stride0_vec
,
826 &colors0_lo
, &colors0_hi
);
829 /* Store the first level's colors in the output variables */
830 LLVMBuildStore(builder
, colors0_lo
, colors_lo_var
);
831 LLVMBuildStore(builder
, colors0_hi
, colors_hi_var
);
833 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
834 LLVMValueRef h16_scale
= lp_build_const_float(bld
->gallivm
, 256.0);
835 LLVMTypeRef i32_type
= LLVMIntTypeInContext(bld
->gallivm
->context
, 32);
836 struct lp_build_if_state if_ctx
;
837 LLVMValueRef need_lerp
;
839 lod_fpart
= LLVMBuildFMul(builder
, lod_fpart
, h16_scale
, "");
840 lod_fpart
= LLVMBuildFPToSI(builder
, lod_fpart
, i32_type
, "lod_fpart.fixed16");
842 /* need_lerp = lod_fpart > 0 */
843 need_lerp
= LLVMBuildICmp(builder
, LLVMIntSGT
,
844 lod_fpart
, LLVMConstNull(i32_type
),
847 lp_build_if(&if_ctx
, bld
->gallivm
, need_lerp
);
849 struct lp_build_context h16_bld
;
851 lp_build_context_init(&h16_bld
, bld
->gallivm
, lp_type_ufixed(16));
853 /* sample the second mipmap level */
854 lp_build_mipmap_level_sizes(bld
, ilevel1
,
856 &row_stride1_vec
, &img_stride1_vec
);
857 data_ptr1
= lp_build_get_mipmap_level(bld
, ilevel1
);
858 if (img_filter
== PIPE_TEX_FILTER_NEAREST
) {
859 lp_build_sample_image_nearest(bld
,
861 row_stride1_vec
, img_stride1_vec
,
863 &colors1_lo
, &colors1_hi
);
866 lp_build_sample_image_linear(bld
,
868 row_stride1_vec
, img_stride1_vec
,
870 &colors1_lo
, &colors1_hi
);
873 /* interpolate samples from the two mipmap levels */
875 lod_fpart
= LLVMBuildTrunc(builder
, lod_fpart
, h16_bld
.elem_type
, "");
876 lod_fpart
= lp_build_broadcast_scalar(&h16_bld
, lod_fpart
);
878 #if HAVE_LLVM == 0x208
879 /* This is a work-around for a bug in LLVM 2.8.
880 * Evidently, something goes wrong in the construction of the
881 * lod_fpart short[8] vector. Adding this no-effect shuffle seems
882 * to force the vector to be properly constructed.
883 * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
886 LLVMValueRef shuffles
[8], shuffle
;
888 assert(h16_bld
.type
.length
<= Elements(shuffles
));
889 for (i
= 0; i
< h16_bld
.type
.length
; i
++)
890 shuffles
[i
] = lp_build_const_int32(bld
->gallivm
, 2 * (i
& 1));
891 shuffle
= LLVMConstVector(shuffles
, h16_bld
.type
.length
);
892 lod_fpart
= LLVMBuildShuffleVector(builder
,
893 lod_fpart
, lod_fpart
,
898 colors0_lo
= lp_build_lerp(&h16_bld
, lod_fpart
,
899 colors0_lo
, colors1_lo
);
900 colors0_hi
= lp_build_lerp(&h16_bld
, lod_fpart
,
901 colors0_hi
, colors1_hi
);
903 LLVMBuildStore(builder
, colors0_lo
, colors_lo_var
);
904 LLVMBuildStore(builder
, colors0_hi
, colors_hi_var
);
906 lp_build_endif(&if_ctx
);
913 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
914 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
915 * but only limited texture coord wrap modes.
918 lp_build_sample_aos(struct lp_build_sample_context
*bld
,
923 const LLVMValueRef
*ddx
,
924 const LLVMValueRef
*ddy
,
925 LLVMValueRef lod_bias
, /* optional */
926 LLVMValueRef explicit_lod
, /* optional */
927 LLVMValueRef texel_out
[4])
929 struct lp_build_context
*int_bld
= &bld
->int_bld
;
930 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
931 const unsigned mip_filter
= bld
->static_state
->min_mip_filter
;
932 const unsigned min_filter
= bld
->static_state
->min_img_filter
;
933 const unsigned mag_filter
= bld
->static_state
->mag_img_filter
;
934 const unsigned dims
= bld
->dims
;
935 LLVMValueRef lod_ipart
= NULL
, lod_fpart
= NULL
;
936 LLVMValueRef ilevel0
, ilevel1
= NULL
;
937 LLVMValueRef packed
, packed_lo
, packed_hi
;
938 LLVMValueRef unswizzled
[4];
939 LLVMValueRef face_ddx
[4], face_ddy
[4];
940 struct lp_build_context h16_bld
;
941 LLVMValueRef first_level
;
942 LLVMValueRef i32t_zero
= lp_build_const_int32(bld
->gallivm
, 0);
944 /* we only support the common/simple wrap modes at this time */
945 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_s
));
947 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_t
));
949 assert(lp_is_simple_wrap_mode(bld
->static_state
->wrap_r
));
952 /* make 16-bit fixed-pt builder context */
953 lp_build_context_init(&h16_bld
, bld
->gallivm
, lp_type_ufixed(16));
955 /* cube face selection, compute pre-face coords, etc. */
956 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
957 LLVMValueRef face
, face_s
, face_t
;
958 lp_build_cube_lookup(bld
, s
, t
, r
, &face
, &face_s
, &face_t
);
959 s
= face_s
; /* vec */
960 t
= face_t
; /* vec */
961 /* use 'r' to indicate cube face */
962 r
= lp_build_broadcast_scalar(&bld
->int_coord_bld
, face
); /* vec */
964 /* recompute ddx, ddy using the new (s,t) face texcoords */
965 face_ddx
[0] = lp_build_scalar_ddx(&bld
->coord_bld
, s
);
966 face_ddx
[1] = lp_build_scalar_ddx(&bld
->coord_bld
, t
);
969 face_ddy
[0] = lp_build_scalar_ddy(&bld
->coord_bld
, s
);
970 face_ddy
[1] = lp_build_scalar_ddy(&bld
->coord_bld
, t
);
978 * Compute the level of detail (float).
980 if (min_filter
!= mag_filter
||
981 mip_filter
!= PIPE_TEX_MIPFILTER_NONE
) {
982 /* Need to compute lod either to choose mipmap levels or to
983 * distinguish between minification/magnification with one mipmap level.
985 lp_build_lod_selector(bld
, unit
, ddx
, ddy
,
986 lod_bias
, explicit_lod
,
988 &lod_ipart
, &lod_fpart
);
990 lod_ipart
= i32t_zero
;
994 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
996 switch (mip_filter
) {
998 assert(0 && "bad mip_filter value in lp_build_sample_aos()");
1000 case PIPE_TEX_MIPFILTER_NONE
:
1001 /* always use mip level 0 */
1002 if (bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
1003 /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1004 * We should be able to set ilevel0 = const(0) but that causes
1005 * bad x86 code to be emitted.
1008 lp_build_nearest_mip_level(bld
, unit
, lod_ipart
, &ilevel0
);
1011 first_level
= bld
->dynamic_state
->first_level(bld
->dynamic_state
,
1012 bld
->gallivm
, unit
);
1013 ilevel0
= first_level
;
1016 case PIPE_TEX_MIPFILTER_NEAREST
:
1018 lp_build_nearest_mip_level(bld
, unit
, lod_ipart
, &ilevel0
);
1020 case PIPE_TEX_MIPFILTER_LINEAR
:
1023 lp_build_linear_mip_levels(bld
, unit
,
1024 lod_ipart
, &lod_fpart
,
1025 &ilevel0
, &ilevel1
);
1030 * Get/interpolate texture colors.
1033 packed_lo
= lp_build_alloca(bld
->gallivm
, h16_bld
.vec_type
, "packed_lo");
1034 packed_hi
= lp_build_alloca(bld
->gallivm
, h16_bld
.vec_type
, "packed_hi");
1036 if (min_filter
== mag_filter
) {
1037 /* no need to distinquish between minification and magnification */
1038 lp_build_sample_mipmap(bld
,
1039 min_filter
, mip_filter
,
1041 ilevel0
, ilevel1
, lod_fpart
,
1042 packed_lo
, packed_hi
);
1045 /* Emit conditional to choose min image filter or mag image filter
1046 * depending on the lod being > 0 or <= 0, respectively.
1048 struct lp_build_if_state if_ctx
;
1049 LLVMValueRef minify
;
1051 /* minify = lod >= 0.0 */
1052 minify
= LLVMBuildICmp(builder
, LLVMIntSGE
,
1053 lod_ipart
, int_bld
->zero
, "");
1055 lp_build_if(&if_ctx
, bld
->gallivm
, minify
);
1057 /* Use the minification filter */
1058 lp_build_sample_mipmap(bld
,
1059 min_filter
, mip_filter
,
1061 ilevel0
, ilevel1
, lod_fpart
,
1062 packed_lo
, packed_hi
);
1064 lp_build_else(&if_ctx
);
1066 /* Use the magnification filter */
1067 lp_build_sample_mipmap(bld
,
1068 mag_filter
, PIPE_TEX_MIPFILTER_NONE
,
1070 ilevel0
, NULL
, NULL
,
1071 packed_lo
, packed_hi
);
1073 lp_build_endif(&if_ctx
);
1077 * combine the values stored in 'packed_lo' and 'packed_hi' variables
1080 packed
= lp_build_pack2(bld
->gallivm
,
1081 h16_bld
.type
, lp_type_unorm(8),
1082 LLVMBuildLoad(builder
, packed_lo
, ""),
1083 LLVMBuildLoad(builder
, packed_hi
, ""));
1086 * Convert to SoA and swizzle.
1088 lp_build_rgba8_to_f32_soa(bld
->gallivm
,
1090 packed
, unswizzled
);
1092 if (util_format_is_rgba8_variant(bld
->format_desc
)) {
1093 lp_build_format_swizzle_soa(bld
->format_desc
,
1095 unswizzled
, texel_out
);
1098 texel_out
[0] = unswizzled
[0];
1099 texel_out
[1] = unswizzled
[1];
1100 texel_out
[2] = unswizzled
[2];
1101 texel_out
[3] = unswizzled
[3];