1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- SoA.
32 * @author Jose Fonseca <jfonseca@vmware.com>
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/u_debug.h"
38 #include "util/u_dump.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "util/u_format.h"
42 #include "util/u_cpu_detect.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_swizzle.h"
50 #include "lp_bld_pack.h"
51 #include "lp_bld_format.h"
52 #include "lp_bld_sample.h"
56 * Keep all information for sampling code generation in a single place.
58 struct lp_build_sample_context
60 LLVMBuilderRef builder
;
62 const struct lp_sampler_static_state
*static_state
;
64 struct lp_sampler_dynamic_state
*dynamic_state
;
66 const struct util_format_description
*format_desc
;
68 /** Incoming coordinates type and build context */
69 struct lp_type coord_type
;
70 struct lp_build_context coord_bld
;
72 /** Unsigned integer coordinates */
73 struct lp_type uint_coord_type
;
74 struct lp_build_context uint_coord_bld
;
76 /** Signed integer coordinates */
77 struct lp_type int_coord_type
;
78 struct lp_build_context int_coord_bld
;
80 /** Output texels type and build context */
81 struct lp_type texel_type
;
82 struct lp_build_context texel_bld
;
87 * Does the given texture wrap mode allow sampling the texture border color?
88 * XXX maybe move this into gallium util code.
91 wrap_mode_uses_border_color(unsigned mode
)
94 case PIPE_TEX_WRAP_REPEAT
:
95 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
96 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
97 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
99 case PIPE_TEX_WRAP_CLAMP
:
100 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
101 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
102 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
105 assert(0 && "unexpected wrap mode");
113 * Gen code to fetch a texel from a texture at int coords (x, y).
114 * The result, texel, will be:
115 * texel[0] = red values
116 * texel[1] = green values
117 * texel[2] = blue values
118 * texel[3] = alpha values
121 lp_build_sample_texel_soa(struct lp_build_sample_context
*bld
,
126 LLVMValueRef y_stride
,
127 LLVMValueRef data_ptr
,
130 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
133 LLVMValueRef use_border
= NULL
;
135 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
136 if (wrap_mode_uses_border_color(bld
->static_state
->wrap_s
)) {
138 b1
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_LESS
, x
, int_coord_bld
->zero
);
139 b2
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_GEQUAL
, x
, width
);
140 use_border
= LLVMBuildOr(bld
->builder
, b1
, b2
, "b1_or_b2");
143 if (wrap_mode_uses_border_color(bld
->static_state
->wrap_t
)) {
145 b1
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_LESS
, y
, int_coord_bld
->zero
);
146 b2
= lp_build_cmp(int_coord_bld
, PIPE_FUNC_GEQUAL
, y
, height
);
148 use_border
= LLVMBuildOr(bld
->builder
, use_border
, b1
, "ub_or_b1");
149 use_border
= LLVMBuildOr(bld
->builder
, use_border
, b2
, "ub_or_b2");
152 use_border
= LLVMBuildOr(bld
->builder
, b1
, b2
, "b1_or_b2");
157 * Note: if we find an app which frequently samples the texture border
158 * we might want to implement a true conditional here to avoid sampling
159 * the texture whenever possible (since that's quite a bit of code).
162 * texel = border_color;
165 * texel = sample_texture(coord);
167 * As it is now, we always sample the texture, then selectively replace
168 * the texel color results with the border color.
171 /* convert x,y coords to linear offset from start of texture, in bytes */
172 offset
= lp_build_sample_offset(&bld
->uint_coord_bld
,
177 assert(bld
->format_desc
->block
.width
== 1);
178 assert(bld
->format_desc
->block
.height
== 1);
179 assert(bld
->format_desc
->block
.bits
<= bld
->texel_type
.width
);
181 /* gather the texels from the texture */
182 packed
= lp_build_gather(bld
->builder
,
183 bld
->texel_type
.length
,
184 bld
->format_desc
->block
.bits
,
185 bld
->texel_type
.width
,
188 /* convert texels to float rgba */
189 lp_build_unpack_rgba_soa(bld
->builder
,
195 /* select texel color or border color depending on use_border */
197 for (chan
= 0; chan
< 4; chan
++) {
198 LLVMValueRef border_chan
=
199 lp_build_const_scalar(bld
->texel_type
,
200 bld
->static_state
->border_color
[chan
]);
201 texel
[chan
] = lp_build_select(&bld
->texel_bld
, use_border
,
202 border_chan
, texel
[chan
]);
209 lp_build_sample_packed(struct lp_build_sample_context
*bld
,
212 LLVMValueRef y_stride
,
213 LLVMValueRef data_ptr
)
217 offset
= lp_build_sample_offset(&bld
->uint_coord_bld
,
222 assert(bld
->format_desc
->block
.width
== 1);
223 assert(bld
->format_desc
->block
.height
== 1);
224 assert(bld
->format_desc
->block
.bits
<= bld
->texel_type
.width
);
226 return lp_build_gather(bld
->builder
,
227 bld
->texel_type
.length
,
228 bld
->format_desc
->block
.bits
,
229 bld
->texel_type
.width
,
235 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
238 lp_build_coord_mirror(struct lp_build_sample_context
*bld
,
241 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
242 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
243 LLVMValueRef fract
, flr
, isOdd
;
245 /* fract = coord - floor(coord) */
246 fract
= lp_build_sub(coord_bld
, coord
, lp_build_floor(coord_bld
, coord
));
248 /* flr = ifloor(coord); */
249 flr
= lp_build_ifloor(coord_bld
, coord
);
251 /* isOdd = flr & 1 */
252 isOdd
= LLVMBuildAnd(bld
->builder
, flr
, int_coord_bld
->one
, "");
254 /* make coord positive or negative depending on isOdd */
255 coord
= lp_build_set_sign(coord_bld
, fract
, isOdd
);
257 /* convert isOdd to float */
258 isOdd
= lp_build_int_to_float(coord_bld
, isOdd
);
260 /* add isOdd to coord */
261 coord
= lp_build_add(coord_bld
, coord
, isOdd
);
268 * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
269 * Return whether the given mode is supported by that function.
272 is_simple_wrap_mode(unsigned mode
)
275 case PIPE_TEX_WRAP_REPEAT
:
276 case PIPE_TEX_WRAP_CLAMP
:
277 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
279 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
287 * Build LLVM code for texture wrap mode, for scaled integer texcoords.
288 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
289 * \param length the texture size along one dimension
290 * \param is_pot if TRUE, length is a power of two
291 * \param wrap_mode one of PIPE_TEX_WRAP_x
294 lp_build_sample_wrap_int(struct lp_build_sample_context
*bld
,
300 struct lp_build_context
*uint_coord_bld
= &bld
->uint_coord_bld
;
301 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
302 LLVMValueRef length_minus_one
;
304 length_minus_one
= lp_build_sub(uint_coord_bld
, length
, uint_coord_bld
->one
);
307 case PIPE_TEX_WRAP_REPEAT
:
309 coord
= LLVMBuildAnd(bld
->builder
, coord
, length_minus_one
, "");
311 /* Signed remainder won't give the right results for negative
312 * dividends but unsigned remainder does.*/
313 coord
= LLVMBuildURem(bld
->builder
, coord
, length
, "");
316 case PIPE_TEX_WRAP_CLAMP
:
317 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
318 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
319 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
320 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
323 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
324 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
325 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
326 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
328 _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
329 util_dump_tex_wrap(wrap_mode
, TRUE
));
330 coord
= lp_build_max(uint_coord_bld
, coord
, uint_coord_bld
->zero
);
331 coord
= lp_build_min(uint_coord_bld
, coord
, length_minus_one
);
343 * Build LLVM code for texture wrap mode for linear filtering.
344 * \param x0_out returns first integer texcoord
345 * \param x1_out returns second integer texcoord
346 * \param weight_out returns linear interpolation weight
349 lp_build_sample_wrap_linear(struct lp_build_sample_context
*bld
,
354 LLVMValueRef
*x0_out
,
355 LLVMValueRef
*x1_out
,
356 LLVMValueRef
*weight_out
)
358 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
359 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
360 struct lp_build_context
*uint_coord_bld
= &bld
->uint_coord_bld
;
361 LLVMValueRef two
= lp_build_const_scalar(coord_bld
->type
, 2.0);
362 LLVMValueRef half
= lp_build_const_scalar(coord_bld
->type
, 0.5);
363 LLVMValueRef length_f
= lp_build_int_to_float(coord_bld
, length
);
364 LLVMValueRef length_minus_one
;
365 LLVMValueRef length_f_minus_one
;
366 LLVMValueRef coord0
, coord1
, weight
;
368 /* XXX check for normalized vs. unnormalized coords */
370 length_minus_one
= lp_build_sub(uint_coord_bld
, length
, uint_coord_bld
->one
);
371 length_f_minus_one
= lp_build_sub(coord_bld
, length_f
, coord_bld
->one
);
374 case PIPE_TEX_WRAP_REPEAT
:
375 /* mul by size and subtract 0.5 */
376 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
377 coord
= lp_build_sub(coord_bld
, coord
, half
);
379 coord0
= lp_build_ifloor(coord_bld
, coord
);
380 coord1
= lp_build_add(uint_coord_bld
, coord0
, uint_coord_bld
->one
);
381 /* compute lerp weight */
382 weight
= lp_build_fract(coord_bld
, coord
);
385 coord0
= LLVMBuildAnd(bld
->builder
, coord0
, length_minus_one
, "");
386 coord1
= LLVMBuildAnd(bld
->builder
, coord1
, length_minus_one
, "");
389 /* Signed remainder won't give the right results for negative
390 * dividends but unsigned remainder does.*/
391 coord0
= LLVMBuildURem(bld
->builder
, coord0
, length
, "");
392 coord1
= LLVMBuildURem(bld
->builder
, coord1
, length
, "");
396 case PIPE_TEX_WRAP_CLAMP
:
397 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
398 weight
= lp_build_fract(coord_bld
, coord
);
399 coord0
= lp_build_clamp(coord_bld
, coord
, coord_bld
->zero
,
401 coord1
= lp_build_add(coord_bld
, coord
, coord_bld
->one
);
402 coord1
= lp_build_clamp(coord_bld
, coord1
, coord_bld
->zero
,
404 coord0
= lp_build_ifloor(coord_bld
, coord0
);
405 coord1
= lp_build_ifloor(coord_bld
, coord1
);
408 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
410 coord
= lp_build_clamp(coord_bld
, coord
, coord_bld
->zero
, coord_bld
->one
);
411 /* mul by tex size and subtract 0.5 */
412 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
413 coord
= lp_build_sub(coord_bld
, coord
, half
);
414 /* compute lerp weight */
415 weight
= lp_build_fract(coord_bld
, coord
);
416 /* coord0 = floor(coord); */
417 coord0
= lp_build_ifloor(coord_bld
, coord
);
418 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
419 /* coord0 = max(coord0, 0) */
420 coord0
= lp_build_max(int_coord_bld
, coord0
, int_coord_bld
->zero
);
421 /* coord1 = min(coord1, length-1) */
422 coord1
= lp_build_min(int_coord_bld
, coord1
, length_minus_one
);
425 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
427 LLVMValueRef min
, max
;
428 /* min = -1.0 / (2 * length) */
429 min
= lp_build_rcp(coord_bld
, lp_build_mul(coord_bld
, two
, length_f
));
430 min
= lp_build_negate(coord_bld
, min
);
431 /* max = 1.0 - min */
432 max
= lp_build_sub(coord_bld
, coord_bld
->one
, min
);
433 /* coord = clamp(coord, min, max) */
434 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
435 /* scale coord to length (and sub 0.5?) */
436 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
437 coord
= lp_build_sub(coord_bld
, coord
, half
);
438 /* compute lerp weight */
439 weight
= lp_build_fract(coord_bld
, coord
);
441 coord0
= lp_build_ifloor(coord_bld
, coord
);
442 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
446 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
447 /* compute mirror function */
448 coord
= lp_build_coord_mirror(bld
, coord
);
450 /* scale coord to length */
451 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
452 coord
= lp_build_sub(coord_bld
, coord
, half
);
454 /* compute lerp weight */
455 weight
= lp_build_fract(coord_bld
, coord
);
457 /* convert to int coords */
458 coord0
= lp_build_ifloor(coord_bld
, coord
);
459 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
461 /* coord0 = max(coord0, 0) */
462 coord0
= lp_build_max(int_coord_bld
, coord0
, int_coord_bld
->zero
);
463 /* coord1 = min(coord1, length-1) */
464 coord1
= lp_build_min(int_coord_bld
, coord1
, length_minus_one
);
467 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
469 LLVMValueRef min
, max
;
470 /* min = 1.0 / (2 * length) */
471 min
= lp_build_rcp(coord_bld
, lp_build_mul(coord_bld
, two
, length_f
));
472 /* max = 1.0 - min */
473 max
= lp_build_sub(coord_bld
, coord_bld
->one
, min
);
475 coord
= lp_build_abs(coord_bld
, coord
);
476 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
477 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
478 if(0)coord
= lp_build_sub(coord_bld
, coord
, half
);
479 weight
= lp_build_fract(coord_bld
, coord
);
480 coord0
= lp_build_ifloor(coord_bld
, coord
);
481 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
485 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
487 LLVMValueRef min
, max
;
488 /* min = 1.0 / (2 * length) */
489 min
= lp_build_rcp(coord_bld
, lp_build_mul(coord_bld
, two
, length_f
));
490 /* max = 1.0 - min */
491 max
= lp_build_sub(coord_bld
, coord_bld
->one
, min
);
493 coord
= lp_build_abs(coord_bld
, coord
);
494 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
495 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
496 coord
= lp_build_sub(coord_bld
, coord
, half
);
497 weight
= lp_build_fract(coord_bld
, coord
);
498 coord0
= lp_build_ifloor(coord_bld
, coord
);
499 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
503 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
505 LLVMValueRef min
, max
;
506 /* min = -1.0 / (2 * length) */
507 min
= lp_build_rcp(coord_bld
, lp_build_mul(coord_bld
, two
, length_f
));
508 min
= lp_build_negate(coord_bld
, min
);
509 /* max = 1.0 - min */
510 max
= lp_build_sub(coord_bld
, coord_bld
->one
, min
);
512 coord
= lp_build_abs(coord_bld
, coord
);
513 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
514 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
515 coord
= lp_build_sub(coord_bld
, coord
, half
);
516 weight
= lp_build_fract(coord_bld
, coord
);
517 coord0
= lp_build_ifloor(coord_bld
, coord
);
518 coord1
= lp_build_add(int_coord_bld
, coord0
, int_coord_bld
->one
);
528 *weight_out
= weight
;
533 * Build LLVM code for texture wrap mode for nearest filtering.
534 * \param coord the incoming texcoord (nominally in [0,1])
535 * \param length the texture size along one dimension, as int
536 * \param is_pot if TRUE, length is a power of two
537 * \param wrap_mode one of PIPE_TEX_WRAP_x
540 lp_build_sample_wrap_nearest(struct lp_build_sample_context
*bld
,
546 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
547 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
548 struct lp_build_context
*uint_coord_bld
= &bld
->uint_coord_bld
;
549 LLVMValueRef two
= lp_build_const_scalar(coord_bld
->type
, 2.0);
550 LLVMValueRef length_f
= lp_build_int_to_float(coord_bld
, length
);
551 LLVMValueRef length_minus_one
= lp_build_sub(uint_coord_bld
, length
, uint_coord_bld
->one
);
552 LLVMValueRef length_f_minus_one
= lp_build_sub(coord_bld
, length_f
, coord_bld
->one
);
555 /* XXX check for normalized vs. unnormalized coords */
558 case PIPE_TEX_WRAP_REPEAT
:
559 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
560 icoord
= lp_build_ifloor(coord_bld
, coord
);
562 icoord
= LLVMBuildAnd(bld
->builder
, icoord
, length_minus_one
, "");
564 /* Signed remainder won't give the right results for negative
565 * dividends but unsigned remainder does.*/
566 icoord
= LLVMBuildURem(bld
->builder
, icoord
, length
, "");
569 case PIPE_TEX_WRAP_CLAMP
:
571 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
573 icoord
= lp_build_ifloor(coord_bld
, coord
);
574 /* clamp to [0, size-1]. Note: int coord builder type */
575 icoord
= lp_build_clamp(int_coord_bld
, icoord
, int_coord_bld
->zero
,
579 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
581 LLVMValueRef min
, max
;
582 /* min = 1.0 / (2 * length) */
583 min
= lp_build_rcp(coord_bld
, lp_build_mul(coord_bld
, two
, length_f
));
584 /* max = length - min */
585 max
= lp_build_sub(coord_bld
, length_f
, min
);
586 /* scale coord to length */
587 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
588 /* coord = clamp(coord, min, max) */
589 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
590 icoord
= lp_build_ifloor(coord_bld
, coord
);
594 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
595 /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
597 LLVMValueRef min
, max
;
598 /* min = -1.0 / (2 * length) */
599 min
= lp_build_rcp(coord_bld
, lp_build_mul(coord_bld
, two
, length_f
));
600 min
= lp_build_negate(coord_bld
, min
);
601 /* max = length - min */
602 max
= lp_build_sub(coord_bld
, length_f
, min
);
603 /* scale coord to length */
604 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
605 /* coord = clamp(coord, min, max) */
606 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
607 icoord
= lp_build_ifloor(coord_bld
, coord
);
611 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
613 LLVMValueRef min
, max
;
614 /* min = 1.0 / (2 * length) */
615 min
= lp_build_rcp(coord_bld
, lp_build_mul(coord_bld
, two
, length_f
));
616 /* max = length - min */
617 max
= lp_build_sub(coord_bld
, length_f
, min
);
619 /* compute mirror function */
620 coord
= lp_build_coord_mirror(bld
, coord
);
622 /* scale coord to length */
623 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
625 /* coord = clamp(coord, min, max) */
626 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
627 icoord
= lp_build_ifloor(coord_bld
, coord
);
631 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
632 coord
= lp_build_abs(coord_bld
, coord
);
633 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
634 coord
= lp_build_clamp(coord_bld
, coord
, coord_bld
->zero
, length_f_minus_one
);
635 icoord
= lp_build_ifloor(coord_bld
, coord
);
638 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
640 LLVMValueRef min
, max
;
641 /* min = 1.0 / (2 * length) */
642 min
= lp_build_rcp(coord_bld
, lp_build_mul(coord_bld
, two
, length_f
));
643 /* max = length - min */
644 max
= lp_build_sub(coord_bld
, length_f
, min
);
646 coord
= lp_build_abs(coord_bld
, coord
);
647 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
648 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
649 icoord
= lp_build_ifloor(coord_bld
, coord
);
653 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
655 LLVMValueRef min
, max
;
656 /* min = 1.0 / (2 * length) */
657 min
= lp_build_rcp(coord_bld
, lp_build_mul(coord_bld
, two
, length_f
));
658 min
= lp_build_negate(coord_bld
, min
);
659 /* max = length - min */
660 max
= lp_build_sub(coord_bld
, length_f
, min
);
662 coord
= lp_build_abs(coord_bld
, coord
);
663 coord
= lp_build_mul(coord_bld
, coord
, length_f
);
664 coord
= lp_build_clamp(coord_bld
, coord
, min
, max
);
665 icoord
= lp_build_ifloor(coord_bld
, coord
);
678 * Sample 2D texture with nearest filtering.
681 lp_build_sample_2d_nearest_soa(struct lp_build_sample_context
*bld
,
687 LLVMValueRef data_ptr
,
692 x
= lp_build_sample_wrap_nearest(bld
, s
, width
,
693 bld
->static_state
->pot_width
,
694 bld
->static_state
->wrap_s
);
695 y
= lp_build_sample_wrap_nearest(bld
, t
, height
,
696 bld
->static_state
->pot_height
,
697 bld
->static_state
->wrap_t
);
699 lp_build_name(x
, "tex.x.wrapped");
700 lp_build_name(y
, "tex.y.wrapped");
702 lp_build_sample_texel_soa(bld
, width
, height
, x
, y
, stride
, data_ptr
, texel
);
707 * Sample 2D texture with bilinear filtering.
710 lp_build_sample_2d_linear_soa(struct lp_build_sample_context
*bld
,
716 LLVMValueRef data_ptr
,
719 LLVMValueRef s_fpart
;
720 LLVMValueRef t_fpart
;
723 LLVMValueRef neighbors
[2][2][4];
726 lp_build_sample_wrap_linear(bld
, s
, width
, bld
->static_state
->pot_width
,
727 bld
->static_state
->wrap_s
, &x0
, &x1
, &s_fpart
);
728 lp_build_sample_wrap_linear(bld
, t
, height
, bld
->static_state
->pot_height
,
729 bld
->static_state
->wrap_t
, &y0
, &y1
, &t_fpart
);
731 lp_build_sample_texel_soa(bld
, width
, height
, x0
, y0
, stride
, data_ptr
, neighbors
[0][0]);
732 lp_build_sample_texel_soa(bld
, width
, height
, x1
, y0
, stride
, data_ptr
, neighbors
[0][1]);
733 lp_build_sample_texel_soa(bld
, width
, height
, x0
, y1
, stride
, data_ptr
, neighbors
[1][0]);
734 lp_build_sample_texel_soa(bld
, width
, height
, x1
, y1
, stride
, data_ptr
, neighbors
[1][1]);
736 /* TODO: Don't interpolate missing channels */
737 for(chan
= 0; chan
< 4; ++chan
) {
738 texel
[chan
] = lp_build_lerp_2d(&bld
->texel_bld
,
740 neighbors
[0][0][chan
],
741 neighbors
[0][1][chan
],
742 neighbors
[1][0][chan
],
743 neighbors
[1][1][chan
]);
749 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder
,
750 struct lp_type dst_type
,
754 LLVMValueRef mask
= lp_build_int_const_scalar(dst_type
, 0xff);
757 /* Decode the input vector components */
758 for (chan
= 0; chan
< 4; ++chan
) {
759 unsigned start
= chan
*8;
760 unsigned stop
= start
+ 8;
766 input
= LLVMBuildLShr(builder
, input
, lp_build_int_const_scalar(dst_type
, start
), "");
769 input
= LLVMBuildAnd(builder
, input
, mask
, "");
771 input
= lp_build_unsigned_norm_to_float(builder
, 8, dst_type
, input
);
779 lp_build_sample_2d_linear_aos(struct lp_build_sample_context
*bld
,
785 LLVMValueRef data_ptr
,
788 LLVMBuilderRef builder
= bld
->builder
;
789 struct lp_build_context i32
, h16
, u8n
;
790 LLVMTypeRef i32_vec_type
, h16_vec_type
, u8n_vec_type
;
791 LLVMValueRef i32_c8
, i32_c128
, i32_c255
;
792 LLVMValueRef s_ipart
, s_fpart
, s_fpart_lo
, s_fpart_hi
;
793 LLVMValueRef t_ipart
, t_fpart
, t_fpart_lo
, t_fpart_hi
;
796 LLVMValueRef neighbors
[2][2];
797 LLVMValueRef neighbors_lo
[2][2];
798 LLVMValueRef neighbors_hi
[2][2];
799 LLVMValueRef packed
, packed_lo
, packed_hi
;
800 LLVMValueRef unswizzled
[4];
802 lp_build_context_init(&i32
, builder
, lp_type_int(32));
803 lp_build_context_init(&h16
, builder
, lp_type_ufixed(16));
804 lp_build_context_init(&u8n
, builder
, lp_type_unorm(8));
806 i32_vec_type
= lp_build_vec_type(i32
.type
);
807 h16_vec_type
= lp_build_vec_type(h16
.type
);
808 u8n_vec_type
= lp_build_vec_type(u8n
.type
);
810 if (bld
->static_state
->normalized_coords
) {
811 LLVMTypeRef coord_vec_type
= lp_build_vec_type(bld
->coord_type
);
812 LLVMValueRef fp_width
= LLVMBuildSIToFP(bld
->builder
, width
, coord_vec_type
, "");
813 LLVMValueRef fp_height
= LLVMBuildSIToFP(bld
->builder
, height
, coord_vec_type
, "");
814 s
= lp_build_mul(&bld
->coord_bld
, s
, fp_width
);
815 t
= lp_build_mul(&bld
->coord_bld
, t
, fp_height
);
818 /* scale coords by 256 (8 fractional bits) */
819 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
820 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
822 /* convert float to int */
823 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
824 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
826 /* subtract 0.5 (add -128) */
827 i32_c128
= lp_build_int_const_scalar(i32
.type
, -128);
828 s
= LLVMBuildAdd(builder
, s
, i32_c128
, "");
829 t
= LLVMBuildAdd(builder
, t
, i32_c128
, "");
831 /* compute floor (shift right 8) */
832 i32_c8
= lp_build_int_const_scalar(i32
.type
, 8);
833 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
834 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
836 /* compute fractional part (AND with 0xff) */
837 i32_c255
= lp_build_int_const_scalar(i32
.type
, 255);
838 s_fpart
= LLVMBuildAnd(builder
, s
, i32_c255
, "");
839 t_fpart
= LLVMBuildAnd(builder
, t
, i32_c255
, "");
844 x1
= lp_build_add(&bld
->int_coord_bld
, x0
, bld
->int_coord_bld
.one
);
845 y1
= lp_build_add(&bld
->int_coord_bld
, y0
, bld
->int_coord_bld
.one
);
847 x0
= lp_build_sample_wrap_int(bld
, x0
, width
, bld
->static_state
->pot_width
,
848 bld
->static_state
->wrap_s
);
849 y0
= lp_build_sample_wrap_int(bld
, y0
, height
, bld
->static_state
->pot_height
,
850 bld
->static_state
->wrap_t
);
852 x1
= lp_build_sample_wrap_int(bld
, x1
, width
, bld
->static_state
->pot_width
,
853 bld
->static_state
->wrap_s
);
854 y1
= lp_build_sample_wrap_int(bld
, y1
, height
, bld
->static_state
->pot_height
,
855 bld
->static_state
->wrap_t
);
858 * Transform 4 x i32 in
860 * s_fpart = {s0, s1, s2, s3}
864 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
868 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
869 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
871 * and likewise for t_fpart. There is no risk of loosing precision here
872 * since the fractional parts only use the lower 8bits.
875 s_fpart
= LLVMBuildBitCast(builder
, s_fpart
, h16_vec_type
, "");
876 t_fpart
= LLVMBuildBitCast(builder
, t_fpart
, h16_vec_type
, "");
879 LLVMTypeRef elem_type
= LLVMInt32Type();
880 LLVMValueRef shuffles_lo
[LP_MAX_VECTOR_LENGTH
];
881 LLVMValueRef shuffles_hi
[LP_MAX_VECTOR_LENGTH
];
882 LLVMValueRef shuffle_lo
;
883 LLVMValueRef shuffle_hi
;
886 for(j
= 0; j
< h16
.type
.length
; j
+= 4) {
887 unsigned subindex
= util_cpu_caps
.little_endian
? 0 : 1;
890 index
= LLVMConstInt(elem_type
, j
/2 + subindex
, 0);
891 for(i
= 0; i
< 4; ++i
)
892 shuffles_lo
[j
+ i
] = index
;
894 index
= LLVMConstInt(elem_type
, h16
.type
.length
/2 + j
/2 + subindex
, 0);
895 for(i
= 0; i
< 4; ++i
)
896 shuffles_hi
[j
+ i
] = index
;
899 shuffle_lo
= LLVMConstVector(shuffles_lo
, h16
.type
.length
);
900 shuffle_hi
= LLVMConstVector(shuffles_hi
, h16
.type
.length
);
902 s_fpart_lo
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
, shuffle_lo
, "");
903 t_fpart_lo
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
, shuffle_lo
, "");
904 s_fpart_hi
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
, shuffle_hi
, "");
905 t_fpart_hi
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
, shuffle_hi
, "");
909 * Fetch the pixels as 4 x 32bit (rgba order might differ):
911 * rgba0 rgba1 rgba2 rgba3
913 * bit cast them into 16 x u8
915 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
917 * unpack them into two 8 x i16:
919 * r0 g0 b0 a0 r1 g1 b1 a1
920 * r2 g2 b2 a2 r3 g3 b3 a3
922 * The higher 8 bits of the resulting elements will be zero.
925 neighbors
[0][0] = lp_build_sample_packed(bld
, x0
, y0
, stride
, data_ptr
);
926 neighbors
[0][1] = lp_build_sample_packed(bld
, x1
, y0
, stride
, data_ptr
);
927 neighbors
[1][0] = lp_build_sample_packed(bld
, x0
, y1
, stride
, data_ptr
);
928 neighbors
[1][1] = lp_build_sample_packed(bld
, x1
, y1
, stride
, data_ptr
);
930 neighbors
[0][0] = LLVMBuildBitCast(builder
, neighbors
[0][0], u8n_vec_type
, "");
931 neighbors
[0][1] = LLVMBuildBitCast(builder
, neighbors
[0][1], u8n_vec_type
, "");
932 neighbors
[1][0] = LLVMBuildBitCast(builder
, neighbors
[1][0], u8n_vec_type
, "");
933 neighbors
[1][1] = LLVMBuildBitCast(builder
, neighbors
[1][1], u8n_vec_type
, "");
935 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[0][0], &neighbors_lo
[0][0], &neighbors_hi
[0][0]);
936 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[0][1], &neighbors_lo
[0][1], &neighbors_hi
[0][1]);
937 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[1][0], &neighbors_lo
[1][0], &neighbors_hi
[1][0]);
938 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[1][1], &neighbors_lo
[1][1], &neighbors_hi
[1][1]);
941 * Linear interpolate with 8.8 fixed point.
944 packed_lo
= lp_build_lerp_2d(&h16
,
945 s_fpart_lo
, t_fpart_lo
,
951 packed_hi
= lp_build_lerp_2d(&h16
,
952 s_fpart_hi
, t_fpart_hi
,
958 packed
= lp_build_pack2(builder
, h16
.type
, u8n
.type
, packed_lo
, packed_hi
);
961 * Convert to SoA and swizzle.
964 packed
= LLVMBuildBitCast(builder
, packed
, i32_vec_type
, "");
966 lp_build_rgba8_to_f32_soa(bld
->builder
,
970 lp_build_format_swizzle_soa(bld
->format_desc
,
971 bld
->texel_type
, unswizzled
,
977 lp_build_sample_compare(struct lp_build_sample_context
*bld
,
981 struct lp_build_context
*texel_bld
= &bld
->texel_bld
;
985 if(bld
->static_state
->compare_mode
== PIPE_TEX_COMPARE_NONE
)
988 /* TODO: Compare before swizzling, to avoid redundant computations */
990 for(chan
= 0; chan
< 4; ++chan
) {
992 cmp
= lp_build_cmp(texel_bld
, bld
->static_state
->compare_func
, p
, texel
[chan
]);
993 cmp
= lp_build_select(texel_bld
, cmp
, texel_bld
->one
, texel_bld
->zero
);
996 res
= lp_build_add(texel_bld
, res
, cmp
);
1002 res
= lp_build_mul(texel_bld
, res
, lp_build_const_scalar(texel_bld
->type
, 0.25));
1004 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1005 for(chan
= 0; chan
< 3; ++chan
)
1007 texel
[3] = texel_bld
->one
;
1012 * Build texture sampling code.
1013 * 'texel' will return a vector of four LLVMValueRefs corresponding to
1017 lp_build_sample_soa(LLVMBuilderRef builder
,
1018 const struct lp_sampler_static_state
*static_state
,
1019 struct lp_sampler_dynamic_state
*dynamic_state
,
1020 struct lp_type type
,
1022 unsigned num_coords
,
1023 const LLVMValueRef
*coords
,
1024 LLVMValueRef lodbias
,
1025 LLVMValueRef
*texel
)
1027 struct lp_build_sample_context bld
;
1029 LLVMValueRef height
;
1030 LLVMValueRef stride
;
1031 LLVMValueRef data_ptr
;
1036 /* Setup our build context */
1037 memset(&bld
, 0, sizeof bld
);
1038 bld
.builder
= builder
;
1039 bld
.static_state
= static_state
;
1040 bld
.dynamic_state
= dynamic_state
;
1041 bld
.format_desc
= util_format_description(static_state
->format
);
1042 bld
.coord_type
= type
;
1043 bld
.uint_coord_type
= lp_uint_type(type
);
1044 bld
.int_coord_type
= lp_int_type(type
);
1045 bld
.texel_type
= type
;
1046 lp_build_context_init(&bld
.coord_bld
, builder
, bld
.coord_type
);
1047 lp_build_context_init(&bld
.uint_coord_bld
, builder
, bld
.uint_coord_type
);
1048 lp_build_context_init(&bld
.int_coord_bld
, builder
, bld
.int_coord_type
);
1049 lp_build_context_init(&bld
.texel_bld
, builder
, bld
.texel_type
);
1051 /* Get the dynamic state */
1052 width
= dynamic_state
->width(dynamic_state
, builder
, unit
);
1053 height
= dynamic_state
->height(dynamic_state
, builder
, unit
);
1054 stride
= dynamic_state
->stride(dynamic_state
, builder
, unit
);
1055 data_ptr
= dynamic_state
->data_ptr(dynamic_state
, builder
, unit
);
1061 width
= lp_build_broadcast_scalar(&bld
.uint_coord_bld
, width
);
1062 height
= lp_build_broadcast_scalar(&bld
.uint_coord_bld
, height
);
1063 stride
= lp_build_broadcast_scalar(&bld
.uint_coord_bld
, stride
);
1065 if(static_state
->target
== PIPE_TEXTURE_1D
)
1066 t
= bld
.coord_bld
.zero
;
1068 switch (static_state
->min_img_filter
) {
1069 case PIPE_TEX_FILTER_NEAREST
:
1070 lp_build_sample_2d_nearest_soa(&bld
, s
, t
, width
, height
,
1071 stride
, data_ptr
, texel
);
1073 case PIPE_TEX_FILTER_LINEAR
:
1074 if(lp_format_is_rgba8(bld
.format_desc
) &&
1075 is_simple_wrap_mode(static_state
->wrap_s
) &&
1076 is_simple_wrap_mode(static_state
->wrap_t
))
1077 lp_build_sample_2d_linear_aos(&bld
, s
, t
, width
, height
,
1078 stride
, data_ptr
, texel
);
1080 lp_build_sample_2d_linear_soa(&bld
, s
, t
, width
, height
,
1081 stride
, data_ptr
, texel
);
1087 /* FIXME: respect static_state->min_mip_filter */;
1088 /* FIXME: respect static_state->mag_img_filter */;
1090 lp_build_sample_compare(&bld
, p
, texel
);