1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- common code.
32 * @author Jose Fonseca <jfonseca@vmware.com>
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/u_format.h"
38 #include "util/u_math.h"
39 #include "lp_bld_arit.h"
40 #include "lp_bld_const.h"
41 #include "lp_bld_debug.h"
42 #include "lp_bld_printf.h"
43 #include "lp_bld_flow.h"
44 #include "lp_bld_sample.h"
45 #include "lp_bld_swizzle.h"
46 #include "lp_bld_type.h"
50 * Bri-linear factor. Use zero or any other number less than one to force
51 * tri-linear filtering.
53 #define BRILINEAR_FACTOR 2
57 * Does the given texture wrap mode allow sampling the texture border color?
58 * XXX maybe move this into gallium util code.
61 lp_sampler_wrap_mode_uses_border_color(unsigned mode
,
62 unsigned min_img_filter
,
63 unsigned mag_img_filter
)
66 case PIPE_TEX_WRAP_REPEAT
:
67 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
68 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
69 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
71 case PIPE_TEX_WRAP_CLAMP
:
72 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
73 if (min_img_filter
== PIPE_TEX_FILTER_NEAREST
&&
74 mag_img_filter
== PIPE_TEX_FILTER_NEAREST
) {
79 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
80 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
83 assert(0 && "unexpected wrap mode");
90 * Initialize lp_sampler_static_state object with the gallium sampler
92 * The former is considered to be static and the later dynamic.
95 lp_sampler_static_state(struct lp_sampler_static_state
*state
,
96 const struct pipe_sampler_view
*view
,
97 const struct pipe_sampler_state
*sampler
)
99 const struct pipe_resource
*texture
= view
->texture
;
101 memset(state
, 0, sizeof *state
);
110 * We don't copy sampler state over unless it is actually enabled, to avoid
111 * spurious recompiles, as the sampler static state is part of the shader
114 * Ideally the state tracker or cso_cache module would make all state
115 * canonical, but until that happens it's better to be safe than sorry here.
117 * XXX: Actually there's much more than can be done here, especially
118 * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
121 state
->format
= view
->format
;
122 state
->swizzle_r
= view
->swizzle_r
;
123 state
->swizzle_g
= view
->swizzle_g
;
124 state
->swizzle_b
= view
->swizzle_b
;
125 state
->swizzle_a
= view
->swizzle_a
;
127 state
->target
= texture
->target
;
128 state
->pot_width
= util_is_power_of_two(texture
->width0
);
129 state
->pot_height
= util_is_power_of_two(texture
->height0
);
130 state
->pot_depth
= util_is_power_of_two(texture
->depth0
);
132 state
->wrap_s
= sampler
->wrap_s
;
133 state
->wrap_t
= sampler
->wrap_t
;
134 state
->wrap_r
= sampler
->wrap_r
;
135 state
->min_img_filter
= sampler
->min_img_filter
;
136 state
->mag_img_filter
= sampler
->mag_img_filter
;
138 if (view
->last_level
&& sampler
->max_lod
> 0.0f
) {
139 state
->min_mip_filter
= sampler
->min_mip_filter
;
141 state
->min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
144 if (state
->min_mip_filter
!= PIPE_TEX_MIPFILTER_NONE
) {
145 if (sampler
->lod_bias
!= 0.0f
) {
146 state
->lod_bias_non_zero
= 1;
149 /* If min_lod == max_lod we can greatly simplify mipmap selection.
150 * This is a case that occurs during automatic mipmap generation.
152 if (sampler
->min_lod
== sampler
->max_lod
) {
153 state
->min_max_lod_equal
= 1;
155 if (sampler
->min_lod
> 0.0f
) {
156 state
->apply_min_lod
= 1;
159 if (sampler
->max_lod
< (float)view
->last_level
) {
160 state
->apply_max_lod
= 1;
165 state
->compare_mode
= sampler
->compare_mode
;
166 if (sampler
->compare_mode
!= PIPE_TEX_COMPARE_NONE
) {
167 state
->compare_func
= sampler
->compare_func
;
170 state
->normalized_coords
= sampler
->normalized_coords
;
173 * FIXME: Handle the remainder of pipe_sampler_view.
179 * Generate code to compute coordinate gradient (rho).
180 * \param ddx partial derivatives of (s, t, r, q) with respect to X
181 * \param ddy partial derivatives of (s, t, r, q) with respect to Y
183 * XXX: The resulting rho is scalar, so we ignore all but the first element of
184 * derivatives that are passed by the shader.
187 lp_build_rho(struct lp_build_sample_context
*bld
,
188 const LLVMValueRef ddx
[4],
189 const LLVMValueRef ddy
[4])
191 struct lp_build_context
*float_size_bld
= &bld
->float_size_bld
;
192 struct lp_build_context
*float_bld
= &bld
->float_bld
;
193 const unsigned dims
= bld
->dims
;
194 LLVMTypeRef i32t
= LLVMInt32Type();
195 LLVMValueRef index0
= LLVMConstInt(i32t
, 0, 0);
196 LLVMValueRef index1
= LLVMConstInt(i32t
, 1, 0);
197 LLVMValueRef index2
= LLVMConstInt(i32t
, 2, 0);
198 LLVMValueRef dsdx
, dsdy
, dtdx
, dtdy
, drdx
, drdy
;
199 LLVMValueRef rho_x
, rho_y
;
200 LLVMValueRef rho_vec
;
201 LLVMValueRef float_size
;
204 dsdx
= LLVMBuildExtractElement(bld
->builder
, ddx
[0], index0
, "dsdx");
205 dsdy
= LLVMBuildExtractElement(bld
->builder
, ddy
[0], index0
, "dsdy");
212 rho_x
= float_size_bld
->undef
;
213 rho_y
= float_size_bld
->undef
;
215 rho_x
= LLVMBuildInsertElement(bld
->builder
, rho_x
, dsdx
, index0
, "");
216 rho_y
= LLVMBuildInsertElement(bld
->builder
, rho_y
, dsdy
, index0
, "");
218 dtdx
= LLVMBuildExtractElement(bld
->builder
, ddx
[1], index0
, "dtdx");
219 dtdy
= LLVMBuildExtractElement(bld
->builder
, ddy
[1], index0
, "dtdy");
221 rho_x
= LLVMBuildInsertElement(bld
->builder
, rho_x
, dtdx
, index1
, "");
222 rho_y
= LLVMBuildInsertElement(bld
->builder
, rho_y
, dtdy
, index1
, "");
225 drdx
= LLVMBuildExtractElement(bld
->builder
, ddx
[2], index0
, "drdx");
226 drdy
= LLVMBuildExtractElement(bld
->builder
, ddy
[2], index0
, "drdy");
228 rho_x
= LLVMBuildInsertElement(bld
->builder
, rho_x
, drdx
, index2
, "");
229 rho_y
= LLVMBuildInsertElement(bld
->builder
, rho_y
, drdy
, index2
, "");
233 rho_x
= lp_build_abs(float_size_bld
, rho_x
);
234 rho_y
= lp_build_abs(float_size_bld
, rho_y
);
236 rho_vec
= lp_build_max(float_size_bld
, rho_x
, rho_y
);
238 float_size
= lp_build_int_to_float(float_size_bld
, bld
->int_size
);
240 rho_vec
= lp_build_mul(float_size_bld
, rho_vec
, float_size
);
247 LLVMValueRef rho_s
, rho_t
, rho_r
;
249 rho_s
= LLVMBuildExtractElement(bld
->builder
, rho_vec
, index0
, "");
250 rho_t
= LLVMBuildExtractElement(bld
->builder
, rho_vec
, index1
, "");
252 rho
= lp_build_max(float_bld
, rho_s
, rho_t
);
255 rho_r
= LLVMBuildExtractElement(bld
->builder
, rho_vec
, index0
, "");
256 rho
= lp_build_max(float_bld
, rho
, rho_r
);
266 * Bri-linear lod computation
268 * Use a piece-wise linear approximation of log2 such that:
269 * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
270 * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
271 * with the steepness specified in 'factor'
272 * - exact result for 0.5, 1.5, etc.
288 * This is a technique also commonly used in hardware:
289 * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
291 * TODO: For correctness, this should only be applied when texture is known to
292 * have regular mipmaps, i.e., mipmaps derived from the base level.
294 * TODO: This could be done in fixed point, where applicable.
297 lp_build_brilinear_lod(struct lp_build_sample_context
*bld
,
300 LLVMValueRef
*out_lod_ipart
,
301 LLVMValueRef
*out_lod_fpart
)
303 struct lp_build_context
*float_bld
= &bld
->float_bld
;
304 LLVMValueRef lod_fpart
;
305 float pre_offset
= (factor
- 0.5)/factor
- 0.5;
306 float post_offset
= 1 - factor
;
309 lp_build_printf(bld
->builder
, "lod = %f\n", lod
);
312 lod
= lp_build_add(float_bld
, lod
,
313 lp_build_const_vec(float_bld
->type
, pre_offset
));
315 lp_build_ifloor_fract(float_bld
, lod
, out_lod_ipart
, &lod_fpart
);
317 lod_fpart
= lp_build_mul(float_bld
, lod_fpart
,
318 lp_build_const_vec(float_bld
->type
, factor
));
320 lod_fpart
= lp_build_add(float_bld
, lod_fpart
,
321 lp_build_const_vec(float_bld
->type
, post_offset
));
324 * It's not necessary to clamp lod_fpart since:
325 * - the above expression will never produce numbers greater than one.
326 * - the mip filtering branch is only taken if lod_fpart is positive
329 *out_lod_fpart
= lod_fpart
;
332 lp_build_printf(bld
->builder
, "lod_ipart = %i\n", *out_lod_ipart
);
333 lp_build_printf(bld
->builder
, "lod_fpart = %f\n\n", *out_lod_fpart
);
339 * Generate code to compute texture level of detail (lambda).
340 * \param ddx partial derivatives of (s, t, r, q) with respect to X
341 * \param ddy partial derivatives of (s, t, r, q) with respect to Y
342 * \param lod_bias optional float vector with the shader lod bias
343 * \param explicit_lod optional float vector with the explicit lod
344 * \param width scalar int texture width
345 * \param height scalar int texture height
346 * \param depth scalar int texture depth
348 * XXX: The resulting lod is scalar, so ignore all but the first element of
349 * derivatives, lod_bias, etc that are passed by the shader.
352 lp_build_lod_selector(struct lp_build_sample_context
*bld
,
354 const LLVMValueRef ddx
[4],
355 const LLVMValueRef ddy
[4],
356 LLVMValueRef lod_bias
, /* optional */
357 LLVMValueRef explicit_lod
, /* optional */
359 LLVMValueRef
*out_lod_ipart
,
360 LLVMValueRef
*out_lod_fpart
)
363 struct lp_build_context
*float_bld
= &bld
->float_bld
;
366 *out_lod_ipart
= bld
->int_bld
.zero
;
367 *out_lod_fpart
= bld
->float_bld
.zero
;
369 if (bld
->static_state
->min_max_lod_equal
) {
370 /* User is forcing sampling from a particular mipmap level.
371 * This is hit during mipmap generation.
373 LLVMValueRef min_lod
=
374 bld
->dynamic_state
->min_lod(bld
->dynamic_state
, bld
->builder
, unit
);
379 LLVMValueRef sampler_lod_bias
=
380 bld
->dynamic_state
->lod_bias(bld
->dynamic_state
, bld
->builder
, unit
);
381 LLVMValueRef index0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
384 lod
= LLVMBuildExtractElement(bld
->builder
, explicit_lod
,
390 rho
= lp_build_rho(bld
, ddx
, ddy
);
392 /* compute lod = log2(rho) */
393 if ((mip_filter
== PIPE_TEX_MIPFILTER_NONE
||
394 mip_filter
== PIPE_TEX_MIPFILTER_NEAREST
) &&
396 !bld
->static_state
->lod_bias_non_zero
&&
397 !bld
->static_state
->apply_max_lod
&&
398 !bld
->static_state
->apply_min_lod
) {
399 *out_lod_ipart
= lp_build_ilog2(float_bld
, rho
);
400 *out_lod_fpart
= bld
->float_bld
.zero
;
405 lod
= lp_build_log2(float_bld
, rho
);
408 lod
= lp_build_fast_log2(float_bld
, rho
);
411 /* add shader lod bias */
413 lod_bias
= LLVMBuildExtractElement(bld
->builder
, lod_bias
,
415 lod
= LLVMBuildFAdd(bld
->builder
, lod
, lod_bias
, "shader_lod_bias");
419 /* add sampler lod bias */
420 if (bld
->static_state
->lod_bias_non_zero
)
421 lod
= LLVMBuildFAdd(bld
->builder
, lod
, sampler_lod_bias
, "sampler_lod_bias");
425 if (bld
->static_state
->apply_max_lod
) {
426 LLVMValueRef max_lod
=
427 bld
->dynamic_state
->max_lod(bld
->dynamic_state
, bld
->builder
, unit
);
429 lod
= lp_build_min(float_bld
, lod
, max_lod
);
431 if (bld
->static_state
->apply_min_lod
) {
432 LLVMValueRef min_lod
=
433 bld
->dynamic_state
->min_lod(bld
->dynamic_state
, bld
->builder
, unit
);
435 lod
= lp_build_max(float_bld
, lod
, min_lod
);
439 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
440 if (BRILINEAR_FACTOR
> 1.0) {
441 lp_build_brilinear_lod(bld
, lod
, BRILINEAR_FACTOR
,
442 out_lod_ipart
, out_lod_fpart
);
445 lp_build_ifloor_fract(float_bld
, lod
, out_lod_ipart
, out_lod_fpart
);
448 lp_build_name(*out_lod_ipart
, "lod_ipart");
449 lp_build_name(*out_lod_fpart
, "lod_fpart");
452 *out_lod_ipart
= lp_build_iround(float_bld
, lod
);
460 * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
461 * mipmap level index.
462 * Note: this is all scalar code.
463 * \param lod scalar float texture level of detail
464 * \param level_out returns integer
467 lp_build_nearest_mip_level(struct lp_build_sample_context
*bld
,
469 LLVMValueRef lod_ipart
,
470 LLVMValueRef
*level_out
)
472 struct lp_build_context
*int_bld
= &bld
->int_bld
;
473 LLVMValueRef last_level
, level
;
475 LLVMValueRef zero
= LLVMConstInt(LLVMInt32Type(), 0, 0);
477 last_level
= bld
->dynamic_state
->last_level(bld
->dynamic_state
,
480 /* convert float lod to integer */
483 /* clamp level to legal range of levels */
484 *level_out
= lp_build_clamp(int_bld
, level
, zero
, last_level
);
489 * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
490 * two (adjacent) mipmap level indexes. Later, we'll sample from those
491 * two mipmap levels and interpolate between them.
494 lp_build_linear_mip_levels(struct lp_build_sample_context
*bld
,
496 LLVMValueRef lod_ipart
,
497 LLVMValueRef
*lod_fpart_inout
,
498 LLVMValueRef
*level0_out
,
499 LLVMValueRef
*level1_out
)
501 LLVMBuilderRef builder
= bld
->builder
;
502 struct lp_build_context
*int_bld
= &bld
->int_bld
;
503 struct lp_build_context
*float_bld
= &bld
->float_bld
;
504 LLVMValueRef last_level
;
505 LLVMValueRef clamp_min
;
506 LLVMValueRef clamp_max
;
508 *level0_out
= lod_ipart
;
509 *level1_out
= lp_build_add(int_bld
, lod_ipart
, int_bld
->one
);
511 last_level
= bld
->dynamic_state
->last_level(bld
->dynamic_state
,
515 * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the
516 * minimum number of comparisons, and zeroing lod_fpart in the extreme
517 * ends in the process.
521 clamp_min
= LLVMBuildICmp(builder
, LLVMIntSLT
,
522 lod_ipart
, int_bld
->zero
,
523 "clamp_lod_to_zero");
525 *level0_out
= LLVMBuildSelect(builder
, clamp_min
,
526 int_bld
->zero
, *level0_out
, "");
528 *level1_out
= LLVMBuildSelect(builder
, clamp_min
,
529 int_bld
->zero
, *level1_out
, "");
531 *lod_fpart_inout
= LLVMBuildSelect(builder
, clamp_min
,
532 float_bld
->zero
, *lod_fpart_inout
, "");
534 /* lod_ipart >= last_level */
535 clamp_max
= LLVMBuildICmp(builder
, LLVMIntSGE
,
536 lod_ipart
, last_level
,
537 "clamp_lod_to_last");
539 *level0_out
= LLVMBuildSelect(builder
, clamp_max
,
540 last_level
, *level0_out
, "");
542 *level1_out
= LLVMBuildSelect(builder
, clamp_max
,
543 last_level
, *level1_out
, "");
545 *lod_fpart_inout
= LLVMBuildSelect(builder
, clamp_max
,
546 float_bld
->zero
, *lod_fpart_inout
, "");
548 lp_build_name(*level0_out
, "sampler%u_miplevel0", unit
);
549 lp_build_name(*level1_out
, "sampler%u_miplevel1", unit
);
550 lp_build_name(*lod_fpart_inout
, "sampler%u_mipweight", unit
);
555 * Return pointer to a single mipmap level.
556 * \param data_array array of pointers to mipmap levels
557 * \param level integer mipmap level
560 lp_build_get_mipmap_level(struct lp_build_sample_context
*bld
,
563 LLVMValueRef indexes
[2], data_ptr
;
564 indexes
[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
566 data_ptr
= LLVMBuildGEP(bld
->builder
, bld
->data_array
, indexes
, 2, "");
567 data_ptr
= LLVMBuildLoad(bld
->builder
, data_ptr
, "");
573 lp_build_get_const_mipmap_level(struct lp_build_sample_context
*bld
,
576 LLVMValueRef lvl
= LLVMConstInt(LLVMInt32Type(), level
, 0);
577 return lp_build_get_mipmap_level(bld
, lvl
);
582 * Codegen equivalent for u_minify().
583 * Return max(1, base_size >> level);
586 lp_build_minify(struct lp_build_context
*bld
,
587 LLVMValueRef base_size
,
590 assert(lp_check_value(bld
->type
, base_size
));
591 assert(lp_check_value(bld
->type
, level
));
593 if (level
== bld
->zero
) {
594 /* if we're using mipmap level zero, no minification is needed */
599 LLVMBuildLShr(bld
->builder
, base_size
, level
, "minify");
600 assert(bld
->type
.sign
);
601 size
= lp_build_max(bld
, size
, bld
->one
);
608 * Dereference stride_array[mipmap_level] array to get a stride.
609 * Return stride as a vector.
612 lp_build_get_level_stride_vec(struct lp_build_sample_context
*bld
,
613 LLVMValueRef stride_array
, LLVMValueRef level
)
615 LLVMValueRef indexes
[2], stride
;
616 indexes
[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
618 stride
= LLVMBuildGEP(bld
->builder
, stride_array
, indexes
, 2, "");
619 stride
= LLVMBuildLoad(bld
->builder
, stride
, "");
620 stride
= lp_build_broadcast_scalar(&bld
->int_coord_bld
, stride
);
626 * When sampling a mipmap, we need to compute the width, height, depth
627 * of the source levels from the level indexes. This helper function
631 lp_build_mipmap_level_sizes(struct lp_build_sample_context
*bld
,
633 LLVMValueRef
*out_width_vec
,
634 LLVMValueRef
*out_height_vec
,
635 LLVMValueRef
*out_depth_vec
,
636 LLVMValueRef
*row_stride_vec
,
637 LLVMValueRef
*img_stride_vec
)
639 const unsigned dims
= bld
->dims
;
640 LLVMValueRef ilevel_vec
;
641 LLVMValueRef size_vec
;
642 LLVMValueRef width
, height
, depth
;
643 LLVMTypeRef i32t
= LLVMInt32Type();
645 ilevel_vec
= lp_build_broadcast_scalar(&bld
->int_size_bld
, ilevel
);
648 * Compute width, height, depth at mipmap level 'ilevel'
650 size_vec
= lp_build_minify(&bld
->int_size_bld
, bld
->int_size
, ilevel_vec
);
656 width
= LLVMBuildExtractElement(bld
->builder
, size_vec
,
657 LLVMConstInt(i32t
, 0, 0), "");
659 *out_width_vec
= lp_build_broadcast_scalar(&bld
->int_coord_bld
, width
);
661 height
= LLVMBuildExtractElement(bld
->builder
, size_vec
,
662 LLVMConstInt(i32t
, 1, 0), "");
663 *out_height_vec
= lp_build_broadcast_scalar(&bld
->int_coord_bld
, height
);
664 *row_stride_vec
= lp_build_get_level_stride_vec(bld
,
665 bld
->row_stride_array
,
667 if (dims
== 3 || bld
->static_state
->target
== PIPE_TEXTURE_CUBE
) {
668 *img_stride_vec
= lp_build_get_level_stride_vec(bld
,
669 bld
->img_stride_array
,
672 depth
= LLVMBuildExtractElement(bld
->builder
, size_vec
,
673 LLVMConstInt(i32t
, 2, 0), "");
674 *out_depth_vec
= lp_build_broadcast_scalar(&bld
->int_coord_bld
, depth
);
682 /** Helper used by lp_build_cube_lookup() */
684 lp_build_cube_ima(struct lp_build_context
*coord_bld
, LLVMValueRef coord
)
686 /* ima = -0.5 / abs(coord); */
687 LLVMValueRef negHalf
= lp_build_const_vec(coord_bld
->type
, -0.5);
688 LLVMValueRef absCoord
= lp_build_abs(coord_bld
, coord
);
689 LLVMValueRef ima
= lp_build_div(coord_bld
, negHalf
, absCoord
);
695 * Helper used by lp_build_cube_lookup()
696 * \param sign scalar +1 or -1
697 * \param coord float vector
698 * \param ima float vector
701 lp_build_cube_coord(struct lp_build_context
*coord_bld
,
702 LLVMValueRef sign
, int negate_coord
,
703 LLVMValueRef coord
, LLVMValueRef ima
)
705 /* return negate(coord) * ima * sign + 0.5; */
706 LLVMValueRef half
= lp_build_const_vec(coord_bld
->type
, 0.5);
709 assert(negate_coord
== +1 || negate_coord
== -1);
711 if (negate_coord
== -1) {
712 coord
= lp_build_negate(coord_bld
, coord
);
715 res
= lp_build_mul(coord_bld
, coord
, ima
);
717 sign
= lp_build_broadcast_scalar(coord_bld
, sign
);
718 res
= lp_build_mul(coord_bld
, res
, sign
);
720 res
= lp_build_add(coord_bld
, res
, half
);
726 /** Helper used by lp_build_cube_lookup()
727 * Return (major_coord >= 0) ? pos_face : neg_face;
730 lp_build_cube_face(struct lp_build_sample_context
*bld
,
731 LLVMValueRef major_coord
,
732 unsigned pos_face
, unsigned neg_face
)
734 LLVMValueRef cmp
= LLVMBuildFCmp(bld
->builder
, LLVMRealUGE
,
736 bld
->float_bld
.zero
, "");
737 LLVMValueRef pos
= LLVMConstInt(LLVMInt32Type(), pos_face
, 0);
738 LLVMValueRef neg
= LLVMConstInt(LLVMInt32Type(), neg_face
, 0);
739 LLVMValueRef res
= LLVMBuildSelect(bld
->builder
, cmp
, pos
, neg
, "");
746 * Generate code to do cube face selection and compute per-face texcoords.
749 lp_build_cube_lookup(struct lp_build_sample_context
*bld
,
754 LLVMValueRef
*face_s
,
755 LLVMValueRef
*face_t
)
757 struct lp_build_context
*float_bld
= &bld
->float_bld
;
758 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
759 LLVMValueRef rx
, ry
, rz
;
760 LLVMValueRef arx
, ary
, arz
;
761 LLVMValueRef c25
= LLVMConstReal(LLVMFloatType(), 0.25);
762 LLVMValueRef arx_ge_ary
, arx_ge_arz
;
763 LLVMValueRef ary_ge_arx
, ary_ge_arz
;
764 LLVMValueRef arx_ge_ary_arz
, ary_ge_arx_arz
;
765 LLVMValueRef rx_pos
, ry_pos
, rz_pos
;
767 assert(bld
->coord_bld
.type
.length
== 4);
770 * Use the average of the four pixel's texcoords to choose the face.
772 rx
= lp_build_mul(float_bld
, c25
,
773 lp_build_sum_vector(&bld
->coord_bld
, s
));
774 ry
= lp_build_mul(float_bld
, c25
,
775 lp_build_sum_vector(&bld
->coord_bld
, t
));
776 rz
= lp_build_mul(float_bld
, c25
,
777 lp_build_sum_vector(&bld
->coord_bld
, r
));
779 arx
= lp_build_abs(float_bld
, rx
);
780 ary
= lp_build_abs(float_bld
, ry
);
781 arz
= lp_build_abs(float_bld
, rz
);
784 * Compare sign/magnitude of rx,ry,rz to determine face
786 arx_ge_ary
= LLVMBuildFCmp(bld
->builder
, LLVMRealUGE
, arx
, ary
, "");
787 arx_ge_arz
= LLVMBuildFCmp(bld
->builder
, LLVMRealUGE
, arx
, arz
, "");
788 ary_ge_arx
= LLVMBuildFCmp(bld
->builder
, LLVMRealUGE
, ary
, arx
, "");
789 ary_ge_arz
= LLVMBuildFCmp(bld
->builder
, LLVMRealUGE
, ary
, arz
, "");
791 arx_ge_ary_arz
= LLVMBuildAnd(bld
->builder
, arx_ge_ary
, arx_ge_arz
, "");
792 ary_ge_arx_arz
= LLVMBuildAnd(bld
->builder
, ary_ge_arx
, ary_ge_arz
, "");
794 rx_pos
= LLVMBuildFCmp(bld
->builder
, LLVMRealUGE
, rx
, float_bld
->zero
, "");
795 ry_pos
= LLVMBuildFCmp(bld
->builder
, LLVMRealUGE
, ry
, float_bld
->zero
, "");
796 rz_pos
= LLVMBuildFCmp(bld
->builder
, LLVMRealUGE
, rz
, float_bld
->zero
, "");
799 struct lp_build_flow_context
*flow_ctx
;
800 struct lp_build_if_state if_ctx
;
802 flow_ctx
= lp_build_flow_create(bld
->builder
);
803 lp_build_flow_scope_begin(flow_ctx
);
805 *face_s
= bld
->coord_bld
.undef
;
806 *face_t
= bld
->coord_bld
.undef
;
807 *face
= bld
->int_bld
.undef
;
809 lp_build_name(*face_s
, "face_s");
810 lp_build_name(*face_t
, "face_t");
811 lp_build_name(*face
, "face");
813 lp_build_flow_scope_declare(flow_ctx
, face_s
);
814 lp_build_flow_scope_declare(flow_ctx
, face_t
);
815 lp_build_flow_scope_declare(flow_ctx
, face
);
817 lp_build_if(&if_ctx
, flow_ctx
, bld
->builder
, arx_ge_ary_arz
);
820 LLVMValueRef sign
= lp_build_sgn(float_bld
, rx
);
821 LLVMValueRef ima
= lp_build_cube_ima(coord_bld
, s
);
822 *face_s
= lp_build_cube_coord(coord_bld
, sign
, +1, r
, ima
);
823 *face_t
= lp_build_cube_coord(coord_bld
, NULL
, +1, t
, ima
);
824 *face
= lp_build_cube_face(bld
, rx
,
826 PIPE_TEX_FACE_NEG_X
);
828 lp_build_else(&if_ctx
);
830 struct lp_build_flow_context
*flow_ctx2
;
831 struct lp_build_if_state if_ctx2
;
833 LLVMValueRef face_s2
= bld
->coord_bld
.undef
;
834 LLVMValueRef face_t2
= bld
->coord_bld
.undef
;
835 LLVMValueRef face2
= bld
->int_bld
.undef
;
837 flow_ctx2
= lp_build_flow_create(bld
->builder
);
838 lp_build_flow_scope_begin(flow_ctx2
);
839 lp_build_flow_scope_declare(flow_ctx2
, &face_s2
);
840 lp_build_flow_scope_declare(flow_ctx2
, &face_t2
);
841 lp_build_flow_scope_declare(flow_ctx2
, &face2
);
843 ary_ge_arx_arz
= LLVMBuildAnd(bld
->builder
, ary_ge_arx
, ary_ge_arz
, "");
845 lp_build_if(&if_ctx2
, flow_ctx2
, bld
->builder
, ary_ge_arx_arz
);
848 LLVMValueRef sign
= lp_build_sgn(float_bld
, ry
);
849 LLVMValueRef ima
= lp_build_cube_ima(coord_bld
, t
);
850 face_s2
= lp_build_cube_coord(coord_bld
, NULL
, -1, s
, ima
);
851 face_t2
= lp_build_cube_coord(coord_bld
, sign
, -1, r
, ima
);
852 face2
= lp_build_cube_face(bld
, ry
,
854 PIPE_TEX_FACE_NEG_Y
);
856 lp_build_else(&if_ctx2
);
859 LLVMValueRef sign
= lp_build_sgn(float_bld
, rz
);
860 LLVMValueRef ima
= lp_build_cube_ima(coord_bld
, r
);
861 face_s2
= lp_build_cube_coord(coord_bld
, sign
, -1, s
, ima
);
862 face_t2
= lp_build_cube_coord(coord_bld
, NULL
, +1, t
, ima
);
863 face2
= lp_build_cube_face(bld
, rz
,
865 PIPE_TEX_FACE_NEG_Z
);
867 lp_build_endif(&if_ctx2
);
868 lp_build_flow_scope_end(flow_ctx2
);
869 lp_build_flow_destroy(flow_ctx2
);
875 lp_build_endif(&if_ctx
);
876 lp_build_flow_scope_end(flow_ctx
);
877 lp_build_flow_destroy(flow_ctx
);
883 * Compute the partial offset of a pixel block along an arbitrary axis.
885 * @param coord coordinate in pixels
886 * @param stride number of bytes between rows of successive pixel blocks
887 * @param block_length number of pixels in a pixels block along the coordinate
889 * @param out_offset resulting relative offset of the pixel block in bytes
890 * @param out_subcoord resulting sub-block pixel coordinate
893 lp_build_sample_partial_offset(struct lp_build_context
*bld
,
894 unsigned block_length
,
897 LLVMValueRef
*out_offset
,
898 LLVMValueRef
*out_subcoord
)
901 LLVMValueRef subcoord
;
903 if (block_length
== 1) {
904 subcoord
= bld
->zero
;
908 * Pixel blocks have power of two dimensions. LLVM should convert the
909 * rem/div to bit arithmetic.
911 * It does indeed BUT it does transform it to scalar (and back) when doing so
912 * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
913 * The generated code looks seriously unfunny and is quite expensive.
916 LLVMValueRef block_width
= lp_build_const_int_vec(bld
->type
, block_length
);
917 subcoord
= LLVMBuildURem(bld
->builder
, coord
, block_width
, "");
918 coord
= LLVMBuildUDiv(bld
->builder
, coord
, block_width
, "");
920 unsigned logbase2
= util_unsigned_logbase2(block_length
);
921 LLVMValueRef block_shift
= lp_build_const_int_vec(bld
->type
, logbase2
);
922 LLVMValueRef block_mask
= lp_build_const_int_vec(bld
->type
, block_length
- 1);
923 subcoord
= LLVMBuildAnd(bld
->builder
, coord
, block_mask
, "");
924 coord
= LLVMBuildLShr(bld
->builder
, coord
, block_shift
, "");
928 offset
= lp_build_mul(bld
, coord
, stride
);
931 assert(out_subcoord
);
933 *out_offset
= offset
;
934 *out_subcoord
= subcoord
;
939 * Compute the offset of a pixel block.
941 * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
943 * Returns the relative offset and i,j sub-block coordinates
946 lp_build_sample_offset(struct lp_build_context
*bld
,
947 const struct util_format_description
*format_desc
,
951 LLVMValueRef y_stride
,
952 LLVMValueRef z_stride
,
953 LLVMValueRef
*out_offset
,
957 LLVMValueRef x_stride
;
960 x_stride
= lp_build_const_vec(bld
->type
, format_desc
->block
.bits
/8);
962 lp_build_sample_partial_offset(bld
,
963 format_desc
->block
.width
,
968 LLVMValueRef y_offset
;
969 lp_build_sample_partial_offset(bld
,
970 format_desc
->block
.height
,
973 offset
= lp_build_add(bld
, offset
, y_offset
);
980 LLVMValueRef z_offset
;
982 lp_build_sample_partial_offset(bld
,
983 1, /* pixel blocks are always 2D */
986 offset
= lp_build_add(bld
, offset
, z_offset
);
989 *out_offset
= offset
;