1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- common code.
32 * @author Jose Fonseca <jfonseca@vmware.com>
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/u_format.h"
38 #include "util/u_math.h"
39 #include "lp_bld_arit.h"
40 #include "lp_bld_const.h"
41 #include "lp_bld_debug.h"
42 #include "lp_bld_printf.h"
43 #include "lp_bld_flow.h"
44 #include "lp_bld_sample.h"
45 #include "lp_bld_swizzle.h"
46 #include "lp_bld_type.h"
47 #include "lp_bld_logic.h"
48 #include "lp_bld_pack.h"
49 #include "lp_bld_quad.h"
50 #include "lp_bld_bitarit.h"
54 * Bri-linear factor. Should be greater than one.
56 #define BRILINEAR_FACTOR 2
59 * Does the given texture wrap mode allow sampling the texture border color?
60 * XXX maybe move this into gallium util code.
63 lp_sampler_wrap_mode_uses_border_color(unsigned mode
,
64 unsigned min_img_filter
,
65 unsigned mag_img_filter
)
68 case PIPE_TEX_WRAP_REPEAT
:
69 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
70 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
71 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
73 case PIPE_TEX_WRAP_CLAMP
:
74 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
75 if (min_img_filter
== PIPE_TEX_FILTER_NEAREST
&&
76 mag_img_filter
== PIPE_TEX_FILTER_NEAREST
) {
81 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
82 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
85 assert(0 && "unexpected wrap mode");
92 * Initialize lp_sampler_static_texture_state object with the gallium
93 * texture/sampler_view state (this contains the parts which are
97 lp_sampler_static_texture_state(struct lp_static_texture_state
*state
,
98 const struct pipe_sampler_view
*view
)
100 const struct pipe_resource
*texture
;
102 memset(state
, 0, sizeof *state
);
104 if (!view
|| !view
->texture
)
107 texture
= view
->texture
;
109 state
->format
= view
->format
;
110 state
->swizzle_r
= view
->swizzle_r
;
111 state
->swizzle_g
= view
->swizzle_g
;
112 state
->swizzle_b
= view
->swizzle_b
;
113 state
->swizzle_a
= view
->swizzle_a
;
115 state
->target
= texture
->target
;
116 state
->pot_width
= util_is_power_of_two(texture
->width0
);
117 state
->pot_height
= util_is_power_of_two(texture
->height0
);
118 state
->pot_depth
= util_is_power_of_two(texture
->depth0
);
119 state
->level_zero_only
= !view
->u
.tex
.last_level
;
122 * the layer / element / level parameters are all either dynamic
123 * state or handled transparently wrt execution.
129 * Initialize lp_sampler_static_sampler_state object with the gallium sampler
130 * state (this contains the parts which are considered static).
133 lp_sampler_static_sampler_state(struct lp_static_sampler_state
*state
,
134 const struct pipe_sampler_state
*sampler
)
136 memset(state
, 0, sizeof *state
);
142 * We don't copy sampler state over unless it is actually enabled, to avoid
143 * spurious recompiles, as the sampler static state is part of the shader
146 * Ideally the state tracker or cso_cache module would make all state
147 * canonical, but until that happens it's better to be safe than sorry here.
149 * XXX: Actually there's much more than can be done here, especially
150 * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
153 state
->wrap_s
= sampler
->wrap_s
;
154 state
->wrap_t
= sampler
->wrap_t
;
155 state
->wrap_r
= sampler
->wrap_r
;
156 state
->min_img_filter
= sampler
->min_img_filter
;
157 state
->mag_img_filter
= sampler
->mag_img_filter
;
159 if (sampler
->max_lod
> 0.0f
) {
160 state
->min_mip_filter
= sampler
->min_mip_filter
;
162 state
->min_mip_filter
= PIPE_TEX_MIPFILTER_NONE
;
165 if (state
->min_mip_filter
!= PIPE_TEX_MIPFILTER_NONE
) {
166 if (sampler
->lod_bias
!= 0.0f
) {
167 state
->lod_bias_non_zero
= 1;
170 /* If min_lod == max_lod we can greatly simplify mipmap selection.
171 * This is a case that occurs during automatic mipmap generation.
173 if (sampler
->min_lod
== sampler
->max_lod
) {
174 state
->min_max_lod_equal
= 1;
176 if (sampler
->min_lod
> 0.0f
) {
177 state
->apply_min_lod
= 1;
181 * XXX this won't do anything with the mesa state tracker which always
182 * sets max_lod to not more than actually present mip maps...
184 if (sampler
->max_lod
< (PIPE_MAX_TEXTURE_LEVELS
- 1)) {
185 state
->apply_max_lod
= 1;
190 state
->compare_mode
= sampler
->compare_mode
;
191 if (sampler
->compare_mode
!= PIPE_TEX_COMPARE_NONE
) {
192 state
->compare_func
= sampler
->compare_func
;
195 state
->normalized_coords
= sampler
->normalized_coords
;
200 * Generate code to compute coordinate gradient (rho).
201 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
203 * The resulting rho has bld->levelf format (per quad or per element).
206 lp_build_rho(struct lp_build_sample_context
*bld
,
207 unsigned texture_unit
,
211 LLVMValueRef cube_rho
,
212 const struct lp_derivatives
*derivs
)
214 struct gallivm_state
*gallivm
= bld
->gallivm
;
215 struct lp_build_context
*int_size_bld
= &bld
->int_size_in_bld
;
216 struct lp_build_context
*float_size_bld
= &bld
->float_size_in_bld
;
217 struct lp_build_context
*float_bld
= &bld
->float_bld
;
218 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
219 struct lp_build_context
*levelf_bld
= &bld
->levelf_bld
;
220 const unsigned dims
= bld
->dims
;
221 LLVMValueRef ddx_ddy
[2];
222 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
223 LLVMTypeRef i32t
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
224 LLVMValueRef index0
= LLVMConstInt(i32t
, 0, 0);
225 LLVMValueRef index1
= LLVMConstInt(i32t
, 1, 0);
226 LLVMValueRef index2
= LLVMConstInt(i32t
, 2, 0);
227 LLVMValueRef rho_vec
;
228 LLVMValueRef int_size
, float_size
;
230 LLVMValueRef first_level
, first_level_vec
;
231 unsigned length
= coord_bld
->type
.length
;
232 unsigned num_quads
= length
/ 4;
233 boolean rho_per_quad
= levelf_bld
->type
.length
!= length
;
235 LLVMValueRef i32undef
= LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
236 LLVMValueRef rho_xvec
, rho_yvec
;
238 /* Note that all simplified calculations will only work for isotropic filtering */
241 * rho calcs are always per quad except for explicit derivs (excluding
242 * the messy cube maps for now) when requested.
245 first_level
= bld
->dynamic_state
->first_level(bld
->dynamic_state
,
246 bld
->gallivm
, texture_unit
);
247 first_level_vec
= lp_build_broadcast_scalar(int_size_bld
, first_level
);
248 int_size
= lp_build_minify(int_size_bld
, bld
->int_size
, first_level_vec
);
249 float_size
= lp_build_int_to_float(float_size_bld
, int_size
);
252 LLVMValueRef cubesize
;
253 LLVMValueRef index0
= lp_build_const_int32(gallivm
, 0);
256 * Cube map code did already everything except size mul and per-quad extraction.
257 * Luckily cube maps are always quadratic!
260 rho
= lp_build_pack_aos_scalars(bld
->gallivm
, coord_bld
->type
,
261 levelf_bld
->type
, cube_rho
, 0);
264 rho
= lp_build_swizzle_scalar_aos(coord_bld
, cube_rho
, 0, 4);
266 if (gallivm_debug
& GALLIVM_DEBUG_NO_RHO_APPROX
) {
267 rho
= lp_build_sqrt(levelf_bld
, rho
);
269 /* Could optimize this for single quad just skip the broadcast */
270 cubesize
= lp_build_extract_broadcast(gallivm
, bld
->float_size_in_type
,
271 levelf_bld
->type
, float_size
, index0
);
272 rho
= lp_build_mul(levelf_bld
, cubesize
, rho
);
274 else if (derivs
&& !(bld
->static_texture_state
->target
== PIPE_TEXTURE_CUBE
)) {
275 LLVMValueRef ddmax
[3], ddx
[3], ddy
[3];
276 for (i
= 0; i
< dims
; i
++) {
277 LLVMValueRef floatdim
;
278 LLVMValueRef indexi
= lp_build_const_int32(gallivm
, i
);
280 floatdim
= lp_build_extract_broadcast(gallivm
, bld
->float_size_in_type
,
281 coord_bld
->type
, float_size
, indexi
);
283 if ((gallivm_debug
& GALLIVM_DEBUG_NO_RHO_APPROX
) && (dims
> 1)) {
284 ddx
[i
] = lp_build_mul(coord_bld
, floatdim
, derivs
->ddx
[i
]);
285 ddy
[i
] = lp_build_mul(coord_bld
, floatdim
, derivs
->ddy
[i
]);
286 ddx
[i
] = lp_build_mul(coord_bld
, ddx
[i
], ddx
[i
]);
287 ddy
[i
] = lp_build_mul(coord_bld
, ddy
[i
], ddy
[i
]);
290 LLVMValueRef tmpx
, tmpy
;
291 tmpx
= lp_build_abs(coord_bld
, derivs
->ddx
[i
]);
292 tmpy
= lp_build_abs(coord_bld
, derivs
->ddy
[i
]);
293 ddmax
[i
] = lp_build_max(coord_bld
, tmpx
, tmpy
);
294 ddmax
[i
] = lp_build_mul(coord_bld
, floatdim
, ddmax
[i
]);
297 if ((gallivm_debug
& GALLIVM_DEBUG_NO_RHO_APPROX
) && (dims
> 1)) {
298 rho_xvec
= lp_build_add(coord_bld
, ddx
[0], ddx
[1]);
299 rho_yvec
= lp_build_add(coord_bld
, ddy
[0], ddy
[1]);
301 rho_xvec
= lp_build_add(coord_bld
, rho_xvec
, ddx
[2]);
302 rho_yvec
= lp_build_add(coord_bld
, rho_yvec
, ddy
[2]);
304 rho
= lp_build_max(coord_bld
, rho_xvec
, rho_yvec
);
308 * note for this case without per-pixel lod could reduce math more
309 * (at some shuffle cost), but for now only do sqrt after packing,
310 * otherwise would also need different code to per-pixel lod case.
312 rho
= lp_build_pack_aos_scalars(bld
->gallivm
, coord_bld
->type
,
313 levelf_bld
->type
, rho
, 0);
315 rho
= lp_build_sqrt(levelf_bld
, rho
);
321 rho
= lp_build_max(coord_bld
, rho
, ddmax
[1]);
323 rho
= lp_build_max(coord_bld
, rho
, ddmax
[2]);
328 * rho_vec contains per-pixel rho, convert to scalar per quad.
330 rho
= lp_build_pack_aos_scalars(bld
->gallivm
, coord_bld
->type
,
331 levelf_bld
->type
, rho
, 0);
337 * This looks all a bit complex, but it's not that bad
338 * (the shuffle code makes it look worse than it is).
339 * Still, might not be ideal for all cases.
341 static const unsigned char swizzle0
[] = { /* no-op swizzle */
342 0, LP_BLD_SWIZZLE_DONTCARE
,
343 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
345 static const unsigned char swizzle1
[] = {
346 1, LP_BLD_SWIZZLE_DONTCARE
,
347 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
349 static const unsigned char swizzle2
[] = {
350 2, LP_BLD_SWIZZLE_DONTCARE
,
351 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
355 ddx_ddy
[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld
, s
);
357 else if (dims
>= 2) {
358 ddx_ddy
[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld
, s
, t
);
360 ddx_ddy
[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld
, r
);
364 if ((gallivm_debug
& GALLIVM_DEBUG_NO_RHO_APPROX
) && (dims
> 1)) {
365 static const unsigned char swizzle01
[] = { /* no-op swizzle */
367 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
369 static const unsigned char swizzle23
[] = {
371 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
373 LLVMValueRef ddx_ddys
, ddx_ddyt
, floatdim
, shuffles
[LP_MAX_VECTOR_LENGTH
/ 4];
375 for (i
= 0; i
< num_quads
; i
++) {
376 shuffles
[i
*4+0] = shuffles
[i
*4+1] = index0
;
377 shuffles
[i
*4+2] = shuffles
[i
*4+3] = index1
;
379 floatdim
= LLVMBuildShuffleVector(builder
, float_size
, float_size
,
380 LLVMConstVector(shuffles
, length
), "");
381 ddx_ddy
[0] = lp_build_mul(coord_bld
, ddx_ddy
[0], floatdim
);
382 ddx_ddy
[0] = lp_build_mul(coord_bld
, ddx_ddy
[0], ddx_ddy
[0]);
383 ddx_ddys
= lp_build_swizzle_aos(coord_bld
, ddx_ddy
[0], swizzle01
);
384 ddx_ddyt
= lp_build_swizzle_aos(coord_bld
, ddx_ddy
[0], swizzle23
);
385 rho_vec
= lp_build_add(coord_bld
, ddx_ddys
, ddx_ddyt
);
388 static const unsigned char swizzle02
[] = {
390 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
392 floatdim
= lp_build_extract_broadcast(gallivm
, bld
->float_size_in_type
,
393 coord_bld
->type
, float_size
, index2
);
394 ddx_ddy
[1] = lp_build_mul(coord_bld
, ddx_ddy
[1], floatdim
);
395 ddx_ddy
[1] = lp_build_mul(coord_bld
, ddx_ddy
[1], ddx_ddy
[1]);
396 ddx_ddy
[1] = lp_build_swizzle_aos(coord_bld
, ddx_ddy
[1], swizzle02
);
397 rho_vec
= lp_build_add(coord_bld
, rho_vec
, ddx_ddy
[1]);
400 rho_xvec
= lp_build_swizzle_aos(coord_bld
, rho_vec
, swizzle0
);
401 rho_yvec
= lp_build_swizzle_aos(coord_bld
, rho_vec
, swizzle1
);
402 rho
= lp_build_max(coord_bld
, rho_xvec
, rho_yvec
);
405 rho
= lp_build_pack_aos_scalars(bld
->gallivm
, coord_bld
->type
,
406 levelf_bld
->type
, rho
, 0);
410 * on some cpus with half-speed 8-wide sqrt (e.g. SNB but not IVB)
411 * doing pack/sqrt/unpack/swizzle might be better for 8-wide case,
412 * same is true for cpus having faster scalars than 4-wide vecs
413 * for 4-wide case (where pack/unpack would be no-ops anyway).
414 * (Same is true really for cube_rho case above.)
416 rho
= lp_build_swizzle_scalar_aos(coord_bld
, rho
, 0, 4);
418 rho
= lp_build_sqrt(levelf_bld
, rho
);
421 ddx_ddy
[0] = lp_build_abs(coord_bld
, ddx_ddy
[0]);
423 ddx_ddy
[1] = lp_build_abs(coord_bld
, ddx_ddy
[1]);
427 rho_xvec
= lp_build_swizzle_aos(coord_bld
, ddx_ddy
[0], swizzle0
);
428 rho_yvec
= lp_build_swizzle_aos(coord_bld
, ddx_ddy
[0], swizzle2
);
430 else if (dims
== 2) {
431 static const unsigned char swizzle02
[] = {
433 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
435 static const unsigned char swizzle13
[] = {
437 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
439 rho_xvec
= lp_build_swizzle_aos(coord_bld
, ddx_ddy
[0], swizzle02
);
440 rho_yvec
= lp_build_swizzle_aos(coord_bld
, ddx_ddy
[0], swizzle13
);
443 LLVMValueRef shuffles1
[LP_MAX_VECTOR_LENGTH
];
444 LLVMValueRef shuffles2
[LP_MAX_VECTOR_LENGTH
];
446 for (i
= 0; i
< num_quads
; i
++) {
447 shuffles1
[4*i
+ 0] = lp_build_const_int32(gallivm
, 4*i
);
448 shuffles1
[4*i
+ 1] = lp_build_const_int32(gallivm
, 4*i
+ 2);
449 shuffles1
[4*i
+ 2] = lp_build_const_int32(gallivm
, length
+ 4*i
);
450 shuffles1
[4*i
+ 3] = i32undef
;
451 shuffles2
[4*i
+ 0] = lp_build_const_int32(gallivm
, 4*i
+ 1);
452 shuffles2
[4*i
+ 1] = lp_build_const_int32(gallivm
, 4*i
+ 3);
453 shuffles2
[4*i
+ 2] = lp_build_const_int32(gallivm
, length
+ 4*i
+ 2);
454 shuffles2
[4*i
+ 3] = i32undef
;
456 rho_xvec
= LLVMBuildShuffleVector(builder
, ddx_ddy
[0], ddx_ddy
[1],
457 LLVMConstVector(shuffles1
, length
), "");
458 rho_yvec
= LLVMBuildShuffleVector(builder
, ddx_ddy
[0], ddx_ddy
[1],
459 LLVMConstVector(shuffles2
, length
), "");
462 rho_vec
= lp_build_max(coord_bld
, rho_xvec
, rho_yvec
);
464 if (bld
->coord_type
.length
> 4) {
465 /* expand size to each quad */
467 /* could use some broadcast_vector helper for this? */
468 LLVMValueRef src
[LP_MAX_VECTOR_LENGTH
/4];
469 for (i
= 0; i
< num_quads
; i
++) {
472 float_size
= lp_build_concat(bld
->gallivm
, src
, float_size_bld
->type
, num_quads
);
475 float_size
= lp_build_broadcast_scalar(coord_bld
, float_size
);
477 rho_vec
= lp_build_mul(coord_bld
, rho_vec
, float_size
);
484 LLVMValueRef rho_s
, rho_t
, rho_r
;
486 rho_s
= lp_build_swizzle_aos(coord_bld
, rho_vec
, swizzle0
);
487 rho_t
= lp_build_swizzle_aos(coord_bld
, rho_vec
, swizzle1
);
489 rho
= lp_build_max(coord_bld
, rho_s
, rho_t
);
492 rho_r
= lp_build_swizzle_aos(coord_bld
, rho_vec
, swizzle2
);
493 rho
= lp_build_max(coord_bld
, rho
, rho_r
);
498 rho
= lp_build_pack_aos_scalars(bld
->gallivm
, coord_bld
->type
,
499 levelf_bld
->type
, rho
, 0);
502 rho
= lp_build_swizzle_scalar_aos(coord_bld
, rho
, 0, 4);
507 rho_vec
= LLVMBuildExtractElement(builder
, rho_vec
, index0
, "");
509 rho_vec
= lp_build_mul(float_size_bld
, rho_vec
, float_size
);
516 LLVMValueRef rho_s
, rho_t
, rho_r
;
518 rho_s
= LLVMBuildExtractElement(builder
, rho_vec
, index0
, "");
519 rho_t
= LLVMBuildExtractElement(builder
, rho_vec
, index1
, "");
521 rho
= lp_build_max(float_bld
, rho_s
, rho_t
);
524 rho_r
= LLVMBuildExtractElement(builder
, rho_vec
, index2
, "");
525 rho
= lp_build_max(float_bld
, rho
, rho_r
);
530 rho
= lp_build_broadcast_scalar(levelf_bld
, rho
);
541 * Bri-linear lod computation
543 * Use a piece-wise linear approximation of log2 such that:
544 * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
545 * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
546 * with the steepness specified in 'factor'
547 * - exact result for 0.5, 1.5, etc.
563 * This is a technique also commonly used in hardware:
564 * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
566 * TODO: For correctness, this should only be applied when texture is known to
567 * have regular mipmaps, i.e., mipmaps derived from the base level.
569 * TODO: This could be done in fixed point, where applicable.
572 lp_build_brilinear_lod(struct lp_build_context
*bld
,
575 LLVMValueRef
*out_lod_ipart
,
576 LLVMValueRef
*out_lod_fpart
)
578 LLVMValueRef lod_fpart
;
579 double pre_offset
= (factor
- 0.5)/factor
- 0.5;
580 double post_offset
= 1 - factor
;
583 lp_build_printf(bld
->gallivm
, "lod = %f\n", lod
);
586 lod
= lp_build_add(bld
, lod
,
587 lp_build_const_vec(bld
->gallivm
, bld
->type
, pre_offset
));
589 lp_build_ifloor_fract(bld
, lod
, out_lod_ipart
, &lod_fpart
);
591 lod_fpart
= lp_build_mul(bld
, lod_fpart
,
592 lp_build_const_vec(bld
->gallivm
, bld
->type
, factor
));
594 lod_fpart
= lp_build_add(bld
, lod_fpart
,
595 lp_build_const_vec(bld
->gallivm
, bld
->type
, post_offset
));
598 * It's not necessary to clamp lod_fpart since:
599 * - the above expression will never produce numbers greater than one.
600 * - the mip filtering branch is only taken if lod_fpart is positive
603 *out_lod_fpart
= lod_fpart
;
606 lp_build_printf(bld
->gallivm
, "lod_ipart = %i\n", *out_lod_ipart
);
607 lp_build_printf(bld
->gallivm
, "lod_fpart = %f\n\n", *out_lod_fpart
);
613 * Combined log2 and brilinear lod computation.
615 * It's in all identical to calling lp_build_fast_log2() and
616 * lp_build_brilinear_lod() above, but by combining we can compute the integer
617 * and fractional part independently.
620 lp_build_brilinear_rho(struct lp_build_context
*bld
,
623 LLVMValueRef
*out_lod_ipart
,
624 LLVMValueRef
*out_lod_fpart
)
626 LLVMValueRef lod_ipart
;
627 LLVMValueRef lod_fpart
;
629 const double pre_factor
= (2*factor
- 0.5)/(M_SQRT2
*factor
);
630 const double post_offset
= 1 - 2*factor
;
632 assert(bld
->type
.floating
);
634 assert(lp_check_value(bld
->type
, rho
));
637 * The pre factor will make the intersections with the exact powers of two
638 * happen precisely where we want then to be, which means that the integer
639 * part will not need any post adjustments.
641 rho
= lp_build_mul(bld
, rho
,
642 lp_build_const_vec(bld
->gallivm
, bld
->type
, pre_factor
));
644 /* ipart = ifloor(log2(rho)) */
645 lod_ipart
= lp_build_extract_exponent(bld
, rho
, 0);
647 /* fpart = rho / 2**ipart */
648 lod_fpart
= lp_build_extract_mantissa(bld
, rho
);
650 lod_fpart
= lp_build_mul(bld
, lod_fpart
,
651 lp_build_const_vec(bld
->gallivm
, bld
->type
, factor
));
653 lod_fpart
= lp_build_add(bld
, lod_fpart
,
654 lp_build_const_vec(bld
->gallivm
, bld
->type
, post_offset
));
657 * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
658 * - the above expression will never produce numbers greater than one.
659 * - the mip filtering branch is only taken if lod_fpart is positive
662 *out_lod_ipart
= lod_ipart
;
663 *out_lod_fpart
= lod_fpart
;
668 * Generate code to compute texture level of detail (lambda).
669 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
670 * \param lod_bias optional float vector with the shader lod bias
671 * \param explicit_lod optional float vector with the explicit lod
672 * \param width scalar int texture width
673 * \param height scalar int texture height
674 * \param depth scalar int texture depth
676 * The resulting lod is scalar per quad, so only the first value per quad
677 * passed in from lod_bias, explicit_lod is used.
680 lp_build_lod_selector(struct lp_build_sample_context
*bld
,
681 unsigned texture_unit
,
682 unsigned sampler_unit
,
686 LLVMValueRef cube_rho
,
687 const struct lp_derivatives
*derivs
,
688 LLVMValueRef lod_bias
, /* optional */
689 LLVMValueRef explicit_lod
, /* optional */
691 LLVMValueRef
*out_lod_ipart
,
692 LLVMValueRef
*out_lod_fpart
)
695 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
696 struct lp_build_context
*levelf_bld
= &bld
->levelf_bld
;
699 *out_lod_ipart
= bld
->leveli_bld
.zero
;
700 *out_lod_fpart
= levelf_bld
->zero
;
702 if (bld
->static_sampler_state
->min_max_lod_equal
) {
703 /* User is forcing sampling from a particular mipmap level.
704 * This is hit during mipmap generation.
706 LLVMValueRef min_lod
=
707 bld
->dynamic_state
->min_lod(bld
->dynamic_state
,
708 bld
->gallivm
, sampler_unit
);
710 lod
= lp_build_broadcast_scalar(levelf_bld
, min_lod
);
714 if (bld
->num_lods
!= bld
->coord_type
.length
)
715 lod
= lp_build_pack_aos_scalars(bld
->gallivm
, bld
->coord_bld
.type
,
716 levelf_bld
->type
, explicit_lod
, 0);
723 rho
= lp_build_rho(bld
, texture_unit
, s
, t
, r
, cube_rho
, derivs
);
726 * Compute lod = log2(rho)
730 !bld
->static_sampler_state
->lod_bias_non_zero
&&
731 !bld
->static_sampler_state
->apply_max_lod
&&
732 !bld
->static_sampler_state
->apply_min_lod
) {
734 * Special case when there are no post-log2 adjustments, which
735 * saves instructions but keeping the integer and fractional lod
736 * computations separate from the start.
739 if (mip_filter
== PIPE_TEX_MIPFILTER_NONE
||
740 mip_filter
== PIPE_TEX_MIPFILTER_NEAREST
) {
742 * FIXME: this is not entirely correct, as out_lod_ipart is used
743 * both for mip level determination as well as mag/min switchover
744 * point (if different min/mag filters are used). In particular,
745 * lod values between [-0.5,0] (rho between [sqrt(2), 1.0]) will
746 * incorrectly use min filter instead of mag (the non-optimized
747 * calculation further down has exactly the same problem).
749 *out_lod_ipart
= lp_build_ilog2(levelf_bld
, rho
);
750 *out_lod_fpart
= levelf_bld
->zero
;
753 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
&&
754 !(gallivm_debug
& GALLIVM_DEBUG_NO_BRILINEAR
)) {
755 lp_build_brilinear_rho(levelf_bld
, rho
, BRILINEAR_FACTOR
,
756 out_lod_ipart
, out_lod_fpart
);
762 lod
= lp_build_log2(levelf_bld
, rho
);
765 lod
= lp_build_fast_log2(levelf_bld
, rho
);
768 /* add shader lod bias */
770 if (bld
->num_lods
!= bld
->coord_type
.length
)
771 lod_bias
= lp_build_pack_aos_scalars(bld
->gallivm
, bld
->coord_bld
.type
,
772 levelf_bld
->type
, lod_bias
, 0);
773 lod
= LLVMBuildFAdd(builder
, lod
, lod_bias
, "shader_lod_bias");
777 /* add sampler lod bias */
778 if (bld
->static_sampler_state
->lod_bias_non_zero
) {
779 LLVMValueRef sampler_lod_bias
=
780 bld
->dynamic_state
->lod_bias(bld
->dynamic_state
,
781 bld
->gallivm
, sampler_unit
);
782 sampler_lod_bias
= lp_build_broadcast_scalar(levelf_bld
,
784 lod
= LLVMBuildFAdd(builder
, lod
, sampler_lod_bias
, "sampler_lod_bias");
788 if (bld
->static_sampler_state
->apply_max_lod
) {
789 LLVMValueRef max_lod
=
790 bld
->dynamic_state
->max_lod(bld
->dynamic_state
,
791 bld
->gallivm
, sampler_unit
);
792 max_lod
= lp_build_broadcast_scalar(levelf_bld
, max_lod
);
794 lod
= lp_build_min(levelf_bld
, lod
, max_lod
);
796 if (bld
->static_sampler_state
->apply_min_lod
) {
797 LLVMValueRef min_lod
=
798 bld
->dynamic_state
->min_lod(bld
->dynamic_state
,
799 bld
->gallivm
, sampler_unit
);
800 min_lod
= lp_build_broadcast_scalar(levelf_bld
, min_lod
);
802 lod
= lp_build_max(levelf_bld
, lod
, min_lod
);
806 if (mip_filter
== PIPE_TEX_MIPFILTER_LINEAR
) {
807 if (!(gallivm_debug
& GALLIVM_DEBUG_NO_BRILINEAR
)) {
808 lp_build_brilinear_lod(levelf_bld
, lod
, BRILINEAR_FACTOR
,
809 out_lod_ipart
, out_lod_fpart
);
812 lp_build_ifloor_fract(levelf_bld
, lod
, out_lod_ipart
, out_lod_fpart
);
815 lp_build_name(*out_lod_fpart
, "lod_fpart");
818 *out_lod_ipart
= lp_build_iround(levelf_bld
, lod
);
821 lp_build_name(*out_lod_ipart
, "lod_ipart");
828 * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
829 * to actual mip level.
830 * Note: this is all scalar per quad code.
831 * \param lod_ipart int texture level of detail
832 * \param level_out returns integer
833 * \param out_of_bounds returns per coord out_of_bounds mask if provided
836 lp_build_nearest_mip_level(struct lp_build_sample_context
*bld
,
837 unsigned texture_unit
,
838 LLVMValueRef lod_ipart
,
839 LLVMValueRef
*level_out
,
840 LLVMValueRef
*out_of_bounds
)
842 struct lp_build_context
*leveli_bld
= &bld
->leveli_bld
;
843 LLVMValueRef first_level
, last_level
, level
;
845 first_level
= bld
->dynamic_state
->first_level(bld
->dynamic_state
,
846 bld
->gallivm
, texture_unit
);
847 last_level
= bld
->dynamic_state
->last_level(bld
->dynamic_state
,
848 bld
->gallivm
, texture_unit
);
849 first_level
= lp_build_broadcast_scalar(leveli_bld
, first_level
);
850 last_level
= lp_build_broadcast_scalar(leveli_bld
, last_level
);
852 level
= lp_build_add(leveli_bld
, lod_ipart
, first_level
);
855 LLVMValueRef out
, out1
;
856 out
= lp_build_cmp(leveli_bld
, PIPE_FUNC_LESS
, level
, first_level
);
857 out1
= lp_build_cmp(leveli_bld
, PIPE_FUNC_GREATER
, level
, last_level
);
858 out
= lp_build_or(leveli_bld
, out
, out1
);
859 if (bld
->num_lods
== bld
->coord_bld
.type
.length
) {
860 *out_of_bounds
= out
;
862 else if (bld
->num_lods
== 1) {
863 *out_of_bounds
= lp_build_broadcast_scalar(&bld
->int_coord_bld
, out
);
866 assert(bld
->num_lods
== bld
->coord_bld
.type
.length
/ 4);
867 *out_of_bounds
= lp_build_unpack_broadcast_aos_scalars(bld
->gallivm
,
869 bld
->int_coord_bld
.type
,
875 /* clamp level to legal range of levels */
876 *level_out
= lp_build_clamp(leveli_bld
, level
, first_level
, last_level
);
883 * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad int LOD(s) to two (per-quad)
884 * (adjacent) mipmap level indexes, and fix up float lod part accordingly.
885 * Later, we'll sample from those two mipmap levels and interpolate between them.
888 lp_build_linear_mip_levels(struct lp_build_sample_context
*bld
,
889 unsigned texture_unit
,
890 LLVMValueRef lod_ipart
,
891 LLVMValueRef
*lod_fpart_inout
,
892 LLVMValueRef
*level0_out
,
893 LLVMValueRef
*level1_out
)
895 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
896 struct lp_build_context
*leveli_bld
= &bld
->leveli_bld
;
897 struct lp_build_context
*levelf_bld
= &bld
->levelf_bld
;
898 LLVMValueRef first_level
, last_level
;
899 LLVMValueRef clamp_min
;
900 LLVMValueRef clamp_max
;
902 first_level
= bld
->dynamic_state
->first_level(bld
->dynamic_state
,
903 bld
->gallivm
, texture_unit
);
904 last_level
= bld
->dynamic_state
->last_level(bld
->dynamic_state
,
905 bld
->gallivm
, texture_unit
);
906 first_level
= lp_build_broadcast_scalar(leveli_bld
, first_level
);
907 last_level
= lp_build_broadcast_scalar(leveli_bld
, last_level
);
909 *level0_out
= lp_build_add(leveli_bld
, lod_ipart
, first_level
);
910 *level1_out
= lp_build_add(leveli_bld
, *level0_out
, leveli_bld
->one
);
913 * Clamp both *level0_out and *level1_out to [first_level, last_level], with
914 * the minimum number of comparisons, and zeroing lod_fpart in the extreme
915 * ends in the process.
919 * This code (vector select in particular) only works with llvm 3.1
920 * (if there's more than one quad, with x86 backend). Might consider
921 * converting to our lp_bld_logic helpers.
923 #if HAVE_LLVM < 0x0301
924 assert(leveli_bld
->type
.length
== 1);
927 /* *level0_out < first_level */
928 clamp_min
= LLVMBuildICmp(builder
, LLVMIntSLT
,
929 *level0_out
, first_level
,
930 "clamp_lod_to_first");
932 *level0_out
= LLVMBuildSelect(builder
, clamp_min
,
933 first_level
, *level0_out
, "");
935 *level1_out
= LLVMBuildSelect(builder
, clamp_min
,
936 first_level
, *level1_out
, "");
938 *lod_fpart_inout
= LLVMBuildSelect(builder
, clamp_min
,
939 levelf_bld
->zero
, *lod_fpart_inout
, "");
941 /* *level0_out >= last_level */
942 clamp_max
= LLVMBuildICmp(builder
, LLVMIntSGE
,
943 *level0_out
, last_level
,
944 "clamp_lod_to_last");
946 *level0_out
= LLVMBuildSelect(builder
, clamp_max
,
947 last_level
, *level0_out
, "");
949 *level1_out
= LLVMBuildSelect(builder
, clamp_max
,
950 last_level
, *level1_out
, "");
952 *lod_fpart_inout
= LLVMBuildSelect(builder
, clamp_max
,
953 levelf_bld
->zero
, *lod_fpart_inout
, "");
955 lp_build_name(*level0_out
, "texture%u_miplevel0", texture_unit
);
956 lp_build_name(*level1_out
, "texture%u_miplevel1", texture_unit
);
957 lp_build_name(*lod_fpart_inout
, "texture%u_mipweight", texture_unit
);
962 * Return pointer to a single mipmap level.
963 * \param level integer mipmap level
966 lp_build_get_mipmap_level(struct lp_build_sample_context
*bld
,
969 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
970 LLVMValueRef indexes
[2], data_ptr
, mip_offset
;
972 indexes
[0] = lp_build_const_int32(bld
->gallivm
, 0);
974 mip_offset
= LLVMBuildGEP(builder
, bld
->mip_offsets
, indexes
, 2, "");
975 mip_offset
= LLVMBuildLoad(builder
, mip_offset
, "");
976 data_ptr
= LLVMBuildGEP(builder
, bld
->base_ptr
, &mip_offset
, 1, "");
981 * Return (per-pixel) offsets to mip levels.
982 * \param level integer mipmap level
985 lp_build_get_mip_offsets(struct lp_build_sample_context
*bld
,
988 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
989 LLVMValueRef indexes
[2], offsets
, offset1
;
991 indexes
[0] = lp_build_const_int32(bld
->gallivm
, 0);
992 if (bld
->num_lods
== 1) {
994 offset1
= LLVMBuildGEP(builder
, bld
->mip_offsets
, indexes
, 2, "");
995 offset1
= LLVMBuildLoad(builder
, offset1
, "");
996 offsets
= lp_build_broadcast_scalar(&bld
->int_coord_bld
, offset1
);
998 else if (bld
->num_lods
== bld
->coord_bld
.type
.length
/ 4) {
1001 offsets
= bld
->int_coord_bld
.undef
;
1002 for (i
= 0; i
< bld
->num_lods
; i
++) {
1003 LLVMValueRef indexi
= lp_build_const_int32(bld
->gallivm
, i
);
1004 LLVMValueRef indexo
= lp_build_const_int32(bld
->gallivm
, 4 * i
);
1005 indexes
[1] = LLVMBuildExtractElement(builder
, level
, indexi
, "");
1006 offset1
= LLVMBuildGEP(builder
, bld
->mip_offsets
, indexes
, 2, "");
1007 offset1
= LLVMBuildLoad(builder
, offset1
, "");
1008 offsets
= LLVMBuildInsertElement(builder
, offsets
, offset1
, indexo
, "");
1010 offsets
= lp_build_swizzle_scalar_aos(&bld
->int_coord_bld
, offsets
, 0, 4);
1015 assert (bld
->num_lods
== bld
->coord_bld
.type
.length
);
1017 offsets
= bld
->int_coord_bld
.undef
;
1018 for (i
= 0; i
< bld
->num_lods
; i
++) {
1019 LLVMValueRef indexi
= lp_build_const_int32(bld
->gallivm
, i
);
1020 indexes
[1] = LLVMBuildExtractElement(builder
, level
, indexi
, "");
1021 offset1
= LLVMBuildGEP(builder
, bld
->mip_offsets
, indexes
, 2, "");
1022 offset1
= LLVMBuildLoad(builder
, offset1
, "");
1023 offsets
= LLVMBuildInsertElement(builder
, offsets
, offset1
, indexi
, "");
1031 * Codegen equivalent for u_minify().
1032 * Return max(1, base_size >> level);
1035 lp_build_minify(struct lp_build_context
*bld
,
1036 LLVMValueRef base_size
,
1039 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1040 assert(lp_check_value(bld
->type
, base_size
));
1041 assert(lp_check_value(bld
->type
, level
));
1043 if (level
== bld
->zero
) {
1044 /* if we're using mipmap level zero, no minification is needed */
1049 LLVMBuildLShr(builder
, base_size
, level
, "minify");
1050 assert(bld
->type
.sign
);
1051 size
= lp_build_max(bld
, size
, bld
->one
);
1058 * Dereference stride_array[mipmap_level] array to get a stride.
1059 * Return stride as a vector.
1062 lp_build_get_level_stride_vec(struct lp_build_sample_context
*bld
,
1063 LLVMValueRef stride_array
, LLVMValueRef level
)
1065 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1066 LLVMValueRef indexes
[2], stride
, stride1
;
1067 indexes
[0] = lp_build_const_int32(bld
->gallivm
, 0);
1068 if (bld
->num_lods
== 1) {
1070 stride1
= LLVMBuildGEP(builder
, stride_array
, indexes
, 2, "");
1071 stride1
= LLVMBuildLoad(builder
, stride1
, "");
1072 stride
= lp_build_broadcast_scalar(&bld
->int_coord_bld
, stride1
);
1074 else if (bld
->num_lods
== bld
->coord_bld
.type
.length
/ 4) {
1075 LLVMValueRef stride1
;
1078 stride
= bld
->int_coord_bld
.undef
;
1079 for (i
= 0; i
< bld
->num_lods
; i
++) {
1080 LLVMValueRef indexi
= lp_build_const_int32(bld
->gallivm
, i
);
1081 LLVMValueRef indexo
= lp_build_const_int32(bld
->gallivm
, 4 * i
);
1082 indexes
[1] = LLVMBuildExtractElement(builder
, level
, indexi
, "");
1083 stride1
= LLVMBuildGEP(builder
, stride_array
, indexes
, 2, "");
1084 stride1
= LLVMBuildLoad(builder
, stride1
, "");
1085 stride
= LLVMBuildInsertElement(builder
, stride
, stride1
, indexo
, "");
1087 stride
= lp_build_swizzle_scalar_aos(&bld
->int_coord_bld
, stride
, 0, 4);
1090 LLVMValueRef stride1
;
1093 assert (bld
->num_lods
== bld
->coord_bld
.type
.length
);
1095 stride
= bld
->int_coord_bld
.undef
;
1096 for (i
= 0; i
< bld
->coord_bld
.type
.length
; i
++) {
1097 LLVMValueRef indexi
= lp_build_const_int32(bld
->gallivm
, i
);
1098 indexes
[1] = LLVMBuildExtractElement(builder
, level
, indexi
, "");
1099 stride1
= LLVMBuildGEP(builder
, stride_array
, indexes
, 2, "");
1100 stride1
= LLVMBuildLoad(builder
, stride1
, "");
1101 stride
= LLVMBuildInsertElement(builder
, stride
, stride1
, indexi
, "");
1109 * When sampling a mipmap, we need to compute the width, height, depth
1110 * of the source levels from the level indexes. This helper function
1114 lp_build_mipmap_level_sizes(struct lp_build_sample_context
*bld
,
1115 LLVMValueRef ilevel
,
1116 LLVMValueRef
*out_size
,
1117 LLVMValueRef
*row_stride_vec
,
1118 LLVMValueRef
*img_stride_vec
)
1120 const unsigned dims
= bld
->dims
;
1121 LLVMValueRef ilevel_vec
;
1124 * Compute width, height, depth at mipmap level 'ilevel'
1126 if (bld
->num_lods
== 1) {
1127 ilevel_vec
= lp_build_broadcast_scalar(&bld
->int_size_bld
, ilevel
);
1128 *out_size
= lp_build_minify(&bld
->int_size_bld
, bld
->int_size
, ilevel_vec
);
1131 LLVMValueRef int_size_vec
;
1132 LLVMValueRef tmp
[LP_MAX_VECTOR_LENGTH
];
1133 unsigned num_quads
= bld
->coord_bld
.type
.length
/ 4;
1136 if (bld
->num_lods
== num_quads
) {
1138 * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1139 * intel "forgot" the variable shift count instruction until avx2.
1140 * A harmless 8x32 shift gets translated into 32 instructions
1141 * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1142 * unable to recognize if there are really just 2 different shift
1143 * count values. So do the shift 4-wide before expansion.
1145 struct lp_build_context bld4
;
1146 struct lp_type type4
;
1148 type4
= bld
->int_coord_bld
.type
;
1151 lp_build_context_init(&bld4
, bld
->gallivm
, type4
);
1153 if (bld
->dims
== 1) {
1154 assert(bld
->int_size_in_bld
.type
.length
== 1);
1155 int_size_vec
= lp_build_broadcast_scalar(&bld4
,
1159 assert(bld
->int_size_in_bld
.type
.length
== 4);
1160 int_size_vec
= bld
->int_size
;
1163 for (i
= 0; i
< num_quads
; i
++) {
1164 LLVMValueRef ileveli
;
1165 LLVMValueRef indexi
= lp_build_const_int32(bld
->gallivm
, i
);
1167 ileveli
= lp_build_extract_broadcast(bld
->gallivm
,
1168 bld
->leveli_bld
.type
,
1172 tmp
[i
] = lp_build_minify(&bld4
, int_size_vec
, ileveli
);
1175 * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
1176 * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1178 *out_size
= lp_build_concat(bld
->gallivm
,
1184 /* FIXME: this is terrible and results in _huge_ vector
1185 * (for the dims > 1 case).
1186 * Should refactor this (together with extract_image_sizes) and do
1187 * something more useful. Could for instance if we have width,height
1188 * with 4-wide vector pack all elements into a 8xi16 vector
1189 * (on which we can still do useful math) instead of using a 16xi32
1191 * FIXME: some callers can't handle this yet.
1192 * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1193 * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
1195 assert(bld
->num_lods
== bld
->coord_bld
.type
.length
);
1196 if (bld
->dims
== 1) {
1197 assert(bld
->int_size_in_bld
.type
.length
== 1);
1198 int_size_vec
= lp_build_broadcast_scalar(&bld
->int_coord_bld
,
1200 /* vector shift with variable shift count alert... */
1201 *out_size
= lp_build_minify(&bld
->int_coord_bld
, int_size_vec
, ilevel
);
1204 LLVMValueRef ilevel1
;
1205 for (i
= 0; i
< bld
->num_lods
; i
++) {
1206 LLVMValueRef indexi
= lp_build_const_int32(bld
->gallivm
, i
);
1207 ilevel1
= lp_build_extract_broadcast(bld
->gallivm
, bld
->int_coord_type
,
1208 bld
->int_size_in_bld
.type
, ilevel
, indexi
);
1209 tmp
[i
] = bld
->int_size
;
1210 tmp
[i
] = lp_build_minify(&bld
->int_size_in_bld
, tmp
[i
], ilevel1
);
1212 *out_size
= lp_build_concat(bld
->gallivm
, tmp
,
1213 bld
->int_size_in_bld
.type
,
1220 *row_stride_vec
= lp_build_get_level_stride_vec(bld
,
1221 bld
->row_stride_array
,
1225 bld
->static_texture_state
->target
== PIPE_TEXTURE_CUBE
||
1226 bld
->static_texture_state
->target
== PIPE_TEXTURE_1D_ARRAY
||
1227 bld
->static_texture_state
->target
== PIPE_TEXTURE_2D_ARRAY
) {
1228 *img_stride_vec
= lp_build_get_level_stride_vec(bld
,
1229 bld
->img_stride_array
,
1236 * Extract and broadcast texture size.
1238 * @param size_type type of the texture size vector (either
1239 * bld->int_size_type or bld->float_size_type)
1240 * @param coord_type type of the texture size vector (either
1241 * bld->int_coord_type or bld->coord_type)
1242 * @param size vector with the texture size (width, height, depth)
1245 lp_build_extract_image_sizes(struct lp_build_sample_context
*bld
,
1246 struct lp_build_context
*size_bld
,
1247 struct lp_type coord_type
,
1249 LLVMValueRef
*out_width
,
1250 LLVMValueRef
*out_height
,
1251 LLVMValueRef
*out_depth
)
1253 const unsigned dims
= bld
->dims
;
1254 LLVMTypeRef i32t
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
1255 struct lp_type size_type
= size_bld
->type
;
1257 if (bld
->num_lods
== 1) {
1258 *out_width
= lp_build_extract_broadcast(bld
->gallivm
,
1262 LLVMConstInt(i32t
, 0, 0));
1264 *out_height
= lp_build_extract_broadcast(bld
->gallivm
,
1268 LLVMConstInt(i32t
, 1, 0));
1270 *out_depth
= lp_build_extract_broadcast(bld
->gallivm
,
1274 LLVMConstInt(i32t
, 2, 0));
1279 unsigned num_quads
= bld
->coord_bld
.type
.length
/ 4;
1284 else if (bld
->num_lods
== num_quads
) {
1285 *out_width
= lp_build_swizzle_scalar_aos(size_bld
, size
, 0, 4);
1287 *out_height
= lp_build_swizzle_scalar_aos(size_bld
, size
, 1, 4);
1289 *out_depth
= lp_build_swizzle_scalar_aos(size_bld
, size
, 2, 4);
1294 assert(bld
->num_lods
== bld
->coord_type
.length
);
1295 *out_width
= lp_build_pack_aos_scalars(bld
->gallivm
, size_type
,
1296 coord_type
, size
, 0);
1298 *out_height
= lp_build_pack_aos_scalars(bld
->gallivm
, size_type
,
1299 coord_type
, size
, 1);
1301 *out_depth
= lp_build_pack_aos_scalars(bld
->gallivm
, size_type
,
1302 coord_type
, size
, 2);
1311 * Unnormalize coords.
1313 * @param flt_size vector with the integer texture size (width, height, depth)
1316 lp_build_unnormalized_coords(struct lp_build_sample_context
*bld
,
1317 LLVMValueRef flt_size
,
1322 const unsigned dims
= bld
->dims
;
1324 LLVMValueRef height
;
1327 lp_build_extract_image_sizes(bld
,
1328 &bld
->float_size_bld
,
1335 /* s = s * width, t = t * height */
1336 *s
= lp_build_mul(&bld
->coord_bld
, *s
, width
);
1338 *t
= lp_build_mul(&bld
->coord_bld
, *t
, height
);
1340 *r
= lp_build_mul(&bld
->coord_bld
, *r
, depth
);
1346 /** Helper used by lp_build_cube_lookup() */
1348 lp_build_cube_imapos(struct lp_build_context
*coord_bld
, LLVMValueRef coord
)
1350 /* ima = +0.5 / abs(coord); */
1351 LLVMValueRef posHalf
= lp_build_const_vec(coord_bld
->gallivm
, coord_bld
->type
, 0.5);
1352 LLVMValueRef absCoord
= lp_build_abs(coord_bld
, coord
);
1353 LLVMValueRef ima
= lp_build_div(coord_bld
, posHalf
, absCoord
);
1357 /** Helper used by lp_build_cube_lookup() */
1359 lp_build_cube_imaneg(struct lp_build_context
*coord_bld
, LLVMValueRef coord
)
1361 /* ima = -0.5 / abs(coord); */
1362 LLVMValueRef negHalf
= lp_build_const_vec(coord_bld
->gallivm
, coord_bld
->type
, -0.5);
1363 LLVMValueRef absCoord
= lp_build_abs(coord_bld
, coord
);
1364 LLVMValueRef ima
= lp_build_div(coord_bld
, negHalf
, absCoord
);
1369 * Helper used by lp_build_cube_lookup()
1370 * FIXME: the sign here can also be 0.
1371 * Arithmetically this could definitely make a difference. Either
1372 * fix the comment or use other (simpler) sign function, not sure
1373 * which one it should be.
1374 * \param sign scalar +1 or -1
1375 * \param coord float vector
1376 * \param ima float vector
1379 lp_build_cube_coord(struct lp_build_context
*coord_bld
,
1380 LLVMValueRef sign
, int negate_coord
,
1381 LLVMValueRef coord
, LLVMValueRef ima
)
1383 /* return negate(coord) * ima * sign + 0.5; */
1384 LLVMValueRef half
= lp_build_const_vec(coord_bld
->gallivm
, coord_bld
->type
, 0.5);
1387 assert(negate_coord
== +1 || negate_coord
== -1);
1389 if (negate_coord
== -1) {
1390 coord
= lp_build_negate(coord_bld
, coord
);
1393 res
= lp_build_mul(coord_bld
, coord
, ima
);
1395 sign
= lp_build_broadcast_scalar(coord_bld
, sign
);
1396 res
= lp_build_mul(coord_bld
, res
, sign
);
1398 res
= lp_build_add(coord_bld
, res
, half
);
1404 /** Helper used by lp_build_cube_lookup()
1405 * Return (major_coord >= 0) ? pos_face : neg_face;
1408 lp_build_cube_face(struct lp_build_sample_context
*bld
,
1409 LLVMValueRef major_coord
,
1410 unsigned pos_face
, unsigned neg_face
)
1412 struct gallivm_state
*gallivm
= bld
->gallivm
;
1413 LLVMBuilderRef builder
= gallivm
->builder
;
1414 LLVMValueRef cmp
= LLVMBuildFCmp(builder
, LLVMRealUGE
,
1416 bld
->float_bld
.zero
, "");
1417 LLVMValueRef pos
= lp_build_const_int32(gallivm
, pos_face
);
1418 LLVMValueRef neg
= lp_build_const_int32(gallivm
, neg_face
);
1419 LLVMValueRef res
= LLVMBuildSelect(builder
, cmp
, pos
, neg
, "");
1426 * Generate code to do cube face selection and compute per-face texcoords.
1429 lp_build_cube_lookup(struct lp_build_sample_context
*bld
,
1430 LLVMValueRef
*coords
,
1431 const struct lp_derivatives
*derivs
, /* optional */
1433 boolean need_derivs
)
1435 struct lp_build_context
*coord_bld
= &bld
->coord_bld
;
1436 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1437 struct gallivm_state
*gallivm
= bld
->gallivm
;
1438 LLVMValueRef si
, ti
, ri
;
1440 if (1 || coord_bld
->type
.length
> 4) {
1442 * Do per-pixel face selection. We cannot however (as we used to do)
1443 * simply calculate the derivs afterwards (which is very bogus for
1444 * explicit derivs btw) because the values would be "random" when
1445 * not all pixels lie on the same face. So what we do here is just
1446 * calculate the derivatives after scaling the coords by the absolute
1447 * value of the inverse major axis, and essentially do rho calculation
1448 * steps as if it were a 3d texture. This is perfect if all pixels hit
1449 * the same face, but not so great at edges, I believe the max error
1450 * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially measuring
1451 * the 3d distance between 2 points on the cube instead of measuring up/down
1452 * the edge). Still this is possibly a win over just selecting the same face
1453 * for all pixels. Unfortunately, something like that doesn't work for
1454 * explicit derivatives.
1455 * TODO: handle explicit derivatives by transforming them alongside coords
1458 struct lp_build_context
*cint_bld
= &bld
->int_coord_bld
;
1459 struct lp_type intctype
= cint_bld
->type
;
1460 LLVMValueRef signs
, signt
, signr
, signma
;
1461 LLVMValueRef as
, at
, ar
, face
, face_s
, face_t
;
1462 LLVMValueRef as_ge_at
, maxasat
, ar_ge_as_at
;
1463 LLVMValueRef snewx
, tnewx
, snewy
, tnewy
, snewz
, tnewz
;
1464 LLVMValueRef tnegi
, rnegi
;
1465 LLVMValueRef ma
, mai
, ima
;
1466 LLVMValueRef posHalf
= lp_build_const_vec(gallivm
, coord_bld
->type
, 0.5);
1467 LLVMValueRef signmask
= lp_build_const_int_vec(gallivm
, intctype
,
1468 1 << (intctype
.width
- 1));
1469 LLVMValueRef signshift
= lp_build_const_int_vec(gallivm
, intctype
,
1471 LLVMValueRef facex
= lp_build_const_int_vec(gallivm
, intctype
, PIPE_TEX_FACE_POS_X
);
1472 LLVMValueRef facey
= lp_build_const_int_vec(gallivm
, intctype
, PIPE_TEX_FACE_POS_Y
);
1473 LLVMValueRef facez
= lp_build_const_int_vec(gallivm
, intctype
, PIPE_TEX_FACE_POS_Z
);
1474 LLVMValueRef s
= coords
[0];
1475 LLVMValueRef t
= coords
[1];
1476 LLVMValueRef r
= coords
[2];
1478 assert(PIPE_TEX_FACE_NEG_X
== PIPE_TEX_FACE_POS_X
+ 1);
1479 assert(PIPE_TEX_FACE_NEG_Y
== PIPE_TEX_FACE_POS_Y
+ 1);
1480 assert(PIPE_TEX_FACE_NEG_Z
== PIPE_TEX_FACE_POS_Z
+ 1);
1483 * get absolute value (for x/y/z face selection) and sign bit
1484 * (for mirroring minor coords and pos/neg face selection)
1485 * of the original coords.
1487 as
= lp_build_abs(&bld
->coord_bld
, s
);
1488 at
= lp_build_abs(&bld
->coord_bld
, t
);
1489 ar
= lp_build_abs(&bld
->coord_bld
, r
);
1492 * major face determination: select x if x > y else select y
1493 * select z if z >= max(x,y) else select previous result
1494 * if some axis are the same we chose z over y, y over x - the
1495 * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1496 * wouldn't care could save a select or two if using different
1497 * compares and doing at_g_as_ar last since tnewx and tnewz are the
1500 as_ge_at
= lp_build_cmp(coord_bld
, PIPE_FUNC_GREATER
, as
, at
);
1501 maxasat
= lp_build_max(coord_bld
, as
, at
);
1502 ar_ge_as_at
= lp_build_cmp(coord_bld
, PIPE_FUNC_GEQUAL
, ar
, maxasat
);
1505 LLVMValueRef ddx_ddy
[2], tmp
[3], rho_vec
;
1506 static const unsigned char swizzle0
[] = { /* no-op swizzle */
1507 0, LP_BLD_SWIZZLE_DONTCARE
,
1508 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
1510 static const unsigned char swizzle1
[] = {
1511 1, LP_BLD_SWIZZLE_DONTCARE
,
1512 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
1514 static const unsigned char swizzle01
[] = { /* no-op swizzle */
1516 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
1518 static const unsigned char swizzle23
[] = {
1520 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
1522 static const unsigned char swizzle02
[] = {
1524 LP_BLD_SWIZZLE_DONTCARE
, LP_BLD_SWIZZLE_DONTCARE
1528 * scale the s/t/r coords pre-select/mirror so we can calculate
1529 * "reasonable" derivs.
1531 ma
= lp_build_select(coord_bld
, as_ge_at
, s
, t
);
1532 ma
= lp_build_select(coord_bld
, ar_ge_as_at
, r
, ma
);
1533 ima
= lp_build_cube_imapos(coord_bld
, ma
);
1534 s
= lp_build_mul(coord_bld
, s
, ima
);
1535 t
= lp_build_mul(coord_bld
, t
, ima
);
1536 r
= lp_build_mul(coord_bld
, r
, ima
);
1539 * This isn't quite the same as the "ordinary" (3d deriv) path since we
1540 * know the texture is square which simplifies things (we can omit the
1541 * size mul which happens very early completely here and do it at the
1544 ddx_ddy
[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld
, s
, t
);
1545 ddx_ddy
[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld
, r
);
1547 if (gallivm_debug
& GALLIVM_DEBUG_NO_RHO_APPROX
) {
1548 ddx_ddy
[0] = lp_build_mul(coord_bld
, ddx_ddy
[0], ddx_ddy
[0]);
1549 ddx_ddy
[1] = lp_build_mul(coord_bld
, ddx_ddy
[1], ddx_ddy
[1]);
1552 ddx_ddy
[0] = lp_build_abs(coord_bld
, ddx_ddy
[0]);
1553 ddx_ddy
[1] = lp_build_abs(coord_bld
, ddx_ddy
[1]);
1556 tmp
[0] = lp_build_swizzle_aos(coord_bld
, ddx_ddy
[0], swizzle01
);
1557 tmp
[1] = lp_build_swizzle_aos(coord_bld
, ddx_ddy
[0], swizzle23
);
1558 tmp
[2] = lp_build_swizzle_aos(coord_bld
, ddx_ddy
[1], swizzle02
);
1560 if (gallivm_debug
& GALLIVM_DEBUG_NO_RHO_APPROX
) {
1561 rho_vec
= lp_build_add(coord_bld
, tmp
[0], tmp
[1]);
1562 rho_vec
= lp_build_add(coord_bld
, rho_vec
, tmp
[2]);
1565 rho_vec
= lp_build_max(coord_bld
, tmp
[0], tmp
[1]);
1566 rho_vec
= lp_build_max(coord_bld
, rho_vec
, tmp
[2]);
1569 tmp
[0] = lp_build_swizzle_aos(coord_bld
, rho_vec
, swizzle0
);
1570 tmp
[1] = lp_build_swizzle_aos(coord_bld
, rho_vec
, swizzle1
);
1571 *rho
= lp_build_max(coord_bld
, tmp
[0], tmp
[1]);
1574 si
= LLVMBuildBitCast(builder
, s
, lp_build_vec_type(gallivm
, intctype
), "");
1575 ti
= LLVMBuildBitCast(builder
, t
, lp_build_vec_type(gallivm
, intctype
), "");
1576 ri
= LLVMBuildBitCast(builder
, r
, lp_build_vec_type(gallivm
, intctype
), "");
1577 signs
= LLVMBuildAnd(builder
, si
, signmask
, "");
1578 signt
= LLVMBuildAnd(builder
, ti
, signmask
, "");
1579 signr
= LLVMBuildAnd(builder
, ri
, signmask
, "");
1582 * compute all possible new s/t coords
1583 * snewx = signs * -r;
1586 * tnewy = signt * r;
1587 * snewz = signr * s;
1590 tnegi
= LLVMBuildXor(builder
, ti
, signmask
, "");
1591 rnegi
= LLVMBuildXor(builder
, ri
, signmask
, "");
1593 snewx
= LLVMBuildXor(builder
, signs
, rnegi
, "");
1597 tnewy
= LLVMBuildXor(builder
, signt
, ri
, "");
1599 snewz
= LLVMBuildXor(builder
, signr
, si
, "");
1602 /* XXX on x86 unclear if we should cast the values back to float
1603 * or not - on some cpus (nehalem) pblendvb has twice the throughput
1604 * of blendvps though on others there just might be domain
1605 * transition penalties when using it (this depends on what llvm
1606 * will chose for the bit ops above so there appears no "right way",
1607 * but given the boatload of selects let's just use the int type).
1612 ma
= lp_build_select(coord_bld
, as_ge_at
, s
, t
);
1614 face_s
= lp_build_select(cint_bld
, as_ge_at
, snewx
, snewy
);
1615 face_t
= lp_build_select(cint_bld
, as_ge_at
, tnewx
, tnewy
);
1616 face
= lp_build_select(cint_bld
, as_ge_at
, facex
, facey
);
1619 ma
= lp_build_select(coord_bld
, ar_ge_as_at
, r
, ma
);
1621 face_s
= lp_build_select(cint_bld
, ar_ge_as_at
, snewz
, face_s
);
1622 face_t
= lp_build_select(cint_bld
, ar_ge_as_at
, tnewz
, face_t
);
1623 face
= lp_build_select(cint_bld
, ar_ge_as_at
, facez
, face
);
1625 face_s
= LLVMBuildBitCast(builder
, face_s
,
1626 lp_build_vec_type(gallivm
, coord_bld
->type
), "");
1627 face_t
= LLVMBuildBitCast(builder
, face_t
,
1628 lp_build_vec_type(gallivm
, coord_bld
->type
), "");
1630 /* add +1 for neg face */
1631 /* XXX with AVX probably want to use another select here -
1632 * as long as we ensure vblendvps gets used we can actually
1633 * skip the comparison and just use sign as a "mask" directly.
1635 mai
= LLVMBuildBitCast(builder
, ma
, lp_build_vec_type(gallivm
, intctype
), "");
1636 signma
= LLVMBuildLShr(builder
, mai
, signshift
, "");
1637 coords
[2] = LLVMBuildOr(builder
, face
, signma
, "face");
1639 /* project coords */
1641 ima
= lp_build_cube_imapos(coord_bld
, ma
);
1642 face_s
= lp_build_mul(coord_bld
, face_s
, ima
);
1643 face_t
= lp_build_mul(coord_bld
, face_t
, ima
);
1646 coords
[0] = lp_build_add(coord_bld
, face_s
, posHalf
);
1647 coords
[1] = lp_build_add(coord_bld
, face_t
, posHalf
);
1651 struct lp_build_if_state if_ctx
;
1652 LLVMValueRef face_s_var
;
1653 LLVMValueRef face_t_var
;
1654 LLVMValueRef face_var
;
1655 LLVMValueRef arx_ge_ary_arz
, ary_ge_arx_arz
;
1656 LLVMValueRef shuffles
[4];
1657 LLVMValueRef arxy_ge_aryx
, arxy_ge_arzz
, arxy_ge_arxy_arzz
;
1658 LLVMValueRef arxyxy
, aryxzz
, arxyxy_ge_aryxzz
;
1659 LLVMValueRef tmp
[4], rxyz
, arxyz
;
1660 struct lp_build_context
*float_bld
= &bld
->float_bld
;
1661 LLVMValueRef s
, t
, r
, face
, face_s
, face_t
;
1663 assert(bld
->coord_bld
.type
.length
== 4);
1665 tmp
[0] = s
= coords
[0];
1666 tmp
[1] = t
= coords
[1];
1667 tmp
[2] = r
= coords
[2];
1668 rxyz
= lp_build_hadd_partial4(&bld
->coord_bld
, tmp
, 3);
1669 arxyz
= lp_build_abs(&bld
->coord_bld
, rxyz
);
1671 shuffles
[0] = lp_build_const_int32(gallivm
, 0);
1672 shuffles
[1] = lp_build_const_int32(gallivm
, 1);
1673 shuffles
[2] = lp_build_const_int32(gallivm
, 0);
1674 shuffles
[3] = lp_build_const_int32(gallivm
, 1);
1675 arxyxy
= LLVMBuildShuffleVector(builder
, arxyz
, arxyz
, LLVMConstVector(shuffles
, 4), "");
1676 shuffles
[0] = lp_build_const_int32(gallivm
, 1);
1677 shuffles
[1] = lp_build_const_int32(gallivm
, 0);
1678 shuffles
[2] = lp_build_const_int32(gallivm
, 2);
1679 shuffles
[3] = lp_build_const_int32(gallivm
, 2);
1680 aryxzz
= LLVMBuildShuffleVector(builder
, arxyz
, arxyz
, LLVMConstVector(shuffles
, 4), "");
1681 arxyxy_ge_aryxzz
= lp_build_cmp(&bld
->coord_bld
, PIPE_FUNC_GEQUAL
, arxyxy
, aryxzz
);
1683 shuffles
[0] = lp_build_const_int32(gallivm
, 0);
1684 shuffles
[1] = lp_build_const_int32(gallivm
, 1);
1685 arxy_ge_aryx
= LLVMBuildShuffleVector(builder
, arxyxy_ge_aryxzz
, arxyxy_ge_aryxzz
,
1686 LLVMConstVector(shuffles
, 2), "");
1687 shuffles
[0] = lp_build_const_int32(gallivm
, 2);
1688 shuffles
[1] = lp_build_const_int32(gallivm
, 3);
1689 arxy_ge_arzz
= LLVMBuildShuffleVector(builder
, arxyxy_ge_aryxzz
, arxyxy_ge_aryxzz
,
1690 LLVMConstVector(shuffles
, 2), "");
1691 arxy_ge_arxy_arzz
= LLVMBuildAnd(builder
, arxy_ge_aryx
, arxy_ge_arzz
, "");
1693 arx_ge_ary_arz
= LLVMBuildExtractElement(builder
, arxy_ge_arxy_arzz
,
1694 lp_build_const_int32(gallivm
, 0), "");
1695 arx_ge_ary_arz
= LLVMBuildICmp(builder
, LLVMIntNE
, arx_ge_ary_arz
,
1696 lp_build_const_int32(gallivm
, 0), "");
1697 ary_ge_arx_arz
= LLVMBuildExtractElement(builder
, arxy_ge_arxy_arzz
,
1698 lp_build_const_int32(gallivm
, 1), "");
1699 ary_ge_arx_arz
= LLVMBuildICmp(builder
, LLVMIntNE
, ary_ge_arx_arz
,
1700 lp_build_const_int32(gallivm
, 0), "");
1701 face_s_var
= lp_build_alloca(gallivm
, bld
->coord_bld
.vec_type
, "face_s_var");
1702 face_t_var
= lp_build_alloca(gallivm
, bld
->coord_bld
.vec_type
, "face_t_var");
1703 face_var
= lp_build_alloca(gallivm
, bld
->int_bld
.vec_type
, "face_var");
1705 lp_build_if(&if_ctx
, gallivm
, arx_ge_ary_arz
);
1708 LLVMValueRef sign
, ima
;
1709 si
= LLVMBuildExtractElement(builder
, rxyz
,
1710 lp_build_const_int32(gallivm
, 0), "");
1712 sign
= lp_build_sgn(float_bld
, si
);
1713 ima
= lp_build_cube_imaneg(coord_bld
, s
);
1714 face_s
= lp_build_cube_coord(coord_bld
, sign
, +1, r
, ima
);
1715 face_t
= lp_build_cube_coord(coord_bld
, NULL
, +1, t
, ima
);
1716 face
= lp_build_cube_face(bld
, si
,
1717 PIPE_TEX_FACE_POS_X
,
1718 PIPE_TEX_FACE_NEG_X
);
1719 LLVMBuildStore(builder
, face_s
, face_s_var
);
1720 LLVMBuildStore(builder
, face_t
, face_t_var
);
1721 LLVMBuildStore(builder
, face
, face_var
);
1723 lp_build_else(&if_ctx
);
1725 struct lp_build_if_state if_ctx2
;
1727 lp_build_if(&if_ctx2
, gallivm
, ary_ge_arx_arz
);
1729 LLVMValueRef sign
, ima
;
1731 ti
= LLVMBuildExtractElement(builder
, rxyz
,
1732 lp_build_const_int32(gallivm
, 1), "");
1733 sign
= lp_build_sgn(float_bld
, ti
);
1734 ima
= lp_build_cube_imaneg(coord_bld
, t
);
1735 face_s
= lp_build_cube_coord(coord_bld
, NULL
, -1, s
, ima
);
1736 face_t
= lp_build_cube_coord(coord_bld
, sign
, -1, r
, ima
);
1737 face
= lp_build_cube_face(bld
, ti
,
1738 PIPE_TEX_FACE_POS_Y
,
1739 PIPE_TEX_FACE_NEG_Y
);
1740 LLVMBuildStore(builder
, face_s
, face_s_var
);
1741 LLVMBuildStore(builder
, face_t
, face_t_var
);
1742 LLVMBuildStore(builder
, face
, face_var
);
1744 lp_build_else(&if_ctx2
);
1747 LLVMValueRef sign
, ima
;
1748 ri
= LLVMBuildExtractElement(builder
, rxyz
,
1749 lp_build_const_int32(gallivm
, 2), "");
1750 sign
= lp_build_sgn(float_bld
, ri
);
1751 ima
= lp_build_cube_imaneg(coord_bld
, r
);
1752 face_s
= lp_build_cube_coord(coord_bld
, sign
, -1, s
, ima
);
1753 face_t
= lp_build_cube_coord(coord_bld
, NULL
, +1, t
, ima
);
1754 face
= lp_build_cube_face(bld
, ri
,
1755 PIPE_TEX_FACE_POS_Z
,
1756 PIPE_TEX_FACE_NEG_Z
);
1757 LLVMBuildStore(builder
, face_s
, face_s_var
);
1758 LLVMBuildStore(builder
, face_t
, face_t_var
);
1759 LLVMBuildStore(builder
, face
, face_var
);
1761 lp_build_endif(&if_ctx2
);
1764 lp_build_endif(&if_ctx
);
1766 coords
[0] = LLVMBuildLoad(builder
, face_s_var
, "face_s");
1767 coords
[1] = LLVMBuildLoad(builder
, face_t_var
, "face_t");
1768 face
= LLVMBuildLoad(builder
, face_var
, "face");
1769 coords
[2] = lp_build_broadcast_scalar(&bld
->int_coord_bld
, face
);
1775 * Compute the partial offset of a pixel block along an arbitrary axis.
1777 * @param coord coordinate in pixels
1778 * @param stride number of bytes between rows of successive pixel blocks
1779 * @param block_length number of pixels in a pixels block along the coordinate
1781 * @param out_offset resulting relative offset of the pixel block in bytes
1782 * @param out_subcoord resulting sub-block pixel coordinate
1785 lp_build_sample_partial_offset(struct lp_build_context
*bld
,
1786 unsigned block_length
,
1788 LLVMValueRef stride
,
1789 LLVMValueRef
*out_offset
,
1790 LLVMValueRef
*out_subcoord
)
1792 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
1793 LLVMValueRef offset
;
1794 LLVMValueRef subcoord
;
1796 if (block_length
== 1) {
1797 subcoord
= bld
->zero
;
1801 * Pixel blocks have power of two dimensions. LLVM should convert the
1802 * rem/div to bit arithmetic.
1803 * TODO: Verify this.
1804 * It does indeed BUT it does transform it to scalar (and back) when doing so
1805 * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
1806 * The generated code looks seriously unfunny and is quite expensive.
1809 LLVMValueRef block_width
= lp_build_const_int_vec(bld
->type
, block_length
);
1810 subcoord
= LLVMBuildURem(builder
, coord
, block_width
, "");
1811 coord
= LLVMBuildUDiv(builder
, coord
, block_width
, "");
1813 unsigned logbase2
= util_logbase2(block_length
);
1814 LLVMValueRef block_shift
= lp_build_const_int_vec(bld
->gallivm
, bld
->type
, logbase2
);
1815 LLVMValueRef block_mask
= lp_build_const_int_vec(bld
->gallivm
, bld
->type
, block_length
- 1);
1816 subcoord
= LLVMBuildAnd(builder
, coord
, block_mask
, "");
1817 coord
= LLVMBuildLShr(builder
, coord
, block_shift
, "");
1821 offset
= lp_build_mul(bld
, coord
, stride
);
1824 assert(out_subcoord
);
1826 *out_offset
= offset
;
1827 *out_subcoord
= subcoord
;
1832 * Compute the offset of a pixel block.
1834 * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
1836 * Returns the relative offset and i,j sub-block coordinates
1839 lp_build_sample_offset(struct lp_build_context
*bld
,
1840 const struct util_format_description
*format_desc
,
1844 LLVMValueRef y_stride
,
1845 LLVMValueRef z_stride
,
1846 LLVMValueRef
*out_offset
,
1847 LLVMValueRef
*out_i
,
1848 LLVMValueRef
*out_j
)
1850 LLVMValueRef x_stride
;
1851 LLVMValueRef offset
;
1853 x_stride
= lp_build_const_vec(bld
->gallivm
, bld
->type
,
1854 format_desc
->block
.bits
/8);
1856 lp_build_sample_partial_offset(bld
,
1857 format_desc
->block
.width
,
1861 if (y
&& y_stride
) {
1862 LLVMValueRef y_offset
;
1863 lp_build_sample_partial_offset(bld
,
1864 format_desc
->block
.height
,
1867 offset
= lp_build_add(bld
, offset
, y_offset
);
1873 if (z
&& z_stride
) {
1874 LLVMValueRef z_offset
;
1876 lp_build_sample_partial_offset(bld
,
1877 1, /* pixel blocks are always 2D */
1880 offset
= lp_build_add(bld
, offset
, z_offset
);
1883 *out_offset
= offset
;