1 /**************************************************************************
3 * Copyright 2010 VMware.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/u_simple_list.h"
32 #include "os/os_time.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_bitarit.h"
35 #include "gallivm/lp_bld_const.h"
36 #include "gallivm/lp_bld_debug.h"
37 #include "gallivm/lp_bld_init.h"
38 #include "gallivm/lp_bld_logic.h"
39 #include "gallivm/lp_bld_intr.h"
40 #include "gallivm/lp_bld_flow.h"
41 #include "gallivm/lp_bld_type.h"
46 #include "lp_screen.h"
47 #include "lp_context.h"
49 #include "lp_state_fs.h"
50 #include "lp_state_setup.h"
53 * Set if the start point for interpolation should be calculated with a
54 * more accurate method (barycentric interpolation).
55 * Unfortunately, actual interpolation results of small tris with steep
56 * gradients far away from the origin are still very busted, this does
57 * nothing to change that (in fact it may make it worse), but some tests
58 * (don't ask) really want accurate values at origin (and ONLY origin).
63 /* currently organized to interpolate full float[4] attributes even
64 * when some elements are unused. Later, can pack vertex data more
71 /* Function arguments:
76 LLVMValueRef facing
; /* boolean */
83 LLVMValueRef x0_center
;
84 LLVMValueRef y0_center
;
85 LLVMValueRef dy20_ooa
;
86 LLVMValueRef dy01_ooa
;
87 LLVMValueRef dx20_ooa
;
88 LLVMValueRef dx01_ooa
;
92 struct lp_build_context bld
;
97 store_coef(struct gallivm_state
*gallivm
,
98 struct lp_setup_args
*args
,
104 LLVMBuilderRef builder
= gallivm
->builder
;
105 LLVMValueRef idx
= lp_build_const_int32(gallivm
, slot
);
107 LLVMBuildStore(builder
,
109 LLVMBuildGEP(builder
, args
->a0
, &idx
, 1, ""));
111 LLVMBuildStore(builder
,
113 LLVMBuildGEP(builder
, args
->dadx
, &idx
, 1, ""));
115 LLVMBuildStore(builder
,
117 LLVMBuildGEP(builder
, args
->dady
, &idx
, 1, ""));
123 emit_constant_coef4(struct gallivm_state
*gallivm
,
124 struct lp_setup_args
*args
,
128 store_coef(gallivm
, args
, slot
, vert
, args
->bld
.zero
, args
->bld
.zero
);
134 * Setup the fragment input attribute with the front-facing value.
135 * \param frontface is the triangle front facing?
138 emit_facing_coef(struct gallivm_state
*gallivm
,
139 struct lp_setup_args
*args
,
142 LLVMBuilderRef builder
= gallivm
->builder
;
143 LLVMTypeRef float_type
= LLVMFloatTypeInContext(gallivm
->context
);
144 LLVMValueRef a0_0
= args
->facing
;
145 LLVMValueRef a0_0f
= LLVMBuildSIToFP(builder
, a0_0
, float_type
, "");
146 LLVMValueRef a0
, face_val
;
147 const unsigned char swizzles
[4] = { PIPE_SWIZZLE_RED
, PIPE_SWIZZLE_ZERO
,
148 PIPE_SWIZZLE_ZERO
, PIPE_SWIZZLE_ZERO
};
149 /* Our face val is either 1 or 0 so we do
150 * face = (val * 2) - 1
154 LLVMBuildFAdd(builder
,
155 LLVMBuildFMul(builder
, a0_0f
,
156 lp_build_const_float(gallivm
, 2.0),
158 lp_build_const_float(gallivm
, -1.0),
160 face_val
= lp_build_broadcast_scalar(&args
->bld
, face_val
);
161 a0
= lp_build_swizzle_aos(&args
->bld
, face_val
, swizzles
);
163 store_coef(gallivm
, args
, slot
, a0
, args
->bld
.zero
, args
->bld
.zero
);
168 vert_attrib(struct gallivm_state
*gallivm
,
174 LLVMBuilderRef b
= gallivm
->builder
;
176 idx
[0] = lp_build_const_int32(gallivm
, attr
);
177 idx
[1] = lp_build_const_int32(gallivm
, elem
);
178 return LLVMBuildLoad(b
, LLVMBuildGEP(b
, vert
, idx
, 2, ""), name
);
183 lp_twoside(struct gallivm_state
*gallivm
,
184 struct lp_setup_args
*args
,
185 const struct lp_setup_variant_key
*key
,
187 LLVMValueRef attribv
[3])
189 LLVMBuilderRef b
= gallivm
->builder
;
190 LLVMValueRef a0_back
, a1_back
, a2_back
;
191 LLVMValueRef idx2
= lp_build_const_int32(gallivm
, bcolor_slot
);
193 LLVMValueRef facing
= args
->facing
;
194 LLVMValueRef front_facing
= LLVMBuildICmp(b
, LLVMIntEQ
, facing
,
195 lp_build_const_int32(gallivm
, 0), ""); /** need i1 for if condition */
197 a0_back
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx2
, 1, ""), "v0a_back");
198 a1_back
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx2
, 1, ""), "v1a_back");
199 a2_back
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx2
, 1, ""), "v2a_back");
201 /* Possibly swap the front and back attrib values,
203 * Prefer select to if so we don't have to worry about phis or
206 attribv
[0] = LLVMBuildSelect(b
, front_facing
, a0_back
, attribv
[0], "");
207 attribv
[1] = LLVMBuildSelect(b
, front_facing
, a1_back
, attribv
[1], "");
208 attribv
[2] = LLVMBuildSelect(b
, front_facing
, a2_back
, attribv
[2], "");
213 lp_do_offset_tri(struct gallivm_state
*gallivm
,
214 struct lp_setup_args
*args
,
215 const struct lp_setup_variant_key
*key
,
216 LLVMValueRef inv_det
,
219 LLVMValueRef attribv
[3])
221 LLVMBuilderRef b
= gallivm
->builder
;
222 struct lp_build_context flt_scalar_bld
;
223 struct lp_build_context int_scalar_bld
;
224 struct lp_build_context
*bld
= &args
->bld
;
225 LLVMValueRef zoffset
, mult
;
226 LLVMValueRef z0_new
, z1_new
, z2_new
;
227 LLVMValueRef dzdxdzdy
, dzdx
, dzdy
, dzxyz20
, dyzzx01
, dyzzx01_dzxyz20
, dzx01_dyz20
;
228 LLVMValueRef z0z1
, z0z1z2
;
229 LLVMValueRef max
, max_value
, res12
;
230 LLVMValueRef shuffles
[4];
231 LLVMTypeRef shuf_type
= LLVMInt32TypeInContext(gallivm
->context
);
232 LLVMValueRef onei
= lp_build_const_int32(gallivm
, 1);
233 LLVMValueRef zeroi
= lp_build_const_int32(gallivm
, 0);
234 LLVMValueRef twoi
= lp_build_const_int32(gallivm
, 2);
235 LLVMValueRef threei
= lp_build_const_int32(gallivm
, 3);
237 /* (res12) = cross(e,f).xy */
242 dzxyz20
= LLVMBuildShuffleVector(b
, dxyz20
, dxyz20
, LLVMConstVector(shuffles
, 4), "");
248 dyzzx01
= LLVMBuildShuffleVector(b
, dxyz01
, dxyz01
, LLVMConstVector(shuffles
, 4), "");
250 dyzzx01_dzxyz20
= LLVMBuildFMul(b
, dzxyz20
, dyzzx01
, "dyzzx01_dzxyz20");
253 shuffles
[1] = threei
;
254 shuffles
[2] = LLVMGetUndef(shuf_type
);
255 shuffles
[3] = LLVMGetUndef(shuf_type
);
256 dzx01_dyz20
= LLVMBuildShuffleVector(b
, dyzzx01_dzxyz20
, dyzzx01_dzxyz20
,
257 LLVMConstVector(shuffles
, 4), "");
259 res12
= LLVMBuildFSub(b
, dyzzx01_dzxyz20
, dzx01_dyz20
, "res12");
261 /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
262 dzdxdzdy
= LLVMBuildFMul(b
, res12
, inv_det
, "dzdxdzdy");
263 dzdxdzdy
= lp_build_abs(bld
, dzdxdzdy
);
265 dzdx
= LLVMBuildExtractElement(b
, dzdxdzdy
, zeroi
, "");
266 dzdy
= LLVMBuildExtractElement(b
, dzdxdzdy
, onei
, "");
268 /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */
269 max
= LLVMBuildFCmp(b
, LLVMRealUGT
, dzdx
, dzdy
, "");
270 max_value
= LLVMBuildSelect(b
, max
, dzdx
, dzdy
, "max");
272 mult
= LLVMBuildFMul(b
, max_value
,
273 lp_build_const_float(gallivm
, key
->pgon_offset_scale
), "");
275 lp_build_context_init(&flt_scalar_bld
, gallivm
, lp_type_float_vec(32, 32));
277 if (key
->floating_point_depth
) {
279 * bias = pgon_offset_units * 2^(exponent(max(z0, z1, z2)) - mantissa_bits) +
280 * MAX2(dzdx, dzdy) * pgon_offset_scale
282 * NOTE: Assumes IEEE float32.
284 LLVMValueRef c23_shifted
, exp_mask
, bias
, exp
;
285 LLVMValueRef maxz_value
, maxz0z1_value
;
287 lp_build_context_init(&int_scalar_bld
, gallivm
, lp_type_int_vec(32, 32));
289 c23_shifted
= lp_build_const_int32(gallivm
, 23 << 23);
290 exp_mask
= lp_build_const_int32(gallivm
, 0xff << 23);
292 maxz0z1_value
= lp_build_max(&flt_scalar_bld
,
293 LLVMBuildExtractElement(b
, attribv
[0], twoi
, ""),
294 LLVMBuildExtractElement(b
, attribv
[1], twoi
, ""));
296 maxz_value
= lp_build_max(&flt_scalar_bld
,
297 LLVMBuildExtractElement(b
, attribv
[2], twoi
, ""),
300 exp
= LLVMBuildBitCast(b
, maxz_value
, int_scalar_bld
.vec_type
, "");
301 exp
= lp_build_and(&int_scalar_bld
, exp
, exp_mask
);
302 exp
= lp_build_sub(&int_scalar_bld
, exp
, c23_shifted
);
303 /* Clamping to zero means mrd will be zero for very small numbers,
304 * but specs do not indicate this should be prevented by clamping
305 * mrd to smallest normal number instead. */
306 exp
= lp_build_max(&int_scalar_bld
, exp
, int_scalar_bld
.zero
);
307 exp
= LLVMBuildBitCast(b
, exp
, flt_scalar_bld
.vec_type
, "");
309 bias
= LLVMBuildFMul(b
, exp
,
310 lp_build_const_float(gallivm
, key
->pgon_offset_units
),
313 zoffset
= LLVMBuildFAdd(b
, bias
, mult
, "zoffset");
316 * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale
318 zoffset
= LLVMBuildFAdd(b
,
319 lp_build_const_float(gallivm
, key
->pgon_offset_units
),
323 if (key
->pgon_offset_clamp
> 0) {
324 zoffset
= lp_build_min(&flt_scalar_bld
,
325 lp_build_const_float(gallivm
, key
->pgon_offset_clamp
),
328 else if (key
->pgon_offset_clamp
< 0) {
329 zoffset
= lp_build_max(&flt_scalar_bld
,
330 lp_build_const_float(gallivm
, key
->pgon_offset_clamp
),
336 shuffles
[1] = lp_build_const_int32(gallivm
, 6);
337 shuffles
[2] = LLVMGetUndef(shuf_type
);
338 shuffles
[3] = LLVMGetUndef(shuf_type
);
339 z0z1
= LLVMBuildShuffleVector(b
, attribv
[0], attribv
[1], LLVMConstVector(shuffles
, 4), "");
342 shuffles
[2] = lp_build_const_int32(gallivm
, 6);
343 shuffles
[3] = LLVMGetUndef(shuf_type
);
344 z0z1z2
= LLVMBuildShuffleVector(b
, z0z1
, attribv
[2], LLVMConstVector(shuffles
, 4), "");
345 zoffset
= lp_build_broadcast_scalar(bld
, zoffset
);
347 /* clamp and do offset */
349 * FIXME I suspect the clamp (is that even right to always clamp to fixed
350 * 0.0/1.0?) should really be per fragment?
352 z0z1z2
= lp_build_clamp(bld
, LLVMBuildFAdd(b
, z0z1z2
, zoffset
, ""), bld
->zero
, bld
->one
);
354 /* insert into args->a0.z, a1.z, a2.z:
356 z0_new
= LLVMBuildExtractElement(b
, z0z1z2
, zeroi
, "");
357 z1_new
= LLVMBuildExtractElement(b
, z0z1z2
, onei
, "");
358 z2_new
= LLVMBuildExtractElement(b
, z0z1z2
, twoi
, "");
359 attribv
[0] = LLVMBuildInsertElement(b
, attribv
[0], z0_new
, twoi
, "");
360 attribv
[1] = LLVMBuildInsertElement(b
, attribv
[1], z1_new
, twoi
, "");
361 attribv
[2] = LLVMBuildInsertElement(b
, attribv
[2], z2_new
, twoi
, "");
365 load_attribute(struct gallivm_state
*gallivm
,
366 struct lp_setup_args
*args
,
367 const struct lp_setup_variant_key
*key
,
369 LLVMValueRef attribv
[3])
371 LLVMBuilderRef b
= gallivm
->builder
;
372 LLVMValueRef idx
= lp_build_const_int32(gallivm
, vert_attr
);
374 /* Load the vertex data
376 attribv
[0] = LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx
, 1, ""), "v0a");
377 attribv
[1] = LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx
, 1, ""), "v1a");
378 attribv
[2] = LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx
, 1, ""), "v2a");
381 /* Potentially modify it according to twoside, etc:
384 if (vert_attr
== key
->color_slot
&& key
->bcolor_slot
>= 0)
385 lp_twoside(gallivm
, args
, key
, key
->bcolor_slot
, attribv
);
386 else if (vert_attr
== key
->spec_slot
&& key
->bspec_slot
>= 0)
387 lp_twoside(gallivm
, args
, key
, key
->bspec_slot
, attribv
);
392 * FIXME: interpolation is always done wrt fb origin (0/0).
393 * However, if some (small) tri is far away from the origin and gradients
394 * are large, this can lead to HUGE errors, since the a0 value calculated
395 * here can get very large (with the actual values inside the triangle way
396 * smaller), leading to complete loss of accuracy. This could be prevented
397 * by using some point inside (or at corner) of the tri as interpolation
398 * origin, or just use barycentric interpolation (which GL suggests and is
399 * what real hw does - you can get the barycentric coordinates from the
400 * edge functions in rasterization in principle (though we skip these
401 * sometimes completely in case of tris covering a block fully,
402 * which obviously wouldn't work)).
405 emit_coef4( struct gallivm_state
*gallivm
,
406 struct lp_setup_args
*args
,
412 LLVMBuilderRef b
= gallivm
->builder
;
413 bool accurate_a0
= ACCURATE_A0
;
415 LLVMValueRef dy20_ooa
= args
->dy20_ooa
;
416 LLVMValueRef dy01_ooa
= args
->dy01_ooa
;
417 LLVMValueRef dx20_ooa
= args
->dx20_ooa
;
418 LLVMValueRef dx01_ooa
= args
->dx01_ooa
;
419 LLVMValueRef x0_center
= args
->x0_center
;
420 LLVMValueRef y0_center
= args
->y0_center
;
421 LLVMValueRef da01
= LLVMBuildFSub(b
, a0
, a1
, "da01");
422 LLVMValueRef da20
= LLVMBuildFSub(b
, a2
, a0
, "da20");
424 /* Calculate dadx (vec4f)
426 LLVMValueRef da01_dy20_ooa
= LLVMBuildFMul(b
, da01
, dy20_ooa
, "da01_dy20_ooa");
427 LLVMValueRef da20_dy01_ooa
= LLVMBuildFMul(b
, da20
, dy01_ooa
, "da20_dy01_ooa");
428 LLVMValueRef dadx
= LLVMBuildFSub(b
, da01_dy20_ooa
, da20_dy01_ooa
, "dadx");
430 /* Calculate dady (vec4f)
432 LLVMValueRef da01_dx20_ooa
= LLVMBuildFMul(b
, da01
, dx20_ooa
, "da01_dx20_ooa");
433 LLVMValueRef da20_dx01_ooa
= LLVMBuildFMul(b
, da20
, dx01_ooa
, "da20_dx01_ooa");
434 LLVMValueRef dady
= LLVMBuildFSub(b
, da20_dx01_ooa
, da01_dx20_ooa
, "dady");
436 /* Calculate a0 - the attribute value at the origin
439 LLVMValueRef dadx_x0
= LLVMBuildFMul(b
, dadx
, x0_center
, "dadx_x0");
440 LLVMValueRef dady_y0
= LLVMBuildFMul(b
, dady
, y0_center
, "dady_y0");
441 LLVMValueRef attr_v0
= LLVMBuildFAdd(b
, dadx_x0
, dady_y0
, "attr_v0");
442 attr_0
= LLVMBuildFSub(b
, a0
, attr_v0
, "attr_0");
445 LLVMValueRef ao2
= LLVMBuildFMul(b
, args
->e01o
, a2
, "");
446 LLVMValueRef ao1
= LLVMBuildFMul(b
, args
->e20o
, a1
, "");
447 LLVMValueRef ao0
= LLVMBuildFMul(b
, args
->e12o
, a0
, "");
448 attr_0
= LLVMBuildFAdd(b
, ao0
, ao1
, "");
449 attr_0
= LLVMBuildFAdd(b
, attr_0
, ao2
, "");
452 store_coef(gallivm
, args
, slot
, attr_0
, dadx
, dady
);
457 emit_linear_coef( struct gallivm_state
*gallivm
,
458 struct lp_setup_args
*args
,
460 LLVMValueRef attribv
[3])
462 /* nothing to do anymore */
472 * Compute a0, dadx and dady for a perspective-corrected interpolant,
474 * We basically multiply the vertex value by 1/w before computing
475 * the plane coefficients (a0, dadx, dady).
476 * Later, when we compute the value at a particular fragment position we'll
477 * divide the interpolated value by the interpolated W at that fragment.
480 apply_perspective_corr( struct gallivm_state
*gallivm
,
481 struct lp_setup_args
*args
,
483 LLVMValueRef attribv
[3])
485 LLVMBuilderRef b
= gallivm
->builder
;
487 /* premultiply by 1/w (v[0][3] is always 1/w):
489 LLVMValueRef v0_oow
= lp_build_broadcast_scalar(&args
->bld
,
490 vert_attrib(gallivm
, args
->v0
, 0, 3, "v0_oow"));
491 LLVMValueRef v1_oow
= lp_build_broadcast_scalar(&args
->bld
,
492 vert_attrib(gallivm
, args
->v1
, 0, 3, "v1_oow"));
493 LLVMValueRef v2_oow
= lp_build_broadcast_scalar(&args
->bld
,
494 vert_attrib(gallivm
, args
->v2
, 0, 3, "v2_oow"));
496 attribv
[0] = LLVMBuildFMul(b
, attribv
[0], v0_oow
, "v0_oow_v0a");
497 attribv
[1] = LLVMBuildFMul(b
, attribv
[1], v1_oow
, "v1_oow_v1a");
498 attribv
[2] = LLVMBuildFMul(b
, attribv
[2], v2_oow
, "v2_oow_v2a");
503 * Applys cylindrical wrapping to vertex attributes if enabled.
504 * Input coordinates must be in [0, 1] range, otherwise results are undefined.
506 * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags
509 emit_apply_cyl_wrap(struct gallivm_state
*gallivm
,
510 struct lp_setup_args
*args
,
512 LLVMValueRef attribv
[3])
515 LLVMBuilderRef builder
= gallivm
->builder
;
516 struct lp_type type
= args
->bld
.type
;
517 LLVMTypeRef float_vec_type
= args
->bld
.vec_type
;
518 LLVMValueRef pos_half
;
519 LLVMValueRef neg_half
;
520 LLVMValueRef cyl_mask
;
529 pos_half
= lp_build_const_vec(gallivm
, type
, +0.5f
);
530 neg_half
= lp_build_const_vec(gallivm
, type
, -0.5f
);
531 cyl_mask
= lp_build_const_mask_aos(gallivm
, type
, cyl_wrap
, 4);
533 one
= lp_build_const_vec(gallivm
, type
, 1.0f
);
534 one
= LLVMBuildBitCast(builder
, one
, lp_build_int_vec_type(gallivm
, type
), "");
535 one
= LLVMBuildAnd(builder
, one
, cyl_mask
, "");
538 delta
= LLVMBuildFSub(builder
, attribv
[1], attribv
[0], "");
540 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_GREATER
, delta
, pos_half
);
541 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
542 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
543 attribv
[0] = LLVMBuildFAdd(builder
, attribv
[0], offset
, "");
545 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_LESS
, delta
, neg_half
);
546 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
547 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
548 attribv
[1] = LLVMBuildFAdd(builder
, attribv
[1], offset
, "");
551 delta
= LLVMBuildFSub(builder
, attribv
[2], attribv
[1], "");
553 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_GREATER
, delta
, pos_half
);
554 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
555 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
556 attribv
[1] = LLVMBuildFAdd(builder
, attribv
[1], offset
, "");
558 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_LESS
, delta
, neg_half
);
559 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
560 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
561 attribv
[2] = LLVMBuildFAdd(builder
, attribv
[2], offset
, "");
564 delta
= LLVMBuildFSub(builder
, attribv
[0], attribv
[2], "");
566 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_GREATER
, delta
, pos_half
);
567 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
568 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
569 attribv
[2] = LLVMBuildFAdd(builder
, attribv
[2], offset
, "");
571 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_LESS
, delta
, neg_half
);
572 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
573 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
574 attribv
[0] = LLVMBuildFAdd(builder
, attribv
[0], offset
, "");
579 * Compute the inputs-> dadx, dady, a0 values.
582 emit_tri_coef( struct gallivm_state
*gallivm
,
583 const struct lp_setup_variant_key
*key
,
584 struct lp_setup_args
*args
)
588 LLVMValueRef attribs
[3];
590 /* setup interpolation for all the remaining attributes:
592 for (slot
= 0; slot
< key
->num_inputs
; slot
++) {
593 switch (key
->inputs
[slot
].interp
) {
594 case LP_INTERP_CONSTANT
:
595 load_attribute(gallivm
, args
, key
, key
->inputs
[slot
].src_index
, attribs
);
596 if (key
->flatshade_first
) {
597 emit_constant_coef4(gallivm
, args
, slot
+1, attribs
[0]);
600 emit_constant_coef4(gallivm
, args
, slot
+1, attribs
[2]);
604 case LP_INTERP_LINEAR
:
605 load_attribute(gallivm
, args
, key
, key
->inputs
[slot
].src_index
, attribs
);
606 emit_apply_cyl_wrap(gallivm
, args
, key
->inputs
[slot
].cyl_wrap
, attribs
);
607 emit_linear_coef(gallivm
, args
, slot
+1, attribs
);
610 case LP_INTERP_PERSPECTIVE
:
611 load_attribute(gallivm
, args
, key
, key
->inputs
[slot
].src_index
, attribs
);
612 emit_apply_cyl_wrap(gallivm
, args
, key
->inputs
[slot
].cyl_wrap
, attribs
);
613 apply_perspective_corr(gallivm
, args
, slot
+1, attribs
);
614 emit_linear_coef(gallivm
, args
, slot
+1, attribs
);
617 case LP_INTERP_POSITION
:
619 * The generated pixel interpolators will pick up the coeffs from
624 case LP_INTERP_FACING
:
625 emit_facing_coef(gallivm
, args
, slot
+1);
635 /* XXX: generic code:
638 set_noalias(LLVMBuilderRef builder
,
639 LLVMValueRef function
,
640 const LLVMTypeRef
*arg_types
,
644 for(i
= 0; i
< nr_args
; ++i
)
645 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
646 LLVMAddAttribute(LLVMGetParam(function
, i
),
647 LLVMNoAliasAttribute
);
651 init_args(struct gallivm_state
*gallivm
,
652 const struct lp_setup_variant_key
*key
,
653 struct lp_setup_args
*args
)
655 LLVMBuilderRef b
= gallivm
->builder
;
656 LLVMTypeRef shuf_type
= LLVMInt32TypeInContext(gallivm
->context
);
657 LLVMValueRef onef
= lp_build_const_float(gallivm
, 1.0);
658 LLVMValueRef onei
= lp_build_const_int32(gallivm
, 1);
659 LLVMValueRef zeroi
= lp_build_const_int32(gallivm
, 0);
660 LLVMValueRef pixel_center
, xy0_center
, dxy01
, dxy20
, dyx20
;
661 LLVMValueRef e
, f
, ef
, ooa
;
662 LLVMValueRef shuffles
[4], shuf10
;
663 LLVMValueRef attr_pos
[3];
664 struct lp_type typef4
= lp_type_float_vec(32, 128);
665 struct lp_build_context bld
;
666 bool accurate_a0
= ACCURATE_A0
;
668 lp_build_context_init(&bld
, gallivm
, typef4
);
671 /* The internal position input is in slot zero:
673 load_attribute(gallivm
, args
, key
, 0, attr_pos
);
675 pixel_center
= lp_build_const_vec(gallivm
, typef4
,
676 key
->pixel_center_half
? 0.5 : 0.0);
679 * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
680 * also offset_tri uses actually xyz in them
682 xy0_center
= LLVMBuildFSub(b
, attr_pos
[0], pixel_center
, "xy0_center" );
684 dxy01
= LLVMBuildFSub(b
, attr_pos
[0], attr_pos
[1], "dxy01");
685 dxy20
= LLVMBuildFSub(b
, attr_pos
[2], attr_pos
[0], "dxy20");
689 shuffles
[2] = LLVMGetUndef(shuf_type
);
690 shuffles
[3] = LLVMGetUndef(shuf_type
);
691 shuf10
= LLVMConstVector(shuffles
, 4);
693 dyx20
= LLVMBuildShuffleVector(b
, dxy20
, dxy20
, shuf10
, "");
695 ef
= LLVMBuildFMul(b
, dxy01
, dyx20
, "ef");
696 e
= LLVMBuildExtractElement(b
, ef
, zeroi
, "");
697 f
= LLVMBuildExtractElement(b
, ef
, onei
, "");
699 ooa
= LLVMBuildFDiv(b
, onef
, LLVMBuildFSub(b
, e
, f
, ""), "ooa");
701 ooa
= lp_build_broadcast_scalar(&bld
, ooa
);
703 /* tri offset calc shares a lot of arithmetic, do it here */
704 if (key
->pgon_offset_scale
!= 0.0f
|| key
->pgon_offset_units
!= 0.0f
) {
705 lp_do_offset_tri(gallivm
, args
, key
, ooa
, dxy01
, dxy20
, attr_pos
);
708 dxy20
= LLVMBuildFMul(b
, dxy20
, ooa
, "");
709 dxy01
= LLVMBuildFMul(b
, dxy01
, ooa
, "");
712 LLVMValueRef xy1xy2
, xy1xy2_center
, dxy12
, dyx01
, dyx12yx20
;
713 LLVMValueRef p0
, p1p2
, tmp0
, tmp1
, shuf0145
, shuf1054
, shuf1u3u
;
717 shuffles
[2] = lp_build_const_int32(gallivm
, 4);
718 shuffles
[3] = lp_build_const_int32(gallivm
, 5);
719 shuf0145
= LLVMConstVector(shuffles
, 4);
722 shuffles
[2] = lp_build_const_int32(gallivm
, 5);
723 shuffles
[3] = lp_build_const_int32(gallivm
, 4);
724 shuf1054
= LLVMConstVector(shuffles
, 4);
726 shuffles
[1] = LLVMGetUndef(shuf_type
);
727 shuffles
[2] = lp_build_const_int32(gallivm
, 3);
728 shuffles
[3] = LLVMGetUndef(shuf_type
);
729 shuf1u3u
= LLVMConstVector(shuffles
, 4);
731 xy1xy2
= LLVMBuildShuffleVector(b
, attr_pos
[1], attr_pos
[2], shuf0145
, "");
732 xy1xy2_center
= LLVMBuildFSub(b
, xy1xy2
, pixel_center
, "");
733 dxy12
= LLVMBuildFSub(b
, attr_pos
[1], attr_pos
[2], "dxy12");
734 dxy12
= LLVMBuildFMul(b
, dxy12
, ooa
, "");
735 dyx12yx20
= LLVMBuildShuffleVector(b
, dxy12
, dxy20
, shuf1054
, "dyx12yx20");
736 dyx01
= LLVMBuildShuffleVector(b
, dxy01
, dxy01
, shuf10
, "");
737 p0
= LLVMBuildFMul(b
, dyx01
, xy0_center
, "");
738 p1p2
= LLVMBuildFMul(b
, dyx12yx20
, xy1xy2_center
, "");
739 tmp0
= LLVMBuildExtractElement(b
, p0
, zeroi
, "");
740 tmp1
= LLVMBuildExtractElement(b
, p0
, onei
, "");
741 args
->e01o
= lp_build_broadcast_scalar(&bld
, LLVMBuildFSub(b
, tmp0
, tmp1
, "e01o"));
742 tmp1
= LLVMBuildShuffleVector(b
, p1p2
, p1p2
, shuf1u3u
, "");
743 tmp0
= LLVMBuildFSub(b
, p1p2
, tmp1
, "e12o20o");
744 args
->e12o
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, tmp0
, zeroi
);
745 args
->e20o
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, tmp0
,
746 lp_build_const_int32(gallivm
, 2));
749 args
->dy20_ooa
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, dxy20
, onei
);
750 args
->dy01_ooa
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, dxy01
, onei
);
752 args
->dx20_ooa
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, dxy20
, zeroi
);
753 args
->dx01_ooa
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, dxy01
, zeroi
);
755 args
->x0_center
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, xy0_center
, zeroi
);
756 args
->y0_center
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, xy0_center
, onei
);
758 emit_linear_coef(gallivm
, args
, 0, attr_pos
);
762 * Generate the runtime callable function for the coefficient calculation.
765 static struct lp_setup_variant
*
766 generate_setup_variant(struct lp_setup_variant_key
*key
,
767 struct llvmpipe_context
*lp
)
769 struct lp_setup_variant
*variant
= NULL
;
770 struct gallivm_state
*gallivm
;
771 struct lp_setup_args args
;
773 LLVMTypeRef vec4f_type
;
774 LLVMTypeRef func_type
;
775 LLVMTypeRef arg_types
[7];
776 LLVMBasicBlockRef block
;
777 LLVMBuilderRef builder
;
783 variant
= CALLOC_STRUCT(lp_setup_variant
);
787 variant
->gallivm
= gallivm
= gallivm_create();
788 if (!variant
->gallivm
) {
792 builder
= gallivm
->builder
;
794 if (LP_DEBUG
& DEBUG_COUNTERS
) {
798 memcpy(&variant
->key
, key
, key
->size
);
799 variant
->list_item_global
.base
= variant
;
801 util_snprintf(func_name
, sizeof(func_name
), "fs%u_setup%u",
804 /* Currently always deal with full 4-wide vertex attributes from
808 vec4f_type
= LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4);
810 arg_types
[0] = LLVMPointerType(vec4f_type
, 0); /* v0 */
811 arg_types
[1] = LLVMPointerType(vec4f_type
, 0); /* v1 */
812 arg_types
[2] = LLVMPointerType(vec4f_type
, 0); /* v2 */
813 arg_types
[3] = LLVMInt32TypeInContext(gallivm
->context
); /* facing */
814 arg_types
[4] = LLVMPointerType(vec4f_type
, 0); /* a0, aligned */
815 arg_types
[5] = LLVMPointerType(vec4f_type
, 0); /* dadx, aligned */
816 arg_types
[6] = LLVMPointerType(vec4f_type
, 0); /* dady, aligned */
818 func_type
= LLVMFunctionType(LLVMVoidTypeInContext(gallivm
->context
),
819 arg_types
, Elements(arg_types
), 0);
821 variant
->function
= LLVMAddFunction(gallivm
->module
, func_name
, func_type
);
822 if (!variant
->function
)
825 LLVMSetFunctionCallConv(variant
->function
, LLVMCCallConv
);
827 args
.v0
= LLVMGetParam(variant
->function
, 0);
828 args
.v1
= LLVMGetParam(variant
->function
, 1);
829 args
.v2
= LLVMGetParam(variant
->function
, 2);
830 args
.facing
= LLVMGetParam(variant
->function
, 3);
831 args
.a0
= LLVMGetParam(variant
->function
, 4);
832 args
.dadx
= LLVMGetParam(variant
->function
, 5);
833 args
.dady
= LLVMGetParam(variant
->function
, 6);
835 lp_build_name(args
.v0
, "in_v0");
836 lp_build_name(args
.v1
, "in_v1");
837 lp_build_name(args
.v2
, "in_v2");
838 lp_build_name(args
.facing
, "in_facing");
839 lp_build_name(args
.a0
, "out_a0");
840 lp_build_name(args
.dadx
, "out_dadx");
841 lp_build_name(args
.dady
, "out_dady");
846 block
= LLVMAppendBasicBlockInContext(gallivm
->context
,
847 variant
->function
, "entry");
848 LLVMPositionBuilderAtEnd(builder
, block
);
850 set_noalias(builder
, variant
->function
, arg_types
, Elements(arg_types
));
851 init_args(gallivm
, &variant
->key
, &args
);
852 emit_tri_coef(gallivm
, &variant
->key
, &args
);
854 LLVMBuildRetVoid(builder
);
856 gallivm_verify_function(gallivm
, variant
->function
);
858 gallivm_compile_module(gallivm
);
860 variant
->jit_function
= (lp_jit_setup_triangle
)
861 gallivm_jit_function(gallivm
, variant
->function
);
862 if (!variant
->jit_function
)
866 * Update timing information:
868 if (LP_DEBUG
& DEBUG_COUNTERS
) {
870 LP_COUNT_ADD(llvm_compile_time
, t1
- t0
);
871 LP_COUNT_ADD(nr_llvm_compiles
, 1);
878 if (variant
->function
) {
879 gallivm_free_function(gallivm
,
881 variant
->jit_function
);
883 if (variant
->gallivm
) {
884 gallivm_destroy(variant
->gallivm
);
895 lp_make_setup_variant_key(struct llvmpipe_context
*lp
,
896 struct lp_setup_variant_key
*key
)
898 struct lp_fragment_shader
*fs
= lp
->fs
;
901 assert(sizeof key
->inputs
[0] == sizeof(uint
));
903 key
->num_inputs
= fs
->info
.base
.num_inputs
;
904 key
->flatshade_first
= lp
->rasterizer
->flatshade_first
;
905 key
->pixel_center_half
= lp
->rasterizer
->half_pixel_center
;
906 key
->twoside
= lp
->rasterizer
->light_twoside
;
907 key
->size
= Offset(struct lp_setup_variant_key
,
908 inputs
[key
->num_inputs
]);
910 key
->color_slot
= lp
->color_slot
[0];
911 key
->bcolor_slot
= lp
->bcolor_slot
[0];
912 key
->spec_slot
= lp
->color_slot
[1];
913 key
->bspec_slot
= lp
->bcolor_slot
[1];
914 assert(key
->color_slot
== lp
->color_slot
[0]);
915 assert(key
->bcolor_slot
== lp
->bcolor_slot
[0]);
916 assert(key
->spec_slot
== lp
->color_slot
[1]);
917 assert(key
->bspec_slot
== lp
->bcolor_slot
[1]);
920 * If depth is floating point, depth bias is calculated with respect
921 * to the primitive's maximum Z value. Retain the original depth bias
922 * value until that stage.
924 key
->floating_point_depth
= lp
->floating_point_depth
;
926 if (key
->floating_point_depth
) {
927 key
->pgon_offset_units
= (float) lp
->rasterizer
->offset_units
;
929 key
->pgon_offset_units
=
930 (float) (lp
->rasterizer
->offset_units
* lp
->mrd
);
933 key
->pgon_offset_scale
= lp
->rasterizer
->offset_scale
;
934 key
->pgon_offset_clamp
= lp
->rasterizer
->offset_clamp
;
936 memcpy(key
->inputs
, fs
->inputs
, key
->num_inputs
* sizeof key
->inputs
[0]);
937 for (i
= 0; i
< key
->num_inputs
; i
++) {
938 if (key
->inputs
[i
].interp
== LP_INTERP_COLOR
) {
939 if (lp
->rasterizer
->flatshade
)
940 key
->inputs
[i
].interp
= LP_INTERP_CONSTANT
;
942 key
->inputs
[i
].interp
= LP_INTERP_PERSPECTIVE
;
950 remove_setup_variant(struct llvmpipe_context
*lp
,
951 struct lp_setup_variant
*variant
)
953 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
954 debug_printf("llvmpipe: del setup_variant #%u total %u\n",
955 variant
->no
, lp
->nr_setup_variants
);
958 if (variant
->function
) {
959 gallivm_free_function(variant
->gallivm
,
961 variant
->jit_function
);
964 if (variant
->gallivm
) {
965 gallivm_destroy(variant
->gallivm
);
968 remove_from_list(&variant
->list_item_global
);
969 lp
->nr_setup_variants
--;
975 /* When the number of setup variants exceeds a threshold, cull a
976 * fraction (currently a quarter) of them.
979 cull_setup_variants(struct llvmpipe_context
*lp
)
981 struct pipe_context
*pipe
= &lp
->pipe
;
985 * XXX: we need to flush the context until we have some sort of reference
986 * counting in fragment shaders as they may still be binned
987 * Flushing alone might not be sufficient we need to wait on it too.
989 llvmpipe_finish(pipe
, __FUNCTION__
);
991 for (i
= 0; i
< LP_MAX_SETUP_VARIANTS
/ 4; i
++) {
992 struct lp_setup_variant_list_item
*item
;
993 if (is_empty_list(&lp
->setup_variants_list
)) {
996 item
= last_elem(&lp
->setup_variants_list
);
999 remove_setup_variant(lp
, item
->base
);
1005 * Update fragment/vertex shader linkage state. This is called just
1006 * prior to drawing something when some fragment-related state has
1010 llvmpipe_update_setup(struct llvmpipe_context
*lp
)
1012 struct lp_setup_variant_key
*key
= &lp
->setup_variant
.key
;
1013 struct lp_setup_variant
*variant
= NULL
;
1014 struct lp_setup_variant_list_item
*li
;
1016 lp_make_setup_variant_key(lp
, key
);
1018 foreach(li
, &lp
->setup_variants_list
) {
1019 if(li
->base
->key
.size
== key
->size
&&
1020 memcmp(&li
->base
->key
, key
, key
->size
) == 0) {
1027 move_to_head(&lp
->setup_variants_list
, &variant
->list_item_global
);
1030 if (lp
->nr_setup_variants
>= LP_MAX_SETUP_VARIANTS
) {
1031 cull_setup_variants(lp
);
1034 variant
= generate_setup_variant(key
, lp
);
1036 insert_at_head(&lp
->setup_variants_list
, &variant
->list_item_global
);
1037 lp
->nr_setup_variants
++;
1038 llvmpipe_variant_count
++;
1042 lp_setup_set_setup_variant(lp
->setup
,
1047 lp_delete_setup_variants(struct llvmpipe_context
*lp
)
1049 struct lp_setup_variant_list_item
*li
;
1050 li
= first_elem(&lp
->setup_variants_list
);
1051 while(!at_end(&lp
->setup_variants_list
, li
)) {
1052 struct lp_setup_variant_list_item
*next
= next_elem(li
);
1053 remove_setup_variant(lp
, li
->base
);
1059 lp_dump_setup_coef( const struct lp_setup_variant_key
*key
,
1060 const float (*sa0
)[4],
1061 const float (*sdadx
)[4],
1062 const float (*sdady
)[4])
1066 for (i
= 0; i
< TGSI_NUM_CHANNELS
; i
++) {
1067 float a0
= sa0
[0][i
];
1068 float dadx
= sdadx
[0][i
];
1069 float dady
= sdady
[0][i
];
1071 debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
1076 for (slot
= 0; slot
< key
->num_inputs
; slot
++) {
1077 unsigned usage_mask
= key
->inputs
[slot
].usage_mask
;
1078 for (i
= 0; i
< TGSI_NUM_CHANNELS
; i
++) {
1079 if (usage_mask
& (1 << i
)) {
1080 float a0
= sa0
[1 + slot
][i
];
1081 float dadx
= sdadx
[1 + slot
][i
];
1082 float dady
= sdady
[1 + slot
][i
];
1084 debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",