1 /**************************************************************************
3 * Copyright 2010 VMware.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/u_simple_list.h"
32 #include "os/os_time.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_debug.h"
35 #include "gallivm/lp_bld_init.h"
36 #include "gallivm/lp_bld_intr.h"
37 #include "gallivm/lp_bld_flow.h"
38 #include "gallivm/lp_bld_type.h"
39 #include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */
44 #include "lp_screen.h"
45 #include "lp_context.h"
46 #include "lp_setup_context.h"
49 #include "lp_state_fs.h"
50 #include "lp_state_setup.h"
54 /* currently organized to interpolate full float[4] attributes even
55 * when some elements are unused. Later, can pack vertex data more
62 /* Function arguments:
67 LLVMValueRef facing
; /* boolean */
74 LLVMValueRef x0_center
;
75 LLVMValueRef y0_center
;
76 LLVMValueRef dy20_ooa
;
77 LLVMValueRef dy01_ooa
;
78 LLVMValueRef dx20_ooa
;
79 LLVMValueRef dx01_ooa
;
82 static LLVMTypeRef
type4f(void)
84 return LLVMVectorType(LLVMFloatType(), 4);
88 /* Equivalent of _mm_setr_ps(a,b,c,d)
90 static LLVMValueRef
vec4f(LLVMBuilderRef bld
,
91 LLVMValueRef a
, LLVMValueRef b
, LLVMValueRef c
, LLVMValueRef d
,
94 LLVMValueRef i0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
95 LLVMValueRef i1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
96 LLVMValueRef i2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
97 LLVMValueRef i3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
99 LLVMValueRef res
= LLVMGetUndef(type4f());
101 res
= LLVMBuildInsertElement(bld
, res
, a
, i0
, "");
102 res
= LLVMBuildInsertElement(bld
, res
, b
, i1
, "");
103 res
= LLVMBuildInsertElement(bld
, res
, c
, i2
, "");
104 res
= LLVMBuildInsertElement(bld
, res
, d
, i3
, name
);
109 /* Equivalent of _mm_set1_ps(a)
111 static LLVMValueRef
vec4f_from_scalar(LLVMBuilderRef bld
,
115 LLVMValueRef res
= LLVMGetUndef(type4f());
118 for(i
= 0; i
< 4; ++i
) {
119 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), i
, 0);
120 res
= LLVMBuildInsertElement(bld
, res
, a
, index
, i
== 3 ? name
: "");
127 store_coef(LLVMBuilderRef builder
,
128 struct lp_setup_args
*args
,
134 LLVMValueRef idx
= LLVMConstInt(LLVMInt32Type(), slot
, 0);
136 LLVMBuildStore(builder
,
138 LLVMBuildGEP(builder
, args
->a0
, &idx
, 1, ""));
140 LLVMBuildStore(builder
,
142 LLVMBuildGEP(builder
, args
->dadx
, &idx
, 1, ""));
144 LLVMBuildStore(builder
,
146 LLVMBuildGEP(builder
, args
->dady
, &idx
, 1, ""));
152 emit_constant_coef4( LLVMBuilderRef builder
,
153 struct lp_setup_args
*args
,
158 LLVMValueRef zero
= LLVMConstReal(LLVMFloatType(), 0.0);
159 LLVMValueRef zerovec
= vec4f_from_scalar(builder
, zero
, "zero");
160 LLVMValueRef idx
= LLVMConstInt(LLVMInt32Type(), attr
, 0);
161 LLVMValueRef attr_ptr
= LLVMBuildGEP(builder
, vert
, &idx
, 1, "attr_ptr");
162 LLVMValueRef vert_attr
= LLVMBuildLoad(builder
, attr_ptr
, "vert_attr");
164 store_coef(builder
, args
, slot
, vert_attr
, zerovec
, zerovec
);
170 * Setup the fragment input attribute with the front-facing value.
171 * \param frontface is the triangle front facing?
174 emit_facing_coef( LLVMBuilderRef builder
,
175 struct lp_setup_args
*args
,
178 LLVMValueRef a0_0
= args
->facing
;
179 LLVMValueRef a0_0f
= LLVMBuildSIToFP(builder
, a0_0
, LLVMFloatType(), "");
180 LLVMValueRef zero
= LLVMConstReal(LLVMFloatType(), 0.0);
181 LLVMValueRef a0
= vec4f(builder
, a0_0f
, zero
, zero
, zero
, "facing");
182 LLVMValueRef zerovec
= vec4f_from_scalar(builder
, zero
, "zero");
184 store_coef(builder
, args
, slot
, a0
, zerovec
, zerovec
);
189 vert_attrib(LLVMBuilderRef b
,
196 idx
[0] = LLVMConstInt(LLVMInt32Type(), attr
, 0);
197 idx
[1] = LLVMConstInt(LLVMInt32Type(), elem
, 0);
198 return LLVMBuildLoad(b
, LLVMBuildGEP(b
, vert
, idx
, 2, ""), name
);
202 vert_clamp(LLVMBuilderRef b
,
207 LLVMValueRef min_result
= LLVMBuildFCmp(b
, LLVMRealUGT
, min
, x
, "");
208 LLVMValueRef max_result
= LLVMBuildFCmp(b
, LLVMRealUGT
, x
, max
, "");
209 LLVMValueRef clamp_value
;
211 clamp_value
= LLVMBuildSelect(b
, min_result
, min
, x
, "");
212 clamp_value
= LLVMBuildSelect(b
, max_result
, max
, x
, "");
218 emit_coef4( LLVMBuilderRef b
,
219 struct lp_setup_args
*args
,
225 LLVMValueRef dy20_ooa
= args
->dy20_ooa
;
226 LLVMValueRef dy01_ooa
= args
->dy01_ooa
;
227 LLVMValueRef dx20_ooa
= args
->dx20_ooa
;
228 LLVMValueRef dx01_ooa
= args
->dx01_ooa
;
229 LLVMValueRef x0_center
= args
->x0_center
;
230 LLVMValueRef y0_center
= args
->y0_center
;
232 /* XXX: using fsub, fmul on vector types -- does this work??
234 LLVMValueRef da01
= LLVMBuildFSub(b
, a0
, a1
, "da01");
235 LLVMValueRef da20
= LLVMBuildFSub(b
, a2
, a0
, "da20");
237 /* Calculate dadx (vec4f)
239 LLVMValueRef da01_dy20_ooa
= LLVMBuildFMul(b
, da01
, dy20_ooa
, "da01_dy20_ooa");
240 LLVMValueRef da20_dy01_ooa
= LLVMBuildFMul(b
, da20
, dy01_ooa
, "da20_dy01_ooa");
241 LLVMValueRef dadx
= LLVMBuildFSub(b
, da01_dy20_ooa
, da20_dy01_ooa
, "dadx");
243 /* Calculate dady (vec4f)
245 LLVMValueRef da01_dx20_ooa
= LLVMBuildFMul(b
, da01
, dx20_ooa
, "da01_dx20_ooa");
246 LLVMValueRef da20_dx01_ooa
= LLVMBuildFMul(b
, da20
, dx01_ooa
, "da20_dx01_ooa");
247 LLVMValueRef dady
= LLVMBuildFSub(b
, da20_dx01_ooa
, da01_dx20_ooa
, "dady");
249 /* Calculate a0 - the attribute value at the origin
251 LLVMValueRef dadx_x0
= LLVMBuildFMul(b
, dadx
, x0_center
, "dadx_x0");
252 LLVMValueRef dady_y0
= LLVMBuildFMul(b
, dady
, y0_center
, "dady_y0");
253 LLVMValueRef attr_v0
= LLVMBuildFAdd(b
, dadx_x0
, dady_y0
, "attr_v0");
254 LLVMValueRef attr_0
= LLVMBuildFSub(b
, a0
, attr_v0
, "attr_0");
256 store_coef(b
, args
, slot
, attr_0
, dadx
, dady
);
261 emit_linear_coef( LLVMBuilderRef b
,
262 struct lp_setup_args
*args
,
266 LLVMValueRef idx
= LLVMConstInt(LLVMInt32Type(), vert_attr
, 0);
268 LLVMValueRef a0
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx
, 1, ""), "v0a");
269 LLVMValueRef a1
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx
, 1, ""), "v1a");
270 LLVMValueRef a2
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx
, 1, ""), "v2a");
272 emit_coef4(b
, args
, slot
, a0
, a1
, a2
);
278 * Compute a0, dadx and dady for a perspective-corrected interpolant,
280 * We basically multiply the vertex value by 1/w before computing
281 * the plane coefficients (a0, dadx, dady).
282 * Later, when we compute the value at a particular fragment position we'll
283 * divide the interpolated value by the interpolated W at that fragment.
286 emit_perspective_coef( LLVMBuilderRef b
,
287 struct lp_setup_args
*args
,
291 /* premultiply by 1/w (v[0][3] is always 1/w):
293 LLVMValueRef idx
= LLVMConstInt(LLVMInt32Type(), vert_attr
, 0);
295 LLVMValueRef v0a
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx
, 1, ""), "v0a");
296 LLVMValueRef v1a
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx
, 1, ""), "v1a");
297 LLVMValueRef v2a
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx
, 1, ""), "v2a");
299 LLVMValueRef v0_oow
= vec4f_from_scalar(b
, vert_attrib(b
, args
->v0
, 0, 3, ""), "v0_oow");
300 LLVMValueRef v1_oow
= vec4f_from_scalar(b
, vert_attrib(b
, args
->v1
, 0, 3, ""), "v1_oow");
301 LLVMValueRef v2_oow
= vec4f_from_scalar(b
, vert_attrib(b
, args
->v2
, 0, 3, ""), "v2_oow");
303 LLVMValueRef v0_oow_v0a
= LLVMBuildFMul(b
, v0a
, v0_oow
, "v0_oow_v0a");
304 LLVMValueRef v1_oow_v1a
= LLVMBuildFMul(b
, v1a
, v1_oow
, "v1_oow_v1a");
305 LLVMValueRef v2_oow_v2a
= LLVMBuildFMul(b
, v2a
, v2_oow
, "v2_oow_v2a");
307 emit_coef4(b
, args
, slot
, v0_oow_v0a
, v1_oow_v1a
, v2_oow_v2a
);
312 emit_position_coef( LLVMBuilderRef builder
,
313 struct lp_setup_args
*args
,
314 int slot
, int attrib
)
316 emit_linear_coef(builder
, args
, slot
, attrib
);
323 * Compute the inputs-> dadx, dady, a0 values.
326 emit_tri_coef( LLVMBuilderRef builder
,
327 const struct lp_setup_variant_key
*key
,
328 struct lp_setup_args
*args
)
332 /* The internal position input is in slot zero:
334 emit_position_coef(builder
, args
, 0, 0);
336 /* setup interpolation for all the remaining attributes:
338 for (slot
= 0; slot
< key
->num_inputs
; slot
++) {
339 unsigned vert_attr
= key
->inputs
[slot
].src_index
;
341 switch (key
->inputs
[slot
].interp
) {
342 case LP_INTERP_CONSTANT
:
343 if (key
->flatshade_first
) {
344 emit_constant_coef4(builder
, args
, slot
+1, args
->v0
, vert_attr
);
347 emit_constant_coef4(builder
, args
, slot
+1, args
->v2
, vert_attr
);
351 case LP_INTERP_LINEAR
:
352 emit_linear_coef(builder
, args
, slot
+1, vert_attr
);
355 case LP_INTERP_PERSPECTIVE
:
356 emit_perspective_coef(builder
, args
, slot
+1, vert_attr
);
359 case LP_INTERP_POSITION
:
361 * The generated pixel interpolators will pick up the coeffs from
366 case LP_INTERP_FACING
:
367 emit_facing_coef(builder
, args
, slot
+1);
377 /* XXX: This is generic code, share with fs/vs codegen:
379 static lp_jit_setup_triangle
380 finalize_function(struct llvmpipe_screen
*screen
,
381 LLVMBuilderRef builder
,
382 LLVMValueRef function
)
386 /* Verify the LLVM IR. If invalid, dump and abort */
388 if (LLVMVerifyFunction(function
, LLVMPrintMessageAction
)) {
390 lp_debug_dump_value(function
);
395 /* Apply optimizations to LLVM IR */
396 LLVMRunFunctionPassManager(screen
->pass
, function
);
398 if (gallivm_debug
& GALLIVM_DEBUG_IR
)
400 /* Print the LLVM IR to stderr */
401 lp_debug_dump_value(function
);
406 * Translate the LLVM IR into machine code.
408 f
= LLVMGetPointerToGlobal(screen
->engine
, function
);
410 if (gallivm_debug
& GALLIVM_DEBUG_ASM
)
415 lp_func_delete_body(function
);
420 /* XXX: Generic code:
423 lp_emit_emms(LLVMBuilderRef builder
)
426 /* Avoid corrupting the FPU stack on 32bit OSes. */
427 lp_build_intrinsic(builder
, "llvm.x86.mmx.emms", LLVMVoidType(), NULL
, 0);
432 /* XXX: generic code:
435 set_noalias(LLVMBuilderRef builder
,
436 LLVMValueRef function
,
437 const LLVMTypeRef
*arg_types
,
441 for(i
= 0; i
< Elements(arg_types
); ++i
)
442 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
443 LLVMAddAttribute(LLVMGetParam(function
, i
),
444 LLVMNoAliasAttribute
);
448 init_args(LLVMBuilderRef b
,
449 struct lp_setup_args
*args
,
450 const struct lp_setup_variant
*variant
)
452 LLVMValueRef v0_x
= vert_attrib(b
, args
->v0
, 0, 0, "v0_x");
453 LLVMValueRef v0_y
= vert_attrib(b
, args
->v0
, 0, 1, "v0_y");
455 LLVMValueRef v1_x
= vert_attrib(b
, args
->v1
, 0, 0, "v1_x");
456 LLVMValueRef v1_y
= vert_attrib(b
, args
->v1
, 0, 1, "v1_y");
458 LLVMValueRef v2_x
= vert_attrib(b
, args
->v2
, 0, 0, "v2_x");
459 LLVMValueRef v2_y
= vert_attrib(b
, args
->v2
, 0, 1, "v2_y");
461 LLVMValueRef pixel_center
= LLVMConstReal(LLVMFloatType(),
462 variant
->key
.pixel_center_half
? 0.5 : 0);
464 LLVMValueRef x0_center
= LLVMBuildFSub(b
, v0_x
, pixel_center
, "x0_center" );
465 LLVMValueRef y0_center
= LLVMBuildFSub(b
, v0_y
, pixel_center
, "y0_center" );
467 LLVMValueRef dx01
= LLVMBuildFSub(b
, v0_x
, v1_x
, "dx01");
468 LLVMValueRef dy01
= LLVMBuildFSub(b
, v0_y
, v1_y
, "dy01");
469 LLVMValueRef dx20
= LLVMBuildFSub(b
, v2_x
, v0_x
, "dx20");
470 LLVMValueRef dy20
= LLVMBuildFSub(b
, v2_y
, v0_y
, "dy20");
472 LLVMValueRef one
= LLVMConstReal(LLVMFloatType(), 1.0);
473 LLVMValueRef e
= LLVMBuildFMul(b
, dx01
, dy20
, "e");
474 LLVMValueRef f
= LLVMBuildFMul(b
, dx20
, dy01
, "f");
475 LLVMValueRef ooa
= LLVMBuildFDiv(b
, one
, LLVMBuildFSub(b
, e
, f
, ""), "ooa");
477 LLVMValueRef dy20_ooa
= LLVMBuildFMul(b
, dy20
, ooa
, "dy20_ooa");
478 LLVMValueRef dy01_ooa
= LLVMBuildFMul(b
, dy01
, ooa
, "dy01_ooa");
479 LLVMValueRef dx20_ooa
= LLVMBuildFMul(b
, dx20
, ooa
, "dx20_ooa");
480 LLVMValueRef dx01_ooa
= LLVMBuildFMul(b
, dx01
, ooa
, "dx01_ooa");
482 args
->dy20_ooa
= vec4f_from_scalar(b
, dy20_ooa
, "dy20_ooa_4f");
483 args
->dy01_ooa
= vec4f_from_scalar(b
, dy01_ooa
, "dy01_ooa_4f");
485 args
->dx20_ooa
= vec4f_from_scalar(b
, dx20_ooa
, "dx20_ooa_4f");
486 args
->dx01_ooa
= vec4f_from_scalar(b
, dx01_ooa
, "dx01_ooa_4f");
488 args
->x0_center
= vec4f_from_scalar(b
, x0_center
, "x0_center_4f");
489 args
->y0_center
= vec4f_from_scalar(b
, y0_center
, "y0_center_4f");
493 lp_twoside(LLVMBuilderRef b
,
494 struct lp_setup_args
*args
,
495 const struct lp_setup_variant_key
*key
)
497 struct lp_build_if_state if_state
;
499 LLVMValueRef a0_old
, a1_old
, a2_old
;
500 LLVMValueRef a0_new
, a1_new
, a2_new
;
502 LLVMValueRef idx1
= LLVMConstInt(LLVMInt32Type(), key
->color_slot
, 0);
503 LLVMValueRef idx2
= LLVMConstInt(LLVMInt32Type(), key
->bcolor_slot
, 0);
505 LLVMValueRef facing
= args
->facing
;
506 LLVMValueRef front_facing
= LLVMBuildICmp(b
, LLVMIntEQ
, facing
, LLVMConstInt(LLVMInt32Type(), 0, 0), ""); /** need i1 for if condition */
508 lp_build_if(&if_state
, b
, front_facing
);
510 /* swap the front and back attrib values */
511 a0_old
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx1
, 1, ""), "v0a");
512 a1_old
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx1
, 1, ""), "v1a");
513 a2_old
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx1
, 1, ""), "v2a");
515 a0_new
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx2
, 1, ""), "v0a");
516 a1_new
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx2
, 1, ""), "v1a");
517 a2_new
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx2
, 1, ""), "v2a");
519 LLVMBuildStore(b
, a0_new
, LLVMBuildGEP(b
, args
->v0
, &idx1
, 1, ""));
520 LLVMBuildStore(b
, a1_new
, LLVMBuildGEP(b
, args
->v1
, &idx1
, 1, ""));
521 LLVMBuildStore(b
, a2_new
, LLVMBuildGEP(b
, args
->v2
, &idx1
, 1, ""));
523 lp_build_endif(&if_state
);
528 lp_do_offset_tri(LLVMBuilderRef b
,
529 struct lp_setup_args
*args
,
530 const struct lp_setup_variant_key
*key
)
532 struct lp_build_context bld
;
533 LLVMValueRef zoffset
, mult
;
534 LLVMValueRef z0_new
, z1_new
, z2_new
;
535 LLVMValueRef dzdx0
, dzdx
, dzdy0
, dzdy
;
536 LLVMValueRef max
, max_value
;
539 LLVMValueRef one
= LLVMConstReal(LLVMFloatType(), 1.0);
540 LLVMValueRef zero
= LLVMConstReal(LLVMFloatType(), 0.0);
542 /* edge vectors: e = v0 - v2, f = v1 - v2 */
543 LLVMValueRef v0_x
= vert_attrib(b
, args
->v0
, 0, 0, "v0_x");
544 LLVMValueRef v1_x
= vert_attrib(b
, args
->v1
, 0, 0, "v1_x");
545 LLVMValueRef v2_x
= vert_attrib(b
, args
->v2
, 0, 0, "v2_x");
546 LLVMValueRef v0_y
= vert_attrib(b
, args
->v0
, 0, 1, "v0_y");
547 LLVMValueRef v1_y
= vert_attrib(b
, args
->v1
, 0, 1, "v1_y");
548 LLVMValueRef v2_y
= vert_attrib(b
, args
->v2
, 0, 1, "v2_y");
549 LLVMValueRef v0_z
= vert_attrib(b
, args
->v0
, 0, 2, "v0_z");
550 LLVMValueRef v1_z
= vert_attrib(b
, args
->v1
, 0, 2, "v1_z");
551 LLVMValueRef v2_z
= vert_attrib(b
, args
->v2
, 0, 2, "v2_z");
553 /* edge vectors: e = v0 - v2, f = v1 - v2 */
554 LLVMValueRef dx02
= LLVMBuildFSub(b
, v0_x
, v2_x
, "dx02");
555 LLVMValueRef dy02
= LLVMBuildFSub(b
, v0_y
, v2_y
, "dy02");
556 LLVMValueRef dz02
= LLVMBuildFSub(b
, v0_z
, v2_z
, "dz02");
557 LLVMValueRef dx12
= LLVMBuildFSub(b
, v1_x
, v2_x
, "dx12");
558 LLVMValueRef dy12
= LLVMBuildFSub(b
, v1_y
, v2_y
, "dy12");
559 LLVMValueRef dz12
= LLVMBuildFSub(b
, v1_z
, v2_z
, "dz12");
561 /* det = cross(e,f).z */
562 LLVMValueRef dx02_dy12
= LLVMBuildFMul(b
, dx02
, dy12
, "dx02_dy12");
563 LLVMValueRef dy02_dx12
= LLVMBuildFMul(b
, dy02
, dx12
, "dy02_dx12");
564 LLVMValueRef det
= LLVMBuildFSub(b
, dx02_dy12
, dy02_dx12
, "det");
565 LLVMValueRef inv_det
= LLVMBuildFDiv(b
, one
, det
, "inv_det");
567 /* (res1,res2) = cross(e,f).xy */
568 LLVMValueRef dy02_dz12
= LLVMBuildFMul(b
, dy02
, dz12
, "dy02_dz12");
569 LLVMValueRef dz02_dy12
= LLVMBuildFMul(b
, dz02
, dy12
, "dz02_dy12");
570 LLVMValueRef dz02_dx12
= LLVMBuildFMul(b
, dz02
, dx12
, "dz02_dx12");
571 LLVMValueRef dx02_dz12
= LLVMBuildFMul(b
, dx02
, dz12
, "dx02_dz12");
572 LLVMValueRef res1
= LLVMBuildFSub(b
, dy02_dz12
, dz02_dy12
, "res1");
573 LLVMValueRef res2
= LLVMBuildFSub(b
, dz02_dx12
, dx02_dz12
, "res2");
575 /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
576 lp_build_context_init(&bld
, b
, lp_type_float(32));
577 dzdx0
= LLVMBuildFMul(b
, res1
, inv_det
, "dzdx");
578 dzdx
= lp_build_abs(&bld
, dzdx0
);
579 dzdy0
= LLVMBuildFMul(b
, res2
, inv_det
, "dzdy");
580 dzdy
= lp_build_abs(&bld
, dzdy0
);
582 /* zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale */
583 max
= LLVMBuildFCmp(b
, LLVMRealUGT
, dzdx
, dzdy
, "");
584 max_value
= LLVMBuildSelect(b
, max
, dzdx
, dzdy
, "max");
586 mult
= LLVMBuildFMul(b
, max_value
, LLVMConstReal(LLVMFloatType(), key
->scale
), "");
587 zoffset
= LLVMBuildFAdd(b
, LLVMConstReal(LLVMFloatType(), key
->units
), mult
, "zoffset");
589 /* clamp and do offset */
590 z0_new
= vert_clamp(b
, LLVMBuildFAdd(b
, v0_z
, zoffset
, ""), zero
, one
);
591 z1_new
= vert_clamp(b
, LLVMBuildFAdd(b
, v1_z
, zoffset
, ""), zero
, one
);
592 z2_new
= vert_clamp(b
, LLVMBuildFAdd(b
, v2_z
, zoffset
, ""), zero
, one
);
594 /* store back new offsetted z values */
595 idx
[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
596 idx
[1] = LLVMConstInt(LLVMInt32Type(), 2, 0);
597 LLVMBuildStore(b
, z0_new
, LLVMBuildGEP(b
, args
->v0
, idx
, 2, ""));
598 LLVMBuildStore(b
, z1_new
, LLVMBuildGEP(b
, args
->v1
, idx
, 2, ""));
599 LLVMBuildStore(b
, z2_new
, LLVMBuildGEP(b
, args
->v2
, idx
, 2, ""));
605 * Generate the runtime callable function for the coefficient calculation.
608 static struct lp_setup_variant
*
609 generate_setup_variant(struct llvmpipe_screen
*screen
,
610 struct lp_setup_variant_key
*key
,
611 struct llvmpipe_context
*lp
)
613 struct lp_setup_variant
*variant
= NULL
;
614 struct lp_setup_args args
;
616 LLVMTypeRef vec4f_type
;
617 LLVMTypeRef func_type
;
618 LLVMTypeRef arg_types
[7];
619 LLVMBasicBlockRef block
;
620 LLVMBuilderRef builder
;
626 variant
= CALLOC_STRUCT(lp_setup_variant
);
630 if (LP_DEBUG
& DEBUG_COUNTERS
) {
634 memcpy(&variant
->key
, key
, key
->size
);
635 variant
->list_item_global
.base
= variant
;
637 util_snprintf(func_name
, sizeof(func_name
), "fs%u_setup%u",
641 /* Currently always deal with full 4-wide vertex attributes from
645 vec4f_type
= LLVMVectorType(LLVMFloatType(), 4);
647 arg_types
[0] = LLVMPointerType(vec4f_type
, 0); /* v0 */
648 arg_types
[1] = LLVMPointerType(vec4f_type
, 0); /* v1 */
649 arg_types
[2] = LLVMPointerType(vec4f_type
, 0); /* v2 */
650 arg_types
[3] = LLVMInt32Type(); /* facing */
651 arg_types
[4] = LLVMPointerType(vec4f_type
, 0); /* a0, aligned */
652 arg_types
[5] = LLVMPointerType(vec4f_type
, 0); /* dadx, aligned */
653 arg_types
[6] = LLVMPointerType(vec4f_type
, 0); /* dady, aligned */
655 func_type
= LLVMFunctionType(LLVMVoidType(), arg_types
, Elements(arg_types
), 0);
657 variant
->function
= LLVMAddFunction(screen
->module
, func_name
, func_type
);
658 if (!variant
->function
)
661 LLVMSetFunctionCallConv(variant
->function
, LLVMCCallConv
);
663 args
.v0
= LLVMGetParam(variant
->function
, 0);
664 args
.v1
= LLVMGetParam(variant
->function
, 1);
665 args
.v2
= LLVMGetParam(variant
->function
, 2);
666 args
.facing
= LLVMGetParam(variant
->function
, 3);
667 args
.a0
= LLVMGetParam(variant
->function
, 4);
668 args
.dadx
= LLVMGetParam(variant
->function
, 5);
669 args
.dady
= LLVMGetParam(variant
->function
, 6);
671 lp_build_name(args
.v0
, "in_v0");
672 lp_build_name(args
.v1
, "in_v1");
673 lp_build_name(args
.v2
, "in_v2");
674 lp_build_name(args
.facing
, "in_facing");
675 lp_build_name(args
.a0
, "out_a0");
676 lp_build_name(args
.dadx
, "out_dadx");
677 lp_build_name(args
.dady
, "out_dady");
682 block
= LLVMAppendBasicBlock(variant
->function
, "entry");
683 builder
= LLVMCreateBuilder();
684 LLVMPositionBuilderAtEnd(builder
, block
);
686 set_noalias(builder
, variant
->function
, arg_types
, Elements(arg_types
));
687 init_args(builder
, &args
, variant
);
688 if (variant
->key
.twoside
){
689 lp_twoside(builder
, &args
, &variant
->key
);
691 if (variant
->key
.scale
|| variant
->key
.units
){
692 lp_do_offset_tri(builder
, &args
, &variant
->key
);
694 emit_tri_coef(builder
, &variant
->key
, &args
);
696 lp_emit_emms(builder
);
697 LLVMBuildRetVoid(builder
);
698 LLVMDisposeBuilder(builder
);
700 variant
->jit_function
= finalize_function(screen
, builder
,
702 if (!variant
->jit_function
)
706 * Update timing information:
708 if (LP_DEBUG
& DEBUG_COUNTERS
) {
710 LP_COUNT_ADD(llvm_compile_time
, t1
- t0
);
711 LP_COUNT_ADD(nr_llvm_compiles
, 1);
718 if (variant
->function
) {
719 if (variant
->jit_function
)
720 LLVMFreeMachineCodeForFunction(screen
->engine
,
722 LLVMDeleteFunction(variant
->function
);
733 lp_make_setup_variant_key(struct llvmpipe_context
*lp
,
734 struct lp_setup_variant_key
*key
)
736 struct lp_fragment_shader
*fs
= lp
->fs
;
739 assert(sizeof key
->inputs
[0] == sizeof(ushort
));
741 key
->num_inputs
= fs
->info
.base
.num_inputs
;
742 key
->flatshade_first
= lp
->rasterizer
->flatshade_first
;
743 key
->pixel_center_half
= lp
->rasterizer
->gl_rasterization_rules
;
744 key
->twoside
= lp
->rasterizer
->light_twoside
;
745 key
->size
= Offset(struct lp_setup_variant_key
,
746 inputs
[key
->num_inputs
]);
747 key
->color_slot
= lp
->color_slot
;
748 key
->bcolor_slot
= lp
->bcolor_slot
;
749 key
->units
= (float) (lp
->rasterizer
->offset_units
* lp
->mrd
);
750 key
->scale
= lp
->rasterizer
->offset_scale
;
752 memcpy(key
->inputs
, fs
->inputs
, key
->num_inputs
* sizeof key
->inputs
[0]);
753 for (i
= 0; i
< key
->num_inputs
; i
++) {
754 if (key
->inputs
[i
].interp
== LP_INTERP_COLOR
) {
755 if (lp
->rasterizer
->flatshade
)
756 key
->inputs
[i
].interp
= LP_INTERP_CONSTANT
;
758 key
->inputs
[i
].interp
= LP_INTERP_LINEAR
;
766 remove_setup_variant(struct llvmpipe_context
*lp
,
767 struct lp_setup_variant
*variant
)
769 struct llvmpipe_screen
*screen
= llvmpipe_screen(lp
->pipe
.screen
);
771 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
772 debug_printf("llvmpipe: del setup_variant #%u total %u\n",
773 variant
->no
, lp
->nr_setup_variants
);
776 if (variant
->function
) {
777 if (variant
->jit_function
)
778 LLVMFreeMachineCodeForFunction(screen
->engine
,
780 LLVMDeleteFunction(variant
->function
);
783 remove_from_list(&variant
->list_item_global
);
784 lp
->nr_setup_variants
--;
790 /* When the number of setup variants exceeds a threshold, cull a
791 * fraction (currently a quarter) of them.
794 cull_setup_variants(struct llvmpipe_context
*lp
)
796 struct pipe_context
*pipe
= &lp
->pipe
;
800 * XXX: we need to flush the context until we have some sort of reference
801 * counting in fragment shaders as they may still be binned
802 * Flushing alone might not be sufficient we need to wait on it too.
804 llvmpipe_finish(pipe
, __FUNCTION__
);
806 for (i
= 0; i
< LP_MAX_SETUP_VARIANTS
/ 4; i
++) {
807 struct lp_setup_variant_list_item
*item
= last_elem(&lp
->setup_variants_list
);
808 remove_setup_variant(lp
, item
->base
);
814 * Update fragment/vertex shader linkage state. This is called just
815 * prior to drawing something when some fragment-related state has
819 llvmpipe_update_setup(struct llvmpipe_context
*lp
)
821 struct llvmpipe_screen
*screen
= llvmpipe_screen(lp
->pipe
.screen
);
823 struct lp_setup_variant_key
*key
= &lp
->setup_variant
.key
;
824 struct lp_setup_variant
*variant
= NULL
;
825 struct lp_setup_variant_list_item
*li
;
827 lp_make_setup_variant_key(lp
, key
);
829 foreach(li
, &lp
->setup_variants_list
) {
830 if(li
->base
->key
.size
== key
->size
&&
831 memcmp(&li
->base
->key
, key
, key
->size
) == 0) {
838 move_to_head(&lp
->setup_variants_list
, &variant
->list_item_global
);
841 if (lp
->nr_setup_variants
>= LP_MAX_SETUP_VARIANTS
) {
842 cull_setup_variants(lp
);
845 variant
= generate_setup_variant(screen
, key
, lp
);
846 insert_at_head(&lp
->setup_variants_list
, &variant
->list_item_global
);
847 lp
->nr_setup_variants
++;
850 lp_setup_set_setup_variant(lp
->setup
,
855 lp_delete_setup_variants(struct llvmpipe_context
*lp
)
857 struct lp_setup_variant_list_item
*li
;
858 li
= first_elem(&lp
->setup_variants_list
);
859 while(!at_end(&lp
->setup_variants_list
, li
)) {
860 struct lp_setup_variant_list_item
*next
= next_elem(li
);
861 remove_setup_variant(lp
, li
->base
);
867 lp_dump_setup_coef( const struct lp_setup_variant_key
*key
,
868 const float (*sa0
)[4],
869 const float (*sdadx
)[4],
870 const float (*sdady
)[4])
874 for (i
= 0; i
< NUM_CHANNELS
; i
++) {
875 float a0
= sa0
[0][i
];
876 float dadx
= sdadx
[0][i
];
877 float dady
= sdady
[0][i
];
879 debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
884 for (slot
= 0; slot
< key
->num_inputs
; slot
++) {
885 unsigned usage_mask
= key
->inputs
[slot
].usage_mask
;
886 for (i
= 0; i
< NUM_CHANNELS
; i
++) {
887 if (usage_mask
& (1 << i
)) {
888 float a0
= sa0
[1 + slot
][i
];
889 float dadx
= sdadx
[1 + slot
][i
];
890 float dady
= sdady
[1 + slot
][i
];
892 debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",