1 /**************************************************************************
3 * Copyright 2010 VMware.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/u_simple_list.h"
32 #include "os/os_time.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_const.h"
35 #include "gallivm/lp_bld_debug.h"
36 #include "gallivm/lp_bld_init.h"
37 #include "gallivm/lp_bld_logic.h"
38 #include "gallivm/lp_bld_intr.h"
39 #include "gallivm/lp_bld_flow.h"
40 #include "gallivm/lp_bld_type.h"
41 #include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */
46 #include "lp_screen.h"
47 #include "lp_context.h"
49 #include "lp_state_fs.h"
50 #include "lp_state_setup.h"
54 /* currently organized to interpolate full float[4] attributes even
55 * when some elements are unused. Later, can pack vertex data more
62 /* Function arguments:
67 LLVMValueRef facing
; /* boolean */
74 LLVMValueRef x0_center
;
75 LLVMValueRef y0_center
;
76 LLVMValueRef dy20_ooa
;
77 LLVMValueRef dy01_ooa
;
78 LLVMValueRef dx20_ooa
;
79 LLVMValueRef dx01_ooa
;
81 /* Temporary, per-attribute:
91 type4f(struct gallivm_state
*gallivm
)
93 return LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4);
97 /* Equivalent of _mm_setr_ps(a,b,c,d)
100 vec4f(struct gallivm_state
*gallivm
,
101 LLVMValueRef a
, LLVMValueRef b
, LLVMValueRef c
, LLVMValueRef d
,
104 LLVMBuilderRef bld
= gallivm
->builder
;
105 LLVMValueRef i0
= lp_build_const_int32(gallivm
, 0);
106 LLVMValueRef i1
= lp_build_const_int32(gallivm
, 1);
107 LLVMValueRef i2
= lp_build_const_int32(gallivm
, 2);
108 LLVMValueRef i3
= lp_build_const_int32(gallivm
, 3);
110 LLVMValueRef res
= LLVMGetUndef(type4f(gallivm
));
112 res
= LLVMBuildInsertElement(bld
, res
, a
, i0
, "");
113 res
= LLVMBuildInsertElement(bld
, res
, b
, i1
, "");
114 res
= LLVMBuildInsertElement(bld
, res
, c
, i2
, "");
115 res
= LLVMBuildInsertElement(bld
, res
, d
, i3
, name
);
120 /* Equivalent of _mm_set1_ps(a)
123 vec4f_from_scalar(struct gallivm_state
*gallivm
,
127 LLVMBuilderRef bld
= gallivm
->builder
;
128 LLVMValueRef res
= LLVMGetUndef(type4f(gallivm
));
131 for(i
= 0; i
< 4; ++i
) {
132 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
133 res
= LLVMBuildInsertElement(bld
, res
, a
, index
, i
== 3 ? name
: "");
140 store_coef(struct gallivm_state
*gallivm
,
141 struct lp_setup_args
*args
,
147 LLVMBuilderRef builder
= gallivm
->builder
;
148 LLVMValueRef idx
= lp_build_const_int32(gallivm
, slot
);
150 LLVMBuildStore(builder
,
152 LLVMBuildGEP(builder
, args
->a0
, &idx
, 1, ""));
154 LLVMBuildStore(builder
,
156 LLVMBuildGEP(builder
, args
->dadx
, &idx
, 1, ""));
158 LLVMBuildStore(builder
,
160 LLVMBuildGEP(builder
, args
->dady
, &idx
, 1, ""));
166 emit_constant_coef4(struct gallivm_state
*gallivm
,
167 struct lp_setup_args
*args
,
171 LLVMValueRef zero
= lp_build_const_float(gallivm
, 0.0);
172 LLVMValueRef zerovec
= vec4f_from_scalar(gallivm
, zero
, "zero");
173 store_coef(gallivm
, args
, slot
, vert
, zerovec
, zerovec
);
179 * Setup the fragment input attribute with the front-facing value.
180 * \param frontface is the triangle front facing?
183 emit_facing_coef(struct gallivm_state
*gallivm
,
184 struct lp_setup_args
*args
,
187 LLVMBuilderRef builder
= gallivm
->builder
;
188 LLVMTypeRef float_type
= LLVMFloatTypeInContext(gallivm
->context
);
189 LLVMValueRef a0_0
= args
->facing
;
190 LLVMValueRef a0_0f
= LLVMBuildSIToFP(builder
, a0_0
, float_type
, "");
191 LLVMValueRef zero
= lp_build_const_float(gallivm
, 0.0);
192 LLVMValueRef a0
= vec4f(gallivm
, a0_0f
, zero
, zero
, zero
, "facing");
193 LLVMValueRef zerovec
= vec4f_from_scalar(gallivm
, zero
, "zero");
195 store_coef(gallivm
, args
, slot
, a0
, zerovec
, zerovec
);
200 vert_attrib(struct gallivm_state
*gallivm
,
206 LLVMBuilderRef b
= gallivm
->builder
;
208 idx
[0] = lp_build_const_int32(gallivm
, attr
);
209 idx
[1] = lp_build_const_int32(gallivm
, elem
);
210 return LLVMBuildLoad(b
, LLVMBuildGEP(b
, vert
, idx
, 2, ""), name
);
214 vert_clamp(LLVMBuilderRef b
,
219 LLVMValueRef min_result
= LLVMBuildFCmp(b
, LLVMRealUGT
, min
, x
, "");
220 LLVMValueRef max_result
= LLVMBuildFCmp(b
, LLVMRealUGT
, x
, max
, "");
221 LLVMValueRef clamp_value
;
223 clamp_value
= LLVMBuildSelect(b
, min_result
, min
, x
, "");
224 clamp_value
= LLVMBuildSelect(b
, max_result
, max
, x
, "");
230 lp_twoside(struct gallivm_state
*gallivm
,
231 struct lp_setup_args
*args
,
232 const struct lp_setup_variant_key
*key
,
235 LLVMBuilderRef b
= gallivm
->builder
;
236 LLVMValueRef a0_back
, a1_back
, a2_back
;
237 LLVMValueRef idx2
= lp_build_const_int32(gallivm
, bcolor_slot
);
239 LLVMValueRef facing
= args
->facing
;
240 LLVMValueRef front_facing
= LLVMBuildICmp(b
, LLVMIntEQ
, facing
, lp_build_const_int32(gallivm
, 0), ""); /** need i1 for if condition */
242 a0_back
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx2
, 1, ""), "v0a_back");
243 a1_back
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx2
, 1, ""), "v1a_back");
244 a2_back
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx2
, 1, ""), "v2a_back");
246 /* Possibly swap the front and back attrib values,
248 * Prefer select to if so we don't have to worry about phis or
251 args
->v0a
= LLVMBuildSelect(b
, front_facing
, a0_back
, args
->v0a
, "");
252 args
->v1a
= LLVMBuildSelect(b
, front_facing
, a1_back
, args
->v1a
, "");
253 args
->v2a
= LLVMBuildSelect(b
, front_facing
, a2_back
, args
->v2a
, "");
258 lp_do_offset_tri(struct gallivm_state
*gallivm
,
259 struct lp_setup_args
*args
,
260 const struct lp_setup_variant_key
*key
)
262 LLVMBuilderRef b
= gallivm
->builder
;
263 struct lp_build_context bld
;
264 LLVMValueRef zoffset
, mult
;
265 LLVMValueRef z0_new
, z1_new
, z2_new
;
266 LLVMValueRef dzdx0
, dzdx
, dzdy0
, dzdy
;
267 LLVMValueRef max
, max_value
;
269 LLVMValueRef one
= lp_build_const_float(gallivm
, 1.0);
270 LLVMValueRef zero
= lp_build_const_float(gallivm
, 0.0);
271 LLVMValueRef two
= lp_build_const_int32(gallivm
, 2);
273 /* edge vectors: e = v0 - v2, f = v1 - v2 */
274 LLVMValueRef v0_x
= vert_attrib(gallivm
, args
->v0
, 0, 0, "v0_x");
275 LLVMValueRef v1_x
= vert_attrib(gallivm
, args
->v1
, 0, 0, "v1_x");
276 LLVMValueRef v2_x
= vert_attrib(gallivm
, args
->v2
, 0, 0, "v2_x");
277 LLVMValueRef v0_y
= vert_attrib(gallivm
, args
->v0
, 0, 1, "v0_y");
278 LLVMValueRef v1_y
= vert_attrib(gallivm
, args
->v1
, 0, 1, "v1_y");
279 LLVMValueRef v2_y
= vert_attrib(gallivm
, args
->v2
, 0, 1, "v2_y");
280 LLVMValueRef v0_z
= vert_attrib(gallivm
, args
->v0
, 0, 2, "v0_z");
281 LLVMValueRef v1_z
= vert_attrib(gallivm
, args
->v1
, 0, 2, "v1_z");
282 LLVMValueRef v2_z
= vert_attrib(gallivm
, args
->v2
, 0, 2, "v2_z");
284 /* edge vectors: e = v0 - v2, f = v1 - v2 */
285 LLVMValueRef dx02
= LLVMBuildFSub(b
, v0_x
, v2_x
, "dx02");
286 LLVMValueRef dy02
= LLVMBuildFSub(b
, v0_y
, v2_y
, "dy02");
287 LLVMValueRef dz02
= LLVMBuildFSub(b
, v0_z
, v2_z
, "dz02");
288 LLVMValueRef dx12
= LLVMBuildFSub(b
, v1_x
, v2_x
, "dx12");
289 LLVMValueRef dy12
= LLVMBuildFSub(b
, v1_y
, v2_y
, "dy12");
290 LLVMValueRef dz12
= LLVMBuildFSub(b
, v1_z
, v2_z
, "dz12");
292 /* det = cross(e,f).z */
293 LLVMValueRef dx02_dy12
= LLVMBuildFMul(b
, dx02
, dy12
, "dx02_dy12");
294 LLVMValueRef dy02_dx12
= LLVMBuildFMul(b
, dy02
, dx12
, "dy02_dx12");
295 LLVMValueRef det
= LLVMBuildFSub(b
, dx02_dy12
, dy02_dx12
, "det");
296 LLVMValueRef inv_det
= LLVMBuildFDiv(b
, one
, det
, "inv_det");
298 /* (res1,res2) = cross(e,f).xy */
299 LLVMValueRef dy02_dz12
= LLVMBuildFMul(b
, dy02
, dz12
, "dy02_dz12");
300 LLVMValueRef dz02_dy12
= LLVMBuildFMul(b
, dz02
, dy12
, "dz02_dy12");
301 LLVMValueRef dz02_dx12
= LLVMBuildFMul(b
, dz02
, dx12
, "dz02_dx12");
302 LLVMValueRef dx02_dz12
= LLVMBuildFMul(b
, dx02
, dz12
, "dx02_dz12");
303 LLVMValueRef res1
= LLVMBuildFSub(b
, dy02_dz12
, dz02_dy12
, "res1");
304 LLVMValueRef res2
= LLVMBuildFSub(b
, dz02_dx12
, dx02_dz12
, "res2");
306 /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
307 lp_build_context_init(&bld
, gallivm
, lp_type_float(32));
308 dzdx0
= LLVMBuildFMul(b
, res1
, inv_det
, "dzdx");
309 dzdx
= lp_build_abs(&bld
, dzdx0
);
310 dzdy0
= LLVMBuildFMul(b
, res2
, inv_det
, "dzdy");
311 dzdy
= lp_build_abs(&bld
, dzdy0
);
313 /* zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale */
314 max
= LLVMBuildFCmp(b
, LLVMRealUGT
, dzdx
, dzdy
, "");
315 max_value
= LLVMBuildSelect(b
, max
, dzdx
, dzdy
, "max");
317 mult
= LLVMBuildFMul(b
, max_value
, lp_build_const_float(gallivm
, key
->scale
), "");
318 zoffset
= LLVMBuildFAdd(b
, lp_build_const_float(gallivm
, key
->units
), mult
, "zoffset");
320 /* clamp and do offset */
321 z0_new
= vert_clamp(b
, LLVMBuildFAdd(b
, v0_z
, zoffset
, ""), zero
, one
);
322 z1_new
= vert_clamp(b
, LLVMBuildFAdd(b
, v1_z
, zoffset
, ""), zero
, one
);
323 z2_new
= vert_clamp(b
, LLVMBuildFAdd(b
, v2_z
, zoffset
, ""), zero
, one
);
325 /* insert into args->a0.z, a1.z, a2.z:
327 args
->v0a
= LLVMBuildInsertElement(b
, args
->v0a
, z0_new
, two
, "");
328 args
->v1a
= LLVMBuildInsertElement(b
, args
->v1a
, z1_new
, two
, "");
329 args
->v2a
= LLVMBuildInsertElement(b
, args
->v2a
, z2_new
, two
, "");
333 load_attribute(struct gallivm_state
*gallivm
,
334 struct lp_setup_args
*args
,
335 const struct lp_setup_variant_key
*key
,
338 LLVMBuilderRef b
= gallivm
->builder
;
339 LLVMValueRef idx
= lp_build_const_int32(gallivm
, vert_attr
);
341 /* Load the vertex data
343 args
->v0a
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx
, 1, ""), "v0a");
344 args
->v1a
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx
, 1, ""), "v1a");
345 args
->v2a
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx
, 1, ""), "v2a");
348 /* Potentially modify it according to twoside, offset, etc:
350 if (vert_attr
== 0 && (key
->scale
!= 0.0f
|| key
->units
!= 0.0f
)) {
351 lp_do_offset_tri(gallivm
, args
, key
);
355 if (vert_attr
== key
->color_slot
&& key
->bcolor_slot
>= 0)
356 lp_twoside(gallivm
, args
, key
, key
->bcolor_slot
);
357 else if (vert_attr
== key
->spec_slot
&& key
->bspec_slot
>= 0)
358 lp_twoside(gallivm
, args
, key
, key
->bspec_slot
);
363 emit_coef4( struct gallivm_state
*gallivm
,
364 struct lp_setup_args
*args
,
370 LLVMBuilderRef b
= gallivm
->builder
;
371 LLVMValueRef dy20_ooa
= args
->dy20_ooa
;
372 LLVMValueRef dy01_ooa
= args
->dy01_ooa
;
373 LLVMValueRef dx20_ooa
= args
->dx20_ooa
;
374 LLVMValueRef dx01_ooa
= args
->dx01_ooa
;
375 LLVMValueRef x0_center
= args
->x0_center
;
376 LLVMValueRef y0_center
= args
->y0_center
;
378 /* XXX: using fsub, fmul on vector types -- does this work??
380 LLVMValueRef da01
= LLVMBuildFSub(b
, a0
, a1
, "da01");
381 LLVMValueRef da20
= LLVMBuildFSub(b
, a2
, a0
, "da20");
383 /* Calculate dadx (vec4f)
385 LLVMValueRef da01_dy20_ooa
= LLVMBuildFMul(b
, da01
, dy20_ooa
, "da01_dy20_ooa");
386 LLVMValueRef da20_dy01_ooa
= LLVMBuildFMul(b
, da20
, dy01_ooa
, "da20_dy01_ooa");
387 LLVMValueRef dadx
= LLVMBuildFSub(b
, da01_dy20_ooa
, da20_dy01_ooa
, "dadx");
389 /* Calculate dady (vec4f)
391 LLVMValueRef da01_dx20_ooa
= LLVMBuildFMul(b
, da01
, dx20_ooa
, "da01_dx20_ooa");
392 LLVMValueRef da20_dx01_ooa
= LLVMBuildFMul(b
, da20
, dx01_ooa
, "da20_dx01_ooa");
393 LLVMValueRef dady
= LLVMBuildFSub(b
, da20_dx01_ooa
, da01_dx20_ooa
, "dady");
395 /* Calculate a0 - the attribute value at the origin
397 LLVMValueRef dadx_x0
= LLVMBuildFMul(b
, dadx
, x0_center
, "dadx_x0");
398 LLVMValueRef dady_y0
= LLVMBuildFMul(b
, dady
, y0_center
, "dady_y0");
399 LLVMValueRef attr_v0
= LLVMBuildFAdd(b
, dadx_x0
, dady_y0
, "attr_v0");
400 LLVMValueRef attr_0
= LLVMBuildFSub(b
, a0
, attr_v0
, "attr_0");
402 store_coef(gallivm
, args
, slot
, attr_0
, dadx
, dady
);
407 emit_linear_coef( struct gallivm_state
*gallivm
,
408 struct lp_setup_args
*args
,
411 /* nothing to do anymore */
421 * Compute a0, dadx and dady for a perspective-corrected interpolant,
423 * We basically multiply the vertex value by 1/w before computing
424 * the plane coefficients (a0, dadx, dady).
425 * Later, when we compute the value at a particular fragment position we'll
426 * divide the interpolated value by the interpolated W at that fragment.
429 emit_perspective_coef( struct gallivm_state
*gallivm
,
430 struct lp_setup_args
*args
,
433 LLVMBuilderRef b
= gallivm
->builder
;
435 /* premultiply by 1/w (v[0][3] is always 1/w):
437 LLVMValueRef v0_oow
= vec4f_from_scalar(gallivm
, vert_attrib(gallivm
, args
->v0
, 0, 3, ""), "v0_oow");
438 LLVMValueRef v1_oow
= vec4f_from_scalar(gallivm
, vert_attrib(gallivm
, args
->v1
, 0, 3, ""), "v1_oow");
439 LLVMValueRef v2_oow
= vec4f_from_scalar(gallivm
, vert_attrib(gallivm
, args
->v2
, 0, 3, ""), "v2_oow");
441 LLVMValueRef v0_oow_v0a
= LLVMBuildFMul(b
, args
->v0a
, v0_oow
, "v0_oow_v0a");
442 LLVMValueRef v1_oow_v1a
= LLVMBuildFMul(b
, args
->v1a
, v1_oow
, "v1_oow_v1a");
443 LLVMValueRef v2_oow_v2a
= LLVMBuildFMul(b
, args
->v2a
, v2_oow
, "v2_oow_v2a");
445 emit_coef4(gallivm
, args
, slot
, v0_oow_v0a
, v1_oow_v1a
, v2_oow_v2a
);
450 emit_position_coef( struct gallivm_state
*gallivm
,
451 struct lp_setup_args
*args
,
454 emit_linear_coef(gallivm
, args
, slot
);
459 * Applys cylindrical wrapping to vertex attributes if enabled.
460 * Input coordinates must be in [0, 1] range, otherwise results are undefined.
462 * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags
465 emit_apply_cyl_wrap(struct gallivm_state
*gallivm
,
466 struct lp_setup_args
*args
,
469 LLVMBuilderRef builder
= gallivm
->builder
;
470 struct lp_type type
= lp_float32_vec4_type();
471 LLVMTypeRef float_vec_type
= lp_build_vec_type(gallivm
, type
);
472 LLVMValueRef pos_half
;
473 LLVMValueRef neg_half
;
474 LLVMValueRef cyl_mask
;
483 pos_half
= lp_build_const_vec(gallivm
, type
, +0.5f
);
484 neg_half
= lp_build_const_vec(gallivm
, type
, -0.5f
);
485 cyl_mask
= lp_build_const_mask_aos(gallivm
, type
, cyl_wrap
);
487 one
= lp_build_const_vec(gallivm
, type
, 1.0f
);
488 one
= LLVMBuildBitCast(builder
, one
, lp_build_int_vec_type(gallivm
, type
), "");
489 one
= LLVMBuildAnd(builder
, one
, cyl_mask
, "");
492 delta
= LLVMBuildFSub(builder
, args
->v1a
, args
->v0a
, "");
494 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_GREATER
, delta
, pos_half
);
495 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
496 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
497 args
->v0a
= LLVMBuildFAdd(builder
, args
->v0a
, offset
, "");
499 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_LESS
, delta
, neg_half
);
500 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
501 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
502 args
->v1a
= LLVMBuildFAdd(builder
, args
->v1a
, offset
, "");
505 delta
= LLVMBuildFSub(builder
, args
->v2a
, args
->v1a
, "");
507 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_GREATER
, delta
, pos_half
);
508 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
509 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
510 args
->v1a
= LLVMBuildFAdd(builder
, args
->v1a
, offset
, "");
512 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_LESS
, delta
, neg_half
);
513 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
514 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
515 args
->v2a
= LLVMBuildFAdd(builder
, args
->v2a
, offset
, "");
518 delta
= LLVMBuildFSub(builder
, args
->v0a
, args
->v2a
, "");
520 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_GREATER
, delta
, pos_half
);
521 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
522 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
523 args
->v2a
= LLVMBuildFAdd(builder
, args
->v2a
, offset
, "");
525 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_LESS
, delta
, neg_half
);
526 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
527 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
528 args
->v0a
= LLVMBuildFAdd(builder
, args
->v0a
, offset
, "");
533 * Compute the inputs-> dadx, dady, a0 values.
536 emit_tri_coef( struct gallivm_state
*gallivm
,
537 const struct lp_setup_variant_key
*key
,
538 struct lp_setup_args
*args
)
542 /* The internal position input is in slot zero:
544 load_attribute(gallivm
, args
, key
, 0);
545 emit_position_coef(gallivm
, args
, 0);
547 /* setup interpolation for all the remaining attributes:
549 for (slot
= 0; slot
< key
->num_inputs
; slot
++) {
551 if (key
->inputs
[slot
].interp
== LP_INTERP_CONSTANT
||
552 key
->inputs
[slot
].interp
== LP_INTERP_LINEAR
||
553 key
->inputs
[slot
].interp
== LP_INTERP_PERSPECTIVE
)
554 load_attribute(gallivm
, args
, key
, key
->inputs
[slot
].src_index
);
556 switch (key
->inputs
[slot
].interp
) {
557 case LP_INTERP_CONSTANT
:
558 if (key
->flatshade_first
) {
559 emit_constant_coef4(gallivm
, args
, slot
+1, args
->v0a
);
562 emit_constant_coef4(gallivm
, args
, slot
+1, args
->v2a
);
566 case LP_INTERP_LINEAR
:
567 emit_apply_cyl_wrap(gallivm
, args
, key
->inputs
[slot
].cyl_wrap
);
568 emit_linear_coef(gallivm
, args
, slot
+1);
571 case LP_INTERP_PERSPECTIVE
:
572 emit_apply_cyl_wrap(gallivm
, args
, key
->inputs
[slot
].cyl_wrap
);
573 emit_perspective_coef(gallivm
, args
, slot
+1);
576 case LP_INTERP_POSITION
:
578 * The generated pixel interpolators will pick up the coeffs from
583 case LP_INTERP_FACING
:
584 emit_facing_coef(gallivm
, args
, slot
+1);
594 /* XXX: This is generic code, share with fs/vs codegen:
596 static lp_jit_setup_triangle
597 finalize_function(struct gallivm_state
*gallivm
,
598 LLVMBuilderRef builder
,
599 LLVMValueRef function
)
603 /* Verify the LLVM IR. If invalid, dump and abort */
605 if (LLVMVerifyFunction(function
, LLVMPrintMessageAction
)) {
607 lp_debug_dump_value(function
);
612 /* Apply optimizations to LLVM IR */
613 LLVMRunFunctionPassManager(gallivm
->passmgr
, function
);
615 if (gallivm_debug
& GALLIVM_DEBUG_IR
)
617 /* Print the LLVM IR to stderr */
618 lp_debug_dump_value(function
);
623 * Translate the LLVM IR into machine code.
625 f
= LLVMGetPointerToGlobal(gallivm
->engine
, function
);
627 if (gallivm_debug
& GALLIVM_DEBUG_ASM
)
632 lp_func_delete_body(function
);
634 return (lp_jit_setup_triangle
) pointer_to_func(f
);
637 /* XXX: Generic code:
640 lp_emit_emms(struct gallivm_state
*gallivm
)
643 /* Avoid corrupting the FPU stack on 32bit OSes. */
644 lp_build_intrinsic(gallivm
->builder
, "llvm.x86.mmx.emms",
645 LLVMVoidTypeInContext(gallivm
->context
), NULL
, 0);
650 /* XXX: generic code:
653 set_noalias(LLVMBuilderRef builder
,
654 LLVMValueRef function
,
655 const LLVMTypeRef
*arg_types
,
659 for(i
= 0; i
< Elements(arg_types
); ++i
)
660 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
661 LLVMAddAttribute(LLVMGetParam(function
, i
),
662 LLVMNoAliasAttribute
);
666 init_args(struct gallivm_state
*gallivm
,
667 struct lp_setup_args
*args
,
668 const struct lp_setup_variant
*variant
)
670 LLVMBuilderRef b
= gallivm
->builder
;
672 LLVMValueRef v0_x
= vert_attrib(gallivm
, args
->v0
, 0, 0, "v0_x");
673 LLVMValueRef v0_y
= vert_attrib(gallivm
, args
->v0
, 0, 1, "v0_y");
675 LLVMValueRef v1_x
= vert_attrib(gallivm
, args
->v1
, 0, 0, "v1_x");
676 LLVMValueRef v1_y
= vert_attrib(gallivm
, args
->v1
, 0, 1, "v1_y");
678 LLVMValueRef v2_x
= vert_attrib(gallivm
, args
->v2
, 0, 0, "v2_x");
679 LLVMValueRef v2_y
= vert_attrib(gallivm
, args
->v2
, 0, 1, "v2_y");
681 LLVMValueRef pixel_center
= lp_build_const_float(gallivm
,
682 variant
->key
.pixel_center_half
? 0.5 : 0);
684 LLVMValueRef x0_center
= LLVMBuildFSub(b
, v0_x
, pixel_center
, "x0_center" );
685 LLVMValueRef y0_center
= LLVMBuildFSub(b
, v0_y
, pixel_center
, "y0_center" );
687 LLVMValueRef dx01
= LLVMBuildFSub(b
, v0_x
, v1_x
, "dx01");
688 LLVMValueRef dy01
= LLVMBuildFSub(b
, v0_y
, v1_y
, "dy01");
689 LLVMValueRef dx20
= LLVMBuildFSub(b
, v2_x
, v0_x
, "dx20");
690 LLVMValueRef dy20
= LLVMBuildFSub(b
, v2_y
, v0_y
, "dy20");
692 LLVMValueRef one
= lp_build_const_float(gallivm
, 1.0);
693 LLVMValueRef e
= LLVMBuildFMul(b
, dx01
, dy20
, "e");
694 LLVMValueRef f
= LLVMBuildFMul(b
, dx20
, dy01
, "f");
695 LLVMValueRef ooa
= LLVMBuildFDiv(b
, one
, LLVMBuildFSub(b
, e
, f
, ""), "ooa");
697 LLVMValueRef dy20_ooa
= LLVMBuildFMul(b
, dy20
, ooa
, "dy20_ooa");
698 LLVMValueRef dy01_ooa
= LLVMBuildFMul(b
, dy01
, ooa
, "dy01_ooa");
699 LLVMValueRef dx20_ooa
= LLVMBuildFMul(b
, dx20
, ooa
, "dx20_ooa");
700 LLVMValueRef dx01_ooa
= LLVMBuildFMul(b
, dx01
, ooa
, "dx01_ooa");
702 args
->dy20_ooa
= vec4f_from_scalar(gallivm
, dy20_ooa
, "dy20_ooa_4f");
703 args
->dy01_ooa
= vec4f_from_scalar(gallivm
, dy01_ooa
, "dy01_ooa_4f");
705 args
->dx20_ooa
= vec4f_from_scalar(gallivm
, dx20_ooa
, "dx20_ooa_4f");
706 args
->dx01_ooa
= vec4f_from_scalar(gallivm
, dx01_ooa
, "dx01_ooa_4f");
708 args
->x0_center
= vec4f_from_scalar(gallivm
, x0_center
, "x0_center_4f");
709 args
->y0_center
= vec4f_from_scalar(gallivm
, y0_center
, "y0_center_4f");
713 * Generate the runtime callable function for the coefficient calculation.
716 static struct lp_setup_variant
*
717 generate_setup_variant(struct gallivm_state
*gallivm
,
718 struct lp_setup_variant_key
*key
,
719 struct llvmpipe_context
*lp
)
721 struct lp_setup_variant
*variant
= NULL
;
722 struct lp_setup_args args
;
724 LLVMTypeRef vec4f_type
;
725 LLVMTypeRef func_type
;
726 LLVMTypeRef arg_types
[7];
727 LLVMBasicBlockRef block
;
728 LLVMBuilderRef builder
= gallivm
->builder
;
734 variant
= CALLOC_STRUCT(lp_setup_variant
);
738 if (LP_DEBUG
& DEBUG_COUNTERS
) {
742 memcpy(&variant
->key
, key
, key
->size
);
743 variant
->list_item_global
.base
= variant
;
745 util_snprintf(func_name
, sizeof(func_name
), "fs%u_setup%u",
749 /* Currently always deal with full 4-wide vertex attributes from
753 vec4f_type
= LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4);
755 arg_types
[0] = LLVMPointerType(vec4f_type
, 0); /* v0 */
756 arg_types
[1] = LLVMPointerType(vec4f_type
, 0); /* v1 */
757 arg_types
[2] = LLVMPointerType(vec4f_type
, 0); /* v2 */
758 arg_types
[3] = LLVMInt32TypeInContext(gallivm
->context
); /* facing */
759 arg_types
[4] = LLVMPointerType(vec4f_type
, 0); /* a0, aligned */
760 arg_types
[5] = LLVMPointerType(vec4f_type
, 0); /* dadx, aligned */
761 arg_types
[6] = LLVMPointerType(vec4f_type
, 0); /* dady, aligned */
763 func_type
= LLVMFunctionType(LLVMVoidTypeInContext(gallivm
->context
),
764 arg_types
, Elements(arg_types
), 0);
766 variant
->function
= LLVMAddFunction(gallivm
->module
, func_name
, func_type
);
767 if (!variant
->function
)
770 LLVMSetFunctionCallConv(variant
->function
, LLVMCCallConv
);
772 args
.v0
= LLVMGetParam(variant
->function
, 0);
773 args
.v1
= LLVMGetParam(variant
->function
, 1);
774 args
.v2
= LLVMGetParam(variant
->function
, 2);
775 args
.facing
= LLVMGetParam(variant
->function
, 3);
776 args
.a0
= LLVMGetParam(variant
->function
, 4);
777 args
.dadx
= LLVMGetParam(variant
->function
, 5);
778 args
.dady
= LLVMGetParam(variant
->function
, 6);
780 lp_build_name(args
.v0
, "in_v0");
781 lp_build_name(args
.v1
, "in_v1");
782 lp_build_name(args
.v2
, "in_v2");
783 lp_build_name(args
.facing
, "in_facing");
784 lp_build_name(args
.a0
, "out_a0");
785 lp_build_name(args
.dadx
, "out_dadx");
786 lp_build_name(args
.dady
, "out_dady");
791 block
= LLVMAppendBasicBlockInContext(gallivm
->context
,
792 variant
->function
, "entry");
793 LLVMPositionBuilderAtEnd(builder
, block
);
795 set_noalias(builder
, variant
->function
, arg_types
, Elements(arg_types
));
796 init_args(gallivm
, &args
, variant
);
797 emit_tri_coef(gallivm
, &variant
->key
, &args
);
799 lp_emit_emms(gallivm
);
800 LLVMBuildRetVoid(builder
);
802 variant
->jit_function
= finalize_function(gallivm
, builder
,
804 if (!variant
->jit_function
)
808 * Update timing information:
810 if (LP_DEBUG
& DEBUG_COUNTERS
) {
812 LP_COUNT_ADD(llvm_compile_time
, t1
- t0
);
813 LP_COUNT_ADD(nr_llvm_compiles
, 1);
820 if (variant
->function
) {
821 if (variant
->jit_function
)
822 LLVMFreeMachineCodeForFunction(gallivm
->engine
,
824 LLVMDeleteFunction(variant
->function
);
835 lp_make_setup_variant_key(struct llvmpipe_context
*lp
,
836 struct lp_setup_variant_key
*key
)
838 struct lp_fragment_shader
*fs
= lp
->fs
;
841 assert(sizeof key
->inputs
[0] == sizeof(uint
));
843 key
->num_inputs
= fs
->info
.base
.num_inputs
;
844 key
->flatshade_first
= lp
->rasterizer
->flatshade_first
;
845 key
->pixel_center_half
= lp
->rasterizer
->gl_rasterization_rules
;
846 key
->twoside
= lp
->rasterizer
->light_twoside
;
847 key
->size
= Offset(struct lp_setup_variant_key
,
848 inputs
[key
->num_inputs
]);
850 key
->color_slot
= lp
->color_slot
[0];
851 key
->bcolor_slot
= lp
->bcolor_slot
[0];
852 key
->spec_slot
= lp
->color_slot
[1];
853 key
->bspec_slot
= lp
->bcolor_slot
[1];
854 assert(key
->color_slot
== lp
->color_slot
[0]);
855 assert(key
->bcolor_slot
== lp
->bcolor_slot
[0]);
856 assert(key
->spec_slot
== lp
->color_slot
[1]);
857 assert(key
->bspec_slot
== lp
->bcolor_slot
[1]);
859 key
->units
= (float) (lp
->rasterizer
->offset_units
* lp
->mrd
);
860 key
->scale
= lp
->rasterizer
->offset_scale
;
862 memcpy(key
->inputs
, fs
->inputs
, key
->num_inputs
* sizeof key
->inputs
[0]);
863 for (i
= 0; i
< key
->num_inputs
; i
++) {
864 if (key
->inputs
[i
].interp
== LP_INTERP_COLOR
) {
865 if (lp
->rasterizer
->flatshade
)
866 key
->inputs
[i
].interp
= LP_INTERP_CONSTANT
;
868 key
->inputs
[i
].interp
= LP_INTERP_PERSPECTIVE
;
876 remove_setup_variant(struct llvmpipe_context
*lp
,
877 struct lp_setup_variant
*variant
)
879 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
880 debug_printf("llvmpipe: del setup_variant #%u total %u\n",
881 variant
->no
, lp
->nr_setup_variants
);
884 if (variant
->function
) {
885 if (variant
->jit_function
)
886 LLVMFreeMachineCodeForFunction(lp
->gallivm
->engine
,
888 LLVMDeleteFunction(variant
->function
);
891 remove_from_list(&variant
->list_item_global
);
892 lp
->nr_setup_variants
--;
898 /* When the number of setup variants exceeds a threshold, cull a
899 * fraction (currently a quarter) of them.
902 cull_setup_variants(struct llvmpipe_context
*lp
)
904 struct pipe_context
*pipe
= &lp
->pipe
;
908 * XXX: we need to flush the context until we have some sort of reference
909 * counting in fragment shaders as they may still be binned
910 * Flushing alone might not be sufficient we need to wait on it too.
912 llvmpipe_finish(pipe
, __FUNCTION__
);
914 for (i
= 0; i
< LP_MAX_SETUP_VARIANTS
/ 4; i
++) {
915 struct lp_setup_variant_list_item
*item
;
916 if (is_empty_list(&lp
->setup_variants_list
)) {
919 item
= last_elem(&lp
->setup_variants_list
);
922 remove_setup_variant(lp
, item
->base
);
928 * Update fragment/vertex shader linkage state. This is called just
929 * prior to drawing something when some fragment-related state has
933 llvmpipe_update_setup(struct llvmpipe_context
*lp
)
935 struct lp_setup_variant_key
*key
= &lp
->setup_variant
.key
;
936 struct lp_setup_variant
*variant
= NULL
;
937 struct lp_setup_variant_list_item
*li
;
939 lp_make_setup_variant_key(lp
, key
);
941 foreach(li
, &lp
->setup_variants_list
) {
942 if(li
->base
->key
.size
== key
->size
&&
943 memcmp(&li
->base
->key
, key
, key
->size
) == 0) {
950 move_to_head(&lp
->setup_variants_list
, &variant
->list_item_global
);
953 if (lp
->nr_setup_variants
>= LP_MAX_SETUP_VARIANTS
) {
954 cull_setup_variants(lp
);
957 variant
= generate_setup_variant(lp
->gallivm
, key
, lp
);
959 insert_at_head(&lp
->setup_variants_list
, &variant
->list_item_global
);
960 lp
->nr_setup_variants
++;
961 llvmpipe_variant_count
++;
965 lp_setup_set_setup_variant(lp
->setup
,
970 lp_delete_setup_variants(struct llvmpipe_context
*lp
)
972 struct lp_setup_variant_list_item
*li
;
973 li
= first_elem(&lp
->setup_variants_list
);
974 while(!at_end(&lp
->setup_variants_list
, li
)) {
975 struct lp_setup_variant_list_item
*next
= next_elem(li
);
976 remove_setup_variant(lp
, li
->base
);
982 lp_dump_setup_coef( const struct lp_setup_variant_key
*key
,
983 const float (*sa0
)[4],
984 const float (*sdadx
)[4],
985 const float (*sdady
)[4])
989 for (i
= 0; i
< TGSI_NUM_CHANNELS
; i
++) {
990 float a0
= sa0
[0][i
];
991 float dadx
= sdadx
[0][i
];
992 float dady
= sdady
[0][i
];
994 debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
999 for (slot
= 0; slot
< key
->num_inputs
; slot
++) {
1000 unsigned usage_mask
= key
->inputs
[slot
].usage_mask
;
1001 for (i
= 0; i
< TGSI_NUM_CHANNELS
; i
++) {
1002 if (usage_mask
& (1 << i
)) {
1003 float a0
= sa0
[1 + slot
][i
];
1004 float dadx
= sdadx
[1 + slot
][i
];
1005 float dady
= sdady
[1 + slot
][i
];
1007 debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",