1 /**************************************************************************
3 * Copyright 2010 VMware.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/u_simple_list.h"
32 #include "os/os_time.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_const.h"
35 #include "gallivm/lp_bld_debug.h"
36 #include "gallivm/lp_bld_init.h"
37 #include "gallivm/lp_bld_logic.h"
38 #include "gallivm/lp_bld_intr.h"
39 #include "gallivm/lp_bld_flow.h"
40 #include "gallivm/lp_bld_type.h"
45 #include "lp_screen.h"
46 #include "lp_context.h"
48 #include "lp_state_fs.h"
49 #include "lp_state_setup.h"
53 /* currently organized to interpolate full float[4] attributes even
54 * when some elements are unused. Later, can pack vertex data more
61 /* Function arguments:
66 LLVMValueRef facing
; /* boolean */
73 LLVMValueRef x0_center
;
74 LLVMValueRef y0_center
;
75 LLVMValueRef dy20_ooa
;
76 LLVMValueRef dy01_ooa
;
77 LLVMValueRef dx20_ooa
;
78 LLVMValueRef dx01_ooa
;
84 type4f(struct gallivm_state
*gallivm
)
86 return LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4);
90 /* Equivalent of _mm_setr_ps(a,b,c,d)
93 vec4f(struct gallivm_state
*gallivm
,
94 LLVMValueRef a
, LLVMValueRef b
, LLVMValueRef c
, LLVMValueRef d
,
97 LLVMBuilderRef bld
= gallivm
->builder
;
98 LLVMValueRef i0
= lp_build_const_int32(gallivm
, 0);
99 LLVMValueRef i1
= lp_build_const_int32(gallivm
, 1);
100 LLVMValueRef i2
= lp_build_const_int32(gallivm
, 2);
101 LLVMValueRef i3
= lp_build_const_int32(gallivm
, 3);
103 LLVMValueRef res
= LLVMGetUndef(type4f(gallivm
));
105 res
= LLVMBuildInsertElement(bld
, res
, a
, i0
, "");
106 res
= LLVMBuildInsertElement(bld
, res
, b
, i1
, "");
107 res
= LLVMBuildInsertElement(bld
, res
, c
, i2
, "");
108 res
= LLVMBuildInsertElement(bld
, res
, d
, i3
, name
);
113 /* Equivalent of _mm_set1_ps(a)
116 vec4f_from_scalar(struct gallivm_state
*gallivm
,
120 LLVMBuilderRef bld
= gallivm
->builder
;
121 LLVMValueRef res
= LLVMGetUndef(type4f(gallivm
));
124 for(i
= 0; i
< 4; ++i
) {
125 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
126 res
= LLVMBuildInsertElement(bld
, res
, a
, index
, i
== 3 ? name
: "");
133 store_coef(struct gallivm_state
*gallivm
,
134 struct lp_setup_args
*args
,
140 LLVMBuilderRef builder
= gallivm
->builder
;
141 LLVMValueRef idx
= lp_build_const_int32(gallivm
, slot
);
143 LLVMBuildStore(builder
,
145 LLVMBuildGEP(builder
, args
->a0
, &idx
, 1, ""));
147 LLVMBuildStore(builder
,
149 LLVMBuildGEP(builder
, args
->dadx
, &idx
, 1, ""));
151 LLVMBuildStore(builder
,
153 LLVMBuildGEP(builder
, args
->dady
, &idx
, 1, ""));
159 emit_constant_coef4(struct gallivm_state
*gallivm
,
160 struct lp_setup_args
*args
,
164 LLVMValueRef zero
= lp_build_const_float(gallivm
, 0.0);
165 LLVMValueRef zerovec
= vec4f_from_scalar(gallivm
, zero
, "zero");
166 store_coef(gallivm
, args
, slot
, vert
, zerovec
, zerovec
);
172 * Setup the fragment input attribute with the front-facing value.
173 * \param frontface is the triangle front facing?
176 emit_facing_coef(struct gallivm_state
*gallivm
,
177 struct lp_setup_args
*args
,
180 LLVMBuilderRef builder
= gallivm
->builder
;
181 LLVMTypeRef float_type
= LLVMFloatTypeInContext(gallivm
->context
);
182 LLVMValueRef a0_0
= args
->facing
;
183 LLVMValueRef a0_0f
= LLVMBuildSIToFP(builder
, a0_0
, float_type
, "");
184 LLVMValueRef zero
= lp_build_const_float(gallivm
, 0.0);
185 LLVMValueRef a0
= vec4f(gallivm
, a0_0f
, zero
, zero
, zero
, "facing");
186 LLVMValueRef zerovec
= vec4f_from_scalar(gallivm
, zero
, "zero");
188 store_coef(gallivm
, args
, slot
, a0
, zerovec
, zerovec
);
193 vert_attrib(struct gallivm_state
*gallivm
,
199 LLVMBuilderRef b
= gallivm
->builder
;
201 idx
[0] = lp_build_const_int32(gallivm
, attr
);
202 idx
[1] = lp_build_const_int32(gallivm
, elem
);
203 return LLVMBuildLoad(b
, LLVMBuildGEP(b
, vert
, idx
, 2, ""), name
);
208 lp_twoside(struct gallivm_state
*gallivm
,
209 struct lp_setup_args
*args
,
210 const struct lp_setup_variant_key
*key
,
212 LLVMValueRef attribv
[3])
214 LLVMBuilderRef b
= gallivm
->builder
;
215 LLVMValueRef a0_back
, a1_back
, a2_back
;
216 LLVMValueRef idx2
= lp_build_const_int32(gallivm
, bcolor_slot
);
218 LLVMValueRef facing
= args
->facing
;
219 LLVMValueRef front_facing
= LLVMBuildICmp(b
, LLVMIntEQ
, facing
, lp_build_const_int32(gallivm
, 0), ""); /** need i1 for if condition */
221 a0_back
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx2
, 1, ""), "v0a_back");
222 a1_back
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx2
, 1, ""), "v1a_back");
223 a2_back
= LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx2
, 1, ""), "v2a_back");
225 /* Possibly swap the front and back attrib values,
227 * Prefer select to if so we don't have to worry about phis or
230 attribv
[0] = LLVMBuildSelect(b
, front_facing
, a0_back
, attribv
[0], "");
231 attribv
[1] = LLVMBuildSelect(b
, front_facing
, a1_back
, attribv
[1], "");
232 attribv
[2] = LLVMBuildSelect(b
, front_facing
, a2_back
, attribv
[2], "");
237 lp_do_offset_tri(struct gallivm_state
*gallivm
,
238 struct lp_setup_args
*args
,
239 const struct lp_setup_variant_key
*key
,
240 LLVMValueRef inv_det
,
243 LLVMValueRef attribv
[3])
245 LLVMBuilderRef b
= gallivm
->builder
;
246 struct lp_build_context bld
;
247 LLVMValueRef zoffset
, mult
;
248 LLVMValueRef z0_new
, z1_new
, z2_new
;
249 LLVMValueRef dzdxdzdy
, dzdx
, dzdy
, dzxyz20
, dyzzx01
, dyzzx01_dzxyz20
, dzx01_dyz20
;
250 LLVMValueRef z0z1
, z0z1z2
;
251 LLVMValueRef max
, max_value
, res12
;
252 LLVMValueRef shuffles
[4];
253 LLVMTypeRef shuf_type
= LLVMInt32TypeInContext(gallivm
->context
);
254 LLVMValueRef onei
= lp_build_const_int32(gallivm
, 1);
255 LLVMValueRef zeroi
= lp_build_const_int32(gallivm
, 0);
256 LLVMValueRef twoi
= lp_build_const_int32(gallivm
, 2);
257 LLVMValueRef threei
= lp_build_const_int32(gallivm
, 3);
259 /* (res12) = cross(e,f).xy */
264 dzxyz20
= LLVMBuildShuffleVector(b
, dxyz20
, dxyz20
, LLVMConstVector(shuffles
, 4), "");
270 dyzzx01
= LLVMBuildShuffleVector(b
, dxyz01
, dxyz01
, LLVMConstVector(shuffles
, 4), "");
272 dyzzx01_dzxyz20
= LLVMBuildFMul(b
, dzxyz20
, dyzzx01
, "dyzzx01_dzxyz20");
275 shuffles
[1] = threei
;
276 shuffles
[2] = LLVMGetUndef(shuf_type
);
277 shuffles
[3] = LLVMGetUndef(shuf_type
);
278 dzx01_dyz20
= LLVMBuildShuffleVector(b
, dyzzx01_dzxyz20
, dyzzx01_dzxyz20
,
279 LLVMConstVector(shuffles
, 4), "");
281 res12
= LLVMBuildFSub(b
, dyzzx01_dzxyz20
, dzx01_dyz20
, "res12");
283 /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
284 lp_build_context_init(&bld
, gallivm
, lp_type_float_vec(32, 128));
285 dzdxdzdy
= LLVMBuildFMul(b
, res12
, inv_det
, "dzdxdzdy");
286 dzdxdzdy
= lp_build_abs(&bld
, dzdxdzdy
);
288 dzdx
= LLVMBuildExtractElement(b
, dzdxdzdy
, zeroi
, "");
289 dzdy
= LLVMBuildExtractElement(b
, dzdxdzdy
, onei
, "");
291 /* zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale */
292 max
= LLVMBuildFCmp(b
, LLVMRealUGT
, dzdx
, dzdy
, "");
293 max_value
= LLVMBuildSelect(b
, max
, dzdx
, dzdy
, "max");
295 mult
= LLVMBuildFMul(b
, max_value
, lp_build_const_float(gallivm
, key
->scale
), "");
296 zoffset
= LLVMBuildFAdd(b
, lp_build_const_float(gallivm
, key
->units
), mult
, "zoffset");
300 shuffles
[1] = lp_build_const_int32(gallivm
, 6);
301 shuffles
[2] = LLVMGetUndef(shuf_type
);
302 shuffles
[3] = LLVMGetUndef(shuf_type
);
303 z0z1
= LLVMBuildShuffleVector(b
, attribv
[0], attribv
[1], LLVMConstVector(shuffles
, 4), "");
306 shuffles
[2] = lp_build_const_int32(gallivm
, 6);
307 shuffles
[3] = LLVMGetUndef(shuf_type
);
308 z0z1z2
= LLVMBuildShuffleVector(b
, z0z1
, attribv
[2], LLVMConstVector(shuffles
, 4), "");
309 zoffset
= vec4f_from_scalar(gallivm
, zoffset
, "");
311 /* clamp and do offset */
312 z0z1z2
= lp_build_clamp(&bld
, LLVMBuildFAdd(b
, z0z1z2
, zoffset
, ""), bld
.zero
, bld
.one
);
314 /* insert into args->a0.z, a1.z, a2.z:
316 z0_new
= LLVMBuildExtractElement(b
, z0z1z2
, zeroi
, "");
317 z1_new
= LLVMBuildExtractElement(b
, z0z1z2
, onei
, "");
318 z2_new
= LLVMBuildExtractElement(b
, z0z1z2
, twoi
, "");
319 attribv
[0] = LLVMBuildInsertElement(b
, attribv
[0], z0_new
, twoi
, "");
320 attribv
[1] = LLVMBuildInsertElement(b
, attribv
[1], z1_new
, twoi
, "");
321 attribv
[2] = LLVMBuildInsertElement(b
, attribv
[2], z2_new
, twoi
, "");
325 load_attribute(struct gallivm_state
*gallivm
,
326 struct lp_setup_args
*args
,
327 const struct lp_setup_variant_key
*key
,
329 LLVMValueRef attribv
[3])
331 LLVMBuilderRef b
= gallivm
->builder
;
332 LLVMValueRef idx
= lp_build_const_int32(gallivm
, vert_attr
);
334 /* Load the vertex data
336 attribv
[0] = LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v0
, &idx
, 1, ""), "v0a");
337 attribv
[1] = LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v1
, &idx
, 1, ""), "v1a");
338 attribv
[2] = LLVMBuildLoad(b
, LLVMBuildGEP(b
, args
->v2
, &idx
, 1, ""), "v2a");
341 /* Potentially modify it according to twoside, etc:
344 if (vert_attr
== key
->color_slot
&& key
->bcolor_slot
>= 0)
345 lp_twoside(gallivm
, args
, key
, key
->bcolor_slot
, attribv
);
346 else if (vert_attr
== key
->spec_slot
&& key
->bspec_slot
>= 0)
347 lp_twoside(gallivm
, args
, key
, key
->bspec_slot
, attribv
);
352 emit_coef4( struct gallivm_state
*gallivm
,
353 struct lp_setup_args
*args
,
359 LLVMBuilderRef b
= gallivm
->builder
;
360 LLVMValueRef dy20_ooa
= args
->dy20_ooa
;
361 LLVMValueRef dy01_ooa
= args
->dy01_ooa
;
362 LLVMValueRef dx20_ooa
= args
->dx20_ooa
;
363 LLVMValueRef dx01_ooa
= args
->dx01_ooa
;
364 LLVMValueRef x0_center
= args
->x0_center
;
365 LLVMValueRef y0_center
= args
->y0_center
;
367 LLVMValueRef da01
= LLVMBuildFSub(b
, a0
, a1
, "da01");
368 LLVMValueRef da20
= LLVMBuildFSub(b
, a2
, a0
, "da20");
370 /* Calculate dadx (vec4f)
372 LLVMValueRef da01_dy20_ooa
= LLVMBuildFMul(b
, da01
, dy20_ooa
, "da01_dy20_ooa");
373 LLVMValueRef da20_dy01_ooa
= LLVMBuildFMul(b
, da20
, dy01_ooa
, "da20_dy01_ooa");
374 LLVMValueRef dadx
= LLVMBuildFSub(b
, da01_dy20_ooa
, da20_dy01_ooa
, "dadx");
376 /* Calculate dady (vec4f)
378 LLVMValueRef da01_dx20_ooa
= LLVMBuildFMul(b
, da01
, dx20_ooa
, "da01_dx20_ooa");
379 LLVMValueRef da20_dx01_ooa
= LLVMBuildFMul(b
, da20
, dx01_ooa
, "da20_dx01_ooa");
380 LLVMValueRef dady
= LLVMBuildFSub(b
, da20_dx01_ooa
, da01_dx20_ooa
, "dady");
382 /* Calculate a0 - the attribute value at the origin
384 LLVMValueRef dadx_x0
= LLVMBuildFMul(b
, dadx
, x0_center
, "dadx_x0");
385 LLVMValueRef dady_y0
= LLVMBuildFMul(b
, dady
, y0_center
, "dady_y0");
386 LLVMValueRef attr_v0
= LLVMBuildFAdd(b
, dadx_x0
, dady_y0
, "attr_v0");
387 LLVMValueRef attr_0
= LLVMBuildFSub(b
, a0
, attr_v0
, "attr_0");
389 store_coef(gallivm
, args
, slot
, attr_0
, dadx
, dady
);
394 emit_linear_coef( struct gallivm_state
*gallivm
,
395 struct lp_setup_args
*args
,
397 LLVMValueRef attribv
[3])
399 /* nothing to do anymore */
409 * Compute a0, dadx and dady for a perspective-corrected interpolant,
411 * We basically multiply the vertex value by 1/w before computing
412 * the plane coefficients (a0, dadx, dady).
413 * Later, when we compute the value at a particular fragment position we'll
414 * divide the interpolated value by the interpolated W at that fragment.
417 apply_perspective_corr( struct gallivm_state
*gallivm
,
418 struct lp_setup_args
*args
,
420 LLVMValueRef attribv
[3])
422 LLVMBuilderRef b
= gallivm
->builder
;
424 /* premultiply by 1/w (v[0][3] is always 1/w):
426 LLVMValueRef v0_oow
= vec4f_from_scalar(gallivm
, vert_attrib(gallivm
, args
->v0
, 0, 3, ""), "v0_oow");
427 LLVMValueRef v1_oow
= vec4f_from_scalar(gallivm
, vert_attrib(gallivm
, args
->v1
, 0, 3, ""), "v1_oow");
428 LLVMValueRef v2_oow
= vec4f_from_scalar(gallivm
, vert_attrib(gallivm
, args
->v2
, 0, 3, ""), "v2_oow");
430 attribv
[0] = LLVMBuildFMul(b
, attribv
[0], v0_oow
, "v0_oow_v0a");
431 attribv
[1] = LLVMBuildFMul(b
, attribv
[1], v1_oow
, "v1_oow_v1a");
432 attribv
[2] = LLVMBuildFMul(b
, attribv
[2], v2_oow
, "v2_oow_v2a");
437 emit_position_coef( struct gallivm_state
*gallivm
,
438 struct lp_setup_args
*args
,
440 LLVMValueRef attribv
[3])
442 emit_linear_coef(gallivm
, args
, slot
, attribv
);
447 * Applys cylindrical wrapping to vertex attributes if enabled.
448 * Input coordinates must be in [0, 1] range, otherwise results are undefined.
450 * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags
453 emit_apply_cyl_wrap(struct gallivm_state
*gallivm
,
454 struct lp_setup_args
*args
,
456 LLVMValueRef attribv
[3])
459 LLVMBuilderRef builder
= gallivm
->builder
;
460 struct lp_type type
= lp_float32_vec4_type();
461 LLVMTypeRef float_vec_type
= lp_build_vec_type(gallivm
, type
);
462 LLVMValueRef pos_half
;
463 LLVMValueRef neg_half
;
464 LLVMValueRef cyl_mask
;
473 pos_half
= lp_build_const_vec(gallivm
, type
, +0.5f
);
474 neg_half
= lp_build_const_vec(gallivm
, type
, -0.5f
);
475 cyl_mask
= lp_build_const_mask_aos(gallivm
, type
, cyl_wrap
, 4);
477 one
= lp_build_const_vec(gallivm
, type
, 1.0f
);
478 one
= LLVMBuildBitCast(builder
, one
, lp_build_int_vec_type(gallivm
, type
), "");
479 one
= LLVMBuildAnd(builder
, one
, cyl_mask
, "");
482 delta
= LLVMBuildFSub(builder
, attribv
[1], attribv
[0], "");
484 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_GREATER
, delta
, pos_half
);
485 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
486 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
487 attribv
[0] = LLVMBuildFAdd(builder
, attribv
[0], offset
, "");
489 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_LESS
, delta
, neg_half
);
490 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
491 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
492 attribv
[1] = LLVMBuildFAdd(builder
, attribv
[1], offset
, "");
495 delta
= LLVMBuildFSub(builder
, attribv
[2], attribv
[1], "");
497 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_GREATER
, delta
, pos_half
);
498 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
499 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
500 attribv
[1] = LLVMBuildFAdd(builder
, attribv
[1], offset
, "");
502 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_LESS
, delta
, neg_half
);
503 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
504 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
505 attribv
[2] = LLVMBuildFAdd(builder
, attribv
[2], offset
, "");
508 delta
= LLVMBuildFSub(builder
, attribv
[0], attribv
[2], "");
510 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_GREATER
, delta
, pos_half
);
511 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
512 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
513 attribv
[2] = LLVMBuildFAdd(builder
, attribv
[2], offset
, "");
515 offset
= lp_build_compare(gallivm
, type
, PIPE_FUNC_LESS
, delta
, neg_half
);
516 offset
= LLVMBuildAnd(builder
, offset
, one
, "");
517 offset
= LLVMBuildBitCast(builder
, offset
, float_vec_type
, "");
518 attribv
[0] = LLVMBuildFAdd(builder
, attribv
[0], offset
, "");
523 * Compute the inputs-> dadx, dady, a0 values.
526 emit_tri_coef( struct gallivm_state
*gallivm
,
527 const struct lp_setup_variant_key
*key
,
528 struct lp_setup_args
*args
)
532 LLVMValueRef attribs
[3];
534 /* setup interpolation for all the remaining attributes:
536 for (slot
= 0; slot
< key
->num_inputs
; slot
++) {
537 switch (key
->inputs
[slot
].interp
) {
538 case LP_INTERP_CONSTANT
:
539 load_attribute(gallivm
, args
, key
, key
->inputs
[slot
].src_index
, attribs
);
540 if (key
->flatshade_first
) {
541 emit_constant_coef4(gallivm
, args
, slot
+1, attribs
[0]);
544 emit_constant_coef4(gallivm
, args
, slot
+1, attribs
[2]);
548 case LP_INTERP_LINEAR
:
549 load_attribute(gallivm
, args
, key
, key
->inputs
[slot
].src_index
, attribs
);
550 emit_apply_cyl_wrap(gallivm
, args
, key
->inputs
[slot
].cyl_wrap
, attribs
);
551 emit_linear_coef(gallivm
, args
, slot
+1, attribs
);
554 case LP_INTERP_PERSPECTIVE
:
555 load_attribute(gallivm
, args
, key
, key
->inputs
[slot
].src_index
, attribs
);
556 emit_apply_cyl_wrap(gallivm
, args
, key
->inputs
[slot
].cyl_wrap
, attribs
);
557 apply_perspective_corr(gallivm
, args
, slot
+1, attribs
);
558 emit_linear_coef(gallivm
, args
, slot
+1, attribs
);
561 case LP_INTERP_POSITION
:
563 * The generated pixel interpolators will pick up the coeffs from
568 case LP_INTERP_FACING
:
569 emit_facing_coef(gallivm
, args
, slot
+1);
579 /* XXX: generic code:
582 set_noalias(LLVMBuilderRef builder
,
583 LLVMValueRef function
,
584 const LLVMTypeRef
*arg_types
,
588 for(i
= 0; i
< nr_args
; ++i
)
589 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
590 LLVMAddAttribute(LLVMGetParam(function
, i
),
591 LLVMNoAliasAttribute
);
595 init_args(struct gallivm_state
*gallivm
,
596 const struct lp_setup_variant_key
*key
,
597 struct lp_setup_args
*args
)
599 LLVMBuilderRef b
= gallivm
->builder
;
600 LLVMTypeRef shuf_type
= LLVMInt32TypeInContext(gallivm
->context
);
601 LLVMValueRef onef
= lp_build_const_float(gallivm
, 1.0);
602 LLVMValueRef onei
= lp_build_const_int32(gallivm
, 1);
603 LLVMValueRef zeroi
= lp_build_const_int32(gallivm
, 0);
604 LLVMValueRef pixel_center
, xy0_center
, dxy01
, dxy20
, dyx20
;
605 LLVMValueRef e
, f
, ef
, ooa
;
606 LLVMValueRef shuffles
[4];
607 LLVMValueRef attr_pos
[3];
608 struct lp_type typef4
= lp_type_float_vec(32, 128);
610 /* The internal position input is in slot zero:
612 load_attribute(gallivm
, args
, key
, 0, attr_pos
);
614 pixel_center
= lp_build_const_vec(gallivm
, typef4
,
615 key
->pixel_center_half
? 0.5 : 0.0);
618 * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
619 * also offset_tri uses actually xyz in them
621 xy0_center
= LLVMBuildFSub(b
, attr_pos
[0], pixel_center
, "xy0_center" );
623 dxy01
= LLVMBuildFSub(b
, attr_pos
[0], attr_pos
[1], "dxy01");
624 dxy20
= LLVMBuildFSub(b
, attr_pos
[2], attr_pos
[0], "dxy20");
628 shuffles
[2] = LLVMGetUndef(shuf_type
);
629 shuffles
[3] = LLVMGetUndef(shuf_type
);
631 dyx20
= LLVMBuildShuffleVector(b
, dxy20
, dxy20
, LLVMConstVector(shuffles
, 4), "");
633 ef
= LLVMBuildFMul(b
, dxy01
, dyx20
, "ef");
634 e
= LLVMBuildExtractElement(b
, ef
, zeroi
, "");
635 f
= LLVMBuildExtractElement(b
, ef
, onei
, "");
637 ooa
= LLVMBuildFDiv(b
, onef
, LLVMBuildFSub(b
, e
, f
, ""), "ooa");
639 ooa
= vec4f_from_scalar(gallivm
, ooa
, "");
641 /* tri offset calc shares a lot of arithmetic, do it here */
642 if (key
->scale
!= 0.0f
|| key
->units
!= 0.0f
) {
643 lp_do_offset_tri(gallivm
, args
, key
, ooa
, dxy01
, dxy20
, attr_pos
);
646 dxy20
= LLVMBuildFMul(b
, dxy20
, ooa
, "");
647 dxy01
= LLVMBuildFMul(b
, dxy01
, ooa
, "");
649 args
->dy20_ooa
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, dxy20
, onei
);
650 args
->dy01_ooa
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, dxy01
, onei
);
652 args
->dx20_ooa
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, dxy20
, zeroi
);
653 args
->dx01_ooa
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, dxy01
, zeroi
);
655 args
->x0_center
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, xy0_center
, zeroi
);
656 args
->y0_center
= lp_build_extract_broadcast(gallivm
, typef4
, typef4
, xy0_center
, onei
);
658 /* might want to merge that with other coef emit in the future */
659 emit_position_coef(gallivm
, args
, 0, attr_pos
);
663 * Generate the runtime callable function for the coefficient calculation.
666 static struct lp_setup_variant
*
667 generate_setup_variant(struct lp_setup_variant_key
*key
,
668 struct llvmpipe_context
*lp
)
670 struct lp_setup_variant
*variant
= NULL
;
671 struct gallivm_state
*gallivm
;
672 struct lp_setup_args args
;
674 LLVMTypeRef vec4f_type
;
675 LLVMTypeRef func_type
;
676 LLVMTypeRef arg_types
[7];
677 LLVMBasicBlockRef block
;
678 LLVMBuilderRef builder
;
684 variant
= CALLOC_STRUCT(lp_setup_variant
);
688 variant
->gallivm
= gallivm
= gallivm_create();
689 if (!variant
->gallivm
) {
693 builder
= gallivm
->builder
;
695 if (LP_DEBUG
& DEBUG_COUNTERS
) {
699 memcpy(&variant
->key
, key
, key
->size
);
700 variant
->list_item_global
.base
= variant
;
702 util_snprintf(func_name
, sizeof(func_name
), "fs%u_setup%u",
706 /* Currently always deal with full 4-wide vertex attributes from
710 vec4f_type
= LLVMVectorType(LLVMFloatTypeInContext(gallivm
->context
), 4);
712 arg_types
[0] = LLVMPointerType(vec4f_type
, 0); /* v0 */
713 arg_types
[1] = LLVMPointerType(vec4f_type
, 0); /* v1 */
714 arg_types
[2] = LLVMPointerType(vec4f_type
, 0); /* v2 */
715 arg_types
[3] = LLVMInt32TypeInContext(gallivm
->context
); /* facing */
716 arg_types
[4] = LLVMPointerType(vec4f_type
, 0); /* a0, aligned */
717 arg_types
[5] = LLVMPointerType(vec4f_type
, 0); /* dadx, aligned */
718 arg_types
[6] = LLVMPointerType(vec4f_type
, 0); /* dady, aligned */
720 func_type
= LLVMFunctionType(LLVMVoidTypeInContext(gallivm
->context
),
721 arg_types
, Elements(arg_types
), 0);
723 variant
->function
= LLVMAddFunction(gallivm
->module
, func_name
, func_type
);
724 if (!variant
->function
)
727 LLVMSetFunctionCallConv(variant
->function
, LLVMCCallConv
);
729 args
.v0
= LLVMGetParam(variant
->function
, 0);
730 args
.v1
= LLVMGetParam(variant
->function
, 1);
731 args
.v2
= LLVMGetParam(variant
->function
, 2);
732 args
.facing
= LLVMGetParam(variant
->function
, 3);
733 args
.a0
= LLVMGetParam(variant
->function
, 4);
734 args
.dadx
= LLVMGetParam(variant
->function
, 5);
735 args
.dady
= LLVMGetParam(variant
->function
, 6);
737 lp_build_name(args
.v0
, "in_v0");
738 lp_build_name(args
.v1
, "in_v1");
739 lp_build_name(args
.v2
, "in_v2");
740 lp_build_name(args
.facing
, "in_facing");
741 lp_build_name(args
.a0
, "out_a0");
742 lp_build_name(args
.dadx
, "out_dadx");
743 lp_build_name(args
.dady
, "out_dady");
748 block
= LLVMAppendBasicBlockInContext(gallivm
->context
,
749 variant
->function
, "entry");
750 LLVMPositionBuilderAtEnd(builder
, block
);
752 set_noalias(builder
, variant
->function
, arg_types
, Elements(arg_types
));
753 init_args(gallivm
, &variant
->key
, &args
);
754 emit_tri_coef(gallivm
, &variant
->key
, &args
);
756 LLVMBuildRetVoid(builder
);
758 gallivm_verify_function(gallivm
, variant
->function
);
760 gallivm_compile_module(gallivm
);
762 variant
->jit_function
= (lp_jit_setup_triangle
)
763 gallivm_jit_function(gallivm
, variant
->function
);
764 if (!variant
->jit_function
)
768 * Update timing information:
770 if (LP_DEBUG
& DEBUG_COUNTERS
) {
772 LP_COUNT_ADD(llvm_compile_time
, t1
- t0
);
773 LP_COUNT_ADD(nr_llvm_compiles
, 1);
780 if (variant
->function
) {
781 gallivm_free_function(gallivm
,
783 variant
->jit_function
);
785 if (variant
->gallivm
) {
786 gallivm_destroy(variant
->gallivm
);
797 lp_make_setup_variant_key(struct llvmpipe_context
*lp
,
798 struct lp_setup_variant_key
*key
)
800 struct lp_fragment_shader
*fs
= lp
->fs
;
803 assert(sizeof key
->inputs
[0] == sizeof(uint
));
805 key
->num_inputs
= fs
->info
.base
.num_inputs
;
806 key
->flatshade_first
= lp
->rasterizer
->flatshade_first
;
807 key
->pixel_center_half
= lp
->rasterizer
->gl_rasterization_rules
;
808 key
->twoside
= lp
->rasterizer
->light_twoside
;
809 key
->size
= Offset(struct lp_setup_variant_key
,
810 inputs
[key
->num_inputs
]);
812 key
->color_slot
= lp
->color_slot
[0];
813 key
->bcolor_slot
= lp
->bcolor_slot
[0];
814 key
->spec_slot
= lp
->color_slot
[1];
815 key
->bspec_slot
= lp
->bcolor_slot
[1];
816 assert(key
->color_slot
== lp
->color_slot
[0]);
817 assert(key
->bcolor_slot
== lp
->bcolor_slot
[0]);
818 assert(key
->spec_slot
== lp
->color_slot
[1]);
819 assert(key
->bspec_slot
== lp
->bcolor_slot
[1]);
821 key
->units
= (float) (lp
->rasterizer
->offset_units
* lp
->mrd
);
822 key
->scale
= lp
->rasterizer
->offset_scale
;
824 memcpy(key
->inputs
, fs
->inputs
, key
->num_inputs
* sizeof key
->inputs
[0]);
825 for (i
= 0; i
< key
->num_inputs
; i
++) {
826 if (key
->inputs
[i
].interp
== LP_INTERP_COLOR
) {
827 if (lp
->rasterizer
->flatshade
)
828 key
->inputs
[i
].interp
= LP_INTERP_CONSTANT
;
830 key
->inputs
[i
].interp
= LP_INTERP_PERSPECTIVE
;
838 remove_setup_variant(struct llvmpipe_context
*lp
,
839 struct lp_setup_variant
*variant
)
841 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
842 debug_printf("llvmpipe: del setup_variant #%u total %u\n",
843 variant
->no
, lp
->nr_setup_variants
);
846 if (variant
->function
) {
847 gallivm_free_function(variant
->gallivm
,
849 variant
->jit_function
);
852 if (variant
->gallivm
) {
853 gallivm_destroy(variant
->gallivm
);
856 remove_from_list(&variant
->list_item_global
);
857 lp
->nr_setup_variants
--;
863 /* When the number of setup variants exceeds a threshold, cull a
864 * fraction (currently a quarter) of them.
867 cull_setup_variants(struct llvmpipe_context
*lp
)
869 struct pipe_context
*pipe
= &lp
->pipe
;
873 * XXX: we need to flush the context until we have some sort of reference
874 * counting in fragment shaders as they may still be binned
875 * Flushing alone might not be sufficient we need to wait on it too.
877 llvmpipe_finish(pipe
, __FUNCTION__
);
879 for (i
= 0; i
< LP_MAX_SETUP_VARIANTS
/ 4; i
++) {
880 struct lp_setup_variant_list_item
*item
;
881 if (is_empty_list(&lp
->setup_variants_list
)) {
884 item
= last_elem(&lp
->setup_variants_list
);
887 remove_setup_variant(lp
, item
->base
);
893 * Update fragment/vertex shader linkage state. This is called just
894 * prior to drawing something when some fragment-related state has
898 llvmpipe_update_setup(struct llvmpipe_context
*lp
)
900 struct lp_setup_variant_key
*key
= &lp
->setup_variant
.key
;
901 struct lp_setup_variant
*variant
= NULL
;
902 struct lp_setup_variant_list_item
*li
;
904 lp_make_setup_variant_key(lp
, key
);
906 foreach(li
, &lp
->setup_variants_list
) {
907 if(li
->base
->key
.size
== key
->size
&&
908 memcmp(&li
->base
->key
, key
, key
->size
) == 0) {
915 move_to_head(&lp
->setup_variants_list
, &variant
->list_item_global
);
918 if (lp
->nr_setup_variants
>= LP_MAX_SETUP_VARIANTS
) {
919 cull_setup_variants(lp
);
922 variant
= generate_setup_variant(key
, lp
);
924 insert_at_head(&lp
->setup_variants_list
, &variant
->list_item_global
);
925 lp
->nr_setup_variants
++;
926 llvmpipe_variant_count
++;
930 lp_setup_set_setup_variant(lp
->setup
,
935 lp_delete_setup_variants(struct llvmpipe_context
*lp
)
937 struct lp_setup_variant_list_item
*li
;
938 li
= first_elem(&lp
->setup_variants_list
);
939 while(!at_end(&lp
->setup_variants_list
, li
)) {
940 struct lp_setup_variant_list_item
*next
= next_elem(li
);
941 remove_setup_variant(lp
, li
->base
);
947 lp_dump_setup_coef( const struct lp_setup_variant_key
*key
,
948 const float (*sa0
)[4],
949 const float (*sdadx
)[4],
950 const float (*sdady
)[4])
954 for (i
= 0; i
< TGSI_NUM_CHANNELS
; i
++) {
955 float a0
= sa0
[0][i
];
956 float dadx
= sdadx
[0][i
];
957 float dady
= sdady
[0][i
];
959 debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
964 for (slot
= 0; slot
< key
->num_inputs
; slot
++) {
965 unsigned usage_mask
= key
->inputs
[slot
].usage_mask
;
966 for (i
= 0; i
< TGSI_NUM_CHANNELS
; i
++) {
967 if (usage_mask
& (1 << i
)) {
968 float a0
= sa0
[1 + slot
][i
];
969 float dadx
= sdadx
[1 + slot
][i
];
970 float dady
= sdady
[1 + slot
][i
];
972 debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",