llvmpipe: remove misleading debug string
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_setup.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/u_simple_list.h"
32 #include "os/os_time.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_debug.h"
35 #include "gallivm/lp_bld_init.h"
36 #include "gallivm/lp_bld_intr.h"
37 #include "gallivm/lp_bld_flow.h"
38 #include "gallivm/lp_bld_type.h"
39 #include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */
40
41 #include "lp_perf.h"
42 #include "lp_debug.h"
43 #include "lp_flush.h"
44 #include "lp_screen.h"
45 #include "lp_context.h"
46 #include "lp_state.h"
47 #include "lp_state_fs.h"
48 #include "lp_state_setup.h"
49
50
51
52 /* currently organized to interpolate full float[4] attributes even
53 * when some elements are unused. Later, can pack vertex data more
54 * closely.
55 */
56
57
58 struct lp_setup_args
59 {
60 /* Function arguments:
61 */
62 LLVMValueRef v0;
63 LLVMValueRef v1;
64 LLVMValueRef v2;
65 LLVMValueRef facing; /* boolean */
66 LLVMValueRef a0;
67 LLVMValueRef dadx;
68 LLVMValueRef dady;
69
70 /* Derived:
71 */
72 LLVMValueRef x0_center;
73 LLVMValueRef y0_center;
74 LLVMValueRef dy20_ooa;
75 LLVMValueRef dy01_ooa;
76 LLVMValueRef dx20_ooa;
77 LLVMValueRef dx01_ooa;
78
79 /* Temporary, per-attribute:
80 */
81 LLVMValueRef v0a;
82 LLVMValueRef v1a;
83 LLVMValueRef v2a;
84 };
85
86 static LLVMTypeRef type4f(void)
87 {
88 return LLVMVectorType(LLVMFloatType(), 4);
89 }
90
91
92 /* Equivalent of _mm_setr_ps(a,b,c,d)
93 */
94 static LLVMValueRef vec4f(LLVMBuilderRef bld,
95 LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d,
96 const char *name)
97 {
98 LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
99 LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
100 LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
101 LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
102
103 LLVMValueRef res = LLVMGetUndef(type4f());
104
105 res = LLVMBuildInsertElement(bld, res, a, i0, "");
106 res = LLVMBuildInsertElement(bld, res, b, i1, "");
107 res = LLVMBuildInsertElement(bld, res, c, i2, "");
108 res = LLVMBuildInsertElement(bld, res, d, i3, name);
109
110 return res;
111 }
112
113 /* Equivalent of _mm_set1_ps(a)
114 */
115 static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
116 LLVMValueRef a,
117 const char *name)
118 {
119 LLVMValueRef res = LLVMGetUndef(type4f());
120 int i;
121
122 for(i = 0; i < 4; ++i) {
123 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
124 res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
125 }
126
127 return res;
128 }
129
130 static void
131 store_coef(LLVMBuilderRef builder,
132 struct lp_setup_args *args,
133 unsigned slot,
134 LLVMValueRef a0,
135 LLVMValueRef dadx,
136 LLVMValueRef dady)
137 {
138 LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0);
139
140 LLVMBuildStore(builder,
141 a0,
142 LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
143
144 LLVMBuildStore(builder,
145 dadx,
146 LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
147
148 LLVMBuildStore(builder,
149 dady,
150 LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
151 }
152
153
154
155 static void
156 emit_constant_coef4( LLVMBuilderRef builder,
157 struct lp_setup_args *args,
158 unsigned slot,
159 LLVMValueRef vert)
160 {
161 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
162 LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
163 store_coef(builder, args, slot, vert, zerovec, zerovec);
164 }
165
166
167
168 /**
169 * Setup the fragment input attribute with the front-facing value.
170 * \param frontface is the triangle front facing?
171 */
172 static void
173 emit_facing_coef( LLVMBuilderRef builder,
174 struct lp_setup_args *args,
175 unsigned slot )
176 {
177 LLVMValueRef a0_0 = args->facing;
178 LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), "");
179 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
180 LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing");
181 LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
182
183 store_coef(builder, args, slot, a0, zerovec, zerovec);
184 }
185
186
187 static LLVMValueRef
188 vert_attrib(LLVMBuilderRef b,
189 LLVMValueRef vert,
190 int attr,
191 int elem,
192 const char *name)
193 {
194 LLVMValueRef idx[2];
195 idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0);
196 idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0);
197 return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
198 }
199
200 static LLVMValueRef
201 vert_clamp(LLVMBuilderRef b,
202 LLVMValueRef x,
203 LLVMValueRef min,
204 LLVMValueRef max)
205 {
206 LLVMValueRef min_result = LLVMBuildFCmp(b, LLVMRealUGT, min, x, "");
207 LLVMValueRef max_result = LLVMBuildFCmp(b, LLVMRealUGT, x, max, "");
208 LLVMValueRef clamp_value;
209
210 clamp_value = LLVMBuildSelect(b, min_result, min, x, "");
211 clamp_value = LLVMBuildSelect(b, max_result, max, x, "");
212
213 return clamp_value;
214 }
215
216 static void
217 lp_twoside(LLVMBuilderRef b,
218 struct lp_setup_args *args,
219 const struct lp_setup_variant_key *key,
220 int bcolor_slot)
221 {
222 LLVMValueRef a0_back, a1_back, a2_back;
223 LLVMValueRef idx2 = LLVMConstInt(LLVMInt32Type(), bcolor_slot, 0);
224
225 LLVMValueRef facing = args->facing;
226 LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, LLVMConstInt(LLVMInt32Type(), 0, 0), ""); /** need i1 for if condition */
227
228 a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back");
229 a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back");
230 a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back");
231
232 /* Possibly swap the front and back attrib values,
233 *
234 * Prefer select to if so we don't have to worry about phis or
235 * allocas.
236 */
237 args->v0a = LLVMBuildSelect(b, front_facing, a0_back, args->v0a, "");
238 args->v1a = LLVMBuildSelect(b, front_facing, a1_back, args->v1a, "");
239 args->v2a = LLVMBuildSelect(b, front_facing, a2_back, args->v2a, "");
240
241 }
242
243 static void
244 lp_do_offset_tri(LLVMBuilderRef b,
245 struct lp_setup_args *args,
246 const struct lp_setup_variant_key *key)
247 {
248 struct lp_build_context bld;
249 LLVMValueRef zoffset, mult;
250 LLVMValueRef z0_new, z1_new, z2_new;
251 LLVMValueRef dzdx0, dzdx, dzdy0, dzdy;
252 LLVMValueRef max, max_value;
253
254 LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0);
255 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
256
257 /* edge vectors: e = v0 - v2, f = v1 - v2 */
258 LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x");
259 LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x");
260 LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x");
261 LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y");
262 LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y");
263 LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y");
264 LLVMValueRef v0_z = vert_attrib(b, args->v0, 0, 2, "v0_z");
265 LLVMValueRef v1_z = vert_attrib(b, args->v1, 0, 2, "v1_z");
266 LLVMValueRef v2_z = vert_attrib(b, args->v2, 0, 2, "v2_z");
267
268 /* edge vectors: e = v0 - v2, f = v1 - v2 */
269 LLVMValueRef dx02 = LLVMBuildFSub(b, v0_x, v2_x, "dx02");
270 LLVMValueRef dy02 = LLVMBuildFSub(b, v0_y, v2_y, "dy02");
271 LLVMValueRef dz02 = LLVMBuildFSub(b, v0_z, v2_z, "dz02");
272 LLVMValueRef dx12 = LLVMBuildFSub(b, v1_x, v2_x, "dx12");
273 LLVMValueRef dy12 = LLVMBuildFSub(b, v1_y, v2_y, "dy12");
274 LLVMValueRef dz12 = LLVMBuildFSub(b, v1_z, v2_z, "dz12");
275
276 /* det = cross(e,f).z */
277 LLVMValueRef dx02_dy12 = LLVMBuildFMul(b, dx02, dy12, "dx02_dy12");
278 LLVMValueRef dy02_dx12 = LLVMBuildFMul(b, dy02, dx12, "dy02_dx12");
279 LLVMValueRef det = LLVMBuildFSub(b, dx02_dy12, dy02_dx12, "det");
280 LLVMValueRef inv_det = LLVMBuildFDiv(b, one, det, "inv_det");
281
282 /* (res1,res2) = cross(e,f).xy */
283 LLVMValueRef dy02_dz12 = LLVMBuildFMul(b, dy02, dz12, "dy02_dz12");
284 LLVMValueRef dz02_dy12 = LLVMBuildFMul(b, dz02, dy12, "dz02_dy12");
285 LLVMValueRef dz02_dx12 = LLVMBuildFMul(b, dz02, dx12, "dz02_dx12");
286 LLVMValueRef dx02_dz12 = LLVMBuildFMul(b, dx02, dz12, "dx02_dz12");
287 LLVMValueRef res1 = LLVMBuildFSub(b, dy02_dz12, dz02_dy12, "res1");
288 LLVMValueRef res2 = LLVMBuildFSub(b, dz02_dx12, dx02_dz12, "res2");
289
290 /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
291 lp_build_context_init(&bld, b, lp_type_float(32));
292 dzdx0 = LLVMBuildFMul(b, res1, inv_det, "dzdx");
293 dzdx = lp_build_abs(&bld, dzdx0);
294 dzdy0 = LLVMBuildFMul(b, res2, inv_det, "dzdy");
295 dzdy = lp_build_abs(&bld, dzdy0);
296
297 /* zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale */
298 max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
299 max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max");
300
301 mult = LLVMBuildFMul(b, max_value, LLVMConstReal(LLVMFloatType(), key->scale), "");
302 zoffset = LLVMBuildFAdd(b, LLVMConstReal(LLVMFloatType(), key->units), mult, "zoffset");
303
304 /* clamp and do offset */
305 z0_new = vert_clamp(b, LLVMBuildFAdd(b, v0_z, zoffset, ""), zero, one);
306 z1_new = vert_clamp(b, LLVMBuildFAdd(b, v1_z, zoffset, ""), zero, one);
307 z2_new = vert_clamp(b, LLVMBuildFAdd(b, v2_z, zoffset, ""), zero, one);
308
309 /* insert into args->a0.z, a1.z, a2.z:
310 */
311 args->v0a = LLVMBuildInsertElement(b, args->v0a, z0_new, LLVMConstInt(LLVMInt32Type(), 2, 0), "");
312 args->v1a = LLVMBuildInsertElement(b, args->v1a, z1_new, LLVMConstInt(LLVMInt32Type(), 2, 0), "");
313 args->v2a = LLVMBuildInsertElement(b, args->v2a, z2_new, LLVMConstInt(LLVMInt32Type(), 2, 0), "");
314 }
315
316 static void
317 load_attribute(LLVMBuilderRef b,
318 struct lp_setup_args *args,
319 const struct lp_setup_variant_key *key,
320 unsigned vert_attr)
321 {
322 LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
323
324 /* Load the vertex data
325 */
326 args->v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
327 args->v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
328 args->v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
329
330
331 /* Potentially modify it according to twoside, offset, etc:
332 */
333 if (vert_attr == 0 && (key->scale != 0.0f || key->units != 0.0f)) {
334 lp_do_offset_tri(b, args, key);
335 }
336
337 if (key->twoside) {
338 if (vert_attr == key->color_slot && key->bcolor_slot != ~0)
339 lp_twoside(b, args, key, key->bcolor_slot);
340 else if (vert_attr == key->spec_slot && key->bspec_slot != ~0)
341 lp_twoside(b, args, key, key->bspec_slot);
342 }
343 }
344
345 static void
346 emit_coef4( LLVMBuilderRef b,
347 struct lp_setup_args *args,
348 unsigned slot,
349 LLVMValueRef a0,
350 LLVMValueRef a1,
351 LLVMValueRef a2)
352 {
353 LLVMValueRef dy20_ooa = args->dy20_ooa;
354 LLVMValueRef dy01_ooa = args->dy01_ooa;
355 LLVMValueRef dx20_ooa = args->dx20_ooa;
356 LLVMValueRef dx01_ooa = args->dx01_ooa;
357 LLVMValueRef x0_center = args->x0_center;
358 LLVMValueRef y0_center = args->y0_center;
359
360 /* XXX: using fsub, fmul on vector types -- does this work??
361 */
362 LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
363 LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
364
365 /* Calculate dadx (vec4f)
366 */
367 LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
368 LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
369 LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
370
371 /* Calculate dady (vec4f)
372 */
373 LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
374 LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
375 LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
376
377 /* Calculate a0 - the attribute value at the origin
378 */
379 LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
380 LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
381 LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
382 LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
383
384 store_coef(b, args, slot, attr_0, dadx, dady);
385 }
386
387
388 static void
389 emit_linear_coef( LLVMBuilderRef b,
390 struct lp_setup_args *args,
391 unsigned slot)
392 {
393 /* nothing to do anymore */
394 emit_coef4(b,
395 args, slot,
396 args->v0a,
397 args->v1a,
398 args->v2a);
399 }
400
401
402 /**
403 * Compute a0, dadx and dady for a perspective-corrected interpolant,
404 * for a triangle.
405 * We basically multiply the vertex value by 1/w before computing
406 * the plane coefficients (a0, dadx, dady).
407 * Later, when we compute the value at a particular fragment position we'll
408 * divide the interpolated value by the interpolated W at that fragment.
409 */
410 static void
411 emit_perspective_coef( LLVMBuilderRef b,
412 struct lp_setup_args *args,
413 unsigned slot)
414 {
415 /* premultiply by 1/w (v[0][3] is always 1/w):
416 */
417 LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow");
418 LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow");
419 LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow");
420
421 LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, args->v0a, v0_oow, "v0_oow_v0a");
422 LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, args->v1a, v1_oow, "v1_oow_v1a");
423 LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, args->v2a, v2_oow, "v2_oow_v2a");
424
425 emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a);
426 }
427
428
429 static void
430 emit_position_coef( LLVMBuilderRef builder,
431 struct lp_setup_args *args,
432 int slot )
433 {
434 emit_linear_coef(builder, args, slot);
435 }
436
437
438
439
440 /**
441 * Compute the inputs-> dadx, dady, a0 values.
442 */
443 static void
444 emit_tri_coef( LLVMBuilderRef builder,
445 const struct lp_setup_variant_key *key,
446 struct lp_setup_args *args )
447 {
448 unsigned slot;
449
450 /* The internal position input is in slot zero:
451 */
452 load_attribute(builder, args, key, 0);
453 emit_position_coef(builder, args, 0);
454
455 /* setup interpolation for all the remaining attributes:
456 */
457 for (slot = 0; slot < key->num_inputs; slot++) {
458
459 if (key->inputs[slot].interp == LP_INTERP_CONSTANT ||
460 key->inputs[slot].interp == LP_INTERP_LINEAR ||
461 key->inputs[slot].interp == LP_INTERP_PERSPECTIVE)
462 load_attribute(builder, args, key, key->inputs[slot].src_index);
463
464 switch (key->inputs[slot].interp) {
465 case LP_INTERP_CONSTANT:
466 if (key->flatshade_first) {
467 emit_constant_coef4(builder, args, slot+1, args->v0a);
468 }
469 else {
470 emit_constant_coef4(builder, args, slot+1, args->v2a);
471 }
472 break;
473
474 case LP_INTERP_LINEAR:
475 emit_linear_coef(builder, args, slot+1);
476 break;
477
478 case LP_INTERP_PERSPECTIVE:
479 emit_perspective_coef(builder, args, slot+1);
480 break;
481
482 case LP_INTERP_POSITION:
483 /*
484 * The generated pixel interpolators will pick up the coeffs from
485 * slot 0.
486 */
487 break;
488
489 case LP_INTERP_FACING:
490 emit_facing_coef(builder, args, slot+1);
491 break;
492
493 default:
494 assert(0);
495 }
496 }
497 }
498
499
500 /* XXX: This is generic code, share with fs/vs codegen:
501 */
502 static lp_jit_setup_triangle
503 finalize_function(struct llvmpipe_screen *screen,
504 LLVMBuilderRef builder,
505 LLVMValueRef function)
506 {
507 void *f;
508
509 /* Verify the LLVM IR. If invalid, dump and abort */
510 #ifdef DEBUG
511 if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
512 if (1)
513 lp_debug_dump_value(function);
514 abort();
515 }
516 #endif
517
518 /* Apply optimizations to LLVM IR */
519 LLVMRunFunctionPassManager(screen->pass, function);
520
521 if (gallivm_debug & GALLIVM_DEBUG_IR)
522 {
523 /* Print the LLVM IR to stderr */
524 lp_debug_dump_value(function);
525 debug_printf("\n");
526 }
527
528 /*
529 * Translate the LLVM IR into machine code.
530 */
531 f = LLVMGetPointerToGlobal(screen->engine, function);
532
533 if (gallivm_debug & GALLIVM_DEBUG_ASM)
534 {
535 lp_disassemble(f);
536 }
537
538 lp_func_delete_body(function);
539
540 return f;
541 }
542
543 /* XXX: Generic code:
544 */
545 static void
546 lp_emit_emms(LLVMBuilderRef builder)
547 {
548 #ifdef PIPE_ARCH_X86
549 /* Avoid corrupting the FPU stack on 32bit OSes. */
550 lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
551 #endif
552 }
553
554
555 /* XXX: generic code:
556 */
557 static void
558 set_noalias(LLVMBuilderRef builder,
559 LLVMValueRef function,
560 const LLVMTypeRef *arg_types,
561 int nr_args)
562 {
563 int i;
564 for(i = 0; i < Elements(arg_types); ++i)
565 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
566 LLVMAddAttribute(LLVMGetParam(function, i),
567 LLVMNoAliasAttribute);
568 }
569
570 static void
571 init_args(LLVMBuilderRef b,
572 struct lp_setup_args *args,
573 const struct lp_setup_variant *variant)
574 {
575 LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x");
576 LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y");
577
578 LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x");
579 LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y");
580
581 LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x");
582 LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y");
583
584 LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(),
585 variant->key.pixel_center_half ? 0.5 : 0);
586
587 LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" );
588 LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" );
589
590 LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01");
591 LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01");
592 LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20");
593 LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20");
594
595 LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0);
596 LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e");
597 LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f");
598 LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa");
599
600 LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa");
601 LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa");
602 LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa");
603 LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa");
604
605 args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f");
606 args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f");
607
608 args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f");
609 args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f");
610
611 args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f");
612 args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f");
613 }
614
615 /**
616 * Generate the runtime callable function for the coefficient calculation.
617 *
618 */
619 static struct lp_setup_variant *
620 generate_setup_variant(struct llvmpipe_screen *screen,
621 struct lp_setup_variant_key *key,
622 struct llvmpipe_context *lp)
623 {
624 struct lp_setup_variant *variant = NULL;
625 struct lp_setup_args args;
626 char func_name[256];
627 LLVMTypeRef vec4f_type;
628 LLVMTypeRef func_type;
629 LLVMTypeRef arg_types[7];
630 LLVMBasicBlockRef block;
631 LLVMBuilderRef builder;
632 int64_t t0, t1;
633
634 if (0)
635 goto fail;
636
637 variant = CALLOC_STRUCT(lp_setup_variant);
638 if (variant == NULL)
639 goto fail;
640
641 if (LP_DEBUG & DEBUG_COUNTERS) {
642 t0 = os_time_get();
643 }
644
645 memcpy(&variant->key, key, key->size);
646 variant->list_item_global.base = variant;
647
648 util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u",
649 0,
650 variant->no);
651
652 /* Currently always deal with full 4-wide vertex attributes from
653 * the vertices.
654 */
655
656 vec4f_type = LLVMVectorType(LLVMFloatType(), 4);
657
658 arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */
659 arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */
660 arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */
661 arg_types[3] = LLVMInt32Type(); /* facing */
662 arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */
663 arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */
664 arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
665
666 func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
667
668 variant->function = LLVMAddFunction(screen->module, func_name, func_type);
669 if (!variant->function)
670 goto fail;
671
672 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
673
674 args.v0 = LLVMGetParam(variant->function, 0);
675 args.v1 = LLVMGetParam(variant->function, 1);
676 args.v2 = LLVMGetParam(variant->function, 2);
677 args.facing = LLVMGetParam(variant->function, 3);
678 args.a0 = LLVMGetParam(variant->function, 4);
679 args.dadx = LLVMGetParam(variant->function, 5);
680 args.dady = LLVMGetParam(variant->function, 6);
681
682 lp_build_name(args.v0, "in_v0");
683 lp_build_name(args.v1, "in_v1");
684 lp_build_name(args.v2, "in_v2");
685 lp_build_name(args.facing, "in_facing");
686 lp_build_name(args.a0, "out_a0");
687 lp_build_name(args.dadx, "out_dadx");
688 lp_build_name(args.dady, "out_dady");
689
690 /*
691 * Function body
692 */
693 block = LLVMAppendBasicBlock(variant->function, "entry");
694 builder = LLVMCreateBuilder();
695 LLVMPositionBuilderAtEnd(builder, block);
696
697 set_noalias(builder, variant->function, arg_types, Elements(arg_types));
698 init_args(builder, &args, variant);
699 emit_tri_coef(builder, &variant->key, &args);
700
701 lp_emit_emms(builder);
702 LLVMBuildRetVoid(builder);
703 LLVMDisposeBuilder(builder);
704
705 variant->jit_function = finalize_function(screen, builder,
706 variant->function);
707 if (!variant->jit_function)
708 goto fail;
709
710 /*
711 * Update timing information:
712 */
713 if (LP_DEBUG & DEBUG_COUNTERS) {
714 t1 = os_time_get();
715 LP_COUNT_ADD(llvm_compile_time, t1 - t0);
716 LP_COUNT_ADD(nr_llvm_compiles, 1);
717 }
718
719 return variant;
720
721 fail:
722 if (variant) {
723 if (variant->function) {
724 if (variant->jit_function)
725 LLVMFreeMachineCodeForFunction(screen->engine,
726 variant->function);
727 LLVMDeleteFunction(variant->function);
728 }
729 FREE(variant);
730 }
731
732 return NULL;
733 }
734
735
736
737 static void
738 lp_make_setup_variant_key(struct llvmpipe_context *lp,
739 struct lp_setup_variant_key *key)
740 {
741 struct lp_fragment_shader *fs = lp->fs;
742 unsigned i;
743
744 assert(sizeof key->inputs[0] == sizeof(ushort));
745
746 key->num_inputs = fs->info.base.num_inputs;
747 key->flatshade_first = lp->rasterizer->flatshade_first;
748 key->pixel_center_half = lp->rasterizer->gl_rasterization_rules;
749 key->twoside = lp->rasterizer->light_twoside;
750 key->size = Offset(struct lp_setup_variant_key,
751 inputs[key->num_inputs]);
752 key->color_slot = lp->color_slot[0];
753 key->bcolor_slot = lp->bcolor_slot[0];
754 key->spec_slot = lp->color_slot[1];
755 key->bspec_slot = lp->bcolor_slot[1];
756 key->units = (float) (lp->rasterizer->offset_units * lp->mrd);
757 key->scale = lp->rasterizer->offset_scale;
758 key->pad = 0;
759 memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
760 for (i = 0; i < key->num_inputs; i++) {
761 if (key->inputs[i].interp == LP_INTERP_COLOR) {
762 if (lp->rasterizer->flatshade)
763 key->inputs[i].interp = LP_INTERP_CONSTANT;
764 else
765 key->inputs[i].interp = LP_INTERP_LINEAR;
766 }
767 }
768
769 }
770
771
772 static void
773 remove_setup_variant(struct llvmpipe_context *lp,
774 struct lp_setup_variant *variant)
775 {
776 struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
777
778 if (gallivm_debug & GALLIVM_DEBUG_IR) {
779 debug_printf("llvmpipe: del setup_variant #%u total %u\n",
780 variant->no, lp->nr_setup_variants);
781 }
782
783 if (variant->function) {
784 if (variant->jit_function)
785 LLVMFreeMachineCodeForFunction(screen->engine,
786 variant->function);
787 LLVMDeleteFunction(variant->function);
788 }
789
790 remove_from_list(&variant->list_item_global);
791 lp->nr_setup_variants--;
792 FREE(variant);
793 }
794
795
796
797 /* When the number of setup variants exceeds a threshold, cull a
798 * fraction (currently a quarter) of them.
799 */
800 static void
801 cull_setup_variants(struct llvmpipe_context *lp)
802 {
803 struct pipe_context *pipe = &lp->pipe;
804 int i;
805
806 /*
807 * XXX: we need to flush the context until we have some sort of reference
808 * counting in fragment shaders as they may still be binned
809 * Flushing alone might not be sufficient we need to wait on it too.
810 */
811 llvmpipe_finish(pipe, __FUNCTION__);
812
813 for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
814 struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list);
815 remove_setup_variant(lp, item->base);
816 }
817 }
818
819
820 /**
821 * Update fragment/vertex shader linkage state. This is called just
822 * prior to drawing something when some fragment-related state has
823 * changed.
824 */
825 void
826 llvmpipe_update_setup(struct llvmpipe_context *lp)
827 {
828 struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
829
830 struct lp_setup_variant_key *key = &lp->setup_variant.key;
831 struct lp_setup_variant *variant = NULL;
832 struct lp_setup_variant_list_item *li;
833
834 lp_make_setup_variant_key(lp, key);
835
836 foreach(li, &lp->setup_variants_list) {
837 if(li->base->key.size == key->size &&
838 memcmp(&li->base->key, key, key->size) == 0) {
839 variant = li->base;
840 break;
841 }
842 }
843
844 if (variant) {
845 move_to_head(&lp->setup_variants_list, &variant->list_item_global);
846 }
847 else {
848 if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
849 cull_setup_variants(lp);
850 }
851
852 variant = generate_setup_variant(screen, key, lp);
853 insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
854 lp->nr_setup_variants++;
855 }
856
857 lp_setup_set_setup_variant(lp->setup,
858 variant);
859 }
860
861 void
862 lp_delete_setup_variants(struct llvmpipe_context *lp)
863 {
864 struct lp_setup_variant_list_item *li;
865 li = first_elem(&lp->setup_variants_list);
866 while(!at_end(&lp->setup_variants_list, li)) {
867 struct lp_setup_variant_list_item *next = next_elem(li);
868 remove_setup_variant(lp, li->base);
869 li = next;
870 }
871 }
872
873 void
874 lp_dump_setup_coef( const struct lp_setup_variant_key *key,
875 const float (*sa0)[4],
876 const float (*sdadx)[4],
877 const float (*sdady)[4])
878 {
879 int i, slot;
880
881 for (i = 0; i < NUM_CHANNELS; i++) {
882 float a0 = sa0 [0][i];
883 float dadx = sdadx[0][i];
884 float dady = sdady[0][i];
885
886 debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
887 "xyzw"[i],
888 a0, dadx, dady);
889 }
890
891 for (slot = 0; slot < key->num_inputs; slot++) {
892 unsigned usage_mask = key->inputs[slot].usage_mask;
893 for (i = 0; i < NUM_CHANNELS; i++) {
894 if (usage_mask & (1 << i)) {
895 float a0 = sa0 [1 + slot][i];
896 float dadx = sdadx[1 + slot][i];
897 float dady = sdady[1 + slot][i];
898
899 debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
900 slot,
901 "xyzw"[i],
902 a0, dadx, dady);
903 }
904 }
905 }
906 }