03b411a33644547680ade66a31f7689092f0f686
[mesa.git] / src / gallium / drivers / llvmpipe / lp_state_setup.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/u_simple_list.h"
32 #include "os/os_time.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_debug.h"
35 #include "gallivm/lp_bld_init.h"
36 #include "gallivm/lp_bld_intr.h"
37 #include "gallivm/lp_bld_flow.h"
38 #include "gallivm/lp_bld_type.h"
39 #include <llvm-c/Analysis.h> /* for LLVMVerifyFunction */
40
41 #include "lp_perf.h"
42 #include "lp_debug.h"
43 #include "lp_flush.h"
44 #include "lp_screen.h"
45 #include "lp_context.h"
46 #include "lp_setup_context.h"
47 #include "lp_rast.h"
48 #include "lp_state.h"
49 #include "lp_state_fs.h"
50 #include "lp_state_setup.h"
51
52
53
54 /* currently organized to interpolate full float[4] attributes even
55 * when some elements are unused. Later, can pack vertex data more
56 * closely.
57 */
58
59
60 struct lp_setup_args
61 {
62 /* Function arguments:
63 */
64 LLVMValueRef v0;
65 LLVMValueRef v1;
66 LLVMValueRef v2;
67 LLVMValueRef facing; /* boolean */
68 LLVMValueRef a0;
69 LLVMValueRef dadx;
70 LLVMValueRef dady;
71
72 /* Derived:
73 */
74 LLVMValueRef x0_center;
75 LLVMValueRef y0_center;
76 LLVMValueRef dy20_ooa;
77 LLVMValueRef dy01_ooa;
78 LLVMValueRef dx20_ooa;
79 LLVMValueRef dx01_ooa;
80 };
81
82 static LLVMTypeRef type4f(void)
83 {
84 return LLVMVectorType(LLVMFloatType(), 4);
85 }
86
87
88 /* Equivalent of _mm_setr_ps(a,b,c,d)
89 */
90 static LLVMValueRef vec4f(LLVMBuilderRef bld,
91 LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d,
92 const char *name)
93 {
94 LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
95 LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
96 LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
97 LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
98
99 LLVMValueRef res = LLVMGetUndef(type4f());
100
101 res = LLVMBuildInsertElement(bld, res, a, i0, "");
102 res = LLVMBuildInsertElement(bld, res, b, i1, "");
103 res = LLVMBuildInsertElement(bld, res, c, i2, "");
104 res = LLVMBuildInsertElement(bld, res, d, i3, name);
105
106 return res;
107 }
108
109 /* Equivalent of _mm_set1_ps(a)
110 */
111 static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
112 LLVMValueRef a,
113 const char *name)
114 {
115 LLVMValueRef res = LLVMGetUndef(type4f());
116 int i;
117
118 for(i = 0; i < 4; ++i) {
119 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
120 res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
121 }
122
123 return res;
124 }
125
126 static void
127 store_coef(LLVMBuilderRef builder,
128 struct lp_setup_args *args,
129 unsigned slot,
130 LLVMValueRef a0,
131 LLVMValueRef dadx,
132 LLVMValueRef dady)
133 {
134 LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0);
135
136 LLVMBuildStore(builder,
137 a0,
138 LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
139
140 LLVMBuildStore(builder,
141 dadx,
142 LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
143
144 LLVMBuildStore(builder,
145 dady,
146 LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
147 }
148
149
150
151 static void
152 emit_constant_coef4( LLVMBuilderRef builder,
153 struct lp_setup_args *args,
154 unsigned slot,
155 LLVMValueRef vert,
156 unsigned attr)
157 {
158 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
159 LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
160 LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), attr, 0);
161 LLVMValueRef attr_ptr = LLVMBuildGEP(builder, vert, &idx, 1, "attr_ptr");
162 LLVMValueRef vert_attr = LLVMBuildLoad(builder, attr_ptr, "vert_attr");
163
164 store_coef(builder, args, slot, vert_attr, zerovec, zerovec);
165 }
166
167
168
169 /**
170 * Setup the fragment input attribute with the front-facing value.
171 * \param frontface is the triangle front facing?
172 */
173 static void
174 emit_facing_coef( LLVMBuilderRef builder,
175 struct lp_setup_args *args,
176 unsigned slot )
177 {
178 LLVMValueRef a0_0 = args->facing;
179 LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), "");
180 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
181 LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing");
182 LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero");
183
184 store_coef(builder, args, slot, a0, zerovec, zerovec);
185 }
186
187
188 static LLVMValueRef
189 vert_attrib(LLVMBuilderRef b,
190 LLVMValueRef vert,
191 int attr,
192 int elem,
193 const char *name)
194 {
195 LLVMValueRef idx[2];
196 idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0);
197 idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0);
198 return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
199 }
200
201 static LLVMValueRef
202 vert_clamp(LLVMBuilderRef b,
203 LLVMValueRef x,
204 LLVMValueRef min,
205 LLVMValueRef max)
206 {
207 LLVMValueRef min_result = LLVMBuildFCmp(b, LLVMRealUGT, min, x, "");
208 LLVMValueRef max_result = LLVMBuildFCmp(b, LLVMRealUGT, x, max, "");
209 LLVMValueRef clamp_value;
210
211 clamp_value = LLVMBuildSelect(b, min_result, min, x, "");
212 clamp_value = LLVMBuildSelect(b, max_result, max, x, "");
213
214 return clamp_value;
215 }
216
217 static void
218 emit_coef4( LLVMBuilderRef b,
219 struct lp_setup_args *args,
220 unsigned slot,
221 LLVMValueRef a0,
222 LLVMValueRef a1,
223 LLVMValueRef a2)
224 {
225 LLVMValueRef dy20_ooa = args->dy20_ooa;
226 LLVMValueRef dy01_ooa = args->dy01_ooa;
227 LLVMValueRef dx20_ooa = args->dx20_ooa;
228 LLVMValueRef dx01_ooa = args->dx01_ooa;
229 LLVMValueRef x0_center = args->x0_center;
230 LLVMValueRef y0_center = args->y0_center;
231
232 /* XXX: using fsub, fmul on vector types -- does this work??
233 */
234 LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
235 LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
236
237 /* Calculate dadx (vec4f)
238 */
239 LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
240 LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
241 LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
242
243 /* Calculate dady (vec4f)
244 */
245 LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
246 LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
247 LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
248
249 /* Calculate a0 - the attribute value at the origin
250 */
251 LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
252 LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
253 LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
254 LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
255
256 store_coef(b, args, slot, attr_0, dadx, dady);
257 }
258
259
260 static void
261 emit_linear_coef( LLVMBuilderRef b,
262 struct lp_setup_args *args,
263 unsigned slot,
264 unsigned vert_attr)
265 {
266 LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
267
268 LLVMValueRef a0 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
269 LLVMValueRef a1 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
270 LLVMValueRef a2 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
271
272 emit_coef4(b, args, slot, a0, a1, a2);
273 }
274
275
276
277 /**
278 * Compute a0, dadx and dady for a perspective-corrected interpolant,
279 * for a triangle.
280 * We basically multiply the vertex value by 1/w before computing
281 * the plane coefficients (a0, dadx, dady).
282 * Later, when we compute the value at a particular fragment position we'll
283 * divide the interpolated value by the interpolated W at that fragment.
284 */
285 static void
286 emit_perspective_coef( LLVMBuilderRef b,
287 struct lp_setup_args *args,
288 unsigned slot,
289 unsigned vert_attr)
290 {
291 /* premultiply by 1/w (v[0][3] is always 1/w):
292 */
293 LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0);
294
295 LLVMValueRef v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
296 LLVMValueRef v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
297 LLVMValueRef v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
298
299 LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow");
300 LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow");
301 LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow");
302
303 LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, v0a, v0_oow, "v0_oow_v0a");
304 LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, v1a, v1_oow, "v1_oow_v1a");
305 LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, v2a, v2_oow, "v2_oow_v2a");
306
307 emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a);
308 }
309
310
311 static void
312 emit_position_coef( LLVMBuilderRef builder,
313 struct lp_setup_args *args,
314 int slot, int attrib )
315 {
316 emit_linear_coef(builder, args, slot, attrib);
317 }
318
319
320
321
322 /**
323 * Compute the inputs-> dadx, dady, a0 values.
324 */
325 static void
326 emit_tri_coef( LLVMBuilderRef builder,
327 const struct lp_setup_variant_key *key,
328 struct lp_setup_args *args )
329 {
330 unsigned slot;
331
332 /* The internal position input is in slot zero:
333 */
334 emit_position_coef(builder, args, 0, 0);
335
336 /* setup interpolation for all the remaining attributes:
337 */
338 for (slot = 0; slot < key->num_inputs; slot++) {
339 unsigned vert_attr = key->inputs[slot].src_index;
340
341 switch (key->inputs[slot].interp) {
342 case LP_INTERP_CONSTANT:
343 if (key->flatshade_first) {
344 emit_constant_coef4(builder, args, slot+1, args->v0, vert_attr);
345 }
346 else {
347 emit_constant_coef4(builder, args, slot+1, args->v2, vert_attr);
348 }
349 break;
350
351 case LP_INTERP_LINEAR:
352 emit_linear_coef(builder, args, slot+1, vert_attr);
353 break;
354
355 case LP_INTERP_PERSPECTIVE:
356 emit_perspective_coef(builder, args, slot+1, vert_attr);
357 break;
358
359 case LP_INTERP_POSITION:
360 /*
361 * The generated pixel interpolators will pick up the coeffs from
362 * slot 0.
363 */
364 break;
365
366 case LP_INTERP_FACING:
367 emit_facing_coef(builder, args, slot+1);
368 break;
369
370 default:
371 assert(0);
372 }
373 }
374 }
375
376
377 /* XXX: This is generic code, share with fs/vs codegen:
378 */
379 static lp_jit_setup_triangle
380 finalize_function(struct llvmpipe_screen *screen,
381 LLVMBuilderRef builder,
382 LLVMValueRef function)
383 {
384 void *f;
385
386 /* Verify the LLVM IR. If invalid, dump and abort */
387 #ifdef DEBUG
388 if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
389 if (1)
390 lp_debug_dump_value(function);
391 abort();
392 }
393 #endif
394
395 /* Apply optimizations to LLVM IR */
396 LLVMRunFunctionPassManager(screen->pass, function);
397
398 if (gallivm_debug & GALLIVM_DEBUG_IR)
399 {
400 /* Print the LLVM IR to stderr */
401 lp_debug_dump_value(function);
402 debug_printf("\n");
403 }
404
405 /*
406 * Translate the LLVM IR into machine code.
407 */
408 f = LLVMGetPointerToGlobal(screen->engine, function);
409
410 if (gallivm_debug & GALLIVM_DEBUG_ASM)
411 {
412 lp_disassemble(f);
413 }
414
415 lp_func_delete_body(function);
416
417 return f;
418 }
419
420 /* XXX: Generic code:
421 */
422 static void
423 lp_emit_emms(LLVMBuilderRef builder)
424 {
425 #ifdef PIPE_ARCH_X86
426 /* Avoid corrupting the FPU stack on 32bit OSes. */
427 lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
428 #endif
429 }
430
431
432 /* XXX: generic code:
433 */
434 static void
435 set_noalias(LLVMBuilderRef builder,
436 LLVMValueRef function,
437 const LLVMTypeRef *arg_types,
438 int nr_args)
439 {
440 int i;
441 for(i = 0; i < Elements(arg_types); ++i)
442 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
443 LLVMAddAttribute(LLVMGetParam(function, i),
444 LLVMNoAliasAttribute);
445 }
446
447 static void
448 init_args(LLVMBuilderRef b,
449 struct lp_setup_args *args,
450 const struct lp_setup_variant *variant)
451 {
452 LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x");
453 LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y");
454
455 LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x");
456 LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y");
457
458 LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x");
459 LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y");
460
461 LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(),
462 variant->key.pixel_center_half ? 0.5 : 0);
463
464 LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" );
465 LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" );
466
467 LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01");
468 LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01");
469 LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20");
470 LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20");
471
472 LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0);
473 LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e");
474 LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f");
475 LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa");
476
477 LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa");
478 LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa");
479 LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa");
480 LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa");
481
482 args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f");
483 args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f");
484
485 args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f");
486 args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f");
487
488 args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f");
489 args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f");
490 }
491
492 static void
493 lp_twoside(LLVMBuilderRef b,
494 struct lp_setup_args *args,
495 const struct lp_setup_variant_key *key)
496 {
497 struct lp_build_if_state if_state;
498
499 LLVMValueRef a0_old, a1_old, a2_old;
500 LLVMValueRef a0_new, a1_new, a2_new;
501
502 LLVMValueRef idx1 = LLVMConstInt(LLVMInt32Type(), key->color_slot, 0);
503 LLVMValueRef idx2 = LLVMConstInt(LLVMInt32Type(), key->bcolor_slot, 0);
504
505 LLVMValueRef facing = args->facing;
506 LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, LLVMConstInt(LLVMInt32Type(), 0, 0), ""); /** need i1 for if condition */
507
508 lp_build_if(&if_state, b, front_facing);
509 {
510 /* swap the front and back attrib values */
511 a0_old = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx1, 1, ""), "v0a");
512 a1_old = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx1, 1, ""), "v1a");
513 a2_old = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx1, 1, ""), "v2a");
514
515 a0_new = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a");
516 a1_new = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a");
517 a2_new = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a");
518
519 LLVMBuildStore(b, a0_new, LLVMBuildGEP(b, args->v0, &idx1, 1, ""));
520 LLVMBuildStore(b, a1_new, LLVMBuildGEP(b, args->v1, &idx1, 1, ""));
521 LLVMBuildStore(b, a2_new, LLVMBuildGEP(b, args->v2, &idx1, 1, ""));
522 }
523 lp_build_endif(&if_state);
524
525 }
526
527 static void
528 lp_do_offset_tri(LLVMBuilderRef b,
529 struct lp_setup_args *args,
530 const struct lp_setup_variant_key *key)
531 {
532 struct lp_build_context bld;
533 LLVMValueRef zoffset, mult;
534 LLVMValueRef z0_new, z1_new, z2_new;
535 LLVMValueRef dzdx0, dzdx, dzdy0, dzdy;
536 LLVMValueRef max, max_value;
537
538 LLVMValueRef idx[2];
539 LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0);
540 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
541
542 /* edge vectors: e = v0 - v2, f = v1 - v2 */
543 LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x");
544 LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x");
545 LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x");
546 LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y");
547 LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y");
548 LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y");
549 LLVMValueRef v0_z = vert_attrib(b, args->v0, 0, 2, "v0_z");
550 LLVMValueRef v1_z = vert_attrib(b, args->v1, 0, 2, "v1_z");
551 LLVMValueRef v2_z = vert_attrib(b, args->v2, 0, 2, "v2_z");
552
553 /* edge vectors: e = v0 - v2, f = v1 - v2 */
554 LLVMValueRef dx02 = LLVMBuildFSub(b, v0_x, v2_x, "dx02");
555 LLVMValueRef dy02 = LLVMBuildFSub(b, v0_y, v2_y, "dy02");
556 LLVMValueRef dz02 = LLVMBuildFSub(b, v0_z, v2_z, "dz02");
557 LLVMValueRef dx12 = LLVMBuildFSub(b, v1_x, v2_x, "dx12");
558 LLVMValueRef dy12 = LLVMBuildFSub(b, v1_y, v2_y, "dy12");
559 LLVMValueRef dz12 = LLVMBuildFSub(b, v1_z, v2_z, "dz12");
560
561 /* det = cross(e,f).z */
562 LLVMValueRef dx02_dy12 = LLVMBuildFMul(b, dx02, dy12, "dx02_dy12");
563 LLVMValueRef dy02_dx12 = LLVMBuildFMul(b, dy02, dx12, "dy02_dx12");
564 LLVMValueRef det = LLVMBuildFSub(b, dx02_dy12, dy02_dx12, "det");
565 LLVMValueRef inv_det = LLVMBuildFDiv(b, one, det, "inv_det");
566
567 /* (res1,res2) = cross(e,f).xy */
568 LLVMValueRef dy02_dz12 = LLVMBuildFMul(b, dy02, dz12, "dy02_dz12");
569 LLVMValueRef dz02_dy12 = LLVMBuildFMul(b, dz02, dy12, "dz02_dy12");
570 LLVMValueRef dz02_dx12 = LLVMBuildFMul(b, dz02, dx12, "dz02_dx12");
571 LLVMValueRef dx02_dz12 = LLVMBuildFMul(b, dx02, dz12, "dx02_dz12");
572 LLVMValueRef res1 = LLVMBuildFSub(b, dy02_dz12, dz02_dy12, "res1");
573 LLVMValueRef res2 = LLVMBuildFSub(b, dz02_dx12, dx02_dz12, "res2");
574
575 /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
576 lp_build_context_init(&bld, b, lp_type_float(32));
577 dzdx0 = LLVMBuildFMul(b, res1, inv_det, "dzdx");
578 dzdx = lp_build_abs(&bld, dzdx0);
579 dzdy0 = LLVMBuildFMul(b, res2, inv_det, "dzdy");
580 dzdy = lp_build_abs(&bld, dzdy0);
581
582 /* zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale */
583 max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
584 max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max");
585
586 mult = LLVMBuildFMul(b, max_value, LLVMConstReal(LLVMFloatType(), key->scale), "");
587 zoffset = LLVMBuildFAdd(b, LLVMConstReal(LLVMFloatType(), key->units), mult, "zoffset");
588
589 /* clamp and do offset */
590 z0_new = vert_clamp(b, LLVMBuildFAdd(b, v0_z, zoffset, ""), zero, one);
591 z1_new = vert_clamp(b, LLVMBuildFAdd(b, v1_z, zoffset, ""), zero, one);
592 z2_new = vert_clamp(b, LLVMBuildFAdd(b, v2_z, zoffset, ""), zero, one);
593
594 /* store back new offsetted z values */
595 idx[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
596 idx[1] = LLVMConstInt(LLVMInt32Type(), 2, 0);
597 LLVMBuildStore(b, z0_new, LLVMBuildGEP(b, args->v0, idx, 2, ""));
598 LLVMBuildStore(b, z1_new, LLVMBuildGEP(b, args->v1, idx, 2, ""));
599 LLVMBuildStore(b, z2_new, LLVMBuildGEP(b, args->v2, idx, 2, ""));
600
601 }
602
603
604 /**
605 * Generate the runtime callable function for the coefficient calculation.
606 *
607 */
608 static struct lp_setup_variant *
609 generate_setup_variant(struct llvmpipe_screen *screen,
610 struct lp_setup_variant_key *key,
611 struct llvmpipe_context *lp)
612 {
613 struct lp_setup_variant *variant = NULL;
614 struct lp_setup_args args;
615 char func_name[256];
616 LLVMTypeRef vec4f_type;
617 LLVMTypeRef func_type;
618 LLVMTypeRef arg_types[7];
619 LLVMBasicBlockRef block;
620 LLVMBuilderRef builder;
621 int64_t t0, t1;
622
623 if (0)
624 goto fail;
625
626 variant = CALLOC_STRUCT(lp_setup_variant);
627 if (variant == NULL)
628 goto fail;
629
630 if (LP_DEBUG & DEBUG_COUNTERS) {
631 t0 = os_time_get();
632 }
633
634 memcpy(&variant->key, key, key->size);
635 variant->list_item_global.base = variant;
636
637 util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u",
638 0,
639 variant->no);
640
641 /* Currently always deal with full 4-wide vertex attributes from
642 * the vertices.
643 */
644
645 vec4f_type = LLVMVectorType(LLVMFloatType(), 4);
646
647 arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */
648 arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */
649 arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */
650 arg_types[3] = LLVMInt32Type(); /* facing */
651 arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */
652 arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */
653 arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
654
655 func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
656
657 variant->function = LLVMAddFunction(screen->module, func_name, func_type);
658 if (!variant->function)
659 goto fail;
660
661 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
662
663 args.v0 = LLVMGetParam(variant->function, 0);
664 args.v1 = LLVMGetParam(variant->function, 1);
665 args.v2 = LLVMGetParam(variant->function, 2);
666 args.facing = LLVMGetParam(variant->function, 3);
667 args.a0 = LLVMGetParam(variant->function, 4);
668 args.dadx = LLVMGetParam(variant->function, 5);
669 args.dady = LLVMGetParam(variant->function, 6);
670
671 lp_build_name(args.v0, "in_v0");
672 lp_build_name(args.v1, "in_v1");
673 lp_build_name(args.v2, "in_v2");
674 lp_build_name(args.facing, "in_facing");
675 lp_build_name(args.a0, "out_a0");
676 lp_build_name(args.dadx, "out_dadx");
677 lp_build_name(args.dady, "out_dady");
678
679 /*
680 * Function body
681 */
682 block = LLVMAppendBasicBlock(variant->function, "entry");
683 builder = LLVMCreateBuilder();
684 LLVMPositionBuilderAtEnd(builder, block);
685
686 set_noalias(builder, variant->function, arg_types, Elements(arg_types));
687 init_args(builder, &args, variant);
688 if (variant->key.twoside){
689 lp_twoside(builder, &args, &variant->key);
690 }
691 if (variant->key.scale || variant->key.units){
692 lp_do_offset_tri(builder, &args, &variant->key);
693 }
694 emit_tri_coef(builder, &variant->key, &args);
695
696 lp_emit_emms(builder);
697 LLVMBuildRetVoid(builder);
698 LLVMDisposeBuilder(builder);
699
700 variant->jit_function = finalize_function(screen, builder,
701 variant->function);
702 if (!variant->jit_function)
703 goto fail;
704
705 /*
706 * Update timing information:
707 */
708 if (LP_DEBUG & DEBUG_COUNTERS) {
709 t1 = os_time_get();
710 LP_COUNT_ADD(llvm_compile_time, t1 - t0);
711 LP_COUNT_ADD(nr_llvm_compiles, 1);
712 }
713
714 return variant;
715
716 fail:
717 if (variant) {
718 if (variant->function) {
719 if (variant->jit_function)
720 LLVMFreeMachineCodeForFunction(screen->engine,
721 variant->function);
722 LLVMDeleteFunction(variant->function);
723 }
724 FREE(variant);
725 }
726
727 return NULL;
728 }
729
730
731
732 static void
733 lp_make_setup_variant_key(struct llvmpipe_context *lp,
734 struct lp_setup_variant_key *key)
735 {
736 struct lp_fragment_shader *fs = lp->fs;
737 unsigned i;
738
739 assert(sizeof key->inputs[0] == sizeof(ushort));
740
741 key->num_inputs = fs->info.base.num_inputs;
742 key->flatshade_first = lp->rasterizer->flatshade_first;
743 key->pixel_center_half = lp->rasterizer->gl_rasterization_rules;
744 key->twoside = lp->rasterizer->light_twoside;
745 key->size = Offset(struct lp_setup_variant_key,
746 inputs[key->num_inputs]);
747 key->color_slot = lp->color_slot;
748 key->bcolor_slot = lp->bcolor_slot;
749 key->units = (float) (lp->rasterizer->offset_units * lp->mrd);
750 key->scale = lp->rasterizer->offset_scale;
751 key->pad = 0;
752 memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
753 for (i = 0; i < key->num_inputs; i++) {
754 if (key->inputs[i].interp == LP_INTERP_COLOR) {
755 if (lp->rasterizer->flatshade)
756 key->inputs[i].interp = LP_INTERP_CONSTANT;
757 else
758 key->inputs[i].interp = LP_INTERP_LINEAR;
759 }
760 }
761
762 }
763
764
765 static void
766 remove_setup_variant(struct llvmpipe_context *lp,
767 struct lp_setup_variant *variant)
768 {
769 struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
770
771 if (gallivm_debug & GALLIVM_DEBUG_IR) {
772 debug_printf("llvmpipe: del setup_variant #%u total %u\n",
773 variant->no, lp->nr_setup_variants);
774 }
775
776 if (variant->function) {
777 if (variant->jit_function)
778 LLVMFreeMachineCodeForFunction(screen->engine,
779 variant->function);
780 LLVMDeleteFunction(variant->function);
781 }
782
783 remove_from_list(&variant->list_item_global);
784 lp->nr_setup_variants--;
785 FREE(variant);
786 }
787
788
789
790 /* When the number of setup variants exceeds a threshold, cull a
791 * fraction (currently a quarter) of them.
792 */
793 static void
794 cull_setup_variants(struct llvmpipe_context *lp)
795 {
796 struct pipe_context *pipe = &lp->pipe;
797 int i;
798
799 /*
800 * XXX: we need to flush the context until we have some sort of reference
801 * counting in fragment shaders as they may still be binned
802 * Flushing alone might not be sufficient we need to wait on it too.
803 */
804 llvmpipe_finish(pipe, __FUNCTION__);
805
806 for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
807 struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list);
808 remove_setup_variant(lp, item->base);
809 }
810 }
811
812
813 /**
814 * Update fragment/vertex shader linkage state. This is called just
815 * prior to drawing something when some fragment-related state has
816 * changed.
817 */
818 void
819 llvmpipe_update_setup(struct llvmpipe_context *lp)
820 {
821 struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
822
823 struct lp_setup_variant_key *key = &lp->setup_variant.key;
824 struct lp_setup_variant *variant = NULL;
825 struct lp_setup_variant_list_item *li;
826
827 lp_make_setup_variant_key(lp, key);
828
829 foreach(li, &lp->setup_variants_list) {
830 if(li->base->key.size == key->size &&
831 memcmp(&li->base->key, key, key->size) == 0) {
832 variant = li->base;
833 break;
834 }
835 }
836
837 if (variant) {
838 move_to_head(&lp->setup_variants_list, &variant->list_item_global);
839 }
840 else {
841 if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
842 cull_setup_variants(lp);
843 }
844
845 variant = generate_setup_variant(screen, key, lp);
846 insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
847 lp->nr_setup_variants++;
848 }
849
850 lp_setup_set_setup_variant(lp->setup,
851 variant);
852 }
853
854 void
855 lp_delete_setup_variants(struct llvmpipe_context *lp)
856 {
857 struct lp_setup_variant_list_item *li;
858 li = first_elem(&lp->setup_variants_list);
859 while(!at_end(&lp->setup_variants_list, li)) {
860 struct lp_setup_variant_list_item *next = next_elem(li);
861 remove_setup_variant(lp, li->base);
862 li = next;
863 }
864 }
865
866 void
867 lp_dump_setup_coef( const struct lp_setup_variant_key *key,
868 const float (*sa0)[4],
869 const float (*sdadx)[4],
870 const float (*sdady)[4])
871 {
872 int i, slot;
873
874 for (i = 0; i < NUM_CHANNELS; i++) {
875 float a0 = sa0 [0][i];
876 float dadx = sdadx[0][i];
877 float dady = sdady[0][i];
878
879 debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
880 "xyzw"[i],
881 a0, dadx, dady);
882 }
883
884 for (slot = 0; slot < key->num_inputs; slot++) {
885 unsigned usage_mask = key->inputs[slot].usage_mask;
886 for (i = 0; i < NUM_CHANNELS; i++) {
887 if (usage_mask & (1 << i)) {
888 float a0 = sa0 [1 + slot][i];
889 float dadx = sdadx[1 + slot][i];
890 float dady = sdady[1 + slot][i];
891
892 debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
893 slot,
894 "xyzw"[i],
895 a0, dadx, dady);
896 }
897 }
898 }
899 }