s/Tungsten Graphics/VMware/
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_fetch_shade_pipeline_llvm.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30 #include "util/u_prim.h"
31 #include "draw/draw_context.h"
32 #include "draw/draw_gs.h"
33 #include "draw/draw_vbuf.h"
34 #include "draw/draw_vertex.h"
35 #include "draw/draw_pt.h"
36 #include "draw/draw_prim_assembler.h"
37 #include "draw/draw_vs.h"
38 #include "draw/draw_llvm.h"
39 #include "gallivm/lp_bld_init.h"
40
41
42 struct llvm_middle_end {
43 struct draw_pt_middle_end base;
44 struct draw_context *draw;
45
46 struct pt_emit *emit;
47 struct pt_so_emit *so_emit;
48 struct pt_fetch *fetch;
49 struct pt_post_vs *post_vs;
50
51
52 unsigned vertex_data_offset;
53 unsigned vertex_size;
54 unsigned input_prim;
55 unsigned opt;
56
57 struct draw_llvm *llvm;
58 struct draw_llvm_variant *current_variant;
59 };
60
61
62 static void
63 llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
64 {
65 struct draw_context *draw = fpme->draw;
66 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
67 struct draw_gs_llvm_variant_key *key;
68 struct draw_gs_llvm_variant *variant = NULL;
69 struct draw_gs_llvm_variant_list_item *li;
70 struct llvm_geometry_shader *shader = llvm_geometry_shader(gs);
71 char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE];
72 unsigned i;
73
74 key = draw_gs_llvm_make_variant_key(fpme->llvm, store);
75
76 /* Search shader's list of variants for the key */
77 li = first_elem(&shader->variants);
78 while (!at_end(&shader->variants, li)) {
79 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
80 variant = li->base;
81 break;
82 }
83 li = next_elem(li);
84 }
85
86 if (variant) {
87 /* found the variant, move to head of global list (for LRU) */
88 move_to_head(&fpme->llvm->gs_variants_list,
89 &variant->list_item_global);
90 }
91 else {
92 /* Need to create new variant */
93
94 /* First check if we've created too many variants. If so, free
95 * 25% of the LRU to avoid using too much memory.
96 */
97 if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
98 /*
99 * XXX: should we flush here ?
100 */
101 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
102 struct draw_gs_llvm_variant_list_item *item;
103 if (is_empty_list(&fpme->llvm->gs_variants_list)) {
104 break;
105 }
106 item = last_elem(&fpme->llvm->gs_variants_list);
107 assert(item);
108 assert(item->base);
109 draw_gs_llvm_destroy_variant(item->base);
110 }
111 }
112
113 variant = draw_gs_llvm_create_variant(fpme->llvm, gs->info.num_outputs, key);
114
115 if (variant) {
116 insert_at_head(&shader->variants, &variant->list_item_local);
117 insert_at_head(&fpme->llvm->gs_variants_list,
118 &variant->list_item_global);
119 fpme->llvm->nr_gs_variants++;
120 shader->variants_cached++;
121 }
122 }
123
124 gs->current_variant = variant;
125 }
126
127 /**
128 * Prepare/validate middle part of the vertex pipeline.
129 * NOTE: if you change this function, also look at the non-LLVM
130 * function fetch_pipeline_prepare() for similar changes.
131 */
132 static void
133 llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
134 unsigned in_prim,
135 unsigned opt,
136 unsigned *max_vertices )
137 {
138 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
139 struct draw_context *draw = fpme->draw;
140 struct draw_vertex_shader *vs = draw->vs.vertex_shader;
141 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
142 const unsigned out_prim = gs ? gs->output_primitive :
143 u_assembled_prim(in_prim);
144 const unsigned nr = MAX2(vs->info.num_inputs,
145 draw_total_vs_outputs(draw));
146
147 fpme->input_prim = in_prim;
148 fpme->opt = opt;
149
150 /* Always leave room for the vertex header whether we need it or
151 * not. It's hard to get rid of it in particular because of the
152 * viewport code in draw_pt_post_vs.c.
153 */
154 fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
155
156
157 draw_pt_post_vs_prepare( fpme->post_vs,
158 out_prim == PIPE_PRIM_POINTS ?
159 draw->clip_points_xy : draw->clip_xy,
160 draw->clip_z,
161 draw->clip_user,
162 draw->guard_band_xy,
163 draw->identity_viewport,
164 draw->rasterizer->clip_halfz,
165 (draw->vs.edgeflag_output ? TRUE : FALSE) );
166
167 draw_pt_so_emit_prepare( fpme->so_emit, gs == NULL );
168
169 if (!(opt & PT_PIPELINE)) {
170 draw_pt_emit_prepare( fpme->emit,
171 out_prim,
172 max_vertices );
173
174 *max_vertices = MAX2( *max_vertices, 4096 );
175 }
176 else {
177 /* limit max fetches by limiting max_vertices */
178 *max_vertices = 4096;
179 }
180
181 /* return even number */
182 *max_vertices = *max_vertices & ~1;
183
184 /* Find/create the vertex shader variant */
185 {
186 struct draw_llvm_variant_key *key;
187 struct draw_llvm_variant *variant = NULL;
188 struct draw_llvm_variant_list_item *li;
189 struct llvm_vertex_shader *shader = llvm_vertex_shader(vs);
190 char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
191 unsigned i;
192
193 key = draw_llvm_make_variant_key(fpme->llvm, store);
194
195 /* Search shader's list of variants for the key */
196 li = first_elem(&shader->variants);
197 while (!at_end(&shader->variants, li)) {
198 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
199 variant = li->base;
200 break;
201 }
202 li = next_elem(li);
203 }
204
205 if (variant) {
206 /* found the variant, move to head of global list (for LRU) */
207 move_to_head(&fpme->llvm->vs_variants_list,
208 &variant->list_item_global);
209 }
210 else {
211 /* Need to create new variant */
212
213 /* First check if we've created too many variants. If so, free
214 * 25% of the LRU to avoid using too much memory.
215 */
216 if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
217 /*
218 * XXX: should we flush here ?
219 */
220 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
221 struct draw_llvm_variant_list_item *item;
222 if (is_empty_list(&fpme->llvm->vs_variants_list)) {
223 break;
224 }
225 item = last_elem(&fpme->llvm->vs_variants_list);
226 assert(item);
227 assert(item->base);
228 draw_llvm_destroy_variant(item->base);
229 }
230 }
231
232 variant = draw_llvm_create_variant(fpme->llvm, nr, key);
233
234 if (variant) {
235 insert_at_head(&shader->variants, &variant->list_item_local);
236 insert_at_head(&fpme->llvm->vs_variants_list,
237 &variant->list_item_global);
238 fpme->llvm->nr_variants++;
239 shader->variants_cached++;
240 }
241 }
242
243 fpme->current_variant = variant;
244 }
245
246 if (gs) {
247 llvm_middle_end_prepare_gs(fpme);
248 }
249 }
250
251
252 /**
253 * Bind/update constant buffer pointers, clip planes and viewport dims.
254 * These are "light weight" parameters which aren't baked into the
255 * generated code. Updating these items is much cheaper than revalidating
256 * and rebuilding the generated pipeline code.
257 */
258 static void
259 llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle)
260 {
261 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
262 struct draw_context *draw = fpme->draw;
263 unsigned i;
264
265 for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) {
266 int num_consts =
267 draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4);
268 fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
269 fpme->llvm->jit_context.num_vs_constants[i] = num_consts;
270 }
271 for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) {
272 int num_consts =
273 draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4);
274 fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
275 fpme->llvm->gs_jit_context.num_constants[i] = num_consts;
276 }
277
278 fpme->llvm->jit_context.planes =
279 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
280 fpme->llvm->gs_jit_context.planes =
281 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
282
283 fpme->llvm->jit_context.viewport = (float *) draw->viewports[0].scale;
284 fpme->llvm->gs_jit_context.viewport = (float *) draw->viewports[0].scale;
285 }
286
287
288 static void pipeline(struct llvm_middle_end *llvm,
289 const struct draw_vertex_info *vert_info,
290 const struct draw_prim_info *prim_info)
291 {
292 if (prim_info->linear)
293 draw_pipeline_run_linear( llvm->draw,
294 vert_info,
295 prim_info);
296 else
297 draw_pipeline_run( llvm->draw,
298 vert_info,
299 prim_info );
300 }
301
302 static void emit(struct pt_emit *emit,
303 const struct draw_vertex_info *vert_info,
304 const struct draw_prim_info *prim_info)
305 {
306 if (prim_info->linear) {
307 draw_pt_emit_linear(emit, vert_info, prim_info);
308 }
309 else {
310 draw_pt_emit(emit, vert_info, prim_info);
311 }
312 }
313
314 static void
315 llvm_pipeline_generic( struct draw_pt_middle_end *middle,
316 const struct draw_fetch_info *fetch_info,
317 const struct draw_prim_info *in_prim_info )
318 {
319 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
320 struct draw_context *draw = fpme->draw;
321 struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
322 struct draw_prim_info gs_prim_info;
323 struct draw_vertex_info llvm_vert_info;
324 struct draw_vertex_info gs_vert_info;
325 struct draw_vertex_info *vert_info;
326 struct draw_prim_info ia_prim_info;
327 struct draw_vertex_info ia_vert_info;
328 const struct draw_prim_info *prim_info = in_prim_info;
329 boolean free_prim_info = FALSE;
330 unsigned opt = fpme->opt;
331 unsigned clipped = 0;
332
333 llvm_vert_info.count = fetch_info->count;
334 llvm_vert_info.vertex_size = fpme->vertex_size;
335 llvm_vert_info.stride = fpme->vertex_size;
336 llvm_vert_info.verts =
337 (struct vertex_header *)MALLOC(fpme->vertex_size *
338 align(fetch_info->count, lp_native_vector_width / 32));
339 if (!llvm_vert_info.verts) {
340 assert(0);
341 return;
342 }
343
344 if (draw->collect_statistics) {
345 draw->statistics.ia_vertices += prim_info->count;
346 draw->statistics.ia_primitives +=
347 u_decomposed_prims_for_vertices(prim_info->prim, prim_info->count);
348 draw->statistics.vs_invocations += fetch_info->count;
349 }
350
351 if (fetch_info->linear)
352 clipped = fpme->current_variant->jit_func( &fpme->llvm->jit_context,
353 llvm_vert_info.verts,
354 draw->pt.user.vbuffer,
355 fetch_info->start,
356 fetch_info->count,
357 fpme->vertex_size,
358 draw->pt.vertex_buffer,
359 draw->instance_id,
360 draw->start_index);
361 else
362 clipped = fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
363 llvm_vert_info.verts,
364 draw->pt.user.vbuffer,
365 fetch_info->elts,
366 draw->pt.user.eltMax,
367 fetch_info->count,
368 fpme->vertex_size,
369 draw->pt.vertex_buffer,
370 draw->instance_id,
371 draw->pt.user.eltBias);
372
373 /* Finished with fetch and vs:
374 */
375 fetch_info = NULL;
376 vert_info = &llvm_vert_info;
377
378
379 if ((opt & PT_SHADE) && gshader) {
380 struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
381 draw_geometry_shader_run(gshader,
382 draw->pt.user.gs_constants,
383 draw->pt.user.gs_constants_size,
384 vert_info,
385 prim_info,
386 &vshader->info,
387 &gs_vert_info,
388 &gs_prim_info);
389
390 FREE(vert_info->verts);
391 vert_info = &gs_vert_info;
392 prim_info = &gs_prim_info;
393 } else {
394 if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) {
395 draw_prim_assembler_run(draw, prim_info, vert_info,
396 &ia_prim_info, &ia_vert_info);
397
398 if (ia_vert_info.count) {
399 FREE(vert_info->verts);
400 vert_info = &ia_vert_info;
401 prim_info = &ia_prim_info;
402 free_prim_info = TRUE;
403 }
404 }
405 }
406 if (prim_info->count == 0) {
407 debug_printf("GS/IA didn't emit any vertices!\n");
408
409 FREE(vert_info->verts);
410 if (free_prim_info) {
411 FREE(prim_info->primitive_lengths);
412 }
413 return;
414 }
415
416 /* stream output needs to be done before clipping */
417 draw_pt_so_emit( fpme->so_emit, vert_info, prim_info );
418
419 draw_stats_clipper_primitives(draw, prim_info);
420
421 /*
422 * if there's no position, need to stop now, or the latter stages
423 * will try to access non-existent position output.
424 */
425 if (draw_current_shader_position_output(draw) != -1) {
426 if ((opt & PT_SHADE) && gshader) {
427 clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info, prim_info );
428 }
429 if (clipped) {
430 opt |= PT_PIPELINE;
431 }
432
433 /* Do we need to run the pipeline? Now will come here if clipped
434 */
435 if (opt & PT_PIPELINE) {
436 pipeline( fpme, vert_info, prim_info );
437 }
438 else {
439 emit( fpme->emit, vert_info, prim_info );
440 }
441 }
442 FREE(vert_info->verts);
443 if (free_prim_info) {
444 FREE(prim_info->primitive_lengths);
445 }
446 }
447
448
449 static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
450 const unsigned *fetch_elts,
451 unsigned fetch_count,
452 const ushort *draw_elts,
453 unsigned draw_count,
454 unsigned prim_flags )
455 {
456 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
457 struct draw_fetch_info fetch_info;
458 struct draw_prim_info prim_info;
459
460 fetch_info.linear = FALSE;
461 fetch_info.start = 0;
462 fetch_info.elts = fetch_elts;
463 fetch_info.count = fetch_count;
464
465 prim_info.linear = FALSE;
466 prim_info.start = 0;
467 prim_info.count = draw_count;
468 prim_info.elts = draw_elts;
469 prim_info.prim = fpme->input_prim;
470 prim_info.flags = prim_flags;
471 prim_info.primitive_count = 1;
472 prim_info.primitive_lengths = &draw_count;
473
474 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
475 }
476
477
478 static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
479 unsigned start,
480 unsigned count,
481 unsigned prim_flags)
482 {
483 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
484 struct draw_fetch_info fetch_info;
485 struct draw_prim_info prim_info;
486
487 fetch_info.linear = TRUE;
488 fetch_info.start = start;
489 fetch_info.count = count;
490 fetch_info.elts = NULL;
491
492 prim_info.linear = TRUE;
493 prim_info.start = 0;
494 prim_info.count = count;
495 prim_info.elts = NULL;
496 prim_info.prim = fpme->input_prim;
497 prim_info.flags = prim_flags;
498 prim_info.primitive_count = 1;
499 prim_info.primitive_lengths = &count;
500
501 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
502 }
503
504
505
506 static boolean
507 llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
508 unsigned start,
509 unsigned count,
510 const ushort *draw_elts,
511 unsigned draw_count,
512 unsigned prim_flags )
513 {
514 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
515 struct draw_fetch_info fetch_info;
516 struct draw_prim_info prim_info;
517
518 fetch_info.linear = TRUE;
519 fetch_info.start = start;
520 fetch_info.count = count;
521 fetch_info.elts = NULL;
522
523 prim_info.linear = FALSE;
524 prim_info.start = 0;
525 prim_info.count = draw_count;
526 prim_info.elts = draw_elts;
527 prim_info.prim = fpme->input_prim;
528 prim_info.flags = prim_flags;
529 prim_info.primitive_count = 1;
530 prim_info.primitive_lengths = &draw_count;
531
532 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
533
534 return TRUE;
535 }
536
537
538
539 static void llvm_middle_end_finish( struct draw_pt_middle_end *middle )
540 {
541 /* nothing to do */
542 }
543
544 static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
545 {
546 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
547
548 if (fpme->fetch)
549 draw_pt_fetch_destroy( fpme->fetch );
550
551 if (fpme->emit)
552 draw_pt_emit_destroy( fpme->emit );
553
554 if (fpme->so_emit)
555 draw_pt_so_emit_destroy( fpme->so_emit );
556
557 if (fpme->post_vs)
558 draw_pt_post_vs_destroy( fpme->post_vs );
559
560 FREE(middle);
561 }
562
563
564 struct draw_pt_middle_end *
565 draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw)
566 {
567 struct llvm_middle_end *fpme = 0;
568
569 if (!draw->llvm)
570 return NULL;
571
572 fpme = CALLOC_STRUCT( llvm_middle_end );
573 if (!fpme)
574 goto fail;
575
576 fpme->base.prepare = llvm_middle_end_prepare;
577 fpme->base.bind_parameters = llvm_middle_end_bind_parameters;
578 fpme->base.run = llvm_middle_end_run;
579 fpme->base.run_linear = llvm_middle_end_linear_run;
580 fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts;
581 fpme->base.finish = llvm_middle_end_finish;
582 fpme->base.destroy = llvm_middle_end_destroy;
583
584 fpme->draw = draw;
585
586 fpme->fetch = draw_pt_fetch_create( draw );
587 if (!fpme->fetch)
588 goto fail;
589
590 fpme->post_vs = draw_pt_post_vs_create( draw );
591 if (!fpme->post_vs)
592 goto fail;
593
594 fpme->emit = draw_pt_emit_create( draw );
595 if (!fpme->emit)
596 goto fail;
597
598 fpme->so_emit = draw_pt_so_emit_create( draw );
599 if (!fpme->so_emit)
600 goto fail;
601
602 fpme->llvm = draw->llvm;
603 if (!fpme->llvm)
604 goto fail;
605
606 fpme->current_variant = NULL;
607
608 return &fpme->base;
609
610 fail:
611 if (fpme)
612 llvm_middle_end_destroy( &fpme->base );
613
614 return NULL;
615 }