9f4f887dff064dd3911dacda1cfa7e676ea24912
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_fetch_shade_pipeline_llvm.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30 #include "util/u_prim.h"
31 #include "draw/draw_context.h"
32 #include "draw/draw_gs.h"
33 #include "draw/draw_vbuf.h"
34 #include "draw/draw_vertex.h"
35 #include "draw/draw_pt.h"
36 #include "draw/draw_prim_assembler.h"
37 #include "draw/draw_vs.h"
38 #include "draw/draw_llvm.h"
39 #include "gallivm/lp_bld_init.h"
40 #include "gallivm/lp_bld_debug.h"
41
42
43 struct llvm_middle_end {
44 struct draw_pt_middle_end base;
45 struct draw_context *draw;
46
47 struct pt_emit *emit;
48 struct pt_so_emit *so_emit;
49 struct pt_fetch *fetch;
50 struct pt_post_vs *post_vs;
51
52
53 unsigned vertex_data_offset;
54 unsigned vertex_size;
55 unsigned input_prim;
56 unsigned opt;
57
58 struct draw_llvm *llvm;
59 struct draw_llvm_variant *current_variant;
60 };
61
62
63 /** cast wrapper */
64 static inline struct llvm_middle_end *
65 llvm_middle_end(struct draw_pt_middle_end *middle)
66 {
67 return (struct llvm_middle_end *) middle;
68 }
69
70
71 static void
72 llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
73 {
74 struct draw_context *draw = fpme->draw;
75 struct draw_llvm *llvm = fpme->llvm;
76 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
77 struct draw_gs_llvm_variant_key *key;
78 struct draw_gs_llvm_variant *variant = NULL;
79 struct draw_gs_llvm_variant_list_item *li;
80 struct llvm_geometry_shader *shader = llvm_geometry_shader(gs);
81 char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE];
82 unsigned i;
83
84 key = draw_gs_llvm_make_variant_key(llvm, store);
85
86 /* Search shader's list of variants for the key */
87 li = first_elem(&shader->variants);
88 while (!at_end(&shader->variants, li)) {
89 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
90 variant = li->base;
91 break;
92 }
93 li = next_elem(li);
94 }
95
96 if (variant) {
97 /* found the variant, move to head of global list (for LRU) */
98 move_to_head(&llvm->gs_variants_list, &variant->list_item_global);
99 }
100 else {
101 /* Need to create new variant */
102
103 /* First check if we've created too many variants. If so, free
104 * 3.125% of the LRU to avoid using too much memory.
105 */
106 if (llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
107 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
108 debug_printf("Evicting GS: %u gs variants,\t%u total variants\n",
109 shader->variants_cached, llvm->nr_gs_variants);
110 }
111
112 /*
113 * XXX: should we flush here ?
114 */
115 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 32; i++) {
116 struct draw_gs_llvm_variant_list_item *item;
117 if (is_empty_list(&llvm->gs_variants_list)) {
118 break;
119 }
120 item = last_elem(&llvm->gs_variants_list);
121 assert(item);
122 assert(item->base);
123 draw_gs_llvm_destroy_variant(item->base);
124 }
125 }
126
127 variant = draw_gs_llvm_create_variant(llvm, gs->info.num_outputs, key);
128
129 if (variant) {
130 insert_at_head(&shader->variants, &variant->list_item_local);
131 insert_at_head(&llvm->gs_variants_list,
132 &variant->list_item_global);
133 llvm->nr_gs_variants++;
134 shader->variants_cached++;
135 }
136 }
137
138 gs->current_variant = variant;
139 }
140
141 /**
142 * Prepare/validate middle part of the vertex pipeline.
143 * NOTE: if you change this function, also look at the non-LLVM
144 * function fetch_pipeline_prepare() for similar changes.
145 */
146 static void
147 llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
148 unsigned in_prim,
149 unsigned opt,
150 unsigned *max_vertices )
151 {
152 struct llvm_middle_end *fpme = llvm_middle_end(middle);
153 struct draw_context *draw = fpme->draw;
154 struct draw_llvm *llvm = fpme->llvm;
155 struct draw_vertex_shader *vs = draw->vs.vertex_shader;
156 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
157 const unsigned out_prim = gs ? gs->output_primitive :
158 u_assembled_prim(in_prim);
159 unsigned point_clip = draw->rasterizer->fill_front == PIPE_POLYGON_MODE_POINT ||
160 out_prim == PIPE_PRIM_POINTS;
161 unsigned nr;
162
163 fpme->input_prim = in_prim;
164 fpme->opt = opt;
165
166 draw_pt_post_vs_prepare( fpme->post_vs,
167 draw->clip_xy,
168 draw->clip_z,
169 draw->clip_user,
170 point_clip ? draw->guard_band_points_xy :
171 draw->guard_band_xy,
172 draw->bypass_viewport,
173 draw->rasterizer->clip_halfz,
174 (draw->vs.edgeflag_output ? TRUE : FALSE) );
175
176 draw_pt_so_emit_prepare( fpme->so_emit, gs == NULL );
177
178 if (!(opt & PT_PIPELINE)) {
179 draw_pt_emit_prepare( fpme->emit, out_prim,
180 max_vertices );
181
182 *max_vertices = MAX2( *max_vertices, 4096 );
183 }
184 else {
185 /* limit max fetches by limiting max_vertices */
186 *max_vertices = 4096;
187 }
188
189 /* Get the number of float[4] attributes per vertex.
190 * Note: this must be done after draw_pt_emit_prepare() since that
191 * can effect the vertex size.
192 */
193 nr = MAX2(vs->info.num_inputs, draw_total_vs_outputs(draw));
194
195 /* Always leave room for the vertex header whether we need it or
196 * not. It's hard to get rid of it in particular because of the
197 * viewport code in draw_pt_post_vs.c.
198 */
199 fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
200
201 /* return even number */
202 *max_vertices = *max_vertices & ~1;
203
204 /* Find/create the vertex shader variant */
205 {
206 struct draw_llvm_variant_key *key;
207 struct draw_llvm_variant *variant = NULL;
208 struct draw_llvm_variant_list_item *li;
209 struct llvm_vertex_shader *shader = llvm_vertex_shader(vs);
210 char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
211 unsigned i;
212
213 key = draw_llvm_make_variant_key(llvm, store);
214
215 /* Search shader's list of variants for the key */
216 li = first_elem(&shader->variants);
217 while (!at_end(&shader->variants, li)) {
218 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
219 variant = li->base;
220 break;
221 }
222 li = next_elem(li);
223 }
224
225 if (variant) {
226 /* found the variant, move to head of global list (for LRU) */
227 move_to_head(&llvm->vs_variants_list, &variant->list_item_global);
228 }
229 else {
230 /* Need to create new variant */
231
232 /* First check if we've created too many variants. If so, free
233 * 3.125% of the LRU to avoid using too much memory.
234 */
235 if (llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
236 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
237 debug_printf("Evicting VS: %u vs variants,\t%u total variants\n",
238 shader->variants_cached, llvm->nr_variants);
239 }
240
241 /*
242 * XXX: should we flush here ?
243 */
244 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 32; i++) {
245 struct draw_llvm_variant_list_item *item;
246 if (is_empty_list(&llvm->vs_variants_list)) {
247 break;
248 }
249 item = last_elem(&llvm->vs_variants_list);
250 assert(item);
251 assert(item->base);
252 draw_llvm_destroy_variant(item->base);
253 }
254 }
255
256 variant = draw_llvm_create_variant(llvm, nr, key);
257
258 if (variant) {
259 insert_at_head(&shader->variants, &variant->list_item_local);
260 insert_at_head(&llvm->vs_variants_list,
261 &variant->list_item_global);
262 llvm->nr_variants++;
263 shader->variants_cached++;
264 }
265 }
266
267 fpme->current_variant = variant;
268 }
269
270 if (gs) {
271 llvm_middle_end_prepare_gs(fpme);
272 }
273 }
274
275
276 /**
277 * Bind/update constant buffer pointers, clip planes and viewport dims.
278 * These are "light weight" parameters which aren't baked into the
279 * generated code. Updating these items is much cheaper than revalidating
280 * and rebuilding the generated pipeline code.
281 */
282 static void
283 llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle)
284 {
285 static const float fake_const_buf[4];
286 struct llvm_middle_end *fpme = llvm_middle_end(middle);
287 struct draw_context *draw = fpme->draw;
288 struct draw_llvm *llvm = fpme->llvm;
289 unsigned i;
290
291 for (i = 0; i < ARRAY_SIZE(llvm->jit_context.vs_constants); ++i) {
292 int num_consts =
293 draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4);
294 llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
295 llvm->jit_context.num_vs_constants[i] = num_consts;
296 if (num_consts == 0) {
297 llvm->jit_context.vs_constants[i] = fake_const_buf;
298 }
299 }
300 for (i = 0; i < ARRAY_SIZE(llvm->gs_jit_context.constants); ++i) {
301 int num_consts =
302 draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4);
303 llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
304 llvm->gs_jit_context.num_constants[i] = num_consts;
305 if (num_consts == 0) {
306 llvm->gs_jit_context.constants[i] = fake_const_buf;
307 }
308 }
309
310 llvm->jit_context.planes =
311 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
312 llvm->gs_jit_context.planes =
313 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
314
315 llvm->jit_context.viewports = draw->viewports;
316 llvm->gs_jit_context.viewports = draw->viewports;
317 }
318
319
320 static void
321 pipeline(struct llvm_middle_end *llvm,
322 const struct draw_vertex_info *vert_info,
323 const struct draw_prim_info *prim_info)
324 {
325 if (prim_info->linear)
326 draw_pipeline_run_linear( llvm->draw,
327 vert_info,
328 prim_info);
329 else
330 draw_pipeline_run( llvm->draw,
331 vert_info,
332 prim_info );
333 }
334
335
336 static void
337 emit(struct pt_emit *emit,
338 const struct draw_vertex_info *vert_info,
339 const struct draw_prim_info *prim_info)
340 {
341 if (prim_info->linear) {
342 draw_pt_emit_linear(emit, vert_info, prim_info);
343 }
344 else {
345 draw_pt_emit(emit, vert_info, prim_info);
346 }
347 }
348
349
350 static void
351 llvm_pipeline_generic(struct draw_pt_middle_end *middle,
352 const struct draw_fetch_info *fetch_info,
353 const struct draw_prim_info *in_prim_info)
354 {
355 struct llvm_middle_end *fpme = llvm_middle_end(middle);
356 struct draw_context *draw = fpme->draw;
357 struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
358 struct draw_prim_info gs_prim_info[TGSI_MAX_VERTEX_STREAMS];
359 struct draw_vertex_info llvm_vert_info;
360 struct draw_vertex_info gs_vert_info[TGSI_MAX_VERTEX_STREAMS];
361 struct draw_vertex_info *vert_info;
362 struct draw_prim_info ia_prim_info;
363 struct draw_vertex_info ia_vert_info;
364 const struct draw_prim_info *prim_info = in_prim_info;
365 boolean free_prim_info = FALSE;
366 unsigned opt = fpme->opt;
367 boolean clipped = 0;
368 unsigned start_or_maxelt, vid_base;
369 const unsigned *elts;
370
371 assert(fetch_info->count > 0);
372 llvm_vert_info.count = fetch_info->count;
373 llvm_vert_info.vertex_size = fpme->vertex_size;
374 llvm_vert_info.stride = fpme->vertex_size;
375 llvm_vert_info.verts = (struct vertex_header *)
376 MALLOC(fpme->vertex_size *
377 align(fetch_info->count, lp_native_vector_width / 32));
378 if (!llvm_vert_info.verts) {
379 assert(0);
380 return;
381 }
382
383 if (draw->collect_statistics) {
384 draw->statistics.ia_vertices += prim_info->count;
385 draw->statistics.ia_primitives +=
386 u_decomposed_prims_for_vertices(prim_info->prim, prim_info->count);
387 draw->statistics.vs_invocations += fetch_info->count;
388 }
389
390 if (fetch_info->linear) {
391 start_or_maxelt = fetch_info->start;
392 vid_base = draw->start_index;
393 elts = NULL;
394 }
395 else {
396 start_or_maxelt = draw->pt.user.eltMax;
397 vid_base = draw->pt.user.eltBias;
398 elts = fetch_info->elts;
399 }
400 clipped = fpme->current_variant->jit_func(&fpme->llvm->jit_context,
401 llvm_vert_info.verts,
402 draw->pt.user.vbuffer,
403 fetch_info->count,
404 start_or_maxelt,
405 fpme->vertex_size,
406 draw->pt.vertex_buffer,
407 draw->instance_id,
408 vid_base,
409 draw->start_instance,
410 elts);
411
412 /* Finished with fetch and vs:
413 */
414 fetch_info = NULL;
415 vert_info = &llvm_vert_info;
416
417 if ((opt & PT_SHADE) && gshader) {
418 struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
419 draw_geometry_shader_run(gshader,
420 draw->pt.user.gs_constants,
421 draw->pt.user.gs_constants_size,
422 vert_info,
423 prim_info,
424 &vshader->info,
425 gs_vert_info,
426 gs_prim_info);
427
428 FREE(vert_info->verts);
429 vert_info = &gs_vert_info[0];
430 prim_info = &gs_prim_info[0];
431 /*
432 * pt emit can only handle ushort number of vertices (see
433 * render->allocate_vertices).
434 * vsplit guarantees there's never more than 4096, however GS can
435 * easily blow this up (by a factor of 256 (or even 1024) max).
436 */
437 if (vert_info->count > 65535) {
438 opt |= PT_PIPELINE;
439 }
440 } else {
441 if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) {
442 draw_prim_assembler_run(draw, prim_info, vert_info,
443 &ia_prim_info, &ia_vert_info);
444
445 if (ia_vert_info.count) {
446 FREE(vert_info->verts);
447 vert_info = &ia_vert_info;
448 prim_info = &ia_prim_info;
449 free_prim_info = TRUE;
450 }
451 }
452 }
453 if (prim_info->count == 0) {
454 debug_printf("GS/IA didn't emit any vertices!\n");
455
456 FREE(vert_info->verts);
457 if (free_prim_info) {
458 FREE(prim_info->primitive_lengths);
459 }
460 return;
461 }
462
463 /* stream output needs to be done before clipping */
464 draw_pt_so_emit( fpme->so_emit, 1, vert_info, prim_info );
465
466 draw_stats_clipper_primitives(draw, prim_info);
467
468 /*
469 * if there's no position, need to stop now, or the latter stages
470 * will try to access non-existent position output.
471 */
472 if (draw_current_shader_position_output(draw) != -1) {
473 if ((opt & PT_SHADE) && (gshader ||
474 draw->vs.vertex_shader->info.writes_viewport_index)) {
475 clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info, prim_info );
476 }
477 /* "clipped" also includes non-one edgeflag */
478 if (clipped) {
479 opt |= PT_PIPELINE;
480 }
481
482 /* Do we need to run the pipeline? Now will come here if clipped
483 */
484 if (opt & PT_PIPELINE) {
485 pipeline( fpme, vert_info, prim_info );
486 }
487 else {
488 emit( fpme->emit, vert_info, prim_info );
489 }
490 }
491 FREE(vert_info->verts);
492 if (free_prim_info) {
493 FREE(prim_info->primitive_lengths);
494 }
495 }
496
497
498 static inline unsigned
499 prim_type(unsigned prim, unsigned flags)
500 {
501 if (flags & DRAW_LINE_LOOP_AS_STRIP)
502 return PIPE_PRIM_LINE_STRIP;
503 else
504 return prim;
505 }
506
507
508 static void
509 llvm_middle_end_run(struct draw_pt_middle_end *middle,
510 const unsigned *fetch_elts,
511 unsigned fetch_count,
512 const ushort *draw_elts,
513 unsigned draw_count,
514 unsigned prim_flags)
515 {
516 struct llvm_middle_end *fpme = llvm_middle_end(middle);
517 struct draw_fetch_info fetch_info;
518 struct draw_prim_info prim_info;
519
520 fetch_info.linear = FALSE;
521 fetch_info.start = 0;
522 fetch_info.elts = fetch_elts;
523 fetch_info.count = fetch_count;
524
525 prim_info.linear = FALSE;
526 prim_info.start = 0;
527 prim_info.count = draw_count;
528 prim_info.elts = draw_elts;
529 prim_info.prim = prim_type(fpme->input_prim, prim_flags);
530 prim_info.flags = prim_flags;
531 prim_info.primitive_count = 1;
532 prim_info.primitive_lengths = &draw_count;
533
534 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
535 }
536
537
538 static void
539 llvm_middle_end_linear_run(struct draw_pt_middle_end *middle,
540 unsigned start,
541 unsigned count,
542 unsigned prim_flags)
543 {
544 struct llvm_middle_end *fpme = llvm_middle_end(middle);
545 struct draw_fetch_info fetch_info;
546 struct draw_prim_info prim_info;
547
548 fetch_info.linear = TRUE;
549 fetch_info.start = start;
550 fetch_info.count = count;
551 fetch_info.elts = NULL;
552
553 prim_info.linear = TRUE;
554 prim_info.start = 0;
555 prim_info.count = count;
556 prim_info.elts = NULL;
557 prim_info.prim = prim_type(fpme->input_prim, prim_flags);
558 prim_info.flags = prim_flags;
559 prim_info.primitive_count = 1;
560 prim_info.primitive_lengths = &count;
561
562 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
563 }
564
565
566 static boolean
567 llvm_middle_end_linear_run_elts(struct draw_pt_middle_end *middle,
568 unsigned start,
569 unsigned count,
570 const ushort *draw_elts,
571 unsigned draw_count,
572 unsigned prim_flags)
573 {
574 struct llvm_middle_end *fpme = llvm_middle_end(middle);
575 struct draw_fetch_info fetch_info;
576 struct draw_prim_info prim_info;
577
578 fetch_info.linear = TRUE;
579 fetch_info.start = start;
580 fetch_info.count = count;
581 fetch_info.elts = NULL;
582
583 prim_info.linear = FALSE;
584 prim_info.start = 0;
585 prim_info.count = draw_count;
586 prim_info.elts = draw_elts;
587 prim_info.prim = prim_type(fpme->input_prim, prim_flags);
588 prim_info.flags = prim_flags;
589 prim_info.primitive_count = 1;
590 prim_info.primitive_lengths = &draw_count;
591
592 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
593
594 return TRUE;
595 }
596
597
598 static void
599 llvm_middle_end_finish(struct draw_pt_middle_end *middle)
600 {
601 /* nothing to do */
602 }
603
604
605 static void
606 llvm_middle_end_destroy(struct draw_pt_middle_end *middle)
607 {
608 struct llvm_middle_end *fpme = llvm_middle_end(middle);
609
610 if (fpme->fetch)
611 draw_pt_fetch_destroy( fpme->fetch );
612
613 if (fpme->emit)
614 draw_pt_emit_destroy( fpme->emit );
615
616 if (fpme->so_emit)
617 draw_pt_so_emit_destroy( fpme->so_emit );
618
619 if (fpme->post_vs)
620 draw_pt_post_vs_destroy( fpme->post_vs );
621
622 FREE(middle);
623 }
624
625
626 struct draw_pt_middle_end *
627 draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw)
628 {
629 struct llvm_middle_end *fpme = 0;
630
631 if (!draw->llvm)
632 return NULL;
633
634 fpme = CALLOC_STRUCT( llvm_middle_end );
635 if (!fpme)
636 goto fail;
637
638 fpme->base.prepare = llvm_middle_end_prepare;
639 fpme->base.bind_parameters = llvm_middle_end_bind_parameters;
640 fpme->base.run = llvm_middle_end_run;
641 fpme->base.run_linear = llvm_middle_end_linear_run;
642 fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts;
643 fpme->base.finish = llvm_middle_end_finish;
644 fpme->base.destroy = llvm_middle_end_destroy;
645
646 fpme->draw = draw;
647
648 fpme->fetch = draw_pt_fetch_create( draw );
649 if (!fpme->fetch)
650 goto fail;
651
652 fpme->post_vs = draw_pt_post_vs_create( draw );
653 if (!fpme->post_vs)
654 goto fail;
655
656 fpme->emit = draw_pt_emit_create( draw );
657 if (!fpme->emit)
658 goto fail;
659
660 fpme->so_emit = draw_pt_so_emit_create( draw );
661 if (!fpme->so_emit)
662 goto fail;
663
664 fpme->llvm = draw->llvm;
665 if (!fpme->llvm)
666 goto fail;
667
668 fpme->current_variant = NULL;
669
670 return &fpme->base;
671
672 fail:
673 if (fpme)
674 llvm_middle_end_destroy( &fpme->base );
675
676 return NULL;
677 }