draw: simplify fetch some more
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_fetch_shade_pipeline_llvm.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30 #include "util/u_prim.h"
31 #include "draw/draw_context.h"
32 #include "draw/draw_gs.h"
33 #include "draw/draw_vbuf.h"
34 #include "draw/draw_vertex.h"
35 #include "draw/draw_pt.h"
36 #include "draw/draw_prim_assembler.h"
37 #include "draw/draw_vs.h"
38 #include "draw/draw_llvm.h"
39 #include "gallivm/lp_bld_init.h"
40
41
42 struct llvm_middle_end {
43 struct draw_pt_middle_end base;
44 struct draw_context *draw;
45
46 struct pt_emit *emit;
47 struct pt_so_emit *so_emit;
48 struct pt_fetch *fetch;
49 struct pt_post_vs *post_vs;
50
51
52 unsigned vertex_data_offset;
53 unsigned vertex_size;
54 unsigned input_prim;
55 unsigned opt;
56
57 struct draw_llvm *llvm;
58 struct draw_llvm_variant *current_variant;
59 };
60
61
62 /** cast wrapper */
63 static inline struct llvm_middle_end *
64 llvm_middle_end(struct draw_pt_middle_end *middle)
65 {
66 return (struct llvm_middle_end *) middle;
67 }
68
69
70 static void
71 llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
72 {
73 struct draw_context *draw = fpme->draw;
74 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
75 struct draw_gs_llvm_variant_key *key;
76 struct draw_gs_llvm_variant *variant = NULL;
77 struct draw_gs_llvm_variant_list_item *li;
78 struct llvm_geometry_shader *shader = llvm_geometry_shader(gs);
79 char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE];
80 unsigned i;
81
82 key = draw_gs_llvm_make_variant_key(fpme->llvm, store);
83
84 /* Search shader's list of variants for the key */
85 li = first_elem(&shader->variants);
86 while (!at_end(&shader->variants, li)) {
87 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
88 variant = li->base;
89 break;
90 }
91 li = next_elem(li);
92 }
93
94 if (variant) {
95 /* found the variant, move to head of global list (for LRU) */
96 move_to_head(&fpme->llvm->gs_variants_list,
97 &variant->list_item_global);
98 }
99 else {
100 /* Need to create new variant */
101
102 /* First check if we've created too many variants. If so, free
103 * 25% of the LRU to avoid using too much memory.
104 */
105 if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
106 /*
107 * XXX: should we flush here ?
108 */
109 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
110 struct draw_gs_llvm_variant_list_item *item;
111 if (is_empty_list(&fpme->llvm->gs_variants_list)) {
112 break;
113 }
114 item = last_elem(&fpme->llvm->gs_variants_list);
115 assert(item);
116 assert(item->base);
117 draw_gs_llvm_destroy_variant(item->base);
118 }
119 }
120
121 variant = draw_gs_llvm_create_variant(fpme->llvm, gs->info.num_outputs, key);
122
123 if (variant) {
124 insert_at_head(&shader->variants, &variant->list_item_local);
125 insert_at_head(&fpme->llvm->gs_variants_list,
126 &variant->list_item_global);
127 fpme->llvm->nr_gs_variants++;
128 shader->variants_cached++;
129 }
130 }
131
132 gs->current_variant = variant;
133 }
134
135 /**
136 * Prepare/validate middle part of the vertex pipeline.
137 * NOTE: if you change this function, also look at the non-LLVM
138 * function fetch_pipeline_prepare() for similar changes.
139 */
140 static void
141 llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
142 unsigned in_prim,
143 unsigned opt,
144 unsigned *max_vertices )
145 {
146 struct llvm_middle_end *fpme = llvm_middle_end(middle);
147 struct draw_context *draw = fpme->draw;
148 struct draw_vertex_shader *vs = draw->vs.vertex_shader;
149 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
150 const unsigned out_prim = gs ? gs->output_primitive :
151 u_assembled_prim(in_prim);
152 unsigned point_clip = draw->rasterizer->fill_front == PIPE_POLYGON_MODE_POINT ||
153 out_prim == PIPE_PRIM_POINTS;
154 unsigned nr;
155
156 fpme->input_prim = in_prim;
157 fpme->opt = opt;
158
159 draw_pt_post_vs_prepare( fpme->post_vs,
160 draw->clip_xy,
161 draw->clip_z,
162 draw->clip_user,
163 point_clip ? draw->guard_band_points_xy :
164 draw->guard_band_xy,
165 draw->bypass_viewport,
166 draw->rasterizer->clip_halfz,
167 (draw->vs.edgeflag_output ? TRUE : FALSE) );
168
169 draw_pt_so_emit_prepare( fpme->so_emit, gs == NULL );
170
171 if (!(opt & PT_PIPELINE)) {
172 draw_pt_emit_prepare( fpme->emit, out_prim,
173 max_vertices );
174
175 *max_vertices = MAX2( *max_vertices, 4096 );
176 }
177 else {
178 /* limit max fetches by limiting max_vertices */
179 *max_vertices = 4096;
180 }
181
182 /* Get the number of float[4] attributes per vertex.
183 * Note: this must be done after draw_pt_emit_prepare() since that
184 * can effect the vertex size.
185 */
186 nr = MAX2(vs->info.num_inputs, draw_total_vs_outputs(draw));
187
188 /* Always leave room for the vertex header whether we need it or
189 * not. It's hard to get rid of it in particular because of the
190 * viewport code in draw_pt_post_vs.c.
191 */
192 fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
193
194 /* return even number */
195 *max_vertices = *max_vertices & ~1;
196
197 /* Find/create the vertex shader variant */
198 {
199 struct draw_llvm_variant_key *key;
200 struct draw_llvm_variant *variant = NULL;
201 struct draw_llvm_variant_list_item *li;
202 struct llvm_vertex_shader *shader = llvm_vertex_shader(vs);
203 char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
204 unsigned i;
205
206 key = draw_llvm_make_variant_key(fpme->llvm, store);
207
208 /* Search shader's list of variants for the key */
209 li = first_elem(&shader->variants);
210 while (!at_end(&shader->variants, li)) {
211 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
212 variant = li->base;
213 break;
214 }
215 li = next_elem(li);
216 }
217
218 if (variant) {
219 /* found the variant, move to head of global list (for LRU) */
220 move_to_head(&fpme->llvm->vs_variants_list,
221 &variant->list_item_global);
222 }
223 else {
224 /* Need to create new variant */
225
226 /* First check if we've created too many variants. If so, free
227 * 25% of the LRU to avoid using too much memory.
228 */
229 if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
230 /*
231 * XXX: should we flush here ?
232 */
233 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
234 struct draw_llvm_variant_list_item *item;
235 if (is_empty_list(&fpme->llvm->vs_variants_list)) {
236 break;
237 }
238 item = last_elem(&fpme->llvm->vs_variants_list);
239 assert(item);
240 assert(item->base);
241 draw_llvm_destroy_variant(item->base);
242 }
243 }
244
245 variant = draw_llvm_create_variant(fpme->llvm, nr, key);
246
247 if (variant) {
248 insert_at_head(&shader->variants, &variant->list_item_local);
249 insert_at_head(&fpme->llvm->vs_variants_list,
250 &variant->list_item_global);
251 fpme->llvm->nr_variants++;
252 shader->variants_cached++;
253 }
254 }
255
256 fpme->current_variant = variant;
257 }
258
259 if (gs) {
260 llvm_middle_end_prepare_gs(fpme);
261 }
262 }
263
264
265 /**
266 * Bind/update constant buffer pointers, clip planes and viewport dims.
267 * These are "light weight" parameters which aren't baked into the
268 * generated code. Updating these items is much cheaper than revalidating
269 * and rebuilding the generated pipeline code.
270 */
271 static void
272 llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle)
273 {
274 static const float fake_const_buf[4];
275 struct llvm_middle_end *fpme = llvm_middle_end(middle);
276 struct draw_context *draw = fpme->draw;
277 struct draw_llvm *llvm = fpme->llvm;
278 unsigned i;
279
280 for (i = 0; i < ARRAY_SIZE(llvm->jit_context.vs_constants); ++i) {
281 int num_consts =
282 draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4);
283 llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
284 llvm->jit_context.num_vs_constants[i] = num_consts;
285 if (num_consts == 0) {
286 llvm->jit_context.vs_constants[i] = fake_const_buf;
287 }
288 }
289 for (i = 0; i < ARRAY_SIZE(llvm->gs_jit_context.constants); ++i) {
290 int num_consts =
291 draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4);
292 llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
293 llvm->gs_jit_context.num_constants[i] = num_consts;
294 if (num_consts == 0) {
295 llvm->gs_jit_context.constants[i] = fake_const_buf;
296 }
297 }
298
299 llvm->jit_context.planes =
300 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
301 llvm->gs_jit_context.planes =
302 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
303
304 llvm->jit_context.viewports = draw->viewports;
305 llvm->gs_jit_context.viewports = draw->viewports;
306 }
307
308
309 static void
310 pipeline(struct llvm_middle_end *llvm,
311 const struct draw_vertex_info *vert_info,
312 const struct draw_prim_info *prim_info)
313 {
314 if (prim_info->linear)
315 draw_pipeline_run_linear( llvm->draw,
316 vert_info,
317 prim_info);
318 else
319 draw_pipeline_run( llvm->draw,
320 vert_info,
321 prim_info );
322 }
323
324
325 static void
326 emit(struct pt_emit *emit,
327 const struct draw_vertex_info *vert_info,
328 const struct draw_prim_info *prim_info)
329 {
330 if (prim_info->linear) {
331 draw_pt_emit_linear(emit, vert_info, prim_info);
332 }
333 else {
334 draw_pt_emit(emit, vert_info, prim_info);
335 }
336 }
337
338
339 static void
340 llvm_pipeline_generic(struct draw_pt_middle_end *middle,
341 const struct draw_fetch_info *fetch_info,
342 const struct draw_prim_info *in_prim_info)
343 {
344 struct llvm_middle_end *fpme = llvm_middle_end(middle);
345 struct draw_context *draw = fpme->draw;
346 struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
347 struct draw_prim_info gs_prim_info;
348 struct draw_vertex_info llvm_vert_info;
349 struct draw_vertex_info gs_vert_info;
350 struct draw_vertex_info *vert_info;
351 struct draw_prim_info ia_prim_info;
352 struct draw_vertex_info ia_vert_info;
353 const struct draw_prim_info *prim_info = in_prim_info;
354 boolean free_prim_info = FALSE;
355 unsigned opt = fpme->opt;
356 boolean clipped = 0;
357 unsigned start_or_maxelt, vid_base;
358 const unsigned *elts;
359
360 llvm_vert_info.count = fetch_info->count;
361 llvm_vert_info.vertex_size = fpme->vertex_size;
362 llvm_vert_info.stride = fpme->vertex_size;
363 llvm_vert_info.verts = (struct vertex_header *)
364 MALLOC(fpme->vertex_size *
365 align(fetch_info->count, lp_native_vector_width / 32));
366 if (!llvm_vert_info.verts) {
367 assert(0);
368 return;
369 }
370
371 if (draw->collect_statistics) {
372 draw->statistics.ia_vertices += prim_info->count;
373 draw->statistics.ia_primitives +=
374 u_decomposed_prims_for_vertices(prim_info->prim, prim_info->count);
375 draw->statistics.vs_invocations += fetch_info->count;
376 }
377
378 if (fetch_info->linear) {
379 start_or_maxelt = fetch_info->start;
380 vid_base = draw->start_index;
381 elts = NULL;
382 }
383 else {
384 start_or_maxelt = draw->pt.user.eltMax;
385 vid_base = draw->pt.user.eltBias;
386 elts = fetch_info->elts;
387 }
388 clipped = fpme->current_variant->jit_func(&fpme->llvm->jit_context,
389 llvm_vert_info.verts,
390 draw->pt.user.vbuffer,
391 fetch_info->count,
392 start_or_maxelt,
393 fpme->vertex_size,
394 draw->pt.vertex_buffer,
395 draw->instance_id,
396 vid_base,
397 draw->start_instance,
398 elts);
399
400 /* Finished with fetch and vs:
401 */
402 fetch_info = NULL;
403 vert_info = &llvm_vert_info;
404
405 if ((opt & PT_SHADE) && gshader) {
406 struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
407 draw_geometry_shader_run(gshader,
408 draw->pt.user.gs_constants,
409 draw->pt.user.gs_constants_size,
410 vert_info,
411 prim_info,
412 &vshader->info,
413 &gs_vert_info,
414 &gs_prim_info);
415
416 FREE(vert_info->verts);
417 vert_info = &gs_vert_info;
418 prim_info = &gs_prim_info;
419 } else {
420 if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) {
421 draw_prim_assembler_run(draw, prim_info, vert_info,
422 &ia_prim_info, &ia_vert_info);
423
424 if (ia_vert_info.count) {
425 FREE(vert_info->verts);
426 vert_info = &ia_vert_info;
427 prim_info = &ia_prim_info;
428 free_prim_info = TRUE;
429 }
430 }
431 }
432 if (prim_info->count == 0) {
433 debug_printf("GS/IA didn't emit any vertices!\n");
434
435 FREE(vert_info->verts);
436 if (free_prim_info) {
437 FREE(prim_info->primitive_lengths);
438 }
439 return;
440 }
441
442 /* stream output needs to be done before clipping */
443 draw_pt_so_emit( fpme->so_emit, vert_info, prim_info );
444
445 draw_stats_clipper_primitives(draw, prim_info);
446
447 /*
448 * if there's no position, need to stop now, or the latter stages
449 * will try to access non-existent position output.
450 */
451 if (draw_current_shader_position_output(draw) != -1) {
452 if ((opt & PT_SHADE) && (gshader ||
453 draw->vs.vertex_shader->info.writes_viewport_index)) {
454 clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info, prim_info );
455 }
456 /* "clipped" also includes non-one edgeflag */
457 if (clipped) {
458 opt |= PT_PIPELINE;
459 }
460
461 /* Do we need to run the pipeline? Now will come here if clipped
462 */
463 if (opt & PT_PIPELINE) {
464 pipeline( fpme, vert_info, prim_info );
465 }
466 else {
467 emit( fpme->emit, vert_info, prim_info );
468 }
469 }
470 FREE(vert_info->verts);
471 if (free_prim_info) {
472 FREE(prim_info->primitive_lengths);
473 }
474 }
475
476
477 static inline unsigned
478 prim_type(unsigned prim, unsigned flags)
479 {
480 if (flags & DRAW_LINE_LOOP_AS_STRIP)
481 return PIPE_PRIM_LINE_STRIP;
482 else
483 return prim;
484 }
485
486
487 static void
488 llvm_middle_end_run(struct draw_pt_middle_end *middle,
489 const unsigned *fetch_elts,
490 unsigned fetch_count,
491 const ushort *draw_elts,
492 unsigned draw_count,
493 unsigned prim_flags)
494 {
495 struct llvm_middle_end *fpme = llvm_middle_end(middle);
496 struct draw_fetch_info fetch_info;
497 struct draw_prim_info prim_info;
498
499 fetch_info.linear = FALSE;
500 fetch_info.start = 0;
501 fetch_info.elts = fetch_elts;
502 fetch_info.count = fetch_count;
503
504 prim_info.linear = FALSE;
505 prim_info.start = 0;
506 prim_info.count = draw_count;
507 prim_info.elts = draw_elts;
508 prim_info.prim = prim_type(fpme->input_prim, prim_flags);
509 prim_info.flags = prim_flags;
510 prim_info.primitive_count = 1;
511 prim_info.primitive_lengths = &draw_count;
512
513 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
514 }
515
516
517 static void
518 llvm_middle_end_linear_run(struct draw_pt_middle_end *middle,
519 unsigned start,
520 unsigned count,
521 unsigned prim_flags)
522 {
523 struct llvm_middle_end *fpme = llvm_middle_end(middle);
524 struct draw_fetch_info fetch_info;
525 struct draw_prim_info prim_info;
526
527 fetch_info.linear = TRUE;
528 fetch_info.start = start;
529 fetch_info.count = count;
530 fetch_info.elts = NULL;
531
532 prim_info.linear = TRUE;
533 prim_info.start = 0;
534 prim_info.count = count;
535 prim_info.elts = NULL;
536 prim_info.prim = prim_type(fpme->input_prim, prim_flags);
537 prim_info.flags = prim_flags;
538 prim_info.primitive_count = 1;
539 prim_info.primitive_lengths = &count;
540
541 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
542 }
543
544
545 static boolean
546 llvm_middle_end_linear_run_elts(struct draw_pt_middle_end *middle,
547 unsigned start,
548 unsigned count,
549 const ushort *draw_elts,
550 unsigned draw_count,
551 unsigned prim_flags)
552 {
553 struct llvm_middle_end *fpme = llvm_middle_end(middle);
554 struct draw_fetch_info fetch_info;
555 struct draw_prim_info prim_info;
556
557 fetch_info.linear = TRUE;
558 fetch_info.start = start;
559 fetch_info.count = count;
560 fetch_info.elts = NULL;
561
562 prim_info.linear = FALSE;
563 prim_info.start = 0;
564 prim_info.count = draw_count;
565 prim_info.elts = draw_elts;
566 prim_info.prim = prim_type(fpme->input_prim, prim_flags);
567 prim_info.flags = prim_flags;
568 prim_info.primitive_count = 1;
569 prim_info.primitive_lengths = &draw_count;
570
571 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
572
573 return TRUE;
574 }
575
576
577 static void
578 llvm_middle_end_finish(struct draw_pt_middle_end *middle)
579 {
580 /* nothing to do */
581 }
582
583
584 static void
585 llvm_middle_end_destroy(struct draw_pt_middle_end *middle)
586 {
587 struct llvm_middle_end *fpme = llvm_middle_end(middle);
588
589 if (fpme->fetch)
590 draw_pt_fetch_destroy( fpme->fetch );
591
592 if (fpme->emit)
593 draw_pt_emit_destroy( fpme->emit );
594
595 if (fpme->so_emit)
596 draw_pt_so_emit_destroy( fpme->so_emit );
597
598 if (fpme->post_vs)
599 draw_pt_post_vs_destroy( fpme->post_vs );
600
601 FREE(middle);
602 }
603
604
605 struct draw_pt_middle_end *
606 draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw)
607 {
608 struct llvm_middle_end *fpme = 0;
609
610 if (!draw->llvm)
611 return NULL;
612
613 fpme = CALLOC_STRUCT( llvm_middle_end );
614 if (!fpme)
615 goto fail;
616
617 fpme->base.prepare = llvm_middle_end_prepare;
618 fpme->base.bind_parameters = llvm_middle_end_bind_parameters;
619 fpme->base.run = llvm_middle_end_run;
620 fpme->base.run_linear = llvm_middle_end_linear_run;
621 fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts;
622 fpme->base.finish = llvm_middle_end_finish;
623 fpme->base.destroy = llvm_middle_end_destroy;
624
625 fpme->draw = draw;
626
627 fpme->fetch = draw_pt_fetch_create( draw );
628 if (!fpme->fetch)
629 goto fail;
630
631 fpme->post_vs = draw_pt_post_vs_create( draw );
632 if (!fpme->post_vs)
633 goto fail;
634
635 fpme->emit = draw_pt_emit_create( draw );
636 if (!fpme->emit)
637 goto fail;
638
639 fpme->so_emit = draw_pt_so_emit_create( draw );
640 if (!fpme->so_emit)
641 goto fail;
642
643 fpme->llvm = draw->llvm;
644 if (!fpme->llvm)
645 goto fail;
646
647 fpme->current_variant = NULL;
648
649 return &fpme->base;
650
651 fail:
652 if (fpme)
653 llvm_middle_end_destroy( &fpme->base );
654
655 return NULL;
656 }