gallium/llvm: implement geometry shaders in the llvm paths
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_fetch_shade_pipeline_llvm.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30 #include "draw/draw_context.h"
31 #include "draw/draw_gs.h"
32 #include "draw/draw_vbuf.h"
33 #include "draw/draw_vertex.h"
34 #include "draw/draw_pt.h"
35 #include "draw/draw_vs.h"
36 #include "draw/draw_llvm.h"
37 #include "gallivm/lp_bld_init.h"
38
39
40 struct llvm_middle_end {
41 struct draw_pt_middle_end base;
42 struct draw_context *draw;
43
44 struct pt_emit *emit;
45 struct pt_so_emit *so_emit;
46 struct pt_fetch *fetch;
47 struct pt_post_vs *post_vs;
48
49
50 unsigned vertex_data_offset;
51 unsigned vertex_size;
52 unsigned input_prim;
53 unsigned opt;
54
55 struct draw_llvm *llvm;
56 struct draw_llvm_variant *current_variant;
57 };
58
59
60 static void
61 llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
62 {
63 struct draw_context *draw = fpme->draw;
64 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
65 struct draw_gs_llvm_variant_key *key;
66 struct draw_gs_llvm_variant *variant = NULL;
67 struct draw_gs_llvm_variant_list_item *li;
68 struct llvm_geometry_shader *shader = llvm_geometry_shader(gs);
69 char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE];
70 unsigned i;
71
72 key = draw_gs_llvm_make_variant_key(fpme->llvm, store);
73
74 /* Search shader's list of variants for the key */
75 li = first_elem(&shader->variants);
76 while (!at_end(&shader->variants, li)) {
77 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
78 variant = li->base;
79 break;
80 }
81 li = next_elem(li);
82 }
83
84 if (variant) {
85 /* found the variant, move to head of global list (for LRU) */
86 move_to_head(&fpme->llvm->gs_variants_list,
87 &variant->list_item_global);
88 }
89 else {
90 /* Need to create new variant */
91
92 /* First check if we've created too many variants. If so, free
93 * 25% of the LRU to avoid using too much memory.
94 */
95 if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
96 /*
97 * XXX: should we flush here ?
98 */
99 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
100 struct draw_gs_llvm_variant_list_item *item;
101 if (is_empty_list(&fpme->llvm->gs_variants_list)) {
102 break;
103 }
104 item = last_elem(&fpme->llvm->gs_variants_list);
105 assert(item);
106 assert(item->base);
107 draw_gs_llvm_destroy_variant(item->base);
108 }
109 }
110
111 variant = draw_gs_llvm_create_variant(fpme->llvm, gs->info.num_outputs, key);
112
113 if (variant) {
114 insert_at_head(&shader->variants, &variant->list_item_local);
115 insert_at_head(&fpme->llvm->gs_variants_list,
116 &variant->list_item_global);
117 fpme->llvm->nr_gs_variants++;
118 shader->variants_cached++;
119 }
120 }
121
122 gs->current_variant = variant;
123 }
124
125 /**
126 * Prepare/validate middle part of the vertex pipeline.
127 * NOTE: if you change this function, also look at the non-LLVM
128 * function fetch_pipeline_prepare() for similar changes.
129 */
130 static void
131 llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
132 unsigned in_prim,
133 unsigned opt,
134 unsigned *max_vertices )
135 {
136 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
137 struct draw_context *draw = fpme->draw;
138 struct draw_vertex_shader *vs = draw->vs.vertex_shader;
139 struct draw_geometry_shader *gs = draw->gs.geometry_shader;
140 const unsigned out_prim = gs ? gs->output_primitive : in_prim;
141
142 /* Add one to num_outputs because the pipeline occasionally tags on
143 * an additional texcoord, eg for AA lines.
144 */
145 const unsigned nr = MAX2( vs->info.num_inputs,
146 vs->info.num_outputs + 1 );
147
148 fpme->input_prim = in_prim;
149 fpme->opt = opt;
150
151 /* Always leave room for the vertex header whether we need it or
152 * not. It's hard to get rid of it in particular because of the
153 * viewport code in draw_pt_post_vs.c.
154 */
155 fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
156
157
158 /* XXX: it's not really gl rasterization rules we care about here,
159 * but gl vs dx9 clip spaces.
160 */
161 draw_pt_post_vs_prepare( fpme->post_vs,
162 draw->clip_xy,
163 draw->clip_z,
164 draw->clip_user,
165 draw->guard_band_xy,
166 draw->identity_viewport,
167 (boolean)draw->rasterizer->gl_rasterization_rules,
168 (draw->vs.edgeflag_output ? TRUE : FALSE) );
169
170 draw_pt_so_emit_prepare( fpme->so_emit, gs == NULL );
171
172 if (!(opt & PT_PIPELINE)) {
173 draw_pt_emit_prepare( fpme->emit,
174 out_prim,
175 max_vertices );
176
177 *max_vertices = MAX2( *max_vertices, 4096 );
178 }
179 else {
180 /* limit max fetches by limiting max_vertices */
181 *max_vertices = 4096;
182 }
183
184 /* return even number */
185 *max_vertices = *max_vertices & ~1;
186
187 /* Find/create the vertex shader variant */
188 {
189 struct draw_llvm_variant_key *key;
190 struct draw_llvm_variant *variant = NULL;
191 struct draw_llvm_variant_list_item *li;
192 struct llvm_vertex_shader *shader = llvm_vertex_shader(vs);
193 char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
194 unsigned i;
195
196 key = draw_llvm_make_variant_key(fpme->llvm, store);
197
198 /* Search shader's list of variants for the key */
199 li = first_elem(&shader->variants);
200 while (!at_end(&shader->variants, li)) {
201 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
202 variant = li->base;
203 break;
204 }
205 li = next_elem(li);
206 }
207
208 if (variant) {
209 /* found the variant, move to head of global list (for LRU) */
210 move_to_head(&fpme->llvm->vs_variants_list,
211 &variant->list_item_global);
212 }
213 else {
214 /* Need to create new variant */
215
216 /* First check if we've created too many variants. If so, free
217 * 25% of the LRU to avoid using too much memory.
218 */
219 if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
220 /*
221 * XXX: should we flush here ?
222 */
223 for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
224 struct draw_llvm_variant_list_item *item;
225 if (is_empty_list(&fpme->llvm->vs_variants_list)) {
226 break;
227 }
228 item = last_elem(&fpme->llvm->vs_variants_list);
229 assert(item);
230 assert(item->base);
231 draw_llvm_destroy_variant(item->base);
232 }
233 }
234
235 variant = draw_llvm_create_variant(fpme->llvm, nr, key);
236
237 if (variant) {
238 insert_at_head(&shader->variants, &variant->list_item_local);
239 insert_at_head(&fpme->llvm->vs_variants_list,
240 &variant->list_item_global);
241 fpme->llvm->nr_variants++;
242 shader->variants_cached++;
243 }
244 }
245
246 fpme->current_variant = variant;
247 }
248
249 if (gs) {
250 llvm_middle_end_prepare_gs(fpme);
251 }
252 }
253
254
255 /**
256 * Bind/update constant buffer pointers, clip planes and viewport dims.
257 * These are "light weight" parameters which aren't baked into the
258 * generated code. Updating these items is much cheaper than revalidating
259 * and rebuilding the generated pipeline code.
260 */
261 static void
262 llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle)
263 {
264 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
265 struct draw_context *draw = fpme->draw;
266 unsigned i;
267
268 for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) {
269 fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
270 }
271 for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) {
272 fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
273 }
274
275 fpme->llvm->jit_context.planes =
276 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
277 fpme->llvm->gs_jit_context.planes =
278 (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
279
280 fpme->llvm->jit_context.viewport = (float *) draw->viewport.scale;
281 fpme->llvm->gs_jit_context.viewport = (float *) draw->viewport.scale;
282 }
283
284
285 static void pipeline(struct llvm_middle_end *llvm,
286 const struct draw_vertex_info *vert_info,
287 const struct draw_prim_info *prim_info)
288 {
289 if (prim_info->linear)
290 draw_pipeline_run_linear( llvm->draw,
291 vert_info,
292 prim_info);
293 else
294 draw_pipeline_run( llvm->draw,
295 vert_info,
296 prim_info );
297 }
298
299 static void emit(struct pt_emit *emit,
300 const struct draw_vertex_info *vert_info,
301 const struct draw_prim_info *prim_info)
302 {
303 if (prim_info->linear) {
304 draw_pt_emit_linear(emit, vert_info, prim_info);
305 }
306 else {
307 draw_pt_emit(emit, vert_info, prim_info);
308 }
309 }
310
311 static void
312 llvm_pipeline_generic( struct draw_pt_middle_end *middle,
313 const struct draw_fetch_info *fetch_info,
314 const struct draw_prim_info *prim_info )
315 {
316 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
317 struct draw_context *draw = fpme->draw;
318 struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
319 struct draw_prim_info gs_prim_info;
320 struct draw_vertex_info llvm_vert_info;
321 struct draw_vertex_info gs_vert_info;
322 struct draw_vertex_info *vert_info;
323 unsigned opt = fpme->opt;
324 unsigned clipped = 0;
325
326 llvm_vert_info.count = fetch_info->count;
327 llvm_vert_info.vertex_size = fpme->vertex_size;
328 llvm_vert_info.stride = fpme->vertex_size;
329 llvm_vert_info.verts =
330 (struct vertex_header *)MALLOC(fpme->vertex_size *
331 align(fetch_info->count, lp_native_vector_width / 32));
332 if (!llvm_vert_info.verts) {
333 assert(0);
334 return;
335 }
336
337 if (fetch_info->linear)
338 clipped = fpme->current_variant->jit_func( &fpme->llvm->jit_context,
339 llvm_vert_info.verts,
340 (const char **)draw->pt.user.vbuffer,
341 fetch_info->start,
342 fetch_info->count,
343 fpme->vertex_size,
344 draw->pt.vertex_buffer,
345 draw->instance_id);
346 else
347 clipped = fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
348 llvm_vert_info.verts,
349 (const char **)draw->pt.user.vbuffer,
350 fetch_info->elts,
351 fetch_info->count,
352 fpme->vertex_size,
353 draw->pt.vertex_buffer,
354 draw->instance_id);
355
356 /* Finished with fetch and vs:
357 */
358 fetch_info = NULL;
359 vert_info = &llvm_vert_info;
360
361
362 if ((opt & PT_SHADE) && gshader) {
363 struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
364 draw_geometry_shader_run(gshader,
365 draw->pt.user.gs_constants,
366 draw->pt.user.gs_constants_size,
367 vert_info,
368 prim_info,
369 &vshader->info,
370 &gs_vert_info,
371 &gs_prim_info);
372
373 FREE(vert_info->verts);
374 vert_info = &gs_vert_info;
375 prim_info = &gs_prim_info;
376 }
377
378 /* stream output needs to be done before clipping */
379 draw_pt_so_emit( fpme->so_emit, vert_info, prim_info );
380
381 /*
382 * if there's no position, need to stop now, or the latter stages
383 * will try to access non-existent position output.
384 */
385 if (draw_current_shader_position_output(draw) != -1) {
386 if ((opt & PT_SHADE) && gshader) {
387 clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info );
388 }
389 if (clipped) {
390 opt |= PT_PIPELINE;
391 }
392
393 /* Do we need to run the pipeline? Now will come here if clipped
394 */
395 if (opt & PT_PIPELINE) {
396 pipeline( fpme, vert_info, prim_info );
397 }
398 else {
399 emit( fpme->emit, vert_info, prim_info );
400 }
401 }
402 FREE(vert_info->verts);
403 }
404
405
406 static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
407 const unsigned *fetch_elts,
408 unsigned fetch_count,
409 const ushort *draw_elts,
410 unsigned draw_count,
411 unsigned prim_flags )
412 {
413 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
414 struct draw_fetch_info fetch_info;
415 struct draw_prim_info prim_info;
416
417 fetch_info.linear = FALSE;
418 fetch_info.start = 0;
419 fetch_info.elts = fetch_elts;
420 fetch_info.count = fetch_count;
421
422 prim_info.linear = FALSE;
423 prim_info.start = 0;
424 prim_info.count = draw_count;
425 prim_info.elts = draw_elts;
426 prim_info.prim = fpme->input_prim;
427 prim_info.flags = prim_flags;
428 prim_info.primitive_count = 1;
429 prim_info.primitive_lengths = &draw_count;
430
431 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
432 }
433
434
435 static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
436 unsigned start,
437 unsigned count,
438 unsigned prim_flags)
439 {
440 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
441 struct draw_fetch_info fetch_info;
442 struct draw_prim_info prim_info;
443
444 fetch_info.linear = TRUE;
445 fetch_info.start = start;
446 fetch_info.count = count;
447 fetch_info.elts = NULL;
448
449 prim_info.linear = TRUE;
450 prim_info.start = 0;
451 prim_info.count = count;
452 prim_info.elts = NULL;
453 prim_info.prim = fpme->input_prim;
454 prim_info.flags = prim_flags;
455 prim_info.primitive_count = 1;
456 prim_info.primitive_lengths = &count;
457
458 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
459 }
460
461
462
463 static boolean
464 llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
465 unsigned start,
466 unsigned count,
467 const ushort *draw_elts,
468 unsigned draw_count,
469 unsigned prim_flags )
470 {
471 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
472 struct draw_fetch_info fetch_info;
473 struct draw_prim_info prim_info;
474
475 fetch_info.linear = TRUE;
476 fetch_info.start = start;
477 fetch_info.count = count;
478 fetch_info.elts = NULL;
479
480 prim_info.linear = FALSE;
481 prim_info.start = 0;
482 prim_info.count = draw_count;
483 prim_info.elts = draw_elts;
484 prim_info.prim = fpme->input_prim;
485 prim_info.flags = prim_flags;
486 prim_info.primitive_count = 1;
487 prim_info.primitive_lengths = &draw_count;
488
489 llvm_pipeline_generic( middle, &fetch_info, &prim_info );
490
491 return TRUE;
492 }
493
494
495
496 static void llvm_middle_end_finish( struct draw_pt_middle_end *middle )
497 {
498 /* nothing to do */
499 }
500
501 static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
502 {
503 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
504
505 if (fpme->fetch)
506 draw_pt_fetch_destroy( fpme->fetch );
507
508 if (fpme->emit)
509 draw_pt_emit_destroy( fpme->emit );
510
511 if (fpme->so_emit)
512 draw_pt_so_emit_destroy( fpme->so_emit );
513
514 if (fpme->post_vs)
515 draw_pt_post_vs_destroy( fpme->post_vs );
516
517 FREE(middle);
518 }
519
520
521 struct draw_pt_middle_end *
522 draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw)
523 {
524 struct llvm_middle_end *fpme = 0;
525
526 if (!draw->llvm)
527 return NULL;
528
529 fpme = CALLOC_STRUCT( llvm_middle_end );
530 if (!fpme)
531 goto fail;
532
533 fpme->base.prepare = llvm_middle_end_prepare;
534 fpme->base.bind_parameters = llvm_middle_end_bind_parameters;
535 fpme->base.run = llvm_middle_end_run;
536 fpme->base.run_linear = llvm_middle_end_linear_run;
537 fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts;
538 fpme->base.finish = llvm_middle_end_finish;
539 fpme->base.destroy = llvm_middle_end_destroy;
540
541 fpme->draw = draw;
542
543 fpme->fetch = draw_pt_fetch_create( draw );
544 if (!fpme->fetch)
545 goto fail;
546
547 fpme->post_vs = draw_pt_post_vs_create( draw );
548 if (!fpme->post_vs)
549 goto fail;
550
551 fpme->emit = draw_pt_emit_create( draw );
552 if (!fpme->emit)
553 goto fail;
554
555 fpme->so_emit = draw_pt_so_emit_create( draw );
556 if (!fpme->so_emit)
557 goto fail;
558
559 fpme->llvm = draw->llvm;
560 if (!fpme->llvm)
561 goto fail;
562
563 fpme->current_variant = NULL;
564
565 return &fpme->base;
566
567 fail:
568 if (fpme)
569 llvm_middle_end_destroy( &fpme->base );
570
571 return NULL;
572 }