gallium: a lot more complete implementation of stream output
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_fetch_shade_pipeline_llvm.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMWare, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_math.h"
29 #include "util/u_memory.h"
30 #include "draw/draw_context.h"
31 #include "draw/draw_vbuf.h"
32 #include "draw/draw_vertex.h"
33 #include "draw/draw_pt.h"
34 #include "draw/draw_vs.h"
35 #include "draw/draw_llvm.h"
36
37
38 struct llvm_middle_end {
39 struct draw_pt_middle_end base;
40 struct draw_context *draw;
41
42 struct pt_emit *emit;
43 struct pt_so_emit *so_emit;
44 struct pt_fetch *fetch;
45 struct pt_post_vs *post_vs;
46
47
48 unsigned vertex_data_offset;
49 unsigned vertex_size;
50 unsigned prim;
51 unsigned opt;
52
53 struct draw_llvm *llvm;
54 struct draw_llvm_variant *variants;
55 struct draw_llvm_variant *current_variant;
56 int nr_variants;
57 };
58
59
60 static void
61 llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
62 unsigned prim,
63 unsigned opt,
64 unsigned *max_vertices )
65 {
66 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
67 struct draw_context *draw = fpme->draw;
68 struct draw_vertex_shader *vs = draw->vs.vertex_shader;
69 struct draw_llvm_variant_key key;
70 struct draw_llvm_variant *variant = NULL;
71 unsigned i;
72 unsigned instance_id_index = ~0;
73
74 /* Add one to num_outputs because the pipeline occasionally tags on
75 * an additional texcoord, eg for AA lines.
76 */
77 unsigned nr = MAX2( vs->info.num_inputs,
78 vs->info.num_outputs + 1 );
79
80 /* Scan for instanceID system value.
81 */
82 for (i = 0; i < vs->info.num_inputs; i++) {
83 if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
84 instance_id_index = i;
85 break;
86 }
87 }
88
89 fpme->prim = prim;
90 fpme->opt = opt;
91
92 /* Always leave room for the vertex header whether we need it or
93 * not. It's hard to get rid of it in particular because of the
94 * viewport code in draw_pt_post_vs.c.
95 */
96 fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
97
98
99 /* XXX: it's not really gl rasterization rules we care about here,
100 * but gl vs dx9 clip spaces.
101 */
102 draw_pt_post_vs_prepare( fpme->post_vs,
103 (boolean)draw->bypass_clipping,
104 (boolean)(draw->identity_viewport),
105 (boolean)draw->rasterizer->gl_rasterization_rules,
106 (draw->vs.edgeflag_output ? true : false) );
107
108 draw_pt_so_emit_prepare( fpme->so_emit, prim );
109 if (!(opt & PT_PIPELINE)) {
110 draw_pt_emit_prepare( fpme->emit,
111 prim,
112 max_vertices );
113
114 *max_vertices = MAX2( *max_vertices,
115 DRAW_PIPE_MAX_VERTICES );
116 }
117 else {
118 *max_vertices = DRAW_PIPE_MAX_VERTICES;
119 }
120
121 /* return even number */
122 *max_vertices = *max_vertices & ~1;
123
124 draw_llvm_make_variant_key(fpme->llvm, &key);
125
126 variant = fpme->variants;
127 while(variant) {
128 if(memcmp(&variant->key, &key, sizeof key) == 0)
129 break;
130
131 variant = variant->next;
132 }
133
134 if (!variant) {
135 variant = draw_llvm_prepare(fpme->llvm, nr);
136 variant->next = fpme->variants;
137 fpme->variants = variant;
138 ++fpme->nr_variants;
139 }
140 fpme->current_variant = variant;
141
142 /*XXX we only support one constant buffer */
143 fpme->llvm->jit_context.vs_constants =
144 draw->pt.user.vs_constants[0];
145 fpme->llvm->jit_context.gs_constants =
146 draw->pt.user.gs_constants[0];
147 }
148
149
150
151 static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
152 const unsigned *fetch_elts,
153 unsigned fetch_count,
154 const ushort *draw_elts,
155 unsigned draw_count )
156 {
157 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
158 struct draw_context *draw = fpme->draw;
159 unsigned opt = fpme->opt;
160 unsigned alloc_count = align( fetch_count, 4 );
161
162 struct vertex_header *pipeline_verts =
163 (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
164
165 if (!pipeline_verts) {
166 /* Not much we can do here - just skip the rendering.
167 */
168 assert(0);
169 return;
170 }
171
172 fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
173 pipeline_verts,
174 (const char **)draw->pt.user.vbuffer,
175 fetch_elts,
176 fetch_count,
177 fpme->vertex_size,
178 draw->pt.vertex_buffer );
179
180 /* stream output needs to be done before clipping */
181 draw_pt_so_emit( fpme->so_emit,
182 (const float (*)[4])pipeline_verts->data,
183 fetch_count,
184 fpme->vertex_size );
185
186 if (draw_pt_post_vs_run( fpme->post_vs,
187 pipeline_verts,
188 fetch_count,
189 fpme->vertex_size ))
190 {
191 opt |= PT_PIPELINE;
192 }
193
194 /* Do we need to run the pipeline?
195 */
196 if (opt & PT_PIPELINE) {
197 draw_pipeline_run( fpme->draw,
198 fpme->prim,
199 pipeline_verts,
200 fetch_count,
201 fpme->vertex_size,
202 draw_elts,
203 draw_count );
204 }
205 else {
206 draw_pt_emit( fpme->emit,
207 (const float (*)[4])pipeline_verts->data,
208 fetch_count,
209 fpme->vertex_size,
210 draw_elts,
211 draw_count );
212 }
213
214
215 FREE(pipeline_verts);
216 }
217
218
219 static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
220 unsigned start,
221 unsigned count)
222 {
223 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
224 struct draw_context *draw = fpme->draw;
225 unsigned opt = fpme->opt;
226 unsigned alloc_count = align( count, 4 );
227
228 struct vertex_header *pipeline_verts =
229 (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
230
231 if (!pipeline_verts) {
232 /* Not much we can do here - just skip the rendering.
233 */
234 assert(0);
235 return;
236 }
237
238 #if 0
239 debug_printf("#### Pipeline = %p (data = %p)\n",
240 pipeline_verts, pipeline_verts->data);
241 #endif
242 fpme->current_variant->jit_func( &fpme->llvm->jit_context,
243 pipeline_verts,
244 (const char **)draw->pt.user.vbuffer,
245 start,
246 count,
247 fpme->vertex_size,
248 draw->pt.vertex_buffer );
249
250 /* stream output needs to be done before clipping */
251 draw_pt_so_emit( fpme->so_emit,
252 (const float (*)[4])pipeline_verts->data,
253 count,
254 fpme->vertex_size );
255
256 if (draw_pt_post_vs_run( fpme->post_vs,
257 pipeline_verts,
258 count,
259 fpme->vertex_size ))
260 {
261 opt |= PT_PIPELINE;
262 }
263
264 /* Do we need to run the pipeline?
265 */
266 if (opt & PT_PIPELINE) {
267 draw_pipeline_run_linear( fpme->draw,
268 fpme->prim,
269 pipeline_verts,
270 count,
271 fpme->vertex_size);
272 }
273 else {
274 draw_pt_emit_linear( fpme->emit,
275 (const float (*)[4])pipeline_verts->data,
276 fpme->vertex_size,
277 count );
278 }
279
280 FREE(pipeline_verts);
281 }
282
283
284
285 static boolean
286 llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
287 unsigned start,
288 unsigned count,
289 const ushort *draw_elts,
290 unsigned draw_count )
291 {
292 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
293 struct draw_context *draw = fpme->draw;
294 unsigned opt = fpme->opt;
295 unsigned alloc_count = align( count, 4 );
296
297 struct vertex_header *pipeline_verts =
298 (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count);
299
300 if (!pipeline_verts)
301 return FALSE;
302
303 fpme->current_variant->jit_func( &fpme->llvm->jit_context,
304 pipeline_verts,
305 (const char **)draw->pt.user.vbuffer,
306 start,
307 count,
308 fpme->vertex_size,
309 draw->pt.vertex_buffer );
310
311 /* stream output needs to be done before clipping */
312 draw_pt_so_emit( fpme->so_emit,
313 (const float (*)[4])pipeline_verts->data,
314 count,
315 fpme->vertex_size );
316
317 if (draw_pt_post_vs_run( fpme->post_vs,
318 pipeline_verts,
319 count,
320 fpme->vertex_size ))
321 {
322 opt |= PT_PIPELINE;
323 }
324
325 /* Do we need to run the pipeline?
326 */
327 if (opt & PT_PIPELINE) {
328 draw_pipeline_run( fpme->draw,
329 fpme->prim,
330 pipeline_verts,
331 count,
332 fpme->vertex_size,
333 draw_elts,
334 draw_count );
335 }
336 else {
337 draw_pt_emit( fpme->emit,
338 (const float (*)[4])pipeline_verts->data,
339 count,
340 fpme->vertex_size,
341 draw_elts,
342 draw_count );
343 }
344
345 FREE(pipeline_verts);
346 return TRUE;
347 }
348
349
350
351 static void llvm_middle_end_finish( struct draw_pt_middle_end *middle )
352 {
353 /* nothing to do */
354 }
355
356 static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
357 {
358 struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
359 struct draw_context *draw = fpme->draw;
360 struct draw_llvm_variant *variant = NULL;
361
362 variant = fpme->variants;
363 while(variant) {
364 struct draw_llvm_variant *next = variant->next;
365
366 if (variant->function_elts) {
367 if (variant->function_elts)
368 LLVMFreeMachineCodeForFunction(draw->engine,
369 variant->function_elts);
370 LLVMDeleteFunction(variant->function_elts);
371 }
372
373 if (variant->function) {
374 if (variant->function)
375 LLVMFreeMachineCodeForFunction(draw->engine,
376 variant->function);
377 LLVMDeleteFunction(variant->function);
378 }
379
380 FREE(variant);
381
382 variant = next;
383 }
384 if (fpme->fetch)
385 draw_pt_fetch_destroy( fpme->fetch );
386
387 if (fpme->emit)
388 draw_pt_emit_destroy( fpme->emit );
389
390 if (fpme->so_emit)
391 draw_pt_so_emit_destroy( fpme->so_emit );
392
393 if (fpme->post_vs)
394 draw_pt_post_vs_destroy( fpme->post_vs );
395
396 if (fpme->llvm)
397 draw_llvm_destroy( fpme->llvm );
398
399 FREE(middle);
400 }
401
402
403 struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw )
404 {
405 struct llvm_middle_end *fpme = 0;
406
407 if (!draw->engine)
408 return NULL;
409
410 fpme = CALLOC_STRUCT( llvm_middle_end );
411 if (!fpme)
412 goto fail;
413
414 fpme->base.prepare = llvm_middle_end_prepare;
415 fpme->base.run = llvm_middle_end_run;
416 fpme->base.run_linear = llvm_middle_end_linear_run;
417 fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts;
418 fpme->base.finish = llvm_middle_end_finish;
419 fpme->base.destroy = llvm_middle_end_destroy;
420
421 fpme->draw = draw;
422
423 fpme->fetch = draw_pt_fetch_create( draw );
424 if (!fpme->fetch)
425 goto fail;
426
427 fpme->post_vs = draw_pt_post_vs_create( draw );
428 if (!fpme->post_vs)
429 goto fail;
430
431 fpme->emit = draw_pt_emit_create( draw );
432 if (!fpme->emit)
433 goto fail;
434
435 fpme->so_emit = draw_pt_so_emit_create( draw );
436 if (!fpme->so_emit)
437 goto fail;
438
439 fpme->llvm = draw_llvm_create(draw);
440 if (!fpme->llvm)
441 goto fail;
442
443 fpme->variants = NULL;
444 fpme->current_variant = NULL;
445 fpme->nr_variants = 0;
446
447 return &fpme->base;
448
449 fail:
450 if (fpme)
451 llvm_middle_end_destroy( &fpme->base );
452
453 return NULL;
454 }