geometry shaders: make gs work with changable primitives and variable number of vertices
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_vcache.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 */
32
33 #include "util/u_memory.h"
34 #include "util/u_prim.h"
35 #include "draw/draw_context.h"
36 #include "draw/draw_private.h"
37 #include "draw/draw_pt.h"
38
39
40 #define CACHE_MAX 256
41 #define FETCH_MAX 256
42 #define DRAW_MAX (16*1024)
43
44 struct vcache_frontend {
45 struct draw_pt_front_end base;
46 struct draw_context *draw;
47
48 unsigned in[CACHE_MAX];
49 ushort out[CACHE_MAX];
50
51 ushort draw_elts[DRAW_MAX];
52 unsigned fetch_elts[FETCH_MAX];
53
54 unsigned draw_count;
55 unsigned fetch_count;
56 unsigned fetch_max;
57
58 struct draw_pt_middle_end *middle;
59
60 unsigned input_prim;
61 unsigned output_prim;
62
63 unsigned middle_prim;
64 unsigned opt;
65 };
66
67 static INLINE void
68 vcache_flush( struct vcache_frontend *vcache )
69 {
70 if (vcache->middle_prim != vcache->output_prim) {
71 vcache->middle_prim = vcache->output_prim;
72 vcache->middle->prepare( vcache->middle,
73 vcache->input_prim,
74 vcache->middle_prim,
75 vcache->opt,
76 &vcache->fetch_max );
77 }
78
79 if (vcache->draw_count) {
80 vcache->middle->run( vcache->middle,
81 vcache->fetch_elts,
82 vcache->fetch_count,
83 vcache->draw_elts,
84 vcache->draw_count );
85 }
86
87 memset(vcache->in, ~0, sizeof(vcache->in));
88 vcache->fetch_count = 0;
89 vcache->draw_count = 0;
90 }
91
92 static INLINE void
93 vcache_check_flush( struct vcache_frontend *vcache )
94 {
95 if ( vcache->draw_count + 6 >= DRAW_MAX ||
96 vcache->fetch_count + 4 >= FETCH_MAX )
97 {
98 vcache_flush( vcache );
99 }
100 }
101
102
103 static INLINE void
104 vcache_elt( struct vcache_frontend *vcache,
105 unsigned felt,
106 ushort flags )
107 {
108 unsigned idx = felt % CACHE_MAX;
109
110 if (vcache->in[idx] != felt) {
111 assert(vcache->fetch_count < FETCH_MAX);
112
113 vcache->in[idx] = felt;
114 vcache->out[idx] = (ushort)vcache->fetch_count;
115 vcache->fetch_elts[vcache->fetch_count++] = felt;
116 }
117
118 vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
119 }
120
121
122
123 static INLINE void
124 vcache_triangle( struct vcache_frontend *vcache,
125 unsigned i0,
126 unsigned i1,
127 unsigned i2 )
128 {
129 vcache_elt(vcache, i0, 0);
130 vcache_elt(vcache, i1, 0);
131 vcache_elt(vcache, i2, 0);
132 vcache_check_flush(vcache);
133 }
134
135
136 static INLINE void
137 vcache_triangle_flags( struct vcache_frontend *vcache,
138 ushort flags,
139 unsigned i0,
140 unsigned i1,
141 unsigned i2 )
142 {
143 vcache_elt(vcache, i0, flags);
144 vcache_elt(vcache, i1, 0);
145 vcache_elt(vcache, i2, 0);
146 vcache_check_flush(vcache);
147 }
148
149 static INLINE void
150 vcache_line( struct vcache_frontend *vcache,
151 unsigned i0,
152 unsigned i1 )
153 {
154 vcache_elt(vcache, i0, 0);
155 vcache_elt(vcache, i1, 0);
156 vcache_check_flush(vcache);
157 }
158
159
160 static INLINE void
161 vcache_line_flags( struct vcache_frontend *vcache,
162 ushort flags,
163 unsigned i0,
164 unsigned i1 )
165 {
166 vcache_elt(vcache, i0, flags);
167 vcache_elt(vcache, i1, 0);
168 vcache_check_flush(vcache);
169 }
170
171
172 static INLINE void
173 vcache_point( struct vcache_frontend *vcache,
174 unsigned i0 )
175 {
176 vcache_elt(vcache, i0, 0);
177 vcache_check_flush(vcache);
178 }
179
180 static INLINE void
181 vcache_quad( struct vcache_frontend *vcache,
182 unsigned i0,
183 unsigned i1,
184 unsigned i2,
185 unsigned i3 )
186 {
187 if (vcache->draw->rasterizer->flatshade_first) {
188 /* pass last quad vertex as first triangle vertex */
189 vcache_triangle( vcache, i3, i0, i1 );
190 vcache_triangle( vcache, i3, i1, i2 );
191 }
192 else {
193 /* pass last quad vertex as last triangle vertex */
194 vcache_triangle( vcache, i0, i1, i3 );
195 vcache_triangle( vcache, i1, i2, i3 );
196 }
197 }
198
199 static INLINE void
200 vcache_ef_quad( struct vcache_frontend *vcache,
201 unsigned i0,
202 unsigned i1,
203 unsigned i2,
204 unsigned i3 )
205 {
206 if (vcache->draw->rasterizer->flatshade_first) {
207 /* pass last quad vertex as first triangle vertex */
208 vcache_triangle_flags( vcache,
209 ( DRAW_PIPE_RESET_STIPPLE |
210 DRAW_PIPE_EDGE_FLAG_0 |
211 DRAW_PIPE_EDGE_FLAG_1 ),
212 i3, i0, i1 );
213
214 vcache_triangle_flags( vcache,
215 ( DRAW_PIPE_EDGE_FLAG_1 |
216 DRAW_PIPE_EDGE_FLAG_2 ),
217 i3, i1, i2 );
218 }
219 else {
220 /* pass last quad vertex as last triangle vertex */
221 vcache_triangle_flags( vcache,
222 ( DRAW_PIPE_RESET_STIPPLE |
223 DRAW_PIPE_EDGE_FLAG_0 |
224 DRAW_PIPE_EDGE_FLAG_2 ),
225 i0, i1, i3 );
226
227 vcache_triangle_flags( vcache,
228 ( DRAW_PIPE_EDGE_FLAG_0 |
229 DRAW_PIPE_EDGE_FLAG_1 ),
230 i1, i2, i3 );
231 }
232 }
233
234 /* At least for now, we're back to using a template include file for
235 * this. The two paths aren't too different though - it may be
236 * possible to reunify them.
237 */
238 #define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2)
239 #define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3)
240 #define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1)
241 #define POINT(vc,i0) vcache_point(vc,i0)
242 #define FUNC vcache_run_extras
243 #include "draw_pt_vcache_tmp.h"
244
245 #define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2)
246 #define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3)
247 #define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1)
248 #define POINT(vc,i0) vcache_point(vc,i0)
249 #define FUNC vcache_run
250 #include "draw_pt_vcache_tmp.h"
251
252 static INLINE void
253 rebase_uint_elts( const unsigned *src,
254 unsigned count,
255 int delta,
256 ushort *dest )
257 {
258 unsigned i;
259
260 for (i = 0; i < count; i++)
261 dest[i] = (ushort)(src[i] + delta);
262 }
263
264 static INLINE void
265 rebase_ushort_elts( const ushort *src,
266 unsigned count,
267 int delta,
268 ushort *dest )
269 {
270 unsigned i;
271
272 for (i = 0; i < count; i++)
273 dest[i] = (ushort)(src[i] + delta);
274 }
275
276 static INLINE void
277 rebase_ubyte_elts( const ubyte *src,
278 unsigned count,
279 int delta,
280 ushort *dest )
281 {
282 unsigned i;
283
284 for (i = 0; i < count; i++)
285 dest[i] = (ushort)(src[i] + delta);
286 }
287
288
289
290 static INLINE void
291 translate_uint_elts( const unsigned *src,
292 unsigned count,
293 ushort *dest )
294 {
295 unsigned i;
296
297 for (i = 0; i < count; i++)
298 dest[i] = (ushort)(src[i]);
299 }
300
301 static INLINE void
302 translate_ushort_elts( const ushort *src,
303 unsigned count,
304 ushort *dest )
305 {
306 unsigned i;
307
308 for (i = 0; i < count; i++)
309 dest[i] = (ushort)(src[i]);
310 }
311
312 static INLINE void
313 translate_ubyte_elts( const ubyte *src,
314 unsigned count,
315 ushort *dest )
316 {
317 unsigned i;
318
319 for (i = 0; i < count; i++)
320 dest[i] = (ushort)(src[i]);
321 }
322
323
324
325
326 #if 0
327 static INLINE enum pipe_format
328 format_from_get_elt( pt_elt_func get_elt )
329 {
330 switch (draw->pt.user.eltSize) {
331 case 1: return PIPE_FORMAT_R8_UNORM;
332 case 2: return PIPE_FORMAT_R16_UNORM;
333 case 4: return PIPE_FORMAT_R32_UNORM;
334 default: return PIPE_FORMAT_NONE;
335 }
336 }
337 #endif
338
339 static INLINE void
340 vcache_check_run( struct draw_pt_front_end *frontend,
341 pt_elt_func get_elt,
342 const void *elts,
343 int elt_bias,
344 unsigned draw_count )
345 {
346 struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
347 struct draw_context *draw = vcache->draw;
348 unsigned min_index = draw->pt.user.min_index;
349 unsigned max_index = draw->pt.user.max_index;
350 unsigned index_size = draw->pt.user.eltSize;
351 unsigned fetch_count = max_index + 1 - min_index;
352 const ushort *transformed_elts;
353 ushort *storage = NULL;
354 boolean ok = FALSE;
355
356
357 if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
358 vcache->fetch_max,
359 draw_count);
360
361 if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
362 fetch_count >= UNDEFINED_VERTEX_ID ||
363 fetch_count > draw_count) {
364 if (0) debug_printf("fail\n");
365 goto fail;
366 }
367
368 if (vcache->middle_prim != vcache->input_prim) {
369 vcache->middle_prim = vcache->input_prim;
370 vcache->middle->prepare( vcache->middle,
371 vcache->input_prim,
372 vcache->middle_prim,
373 vcache->opt,
374 &vcache->fetch_max );
375 }
376
377
378 assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
379 (elt_bias < 0 && min_index + elt_bias < min_index));
380
381 if (min_index == 0 &&
382 index_size == 2)
383 {
384 transformed_elts = (const ushort *)elts;
385 }
386 else
387 {
388 storage = MALLOC( draw_count * sizeof(ushort) );
389 if (!storage)
390 goto fail;
391
392 if (min_index == 0) {
393 switch(index_size) {
394 case 1:
395 translate_ubyte_elts( (const ubyte *)elts,
396 draw_count,
397 storage );
398 break;
399
400 case 2:
401 translate_ushort_elts( (const ushort *)elts,
402 draw_count,
403 storage );
404 break;
405
406 case 4:
407 translate_uint_elts( (const uint *)elts,
408 draw_count,
409 storage );
410 break;
411
412 default:
413 assert(0);
414 FREE(storage);
415 return;
416 }
417 }
418 else {
419 switch(index_size) {
420 case 1:
421 rebase_ubyte_elts( (const ubyte *)elts,
422 draw_count,
423 0 - (int)min_index,
424 storage );
425 break;
426
427 case 2:
428 rebase_ushort_elts( (const ushort *)elts,
429 draw_count,
430 0 - (int)min_index,
431 storage );
432 break;
433
434 case 4:
435 rebase_uint_elts( (const uint *)elts,
436 draw_count,
437 0 - (int)min_index,
438 storage );
439 break;
440
441 default:
442 assert(0);
443 FREE(storage);
444 return;
445 }
446 }
447 transformed_elts = storage;
448 }
449
450 if (fetch_count < UNDEFINED_VERTEX_ID)
451 ok = vcache->middle->run_linear_elts( vcache->middle,
452 min_index + elt_bias, /* start */
453 fetch_count,
454 transformed_elts,
455 draw_count );
456
457 FREE(storage);
458
459 if (ok)
460 return;
461
462 debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
463 fetch_count, draw_count);
464
465 fail:
466 vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
467 }
468
469
470
471
472 static void
473 vcache_prepare( struct draw_pt_front_end *frontend,
474 unsigned in_prim,
475 unsigned out_prim,
476 struct draw_pt_middle_end *middle,
477 unsigned opt )
478 {
479 struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
480
481 if (opt & PT_PIPELINE)
482 {
483 vcache->base.run = vcache_run_extras;
484 }
485 else
486 {
487 vcache->base.run = vcache_check_run;
488 }
489
490 vcache->input_prim = in_prim;
491 vcache->output_prim = u_reduced_prim(out_prim);
492
493 vcache->middle = middle;
494 vcache->opt = opt;
495
496 /* Have to run prepare here, but try and guess a good prim for
497 * doing so:
498 */
499 vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim;
500 middle->prepare( middle, vcache->input_prim,
501 vcache->middle_prim, opt, &vcache->fetch_max );
502 }
503
504
505
506
507 static void
508 vcache_finish( struct draw_pt_front_end *frontend )
509 {
510 struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
511 vcache->middle->finish( vcache->middle );
512 vcache->middle = NULL;
513 }
514
515 static void
516 vcache_destroy( struct draw_pt_front_end *frontend )
517 {
518 FREE(frontend);
519 }
520
521
522 struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
523 {
524 struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
525 if (vcache == NULL)
526 return NULL;
527
528 vcache->base.prepare = vcache_prepare;
529 vcache->base.run = NULL;
530 vcache->base.finish = vcache_finish;
531 vcache->base.destroy = vcache_destroy;
532 vcache->draw = draw;
533
534 memset(vcache->in, ~0, sizeof(vcache->in));
535
536 return &vcache->base;
537 }