gallium: a lot more complete implementation of stream output
[mesa.git] / src / gallium / auxiliary / draw / draw_pt_vcache.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 */
32
33 #include "util/u_memory.h"
34 #include "util/u_prim.h"
35 #include "draw/draw_context.h"
36 #include "draw/draw_private.h"
37 #include "draw/draw_pt.h"
38
39
40 #define CACHE_MAX 256
41 #define FETCH_MAX 256
42 #define DRAW_MAX (16*1024)
43
44 struct vcache_frontend {
45 struct draw_pt_front_end base;
46 struct draw_context *draw;
47
48 unsigned in[CACHE_MAX];
49 ushort out[CACHE_MAX];
50
51 ushort draw_elts[DRAW_MAX];
52 unsigned fetch_elts[FETCH_MAX];
53
54 unsigned draw_count;
55 unsigned fetch_count;
56 unsigned fetch_max;
57
58 struct draw_pt_middle_end *middle;
59
60 unsigned input_prim;
61 unsigned output_prim;
62
63 unsigned middle_prim;
64 unsigned opt;
65 };
66
67 static INLINE void
68 vcache_flush( struct vcache_frontend *vcache )
69 {
70 if (vcache->middle_prim != vcache->output_prim) {
71 vcache->middle_prim = vcache->output_prim;
72 vcache->middle->prepare( vcache->middle,
73 vcache->middle_prim,
74 vcache->opt,
75 &vcache->fetch_max );
76 }
77
78 if (vcache->draw_count) {
79 vcache->middle->run( vcache->middle,
80 vcache->fetch_elts,
81 vcache->fetch_count,
82 vcache->draw_elts,
83 vcache->draw_count );
84 }
85
86 memset(vcache->in, ~0, sizeof(vcache->in));
87 vcache->fetch_count = 0;
88 vcache->draw_count = 0;
89 }
90
91 static INLINE void
92 vcache_check_flush( struct vcache_frontend *vcache )
93 {
94 if ( vcache->draw_count + 6 >= DRAW_MAX ||
95 vcache->fetch_count + 4 >= FETCH_MAX )
96 {
97 vcache_flush( vcache );
98 }
99 }
100
101
102 static INLINE void
103 vcache_elt( struct vcache_frontend *vcache,
104 unsigned felt,
105 ushort flags )
106 {
107 unsigned idx = felt % CACHE_MAX;
108
109 if (vcache->in[idx] != felt) {
110 assert(vcache->fetch_count < FETCH_MAX);
111
112 vcache->in[idx] = felt;
113 vcache->out[idx] = (ushort)vcache->fetch_count;
114 vcache->fetch_elts[vcache->fetch_count++] = felt;
115 }
116
117 vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
118 }
119
120
121
122 static INLINE void
123 vcache_triangle( struct vcache_frontend *vcache,
124 unsigned i0,
125 unsigned i1,
126 unsigned i2 )
127 {
128 vcache_elt(vcache, i0, 0);
129 vcache_elt(vcache, i1, 0);
130 vcache_elt(vcache, i2, 0);
131 vcache_check_flush(vcache);
132 }
133
134
135 static INLINE void
136 vcache_triangle_flags( struct vcache_frontend *vcache,
137 ushort flags,
138 unsigned i0,
139 unsigned i1,
140 unsigned i2 )
141 {
142 vcache_elt(vcache, i0, flags);
143 vcache_elt(vcache, i1, 0);
144 vcache_elt(vcache, i2, 0);
145 vcache_check_flush(vcache);
146 }
147
148 static INLINE void
149 vcache_line( struct vcache_frontend *vcache,
150 unsigned i0,
151 unsigned i1 )
152 {
153 vcache_elt(vcache, i0, 0);
154 vcache_elt(vcache, i1, 0);
155 vcache_check_flush(vcache);
156 }
157
158
159 static INLINE void
160 vcache_line_flags( struct vcache_frontend *vcache,
161 ushort flags,
162 unsigned i0,
163 unsigned i1 )
164 {
165 vcache_elt(vcache, i0, flags);
166 vcache_elt(vcache, i1, 0);
167 vcache_check_flush(vcache);
168 }
169
170
171 static INLINE void
172 vcache_point( struct vcache_frontend *vcache,
173 unsigned i0 )
174 {
175 vcache_elt(vcache, i0, 0);
176 vcache_check_flush(vcache);
177 }
178
179 static INLINE void
180 vcache_quad( struct vcache_frontend *vcache,
181 unsigned i0,
182 unsigned i1,
183 unsigned i2,
184 unsigned i3 )
185 {
186 if (vcache->draw->rasterizer->flatshade_first) {
187 /* pass last quad vertex as first triangle vertex */
188 vcache_triangle( vcache, i3, i0, i1 );
189 vcache_triangle( vcache, i3, i1, i2 );
190 }
191 else {
192 /* pass last quad vertex as last triangle vertex */
193 vcache_triangle( vcache, i0, i1, i3 );
194 vcache_triangle( vcache, i1, i2, i3 );
195 }
196 }
197
198 static INLINE void
199 vcache_ef_quad( struct vcache_frontend *vcache,
200 unsigned i0,
201 unsigned i1,
202 unsigned i2,
203 unsigned i3 )
204 {
205 if (vcache->draw->rasterizer->flatshade_first) {
206 /* pass last quad vertex as first triangle vertex */
207 vcache_triangle_flags( vcache,
208 ( DRAW_PIPE_RESET_STIPPLE |
209 DRAW_PIPE_EDGE_FLAG_0 |
210 DRAW_PIPE_EDGE_FLAG_1 ),
211 i3, i0, i1 );
212
213 vcache_triangle_flags( vcache,
214 ( DRAW_PIPE_EDGE_FLAG_1 |
215 DRAW_PIPE_EDGE_FLAG_2 ),
216 i3, i1, i2 );
217 }
218 else {
219 /* pass last quad vertex as last triangle vertex */
220 vcache_triangle_flags( vcache,
221 ( DRAW_PIPE_RESET_STIPPLE |
222 DRAW_PIPE_EDGE_FLAG_0 |
223 DRAW_PIPE_EDGE_FLAG_2 ),
224 i0, i1, i3 );
225
226 vcache_triangle_flags( vcache,
227 ( DRAW_PIPE_EDGE_FLAG_0 |
228 DRAW_PIPE_EDGE_FLAG_1 ),
229 i1, i2, i3 );
230 }
231 }
232
233 /* At least for now, we're back to using a template include file for
234 * this. The two paths aren't too different though - it may be
235 * possible to reunify them.
236 */
237 #define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2)
238 #define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3)
239 #define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1)
240 #define POINT(vc,i0) vcache_point(vc,i0)
241 #define FUNC vcache_run_extras
242 #include "draw_pt_vcache_tmp.h"
243
244 #define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2)
245 #define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3)
246 #define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1)
247 #define POINT(vc,i0) vcache_point(vc,i0)
248 #define FUNC vcache_run
249 #include "draw_pt_vcache_tmp.h"
250
251 static INLINE void
252 rebase_uint_elts( const unsigned *src,
253 unsigned count,
254 int delta,
255 ushort *dest )
256 {
257 unsigned i;
258
259 for (i = 0; i < count; i++)
260 dest[i] = (ushort)(src[i] + delta);
261 }
262
263 static INLINE void
264 rebase_ushort_elts( const ushort *src,
265 unsigned count,
266 int delta,
267 ushort *dest )
268 {
269 unsigned i;
270
271 for (i = 0; i < count; i++)
272 dest[i] = (ushort)(src[i] + delta);
273 }
274
275 static INLINE void
276 rebase_ubyte_elts( const ubyte *src,
277 unsigned count,
278 int delta,
279 ushort *dest )
280 {
281 unsigned i;
282
283 for (i = 0; i < count; i++)
284 dest[i] = (ushort)(src[i] + delta);
285 }
286
287
288
289 static INLINE void
290 translate_uint_elts( const unsigned *src,
291 unsigned count,
292 ushort *dest )
293 {
294 unsigned i;
295
296 for (i = 0; i < count; i++)
297 dest[i] = (ushort)(src[i]);
298 }
299
300 static INLINE void
301 translate_ushort_elts( const ushort *src,
302 unsigned count,
303 ushort *dest )
304 {
305 unsigned i;
306
307 for (i = 0; i < count; i++)
308 dest[i] = (ushort)(src[i]);
309 }
310
311 static INLINE void
312 translate_ubyte_elts( const ubyte *src,
313 unsigned count,
314 ushort *dest )
315 {
316 unsigned i;
317
318 for (i = 0; i < count; i++)
319 dest[i] = (ushort)(src[i]);
320 }
321
322
323
324
325 #if 0
326 static INLINE enum pipe_format
327 format_from_get_elt( pt_elt_func get_elt )
328 {
329 switch (draw->pt.user.eltSize) {
330 case 1: return PIPE_FORMAT_R8_UNORM;
331 case 2: return PIPE_FORMAT_R16_UNORM;
332 case 4: return PIPE_FORMAT_R32_UNORM;
333 default: return PIPE_FORMAT_NONE;
334 }
335 }
336 #endif
337
338 static INLINE void
339 vcache_check_run( struct draw_pt_front_end *frontend,
340 pt_elt_func get_elt,
341 const void *elts,
342 int elt_bias,
343 unsigned draw_count )
344 {
345 struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
346 struct draw_context *draw = vcache->draw;
347 unsigned min_index = draw->pt.user.min_index;
348 unsigned max_index = draw->pt.user.max_index;
349 unsigned index_size = draw->pt.user.eltSize;
350 unsigned fetch_count = max_index + 1 - min_index;
351 const ushort *transformed_elts;
352 ushort *storage = NULL;
353 boolean ok = FALSE;
354
355
356 if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
357 vcache->fetch_max,
358 draw_count);
359
360 if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
361 fetch_count >= UNDEFINED_VERTEX_ID ||
362 fetch_count > draw_count) {
363 if (0) debug_printf("fail\n");
364 goto fail;
365 }
366
367 if (vcache->middle_prim != vcache->input_prim) {
368 vcache->middle_prim = vcache->input_prim;
369 vcache->middle->prepare( vcache->middle,
370 vcache->middle_prim,
371 vcache->opt,
372 &vcache->fetch_max );
373 }
374
375
376 assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
377 (elt_bias < 0 && min_index + elt_bias < min_index));
378
379 if (min_index == 0 &&
380 index_size == 2)
381 {
382 transformed_elts = (const ushort *)elts;
383 }
384 else
385 {
386 storage = MALLOC( draw_count * sizeof(ushort) );
387 if (!storage)
388 goto fail;
389
390 if (min_index == 0) {
391 switch(index_size) {
392 case 1:
393 translate_ubyte_elts( (const ubyte *)elts,
394 draw_count,
395 storage );
396 break;
397
398 case 2:
399 translate_ushort_elts( (const ushort *)elts,
400 draw_count,
401 storage );
402 break;
403
404 case 4:
405 translate_uint_elts( (const uint *)elts,
406 draw_count,
407 storage );
408 break;
409
410 default:
411 assert(0);
412 FREE(storage);
413 return;
414 }
415 }
416 else {
417 switch(index_size) {
418 case 1:
419 rebase_ubyte_elts( (const ubyte *)elts,
420 draw_count,
421 0 - (int)min_index,
422 storage );
423 break;
424
425 case 2:
426 rebase_ushort_elts( (const ushort *)elts,
427 draw_count,
428 0 - (int)min_index,
429 storage );
430 break;
431
432 case 4:
433 rebase_uint_elts( (const uint *)elts,
434 draw_count,
435 0 - (int)min_index,
436 storage );
437 break;
438
439 default:
440 assert(0);
441 FREE(storage);
442 return;
443 }
444 }
445 transformed_elts = storage;
446 }
447
448 if (fetch_count < UNDEFINED_VERTEX_ID)
449 ok = vcache->middle->run_linear_elts( vcache->middle,
450 min_index + elt_bias, /* start */
451 fetch_count,
452 transformed_elts,
453 draw_count );
454
455 FREE(storage);
456
457 if (ok)
458 return;
459
460 debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
461 fetch_count, draw_count);
462
463 fail:
464 vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
465 }
466
467
468
469
470 static void
471 vcache_prepare( struct draw_pt_front_end *frontend,
472 unsigned prim,
473 struct draw_pt_middle_end *middle,
474 unsigned opt )
475 {
476 struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
477
478 if (opt & PT_PIPELINE)
479 {
480 vcache->base.run = vcache_run_extras;
481 }
482 else
483 {
484 vcache->base.run = vcache_check_run;
485 }
486
487 vcache->input_prim = prim;
488 vcache->output_prim = u_reduced_prim(prim);
489
490 vcache->middle = middle;
491 vcache->opt = opt;
492
493 /* Have to run prepare here, but try and guess a good prim for
494 * doing so:
495 */
496 vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim;
497 middle->prepare( middle, vcache->middle_prim, opt, &vcache->fetch_max );
498 }
499
500
501
502
503 static void
504 vcache_finish( struct draw_pt_front_end *frontend )
505 {
506 struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
507 vcache->middle->finish( vcache->middle );
508 vcache->middle = NULL;
509 }
510
511 static void
512 vcache_destroy( struct draw_pt_front_end *frontend )
513 {
514 FREE(frontend);
515 }
516
517
518 struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
519 {
520 struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
521 if (vcache == NULL)
522 return NULL;
523
524 vcache->base.prepare = vcache_prepare;
525 vcache->base.run = NULL;
526 vcache->base.finish = vcache_finish;
527 vcache->base.destroy = vcache_destroy;
528 vcache->draw = draw;
529
530 memset(vcache->in, ~0, sizeof(vcache->in));
531
532 return &vcache->base;
533 }