Merge commit 'origin/dlist-statechange-shortcircuit' into mesa_7_5_branch
[mesa.git] / src / mesa / vbo / vbo_split_copy.c
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 6.5
5 *
6 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Keith Whitwell <keith@tungstengraphics.com>
27 */
28
29 /* Split indexed primitives with per-vertex copying.
30 */
31
32 #include "main/glheader.h"
33 #include "main/imports.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "main/mtypes.h"
37
38 #include "vbo_split.h"
39 #include "vbo.h"
40
41
42 #define ELT_TABLE_SIZE 16
43
44 /* Used for vertex-level splitting of indexed buffers. Note that
45 * non-indexed primitives may be converted to indexed in some cases
46 * (eg loops, fans) in order to use this splitting path.
47 */
48 struct copy_context {
49
50 GLcontext *ctx;
51 const struct gl_client_array **array;
52 const struct _mesa_prim *prim;
53 GLuint nr_prims;
54 const struct _mesa_index_buffer *ib;
55 vbo_draw_func draw;
56
57 const struct split_limits *limits;
58
59 struct {
60 GLuint attr;
61 GLuint size;
62 const struct gl_client_array *array;
63 const GLubyte *src_ptr;
64
65 struct gl_client_array dstarray;
66
67 } varying[VERT_ATTRIB_MAX];
68 GLuint nr_varying;
69
70 const struct gl_client_array *dstarray_ptr[VERT_ATTRIB_MAX];
71 struct _mesa_index_buffer dstib;
72
73 GLuint *translated_elt_buf;
74 const GLuint *srcelt;
75
76 /* A baby hash table to avoid re-emitting (some) duplicate
77 * vertices when splitting indexed primitives.
78 */
79 struct {
80 GLuint in;
81 GLuint out;
82 } vert_cache[ELT_TABLE_SIZE];
83
84
85 GLuint vertex_size;
86 GLubyte *dstbuf;
87 GLubyte *dstptr; /* dstptr == dstbuf + dstelt_max * vertsize */
88 GLuint dstbuf_size; /* in vertices */
89 GLuint dstbuf_nr; /* count of emitted vertices, also the
90 * largest value in dstelt. Our
91 * MaxIndex.
92 */
93
94 GLuint *dstelt;
95 GLuint dstelt_nr;
96 GLuint dstelt_size;
97
98 #define MAX_PRIM 32
99 struct _mesa_prim dstprim[MAX_PRIM];
100 GLuint dstprim_nr;
101
102 };
103
104
105 static GLuint type_size( GLenum type )
106 {
107 switch(type) {
108 case GL_BYTE: return sizeof(GLbyte);
109 case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
110 case GL_SHORT: return sizeof(GLshort);
111 case GL_UNSIGNED_SHORT: return sizeof(GLushort);
112 case GL_INT: return sizeof(GLint);
113 case GL_UNSIGNED_INT: return sizeof(GLuint);
114 case GL_FLOAT: return sizeof(GLfloat);
115 case GL_DOUBLE: return sizeof(GLdouble);
116 default: return 0;
117 }
118 }
119
120 static GLuint attr_size( const struct gl_client_array *array )
121 {
122 return array->Size * type_size(array->Type);
123 }
124
125
126 /* Starts returning true slightly before the buffer fills, to ensure
127 * that there is sufficient room for any remaining vertices to finish
128 * off the prim:
129 */
130 static GLboolean check_flush( struct copy_context *copy )
131 {
132 GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
133
134 if (GL_TRIANGLE_STRIP == mode &&
135 copy->dstelt_nr & 1) { /* see bug9962 */
136 return GL_FALSE;
137 }
138
139 if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
140 return GL_TRUE;
141
142 if (copy->dstelt_nr + 4 > copy->dstelt_size)
143 return GL_TRUE;
144
145 return GL_FALSE;
146 }
147
148 static void flush( struct copy_context *copy )
149 {
150 GLuint i;
151
152 /* Set some counters:
153 */
154 copy->dstib.count = copy->dstelt_nr;
155
156 copy->draw( copy->ctx,
157 copy->dstarray_ptr,
158 copy->dstprim,
159 copy->dstprim_nr,
160 &copy->dstib,
161 0,
162 copy->dstbuf_nr );
163
164 /* Reset all pointers:
165 */
166 copy->dstprim_nr = 0;
167 copy->dstelt_nr = 0;
168 copy->dstbuf_nr = 0;
169 copy->dstptr = copy->dstbuf;
170
171 /* Clear the vertex cache:
172 */
173 for (i = 0; i < ELT_TABLE_SIZE; i++)
174 copy->vert_cache[i].in = ~0;
175 }
176
177
178
179 static void begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
180 {
181 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
182
183 /* _mesa_printf("begin %s (%d)\n", _mesa_lookup_prim_by_nr(mode), begin_flag); */
184
185 prim->mode = mode;
186 prim->begin = begin_flag;
187 }
188
189
190 /* Use a hashtable to attempt to identify recently-emitted vertices
191 * and avoid re-emitting them.
192 */
193 static GLuint elt(struct copy_context *copy, GLuint elt_idx)
194 {
195 GLuint elt = copy->srcelt[elt_idx];
196 GLuint slot = elt & (ELT_TABLE_SIZE-1);
197
198 /* _mesa_printf("elt %d\n", elt); */
199
200 /* Look up the incoming element in the vertex cache. Re-emit if
201 * necessary.
202 */
203 if (copy->vert_cache[slot].in != elt) {
204 GLubyte *csr = copy->dstptr;
205 GLuint i;
206
207 /* _mesa_printf(" --> emit to dstelt %d\n", copy->dstbuf_nr); */
208
209 for (i = 0; i < copy->nr_varying; i++) {
210 const struct gl_client_array *srcarray = copy->varying[i].array;
211 const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB;
212
213 memcpy(csr, srcptr, copy->varying[i].size);
214 csr += copy->varying[i].size;
215
216 if (0)
217 {
218 const GLuint *f = (const GLuint *)srcptr;
219 GLuint j;
220 _mesa_printf(" varying %d: ", i);
221 for(j = 0; j < copy->varying[i].size / 4; j++)
222 _mesa_printf("%x ", f[j]);
223 _mesa_printf("\n");
224 }
225
226 }
227
228 copy->vert_cache[slot].in = elt;
229 copy->vert_cache[slot].out = copy->dstbuf_nr++;
230 copy->dstptr += copy->vertex_size;
231
232 assert(csr == copy->dstptr);
233 assert(copy->dstptr == (copy->dstbuf +
234 copy->dstbuf_nr *
235 copy->vertex_size));
236 }
237 /* else */
238 /* _mesa_printf(" --> reuse vertex\n"); */
239
240 /* _mesa_printf(" --> emit %d\n", copy->vert_cache[slot].out); */
241 copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
242 return check_flush(copy);
243 }
244
245 static void end( struct copy_context *copy, GLboolean end_flag )
246 {
247 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
248
249 /* _mesa_printf("end (%d)\n", end_flag); */
250
251 prim->end = end_flag;
252 prim->count = copy->dstelt_nr - prim->start;
253
254 if (++copy->dstprim_nr == MAX_PRIM ||
255 check_flush(copy))
256 flush(copy);
257 }
258
259
260
261 static void replay_elts( struct copy_context *copy )
262 {
263 GLuint i, j, k;
264 GLboolean split;
265
266 for (i = 0; i < copy->nr_prims; i++) {
267 const struct _mesa_prim *prim = &copy->prim[i];
268 const GLuint start = prim->start;
269 GLuint first, incr;
270
271 switch (prim->mode) {
272
273 case GL_LINE_LOOP:
274 /* Convert to linestrip and emit the final vertex explicitly,
275 * but only in the resultant strip that requires it.
276 */
277 j = 0;
278 while (j != prim->count) {
279 begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
280
281 for (split = GL_FALSE; j != prim->count && !split; j++)
282 split = elt(copy, start + j);
283
284 if (j == prim->count) {
285 /* Done, emit final line. Split doesn't matter as
286 * it is always raised a bit early so we can emit
287 * the last verts if necessary!
288 */
289 if (prim->end)
290 (void)elt(copy, start + 0);
291
292 end(copy, prim->end);
293 }
294 else {
295 /* Wrap
296 */
297 assert(split);
298 end(copy, 0);
299 j--;
300 }
301 }
302 break;
303
304 case GL_TRIANGLE_FAN:
305 case GL_POLYGON:
306 j = 2;
307 while (j != prim->count) {
308 begin(copy, prim->mode, prim->begin && j == 0);
309
310 split = elt(copy, start+0);
311 assert(!split);
312
313 split = elt(copy, start+j-1);
314 assert(!split);
315
316 for (; j != prim->count && !split; j++)
317 split = elt(copy, start+j);
318
319 end(copy, prim->end && j == prim->count);
320
321 if (j != prim->count) {
322 /* Wrapped the primitive, need to repeat some vertices:
323 */
324 j -= 1;
325 }
326 }
327 break;
328
329 default:
330 (void)split_prim_inplace(prim->mode, &first, &incr);
331
332 j = 0;
333 while (j != prim->count) {
334
335 begin(copy, prim->mode, prim->begin && j == 0);
336
337 split = 0;
338 for (k = 0; k < first; k++, j++)
339 split |= elt(copy, start+j);
340
341 assert(!split);
342
343 for (; j != prim->count && !split; )
344 for (k = 0; k < incr; k++, j++)
345 split |= elt(copy, start+j);
346
347 end(copy, prim->end && j == prim->count);
348
349 if (j != prim->count) {
350 /* Wrapped the primitive, need to repeat some vertices:
351 */
352 assert(j > first - incr);
353 j -= (first - incr);
354 }
355 }
356 break;
357 }
358 }
359
360 if (copy->dstprim_nr)
361 flush(copy);
362 }
363
364
365 static void replay_init( struct copy_context *copy )
366 {
367 GLcontext *ctx = copy->ctx;
368 GLuint i;
369 GLuint offset;
370 const GLvoid *srcptr;
371
372 /* Make a list of varying attributes and their vbo's. Also
373 * calculate vertex size.
374 */
375 copy->vertex_size = 0;
376 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
377 struct gl_buffer_object *vbo = copy->array[i]->BufferObj;
378
379 if (copy->array[i]->StrideB == 0) {
380 copy->dstarray_ptr[i] = copy->array[i];
381 }
382 else {
383 GLuint j = copy->nr_varying++;
384
385 copy->varying[j].attr = i;
386 copy->varying[j].array = copy->array[i];
387 copy->varying[j].size = attr_size(copy->array[i]);
388 copy->vertex_size += attr_size(copy->array[i]);
389
390 if (vbo->Name && !vbo->Pointer)
391 ctx->Driver.MapBuffer(ctx,
392 GL_ARRAY_BUFFER_ARB,
393 GL_WRITE_ONLY, /* XXX */
394 vbo);
395
396 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
397 copy->array[i]->Ptr);
398
399 copy->dstarray_ptr[i] = &copy->varying[j].dstarray;
400 }
401 }
402
403 /* There must always be an index buffer. Currently require the
404 * caller convert non-indexed prims to indexed. Could alternately
405 * do it internally.
406 */
407 if (copy->ib->obj->Name && !copy->ib->obj->Pointer)
408 ctx->Driver.MapBuffer(ctx,
409 GL_ARRAY_BUFFER_ARB, /* XXX */
410 GL_WRITE_ONLY, /* XXX */
411 copy->ib->obj);
412
413 srcptr = (const GLubyte *)ADD_POINTERS(copy->ib->obj->Pointer, copy->ib->ptr);
414
415 switch (copy->ib->type) {
416 case GL_UNSIGNED_BYTE:
417 copy->translated_elt_buf = _mesa_malloc(sizeof(GLuint) * copy->ib->count);
418 copy->srcelt = copy->translated_elt_buf;
419
420 for (i = 0; i < copy->ib->count; i++)
421 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
422 break;
423
424 case GL_UNSIGNED_SHORT:
425 copy->translated_elt_buf = _mesa_malloc(sizeof(GLuint) * copy->ib->count);
426 copy->srcelt = copy->translated_elt_buf;
427
428 for (i = 0; i < copy->ib->count; i++)
429 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
430 break;
431
432 case GL_UNSIGNED_INT:
433 copy->translated_elt_buf = NULL;
434 copy->srcelt = (const GLuint *)srcptr;
435 break;
436 }
437
438
439 /* Figure out the maximum allowed vertex buffer size:
440 */
441 if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
442 copy->dstbuf_size = copy->limits->max_verts;
443 }
444 else {
445 copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
446 }
447
448 /* Allocate an output vertex buffer:
449 *
450 * XXX: This should be a VBO!
451 */
452 copy->dstbuf = _mesa_malloc(copy->dstbuf_size *
453 copy->vertex_size);
454 copy->dstptr = copy->dstbuf;
455
456 /* Setup new vertex arrays to point into the output buffer:
457 */
458 for (offset = 0, i = 0; i < copy->nr_varying; i++) {
459 const struct gl_client_array *src = copy->varying[i].array;
460 struct gl_client_array *dst = &copy->varying[i].dstarray;
461
462 dst->Size = src->Size;
463 dst->Type = src->Type;
464 dst->Format = GL_RGBA;
465 dst->Stride = copy->vertex_size;
466 dst->StrideB = copy->vertex_size;
467 dst->Ptr = copy->dstbuf + offset;
468 dst->Enabled = GL_TRUE;
469 dst->Normalized = src->Normalized;
470 dst->BufferObj = ctx->Array.NullBufferObj;
471 dst->_MaxElement = copy->dstbuf_size; /* may be less! */
472
473 offset += copy->varying[i].size;
474 }
475
476 /* Allocate an output element list:
477 */
478 copy->dstelt_size = MIN2(65536,
479 copy->ib->count * 2 + 3);
480 copy->dstelt_size = MIN2(copy->dstelt_size,
481 copy->limits->max_indices);
482 copy->dstelt = _mesa_malloc(sizeof(GLuint) * copy->dstelt_size);
483 copy->dstelt_nr = 0;
484
485 /* Setup the new index buffer to point to the allocated element
486 * list:
487 */
488 copy->dstib.count = 0; /* duplicates dstelt_nr */
489 copy->dstib.type = GL_UNSIGNED_INT;
490 copy->dstib.obj = ctx->Array.NullBufferObj;
491 copy->dstib.ptr = copy->dstelt;
492 }
493
494
495 static void replay_finish( struct copy_context *copy )
496 {
497 GLcontext *ctx = copy->ctx;
498 GLuint i;
499
500 /* Free our vertex and index buffers:
501 */
502 _mesa_free(copy->translated_elt_buf);
503 _mesa_free(copy->dstbuf);
504 _mesa_free(copy->dstelt);
505
506 /* Unmap VBO's
507 */
508 for (i = 0; i < copy->nr_varying; i++) {
509 struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
510
511 if (vbo->Name && vbo->Pointer)
512 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo);
513 }
514
515 /* Unmap index buffer:
516 */
517 if (copy->ib->obj->Name && copy->ib->obj->Pointer) {
518 ctx->Driver.UnmapBuffer(ctx,
519 GL_ARRAY_BUFFER_ARB, /* XXX */
520 copy->ib->obj);
521 }
522 }
523
524 void vbo_split_copy( GLcontext *ctx,
525 const struct gl_client_array *arrays[],
526 const struct _mesa_prim *prim,
527 GLuint nr_prims,
528 const struct _mesa_index_buffer *ib,
529 vbo_draw_func draw,
530 const struct split_limits *limits )
531 {
532 struct copy_context copy;
533 GLuint i;
534
535 memset(&copy, 0, sizeof(copy));
536
537 /* Require indexed primitives:
538 */
539 assert(ib);
540
541 copy.ctx = ctx;
542 copy.array = arrays;
543 copy.prim = prim;
544 copy.nr_prims = nr_prims;
545 copy.ib = ib;
546 copy.draw = draw;
547 copy.limits = limits;
548
549
550 /* Clear the vertex cache:
551 */
552 for (i = 0; i < ELT_TABLE_SIZE; i++)
553 copy.vert_cache[i].in = ~0;
554
555
556 replay_init(&copy);
557 replay_elts(&copy);
558 replay_finish(&copy);
559 }