Cell: generalize the batch buffer code for vertex buffers...
[mesa.git] / src / mesa / vbo / vbo_split_copy.c
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 6.5
5 *
6 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Keith Whitwell <keith@tungstengraphics.com>
27 */
28
29 /* Split indexed primitives with per-vertex copying.
30 */
31
32 #include "main/glheader.h"
33 #include "main/imports.h"
34 #include "main/macros.h"
35 #include "main/enums.h"
36 #include "main/mtypes.h"
37
38 #include "vbo_split.h"
39 #include "vbo.h"
40
41
42 #define ELT_TABLE_SIZE 16
43
44 /* Used for vertex-level splitting of indexed buffers. Note that
45 * non-indexed primitives may be converted to indexed in some cases
46 * (eg loops, fans) in order to use this splitting path.
47 */
48 struct copy_context {
49
50 GLcontext *ctx;
51 const struct gl_client_array **array;
52 const struct _mesa_prim *prim;
53 GLuint nr_prims;
54 const struct _mesa_index_buffer *ib;
55 vbo_draw_func draw;
56
57 const struct split_limits *limits;
58
59 struct {
60 GLuint attr;
61 GLuint size;
62 const struct gl_client_array *array;
63 const GLubyte *src_ptr;
64
65 struct gl_client_array dstarray;
66
67 } varying[VERT_ATTRIB_MAX];
68 GLuint nr_varying;
69
70 const struct gl_client_array *dstarray_ptr[VERT_ATTRIB_MAX];
71 struct _mesa_index_buffer dstib;
72
73 GLuint *translated_elt_buf;
74 const GLuint *srcelt;
75
76 /* A baby hash table to avoid re-emitting (some) duplicate
77 * vertices when splitting indexed primitives.
78 */
79 struct {
80 GLuint in;
81 GLuint out;
82 } vert_cache[ELT_TABLE_SIZE];
83
84
85 GLuint vertex_size;
86 GLubyte *dstbuf;
87 GLubyte *dstptr; /* dstptr == dstbuf + dstelt_max * vertsize */
88 GLuint dstbuf_size; /* in vertices */
89 GLuint dstbuf_nr; /* count of emitted vertices, also the
90 * largest value in dstelt. Our
91 * MaxIndex.
92 */
93
94 GLuint *dstelt;
95 GLuint dstelt_nr;
96 GLuint dstelt_size;
97
98 #define MAX_PRIM 32
99 struct _mesa_prim dstprim[MAX_PRIM];
100 GLuint dstprim_nr;
101
102 };
103
104
105 static GLuint type_size( GLenum type )
106 {
107 switch(type) {
108 case GL_BYTE: return sizeof(GLbyte);
109 case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
110 case GL_SHORT: return sizeof(GLshort);
111 case GL_UNSIGNED_SHORT: return sizeof(GLushort);
112 case GL_INT: return sizeof(GLint);
113 case GL_UNSIGNED_INT: return sizeof(GLuint);
114 case GL_FLOAT: return sizeof(GLfloat);
115 case GL_DOUBLE: return sizeof(GLdouble);
116 default: return 0;
117 }
118 }
119
120 static GLuint attr_size( const struct gl_client_array *array )
121 {
122 return array->Size * type_size(array->Type);
123 }
124
125
126 /* Starts returning true slightly before the buffer fills, to ensure
127 * that there is sufficient room for any remaining vertices to finish
128 * off the prim:
129 */
130 static GLboolean check_flush( struct copy_context *copy )
131 {
132 if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
133 return GL_TRUE;
134
135 if (copy->dstelt_nr + 4 > copy->dstelt_size)
136 return GL_TRUE;
137
138 return GL_FALSE;
139 }
140
141 static void flush( struct copy_context *copy )
142 {
143 GLuint i;
144
145 /* Set some counters:
146 */
147 copy->dstib.count = copy->dstelt_nr;
148
149 copy->draw( copy->ctx,
150 copy->dstarray_ptr,
151 copy->dstprim,
152 copy->dstprim_nr,
153 &copy->dstib,
154 0,
155 copy->dstbuf_nr );
156
157 /* Reset all pointers:
158 */
159 copy->dstprim_nr = 0;
160 copy->dstelt_nr = 0;
161 copy->dstbuf_nr = 0;
162 copy->dstptr = copy->dstbuf;
163
164 /* Clear the vertex cache:
165 */
166 for (i = 0; i < ELT_TABLE_SIZE; i++)
167 copy->vert_cache[i].in = ~0;
168 }
169
170
171
172 static void begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
173 {
174 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
175
176 /* _mesa_printf("begin %s (%d)\n", _mesa_lookup_enum_by_nr(mode), begin_flag); */
177
178 prim->mode = mode;
179 prim->begin = begin_flag;
180 }
181
182
183 /* Use a hashtable to attempt to identify recently-emitted vertices
184 * and avoid re-emitting them.
185 */
186 static GLuint elt(struct copy_context *copy, GLuint elt_idx)
187 {
188 GLuint elt = copy->srcelt[elt_idx];
189 GLuint slot = elt & (ELT_TABLE_SIZE-1);
190
191 /* _mesa_printf("elt %d\n", elt); */
192
193 /* Look up the incoming element in the vertex cache. Re-emit if
194 * necessary.
195 */
196 if (copy->vert_cache[slot].in != elt) {
197 GLubyte *csr = copy->dstptr;
198 GLuint i;
199
200 /* _mesa_printf(" --> emit to dstelt %d\n", copy->dstbuf_nr); */
201
202 for (i = 0; i < copy->nr_varying; i++) {
203 const struct gl_client_array *srcarray = copy->varying[i].array;
204 const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB;
205
206 memcpy(csr, srcptr, copy->varying[i].size);
207 csr += copy->varying[i].size;
208
209 if (0)
210 {
211 const GLuint *f = (const GLuint *)srcptr;
212 GLuint j;
213 _mesa_printf(" varying %d: ", i);
214 for(j = 0; j < copy->varying[i].size / 4; j++)
215 _mesa_printf("%x ", f[j]);
216 _mesa_printf("\n");
217 }
218
219 }
220
221 copy->vert_cache[slot].in = elt;
222 copy->vert_cache[slot].out = copy->dstbuf_nr++;
223 copy->dstptr += copy->vertex_size;
224
225 assert(csr == copy->dstptr);
226 assert(copy->dstptr == (copy->dstbuf +
227 copy->dstbuf_nr *
228 copy->vertex_size));
229 }
230 /* else */
231 /* _mesa_printf(" --> reuse vertex\n"); */
232
233 /* _mesa_printf(" --> emit %d\n", copy->vert_cache[slot].out); */
234 copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
235 return check_flush(copy);
236 }
237
238 static void end( struct copy_context *copy, GLboolean end_flag )
239 {
240 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
241
242 /* _mesa_printf("end (%d)\n", end_flag); */
243
244 prim->end = end_flag;
245 prim->count = copy->dstelt_nr - prim->start;
246
247 if (++copy->dstprim_nr == MAX_PRIM ||
248 check_flush(copy))
249 flush(copy);
250 }
251
252
253
254 static void replay_elts( struct copy_context *copy )
255 {
256 GLuint i, j, k;
257 GLboolean split;
258
259 for (i = 0; i < copy->nr_prims; i++) {
260 const struct _mesa_prim *prim = &copy->prim[i];
261 const GLuint start = prim->start;
262 GLuint first, incr;
263
264 switch (prim->mode) {
265
266 case GL_LINE_LOOP:
267 /* Convert to linestrip and emit the final vertex explicitly,
268 * but only in the resultant strip that requires it.
269 */
270 j = 0;
271 while (j != prim->count) {
272 begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
273
274 for (split = GL_FALSE; j != prim->count && !split; j++)
275 split = elt(copy, start + j);
276
277 if (j == prim->count) {
278 /* Done, emit final line. Split doesn't matter as
279 * it is always raised a bit early so we can emit
280 * the last verts if necessary!
281 */
282 if (prim->end)
283 (void)elt(copy, start + 0);
284
285 end(copy, prim->end);
286 }
287 else {
288 /* Wrap
289 */
290 assert(split);
291 end(copy, 0);
292 j--;
293 }
294 }
295 break;
296
297 case GL_TRIANGLE_FAN:
298 case GL_POLYGON:
299 j = 2;
300 while (j != prim->count) {
301 begin(copy, prim->mode, prim->begin && j == 0);
302
303 split = elt(copy, start+0);
304 assert(!split);
305
306 split = elt(copy, start+j-1);
307 assert(!split);
308
309 for (; j != prim->count && !split; j++)
310 split = elt(copy, start+j);
311
312 end(copy, prim->end && j == prim->count);
313
314 if (j != prim->count) {
315 /* Wrapped the primitive, need to repeat some vertices:
316 */
317 j -= 1;
318 }
319 }
320 break;
321
322 default:
323 (void)split_prim_inplace(prim->mode, &first, &incr);
324
325 j = 0;
326 while (j != prim->count) {
327
328 begin(copy, prim->mode, prim->begin && j == 0);
329
330 split = 0;
331 for (k = 0; k < first; k++, j++)
332 split |= elt(copy, start+j);
333
334 assert(!split);
335
336 for (; j != prim->count && !split; )
337 for (k = 0; k < incr; k++, j++)
338 split |= elt(copy, start+j);
339
340 end(copy, prim->end && j == prim->count);
341
342 if (j != prim->count) {
343 /* Wrapped the primitive, need to repeat some vertices:
344 */
345 assert(j > first - incr);
346 j -= (first - incr);
347 }
348 }
349 break;
350 }
351 }
352
353 if (copy->dstprim_nr)
354 flush(copy);
355 }
356
357
358 static void replay_init( struct copy_context *copy )
359 {
360 GLcontext *ctx = copy->ctx;
361 GLuint i;
362 GLuint offset;
363 const GLvoid *srcptr;
364
365 /* Make a list of varying attributes and their vbo's. Also
366 * calculate vertex size.
367 */
368 copy->vertex_size = 0;
369 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
370 struct gl_buffer_object *vbo = copy->array[i]->BufferObj;
371
372 if (copy->array[i]->StrideB == 0) {
373 copy->dstarray_ptr[i] = copy->array[i];
374 }
375 else {
376 GLuint j = copy->nr_varying++;
377
378 copy->varying[j].attr = i;
379 copy->varying[j].array = copy->array[i];
380 copy->varying[j].size = attr_size(copy->array[i]);
381 copy->vertex_size += attr_size(copy->array[i]);
382
383 if (vbo->Name && !vbo->Pointer)
384 ctx->Driver.MapBuffer(ctx,
385 GL_ARRAY_BUFFER_ARB,
386 GL_WRITE_ONLY, /* XXX */
387 vbo);
388
389 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer,
390 copy->array[i]->Ptr);
391
392 copy->dstarray_ptr[i] = &copy->varying[j].dstarray;
393 }
394 }
395
396 /* There must always be an index buffer. Currently require the
397 * caller convert non-indexed prims to indexed. Could alternately
398 * do it internally.
399 */
400 if (copy->ib->obj->Name && !copy->ib->obj->Pointer)
401 ctx->Driver.MapBuffer(ctx,
402 GL_ARRAY_BUFFER_ARB, /* XXX */
403 GL_WRITE_ONLY, /* XXX */
404 copy->ib->obj);
405
406 srcptr = (const GLubyte *)ADD_POINTERS(copy->ib->obj->Pointer, copy->ib->ptr);
407
408 switch (copy->ib->type) {
409 case GL_UNSIGNED_BYTE:
410 copy->translated_elt_buf = _mesa_malloc(sizeof(GLuint) * copy->ib->count);
411 copy->srcelt = copy->translated_elt_buf;
412
413 for (i = 0; i < copy->ib->count; i++)
414 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
415 break;
416
417 case GL_UNSIGNED_SHORT:
418 copy->translated_elt_buf = _mesa_malloc(sizeof(GLuint) * copy->ib->count);
419 copy->srcelt = copy->translated_elt_buf;
420
421 for (i = 0; i < copy->ib->count; i++)
422 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
423 break;
424
425 case GL_UNSIGNED_INT:
426 copy->translated_elt_buf = NULL;
427 copy->srcelt = (const GLuint *)srcptr;
428 break;
429 }
430
431
432 /* Figure out the maximum allowed vertex buffer size:
433 */
434 if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
435 copy->dstbuf_size = copy->limits->max_verts;
436 }
437 else {
438 copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
439 }
440
441 /* Allocate an output vertex buffer:
442 *
443 * XXX: This should be a VBO!
444 */
445 copy->dstbuf = _mesa_malloc(copy->dstbuf_size *
446 copy->vertex_size);
447 copy->dstptr = copy->dstbuf;
448
449 /* Setup new vertex arrays to point into the output buffer:
450 */
451 for (offset = 0, i = 0; i < copy->nr_varying; i++) {
452 const struct gl_client_array *src = copy->varying[i].array;
453 struct gl_client_array *dst = &copy->varying[i].dstarray;
454
455 dst->Size = src->Size;
456 dst->Type = src->Type;
457 dst->Stride = copy->vertex_size;
458 dst->StrideB = copy->vertex_size;
459 dst->Ptr = copy->dstbuf + offset;
460 dst->Enabled = GL_TRUE;
461 dst->Normalized = GL_TRUE;
462 dst->BufferObj = ctx->Array.NullBufferObj;
463 dst->_MaxElement = copy->dstbuf_size; /* may be less! */
464
465 offset += copy->varying[i].size;
466 }
467
468 /* Allocate an output element list:
469 */
470 copy->dstelt_size = MIN2(65536,
471 copy->ib->count * 2 + 3);
472 copy->dstelt_size = MIN2(copy->dstelt_size,
473 copy->limits->max_indices);
474 copy->dstelt = _mesa_malloc(sizeof(GLuint) * copy->dstelt_size);
475 copy->dstelt_nr = 0;
476
477 /* Setup the new index buffer to point to the allocated element
478 * list:
479 */
480 copy->dstib.count = 0; /* duplicates dstelt_nr */
481 copy->dstib.type = GL_UNSIGNED_INT;
482 copy->dstib.obj = ctx->Array.NullBufferObj;
483 copy->dstib.ptr = copy->dstelt;
484 }
485
486
487 static void replay_finish( struct copy_context *copy )
488 {
489 GLcontext *ctx = copy->ctx;
490 GLuint i;
491
492 /* Free our vertex and index buffers:
493 */
494 _mesa_free(copy->translated_elt_buf);
495 _mesa_free(copy->dstbuf);
496 _mesa_free(copy->dstelt);
497
498 /* Unmap VBO's
499 */
500 for (i = 0; i < copy->nr_varying; i++) {
501 struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
502
503 if (vbo->Name && vbo->Pointer)
504 ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, vbo);
505 }
506
507 /* Unmap index buffer:
508 */
509 if (copy->ib->obj->Name && copy->ib->obj->Pointer) {
510 ctx->Driver.UnmapBuffer(ctx,
511 GL_ARRAY_BUFFER_ARB, /* XXX */
512 copy->ib->obj);
513 }
514 }
515
516 void vbo_split_copy( GLcontext *ctx,
517 const struct gl_client_array *arrays[],
518 const struct _mesa_prim *prim,
519 GLuint nr_prims,
520 const struct _mesa_index_buffer *ib,
521 vbo_draw_func draw,
522 const struct split_limits *limits )
523 {
524 struct copy_context copy;
525 GLuint i;
526
527 memset(&copy, 0, sizeof(copy));
528
529 /* Require indexed primitives:
530 */
531 assert(ib);
532
533 copy.ctx = ctx;
534 copy.array = arrays;
535 copy.prim = prim;
536 copy.nr_prims = nr_prims;
537 copy.ib = ib;
538 copy.draw = draw;
539 copy.limits = limits;
540
541
542 /* Clear the vertex cache:
543 */
544 for (i = 0; i < ELT_TABLE_SIZE; i++)
545 copy.vert_cache[i].in = ~0;
546
547
548 replay_init(&copy);
549 replay_elts(&copy);
550 replay_finish(&copy);
551 }