vbo: move vbo_sizeof_ib_type() into vbo_exec_array.c
[mesa.git] / src / mesa / vbo / vbo_split_copy.c
1
2 /*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Keith Whitwell <keithw@vmware.com>
27 */
28
29 /* Split indexed primitives with per-vertex copying.
30 */
31
32 #include <stdio.h>
33
34 #include "main/glheader.h"
35 #include "main/bufferobj.h"
36 #include "main/imports.h"
37 #include "main/glformats.h"
38 #include "main/macros.h"
39 #include "main/mtypes.h"
40
41 #include "vbo_split.h"
42 #include "vbo.h"
43
44
45 #define ELT_TABLE_SIZE 16
46
47 /**
48 * Used for vertex-level splitting of indexed buffers. Note that
49 * non-indexed primitives may be converted to indexed in some cases
50 * (eg loops, fans) in order to use this splitting path.
51 */
52 struct copy_context {
53 struct gl_context *ctx;
54 const struct gl_vertex_array **array;
55 const struct _mesa_prim *prim;
56 GLuint nr_prims;
57 const struct _mesa_index_buffer *ib;
58 vbo_draw_func draw;
59
60 const struct split_limits *limits;
61
62 struct {
63 GLuint attr;
64 GLuint size;
65 const struct gl_vertex_array *array;
66 const GLubyte *src_ptr;
67
68 struct gl_vertex_array dstarray;
69
70 } varying[VERT_ATTRIB_MAX];
71 GLuint nr_varying;
72
73 const struct gl_vertex_array *dstarray_ptr[VERT_ATTRIB_MAX];
74 struct _mesa_index_buffer dstib;
75
76 GLuint *translated_elt_buf;
77 const GLuint *srcelt;
78
79 /** A baby hash table to avoid re-emitting (some) duplicate
80 * vertices when splitting indexed primitives.
81 */
82 struct {
83 GLuint in;
84 GLuint out;
85 } vert_cache[ELT_TABLE_SIZE];
86
87 GLuint vertex_size;
88 GLubyte *dstbuf;
89 GLubyte *dstptr; /**< dstptr == dstbuf + dstelt_max * vertsize */
90 GLuint dstbuf_size; /**< in vertices */
91 GLuint dstbuf_nr; /**< count of emitted vertices, also the largest value
92 * in dstelt. Our MaxIndex.
93 */
94
95 GLuint *dstelt;
96 GLuint dstelt_nr;
97 GLuint dstelt_size;
98
99 #define MAX_PRIM 32
100 struct _mesa_prim dstprim[MAX_PRIM];
101 GLuint dstprim_nr;
102 };
103
104
105 static GLuint
106 attr_size(const struct gl_vertex_array *array)
107 {
108 return array->Size * _mesa_sizeof_type(array->Type);
109 }
110
111
112 /**
113 * Starts returning true slightly before the buffer fills, to ensure
114 * that there is sufficient room for any remaining vertices to finish
115 * off the prim:
116 */
117 static GLboolean
118 check_flush(struct copy_context *copy)
119 {
120 GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
121
122 if (GL_TRIANGLE_STRIP == mode &&
123 copy->dstelt_nr & 1) { /* see bug9962 */
124 return GL_FALSE;
125 }
126
127 if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
128 return GL_TRUE;
129
130 if (copy->dstelt_nr + 4 > copy->dstelt_size)
131 return GL_TRUE;
132
133 return GL_FALSE;
134 }
135
136
137 /**
138 * Dump the parameters/info for a vbo->draw() call.
139 */
140 static void
141 dump_draw_info(struct gl_context *ctx,
142 const struct gl_vertex_array **arrays,
143 const struct _mesa_prim *prims,
144 GLuint nr_prims,
145 const struct _mesa_index_buffer *ib,
146 GLuint min_index,
147 GLuint max_index)
148 {
149 GLuint i, j;
150
151 printf("VBO Draw:\n");
152 for (i = 0; i < nr_prims; i++) {
153 printf("Prim %u of %u\n", i, nr_prims);
154 printf(" Prim mode 0x%x\n", prims[i].mode);
155 printf(" IB: %p\n", (void*) ib);
156 for (j = 0; j < VERT_ATTRIB_MAX; j++) {
157 printf(" array %d at %p:\n", j, (void*) arrays[j]);
158 printf(" ptr %p, size %d, type 0x%x, stride %d\n",
159 arrays[j]->Ptr,
160 arrays[j]->Size, arrays[j]->Type, arrays[j]->StrideB);
161 if (0) {
162 GLint k = prims[i].start + prims[i].count - 1;
163 GLfloat *last = (GLfloat *) (arrays[j]->Ptr + arrays[j]->StrideB * k);
164 printf(" last: %f %f %f\n",
165 last[0], last[1], last[2]);
166 }
167 }
168 }
169 }
170
171
172 static void
173 flush(struct copy_context *copy)
174 {
175 struct gl_context *ctx = copy->ctx;
176 const struct gl_vertex_array **saved_arrays = ctx->Array._DrawArrays;
177 GLuint i;
178
179 /* Set some counters:
180 */
181 copy->dstib.count = copy->dstelt_nr;
182
183 #if 0
184 dump_draw_info(copy->ctx,
185 copy->dstarray_ptr,
186 copy->dstprim,
187 copy->dstprim_nr,
188 &copy->dstib,
189 0,
190 copy->dstbuf_nr);
191 #else
192 (void) dump_draw_info;
193 #endif
194
195 ctx->Array._DrawArrays = copy->dstarray_ptr;
196 ctx->NewDriverState |= ctx->DriverFlags.NewArray;
197
198 copy->draw(ctx,
199 copy->dstprim,
200 copy->dstprim_nr,
201 &copy->dstib,
202 GL_TRUE,
203 0,
204 copy->dstbuf_nr - 1,
205 NULL, 0, NULL);
206
207 ctx->Array._DrawArrays = saved_arrays;
208 ctx->NewDriverState |= ctx->DriverFlags.NewArray;
209
210 /* Reset all pointers:
211 */
212 copy->dstprim_nr = 0;
213 copy->dstelt_nr = 0;
214 copy->dstbuf_nr = 0;
215 copy->dstptr = copy->dstbuf;
216
217 /* Clear the vertex cache:
218 */
219 for (i = 0; i < ELT_TABLE_SIZE; i++)
220 copy->vert_cache[i].in = ~0;
221 }
222
223
224 /**
225 * Called at begin of each primitive during replay.
226 */
227 static void
228 begin(struct copy_context *copy, GLenum mode, GLboolean begin_flag)
229 {
230 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
231
232 prim->mode = mode;
233 prim->begin = begin_flag;
234 prim->num_instances = 1;
235 }
236
237
238 /**
239 * Use a hashtable to attempt to identify recently-emitted vertices
240 * and avoid re-emitting them.
241 */
242 static GLuint
243 elt(struct copy_context *copy, GLuint elt_idx)
244 {
245 GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
246 GLuint slot = elt & (ELT_TABLE_SIZE-1);
247
248 /* Look up the incoming element in the vertex cache. Re-emit if
249 * necessary.
250 */
251 if (copy->vert_cache[slot].in != elt) {
252 GLubyte *csr = copy->dstptr;
253 GLuint i;
254
255 for (i = 0; i < copy->nr_varying; i++) {
256 const struct gl_vertex_array *srcarray = copy->varying[i].array;
257 const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB;
258
259 memcpy(csr, srcptr, copy->varying[i].size);
260 csr += copy->varying[i].size;
261
262 #ifdef NAN_CHECK
263 if (srcarray->Type == GL_FLOAT) {
264 GLuint k;
265 GLfloat *f = (GLfloat *) srcptr;
266 for (k = 0; k < srcarray->Size; k++) {
267 assert(!IS_INF_OR_NAN(f[k]));
268 assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
269 }
270 }
271 #endif
272
273 if (0) {
274 const GLuint *f = (const GLuint *)srcptr;
275 GLuint j;
276 printf(" varying %d: ", i);
277 for (j = 0; j < copy->varying[i].size / 4; j++)
278 printf("%x ", f[j]);
279 printf("\n");
280 }
281 }
282
283 copy->vert_cache[slot].in = elt;
284 copy->vert_cache[slot].out = copy->dstbuf_nr++;
285 copy->dstptr += copy->vertex_size;
286
287 assert(csr == copy->dstptr);
288 assert(copy->dstptr == (copy->dstbuf +
289 copy->dstbuf_nr * copy->vertex_size));
290 }
291
292 copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
293 return check_flush(copy);
294 }
295
296
297 /**
298 * Called at end of each primitive during replay.
299 */
300 static void
301 end(struct copy_context *copy, GLboolean end_flag)
302 {
303 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
304
305 prim->end = end_flag;
306 prim->count = copy->dstelt_nr - prim->start;
307
308 if (++copy->dstprim_nr == MAX_PRIM || check_flush(copy)) {
309 flush(copy);
310 }
311 }
312
313
314 static void
315 replay_elts(struct copy_context *copy)
316 {
317 GLuint i, j, k;
318 GLboolean split;
319
320 for (i = 0; i < copy->nr_prims; i++) {
321 const struct _mesa_prim *prim = &copy->prim[i];
322 const GLuint start = prim->start;
323 GLuint first, incr;
324
325 switch (prim->mode) {
326 case GL_LINE_LOOP:
327 /* Convert to linestrip and emit the final vertex explicitly,
328 * but only in the resultant strip that requires it.
329 */
330 j = 0;
331 while (j != prim->count) {
332 begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
333
334 for (split = GL_FALSE; j != prim->count && !split; j++)
335 split = elt(copy, start + j);
336
337 if (j == prim->count) {
338 /* Done, emit final line. Split doesn't matter as
339 * it is always raised a bit early so we can emit
340 * the last verts if necessary!
341 */
342 if (prim->end)
343 (void)elt(copy, start + 0);
344
345 end(copy, prim->end);
346 }
347 else {
348 /* Wrap
349 */
350 assert(split);
351 end(copy, 0);
352 j--;
353 }
354 }
355 break;
356
357 case GL_TRIANGLE_FAN:
358 case GL_POLYGON:
359 j = 2;
360 while (j != prim->count) {
361 begin(copy, prim->mode, prim->begin && j == 0);
362
363 split = elt(copy, start+0);
364 assert(!split);
365
366 split = elt(copy, start+j-1);
367 assert(!split);
368
369 for (; j != prim->count && !split; j++)
370 split = elt(copy, start+j);
371
372 end(copy, prim->end && j == prim->count);
373
374 if (j != prim->count) {
375 /* Wrapped the primitive, need to repeat some vertices:
376 */
377 j -= 1;
378 }
379 }
380 break;
381
382 default:
383 (void)split_prim_inplace(prim->mode, &first, &incr);
384
385 j = 0;
386 while (j != prim->count) {
387
388 begin(copy, prim->mode, prim->begin && j == 0);
389
390 split = 0;
391 for (k = 0; k < first; k++, j++)
392 split |= elt(copy, start+j);
393
394 assert(!split);
395
396 for (; j != prim->count && !split;)
397 for (k = 0; k < incr; k++, j++)
398 split |= elt(copy, start+j);
399
400 end(copy, prim->end && j == prim->count);
401
402 if (j != prim->count) {
403 /* Wrapped the primitive, need to repeat some vertices:
404 */
405 assert(j > first - incr);
406 j -= (first - incr);
407 }
408 }
409 break;
410 }
411 }
412
413 if (copy->dstprim_nr)
414 flush(copy);
415 }
416
417
418 static void
419 replay_init(struct copy_context *copy)
420 {
421 struct gl_context *ctx = copy->ctx;
422 GLuint i;
423 GLuint offset;
424 const GLvoid *srcptr;
425
426 /* Make a list of varying attributes and their vbo's. Also
427 * calculate vertex size.
428 */
429 copy->vertex_size = 0;
430 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
431 struct gl_buffer_object *vbo = copy->array[i]->BufferObj;
432
433 if (copy->array[i]->StrideB == 0) {
434 copy->dstarray_ptr[i] = copy->array[i];
435 }
436 else {
437 GLuint j = copy->nr_varying++;
438
439 copy->varying[j].attr = i;
440 copy->varying[j].array = copy->array[i];
441 copy->varying[j].size = attr_size(copy->array[i]);
442 copy->vertex_size += attr_size(copy->array[i]);
443
444 if (_mesa_is_bufferobj(vbo) &&
445 !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
446 ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
447 MAP_INTERNAL);
448
449 copy->varying[j].src_ptr =
450 ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer,
451 copy->array[i]->Ptr);
452
453 copy->dstarray_ptr[i] = &copy->varying[j].dstarray;
454 }
455 }
456
457 /* There must always be an index buffer. Currently require the
458 * caller convert non-indexed prims to indexed. Could alternately
459 * do it internally.
460 */
461 if (_mesa_is_bufferobj(copy->ib->obj) &&
462 !_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
463 ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
464 copy->ib->obj, MAP_INTERNAL);
465
466 srcptr = (const GLubyte *)
467 ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
468 copy->ib->ptr);
469
470 switch (copy->ib->index_size) {
471 case 1:
472 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
473 copy->srcelt = copy->translated_elt_buf;
474
475 for (i = 0; i < copy->ib->count; i++)
476 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
477 break;
478
479 case 2:
480 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
481 copy->srcelt = copy->translated_elt_buf;
482
483 for (i = 0; i < copy->ib->count; i++)
484 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
485 break;
486
487 case 4:
488 copy->translated_elt_buf = NULL;
489 copy->srcelt = (const GLuint *)srcptr;
490 break;
491 }
492
493 /* Figure out the maximum allowed vertex buffer size:
494 */
495 if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
496 copy->dstbuf_size = copy->limits->max_verts;
497 }
498 else {
499 copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
500 }
501
502 /* Allocate an output vertex buffer:
503 *
504 * XXX: This should be a VBO!
505 */
506 copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
507 copy->dstptr = copy->dstbuf;
508
509 /* Setup new vertex arrays to point into the output buffer:
510 */
511 for (offset = 0, i = 0; i < copy->nr_varying; i++) {
512 const struct gl_vertex_array *src = copy->varying[i].array;
513 struct gl_vertex_array *dst = &copy->varying[i].dstarray;
514
515 dst->Size = src->Size;
516 dst->Type = src->Type;
517 dst->Format = GL_RGBA;
518 dst->StrideB = copy->vertex_size;
519 dst->Ptr = copy->dstbuf + offset;
520 dst->Normalized = src->Normalized;
521 dst->Integer = src->Integer;
522 dst->Doubles = src->Doubles;
523 dst->BufferObj = ctx->Shared->NullBufferObj;
524 dst->_ElementSize = src->_ElementSize;
525
526 offset += copy->varying[i].size;
527 }
528
529 /* Allocate an output element list:
530 */
531 copy->dstelt_size = MIN2(65536, copy->ib->count * 2 + 3);
532 copy->dstelt_size = MIN2(copy->dstelt_size, copy->limits->max_indices);
533 copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
534 copy->dstelt_nr = 0;
535
536 /* Setup the new index buffer to point to the allocated element
537 * list:
538 */
539 copy->dstib.count = 0; /* duplicates dstelt_nr */
540 copy->dstib.index_size = 4;
541 copy->dstib.obj = ctx->Shared->NullBufferObj;
542 copy->dstib.ptr = copy->dstelt;
543 }
544
545
546 /**
547 * Free up everything allocated during split/replay.
548 */
549 static void
550 replay_finish(struct copy_context *copy)
551 {
552 struct gl_context *ctx = copy->ctx;
553 GLuint i;
554
555 /* Free our vertex and index buffers */
556 free(copy->translated_elt_buf);
557 free(copy->dstbuf);
558 free(copy->dstelt);
559
560 /* Unmap VBO's */
561 for (i = 0; i < copy->nr_varying; i++) {
562 struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
563 if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
564 ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
565 }
566
567 /* Unmap index buffer */
568 if (_mesa_is_bufferobj(copy->ib->obj) &&
569 _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
570 ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
571 }
572 }
573
574
575 /**
576 * Split VBO into smaller pieces, draw the pieces.
577 */
578 void
579 vbo_split_copy(struct gl_context *ctx,
580 const struct gl_vertex_array *arrays[],
581 const struct _mesa_prim *prim,
582 GLuint nr_prims,
583 const struct _mesa_index_buffer *ib,
584 vbo_draw_func draw,
585 const struct split_limits *limits)
586 {
587 struct copy_context copy;
588 GLuint i, this_nr_prims;
589
590 for (i = 0; i < nr_prims;) {
591 /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
592 * will rebase the elements to the basevertex, and we'll only
593 * emit strings of prims with the same basevertex in one draw call.
594 */
595 for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
596 this_nr_prims++) {
597 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
598 break;
599 }
600
601 memset(&copy, 0, sizeof(copy));
602
603 /* Require indexed primitives:
604 */
605 assert(ib);
606
607 copy.ctx = ctx;
608 copy.array = arrays;
609 copy.prim = &prim[i];
610 copy.nr_prims = this_nr_prims;
611 copy.ib = ib;
612 copy.draw = draw;
613 copy.limits = limits;
614
615 /* Clear the vertex cache:
616 */
617 for (i = 0; i < ELT_TABLE_SIZE; i++)
618 copy.vert_cache[i].in = ~0;
619
620 replay_init(&copy);
621 replay_elts(&copy);
622 replay_finish(&copy);
623 }
624 }