f20a97755cba3f469d4ba65d0a60e9057120d195
[mesa.git] / src / mesa / tnl / t_split_copy.c
1
2 /*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Keith Whitwell <keithw@vmware.com>
27 */
28
29 /* Split indexed primitives with per-vertex copying.
30 */
31
32 #include <stdio.h>
33
34 #include "main/glheader.h"
35 #include "main/bufferobj.h"
36 #include "main/imports.h"
37 #include "main/glformats.h"
38 #include "main/macros.h"
39 #include "main/mtypes.h"
40 #include "main/varray.h"
41 #include "vbo/vbo.h"
42
43 #include "t_split.h"
44 #include "tnl.h"
45
46
47 #define ELT_TABLE_SIZE 16
48
49 /**
50 * Used for vertex-level splitting of indexed buffers. Note that
51 * non-indexed primitives may be converted to indexed in some cases
52 * (eg loops, fans) in order to use this splitting path.
53 */
54 struct copy_context {
55 struct gl_context *ctx;
56 const struct tnl_vertex_array *array;
57 const struct _mesa_prim *prim;
58 GLuint nr_prims;
59 const struct _mesa_index_buffer *ib;
60 tnl_draw_func draw;
61
62 const struct split_limits *limits;
63
64 struct {
65 GLuint attr;
66 GLuint size;
67 const struct tnl_vertex_array *array;
68 const GLubyte *src_ptr;
69
70 struct gl_vertex_buffer_binding dstbinding;
71 struct gl_array_attributes dstattribs;
72
73 } varying[VERT_ATTRIB_MAX];
74 GLuint nr_varying;
75
76 struct tnl_vertex_array dstarray[VERT_ATTRIB_MAX];
77 struct _mesa_index_buffer dstib;
78
79 GLuint *translated_elt_buf;
80 const GLuint *srcelt;
81
82 /** A baby hash table to avoid re-emitting (some) duplicate
83 * vertices when splitting indexed primitives.
84 */
85 struct {
86 GLuint in;
87 GLuint out;
88 } vert_cache[ELT_TABLE_SIZE];
89
90 GLuint vertex_size;
91 GLubyte *dstbuf;
92 GLubyte *dstptr; /**< dstptr == dstbuf + dstelt_max * vertsize */
93 GLuint dstbuf_size; /**< in vertices */
94 GLuint dstbuf_nr; /**< count of emitted vertices, also the largest value
95 * in dstelt. Our MaxIndex.
96 */
97
98 GLuint *dstelt;
99 GLuint dstelt_nr;
100 GLuint dstelt_size;
101
102 #define MAX_PRIM 32
103 struct _mesa_prim dstprim[MAX_PRIM];
104 GLuint dstprim_nr;
105 };
106
107
108 /**
109 * Shallow copy one vertex array to another.
110 */
111 static inline void
112 copy_vertex_array(struct tnl_vertex_array *dst,
113 const struct tnl_vertex_array *src)
114 {
115 dst->VertexAttrib = src->VertexAttrib;
116 dst->BufferBinding = src->BufferBinding;
117 }
118
119
120 /**
121 * Starts returning true slightly before the buffer fills, to ensure
122 * that there is sufficient room for any remaining vertices to finish
123 * off the prim:
124 */
125 static GLboolean
126 check_flush(struct copy_context *copy)
127 {
128 GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
129
130 if (GL_TRIANGLE_STRIP == mode &&
131 copy->dstelt_nr & 1) { /* see bug9962 */
132 return GL_FALSE;
133 }
134
135 if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
136 return GL_TRUE;
137
138 if (copy->dstelt_nr + 4 > copy->dstelt_size)
139 return GL_TRUE;
140
141 return GL_FALSE;
142 }
143
144
145 /**
146 * Dump the parameters/info for a vbo->draw() call.
147 */
148 static void
149 dump_draw_info(struct gl_context *ctx,
150 const struct tnl_vertex_array *arrays,
151 const struct _mesa_prim *prims,
152 GLuint nr_prims,
153 const struct _mesa_index_buffer *ib,
154 GLuint min_index,
155 GLuint max_index)
156 {
157 GLuint i, j;
158
159 printf("VBO Draw:\n");
160 for (i = 0; i < nr_prims; i++) {
161 printf("Prim %u of %u\n", i, nr_prims);
162 printf(" Prim mode 0x%x\n", prims[i].mode);
163 printf(" IB: %p\n", (void*) ib);
164 for (j = 0; j < VERT_ATTRIB_MAX; j++) {
165 const struct tnl_vertex_array *array = &arrays[j];
166 const struct gl_vertex_buffer_binding *binding
167 = array->BufferBinding;
168 const struct gl_array_attributes *attrib = array->VertexAttrib;
169 const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
170 printf(" array %d at %p:\n", j, (void*) &arrays[j]);
171 printf(" ptr %p, size %d, type 0x%x, stride %d\n",
172 ptr, attrib->Format.Size, attrib->Format.Type, binding->Stride);
173 if (0) {
174 GLint k = prims[i].start + prims[i].count - 1;
175 GLfloat *last = (GLfloat *) (ptr + binding->Stride * k);
176 printf(" last: %f %f %f\n",
177 last[0], last[1], last[2]);
178 }
179 }
180 }
181 }
182
183
184 static void
185 flush(struct copy_context *copy)
186 {
187 struct gl_context *ctx = copy->ctx;
188 GLuint i;
189
190 /* Set some counters:
191 */
192 copy->dstib.count = copy->dstelt_nr;
193
194 #if 0
195 dump_draw_info(copy->ctx,
196 copy->dstarray,
197 copy->dstprim,
198 copy->dstprim_nr,
199 &copy->dstib,
200 0,
201 copy->dstbuf_nr);
202 #else
203 (void) dump_draw_info;
204 #endif
205
206 copy->draw(ctx,
207 copy->dstarray,
208 copy->dstprim,
209 copy->dstprim_nr,
210 &copy->dstib,
211 GL_TRUE,
212 0,
213 copy->dstbuf_nr - 1,
214 1,
215 0,
216 NULL, 0);
217
218 /* Reset all pointers:
219 */
220 copy->dstprim_nr = 0;
221 copy->dstelt_nr = 0;
222 copy->dstbuf_nr = 0;
223 copy->dstptr = copy->dstbuf;
224
225 /* Clear the vertex cache:
226 */
227 for (i = 0; i < ELT_TABLE_SIZE; i++)
228 copy->vert_cache[i].in = ~0;
229 }
230
231
232 /**
233 * Called at begin of each primitive during replay.
234 */
235 static void
236 begin(struct copy_context *copy, GLenum mode, GLboolean begin_flag)
237 {
238 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
239
240 prim->mode = mode;
241 prim->begin = begin_flag;
242 }
243
244
245 /**
246 * Use a hashtable to attempt to identify recently-emitted vertices
247 * and avoid re-emitting them.
248 */
249 static GLuint
250 elt(struct copy_context *copy, GLuint elt_idx)
251 {
252 GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
253 GLuint slot = elt & (ELT_TABLE_SIZE-1);
254
255 /* Look up the incoming element in the vertex cache. Re-emit if
256 * necessary.
257 */
258 if (copy->vert_cache[slot].in != elt) {
259 GLubyte *csr = copy->dstptr;
260 GLuint i;
261
262 for (i = 0; i < copy->nr_varying; i++) {
263 const struct tnl_vertex_array *srcarray = copy->varying[i].array;
264 const struct gl_vertex_buffer_binding* srcbinding
265 = srcarray->BufferBinding;
266 const GLubyte *srcptr
267 = copy->varying[i].src_ptr + elt * srcbinding->Stride;
268
269 memcpy(csr, srcptr, copy->varying[i].size);
270 csr += copy->varying[i].size;
271
272 #ifdef NAN_CHECK
273 if (srcarray->Format.Type == GL_FLOAT) {
274 GLuint k;
275 GLfloat *f = (GLfloat *) srcptr;
276 for (k = 0; k < srcarray->Size; k++) {
277 assert(!IS_INF_OR_NAN(f[k]));
278 assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
279 }
280 }
281 #endif
282
283 if (0) {
284 const GLuint *f = (const GLuint *)srcptr;
285 GLuint j;
286 printf(" varying %d: ", i);
287 for (j = 0; j < copy->varying[i].size / 4; j++)
288 printf("%x ", f[j]);
289 printf("\n");
290 }
291 }
292
293 copy->vert_cache[slot].in = elt;
294 copy->vert_cache[slot].out = copy->dstbuf_nr++;
295 copy->dstptr += copy->vertex_size;
296
297 assert(csr == copy->dstptr);
298 assert(copy->dstptr == (copy->dstbuf +
299 copy->dstbuf_nr * copy->vertex_size));
300 }
301
302 copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
303 return check_flush(copy);
304 }
305
306
307 /**
308 * Called at end of each primitive during replay.
309 */
310 static void
311 end(struct copy_context *copy, GLboolean end_flag)
312 {
313 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
314
315 prim->end = end_flag;
316 prim->count = copy->dstelt_nr - prim->start;
317
318 if (++copy->dstprim_nr == MAX_PRIM || check_flush(copy)) {
319 flush(copy);
320 }
321 }
322
323
324 static void
325 replay_elts(struct copy_context *copy)
326 {
327 GLuint i, j, k;
328 GLboolean split;
329
330 for (i = 0; i < copy->nr_prims; i++) {
331 const struct _mesa_prim *prim = &copy->prim[i];
332 const GLuint start = prim->start;
333 GLuint first, incr;
334
335 switch (prim->mode) {
336 case GL_LINE_LOOP:
337 /* Convert to linestrip and emit the final vertex explicitly,
338 * but only in the resultant strip that requires it.
339 */
340 j = 0;
341 while (j != prim->count) {
342 begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
343
344 for (split = GL_FALSE; j != prim->count && !split; j++)
345 split = elt(copy, start + j);
346
347 if (j == prim->count) {
348 /* Done, emit final line. Split doesn't matter as
349 * it is always raised a bit early so we can emit
350 * the last verts if necessary!
351 */
352 if (prim->end)
353 (void)elt(copy, start + 0);
354
355 end(copy, prim->end);
356 }
357 else {
358 /* Wrap
359 */
360 assert(split);
361 end(copy, 0);
362 j--;
363 }
364 }
365 break;
366
367 case GL_TRIANGLE_FAN:
368 case GL_POLYGON:
369 j = 2;
370 while (j != prim->count) {
371 begin(copy, prim->mode, prim->begin && j == 0);
372
373 split = elt(copy, start+0);
374 assert(!split);
375
376 split = elt(copy, start+j-1);
377 assert(!split);
378
379 for (; j != prim->count && !split; j++)
380 split = elt(copy, start+j);
381
382 end(copy, prim->end && j == prim->count);
383
384 if (j != prim->count) {
385 /* Wrapped the primitive, need to repeat some vertices:
386 */
387 j -= 1;
388 }
389 }
390 break;
391
392 default:
393 (void)_tnl_split_prim_inplace(prim->mode, &first, &incr);
394
395 j = 0;
396 while (j != prim->count) {
397
398 begin(copy, prim->mode, prim->begin && j == 0);
399
400 split = 0;
401 for (k = 0; k < first; k++, j++)
402 split |= elt(copy, start+j);
403
404 assert(!split);
405
406 for (; j != prim->count && !split;)
407 for (k = 0; k < incr; k++, j++)
408 split |= elt(copy, start+j);
409
410 end(copy, prim->end && j == prim->count);
411
412 if (j != prim->count) {
413 /* Wrapped the primitive, need to repeat some vertices:
414 */
415 assert(j > first - incr);
416 j -= (first - incr);
417 }
418 }
419 break;
420 }
421 }
422
423 if (copy->dstprim_nr)
424 flush(copy);
425 }
426
427
428 static void
429 replay_init(struct copy_context *copy)
430 {
431 struct gl_context *ctx = copy->ctx;
432 GLuint i;
433 GLuint offset;
434 const GLvoid *srcptr;
435
436 /* Make a list of varying attributes and their vbo's. Also
437 * calculate vertex size.
438 */
439 copy->vertex_size = 0;
440 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
441 const struct tnl_vertex_array *array = &copy->array[i];
442 const struct gl_vertex_buffer_binding *binding = array->BufferBinding;
443
444 if (binding->Stride == 0) {
445 copy_vertex_array(&copy->dstarray[i], array);
446 }
447 else {
448 const struct gl_array_attributes *attrib = array->VertexAttrib;
449 struct gl_buffer_object *vbo = binding->BufferObj;
450 const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
451 GLuint j = copy->nr_varying++;
452
453 copy->varying[j].attr = i;
454 copy->varying[j].array = &copy->array[i];
455 copy->varying[j].size = attrib->Format._ElementSize;
456 copy->vertex_size += attrib->Format._ElementSize;
457
458 if (_mesa_is_bufferobj(vbo) &&
459 !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
460 ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
461 MAP_INTERNAL);
462
463 copy->varying[j].src_ptr =
464 ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer, ptr);
465
466 copy->dstarray[i].VertexAttrib = &copy->varying[j].dstattribs;
467 copy->dstarray[i].BufferBinding = &copy->varying[j].dstbinding;
468 }
469 }
470
471 /* There must always be an index buffer. Currently require the
472 * caller convert non-indexed prims to indexed. Could alternately
473 * do it internally.
474 */
475 if (_mesa_is_bufferobj(copy->ib->obj) &&
476 !_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
477 ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
478 copy->ib->obj, MAP_INTERNAL);
479
480 srcptr = (const GLubyte *)
481 ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
482 copy->ib->ptr);
483
484 switch (copy->ib->index_size) {
485 case 1:
486 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
487 copy->srcelt = copy->translated_elt_buf;
488
489 for (i = 0; i < copy->ib->count; i++)
490 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
491 break;
492
493 case 2:
494 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
495 copy->srcelt = copy->translated_elt_buf;
496
497 for (i = 0; i < copy->ib->count; i++)
498 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
499 break;
500
501 case 4:
502 copy->translated_elt_buf = NULL;
503 copy->srcelt = (const GLuint *)srcptr;
504 break;
505 }
506
507 /* Figure out the maximum allowed vertex buffer size:
508 */
509 if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
510 copy->dstbuf_size = copy->limits->max_verts;
511 }
512 else {
513 copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
514 }
515
516 /* Allocate an output vertex buffer:
517 *
518 * XXX: This should be a VBO!
519 */
520 copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
521 copy->dstptr = copy->dstbuf;
522
523 /* Setup new vertex arrays to point into the output buffer:
524 */
525 for (offset = 0, i = 0; i < copy->nr_varying; i++) {
526 const struct tnl_vertex_array *src = copy->varying[i].array;
527 const struct gl_array_attributes *srcattr = src->VertexAttrib;
528 struct tnl_vertex_array *dst = &copy->dstarray[copy->varying[i].attr];
529 struct gl_vertex_buffer_binding *dstbind = &copy->varying[i].dstbinding;
530 struct gl_array_attributes *dstattr = &copy->varying[i].dstattribs;
531
532 dstattr->Format = srcattr->Format;
533 dstattr->Ptr = copy->dstbuf + offset;
534 dstbind->Stride = copy->vertex_size;
535 dstbind->BufferObj = ctx->Shared->NullBufferObj;
536 dst->BufferBinding = dstbind;
537 dst->VertexAttrib = dstattr;
538
539 offset += copy->varying[i].size;
540 }
541
542 /* Allocate an output element list:
543 */
544 copy->dstelt_size = MIN2(65536, copy->ib->count * 2 + 3);
545 copy->dstelt_size = MIN2(copy->dstelt_size, copy->limits->max_indices);
546 copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
547 copy->dstelt_nr = 0;
548
549 /* Setup the new index buffer to point to the allocated element
550 * list:
551 */
552 copy->dstib.count = 0; /* duplicates dstelt_nr */
553 copy->dstib.index_size = 4;
554 copy->dstib.index_size_shift = 2;
555 copy->dstib.obj = ctx->Shared->NullBufferObj;
556 copy->dstib.ptr = copy->dstelt;
557 }
558
559
560 /**
561 * Free up everything allocated during split/replay.
562 */
563 static void
564 replay_finish(struct copy_context *copy)
565 {
566 struct gl_context *ctx = copy->ctx;
567 GLuint i;
568
569 /* Free our vertex and index buffers */
570 free(copy->translated_elt_buf);
571 free(copy->dstbuf);
572 free(copy->dstelt);
573
574 /* Unmap VBO's */
575 for (i = 0; i < copy->nr_varying; i++) {
576 struct gl_buffer_object *vbo =
577 copy->varying[i].array->BufferBinding->BufferObj;
578 if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
579 ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
580 }
581
582 /* Unmap index buffer */
583 if (_mesa_is_bufferobj(copy->ib->obj) &&
584 _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
585 ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
586 }
587 }
588
589
590 /**
591 * Split VBO into smaller pieces, draw the pieces.
592 */
593 void
594 _tnl_split_copy(struct gl_context *ctx,
595 const struct tnl_vertex_array *arrays,
596 const struct _mesa_prim *prim,
597 GLuint nr_prims,
598 const struct _mesa_index_buffer *ib,
599 tnl_draw_func draw,
600 const struct split_limits *limits)
601 {
602 struct copy_context copy;
603 GLuint i, this_nr_prims;
604
605 for (i = 0; i < nr_prims;) {
606 /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
607 * will rebase the elements to the basevertex, and we'll only
608 * emit strings of prims with the same basevertex in one draw call.
609 */
610 for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
611 this_nr_prims++) {
612 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
613 break;
614 }
615
616 memset(&copy, 0, sizeof(copy));
617
618 /* Require indexed primitives:
619 */
620 assert(ib);
621
622 copy.ctx = ctx;
623 copy.array = arrays;
624 copy.prim = &prim[i];
625 copy.nr_prims = this_nr_prims;
626 copy.ib = ib;
627 copy.draw = draw;
628 copy.limits = limits;
629
630 /* Clear the vertex cache:
631 */
632 for (i = 0; i < ELT_TABLE_SIZE; i++)
633 copy.vert_cache[i].in = ~0;
634
635 replay_init(&copy);
636 replay_elts(&copy);
637 replay_finish(&copy);
638 }
639 }