cbb7eb409f9bbd87190a39ca733b826b70a29c50
[mesa.git] / src / mesa / tnl / t_split_copy.c
1
2 /*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Keith Whitwell <keithw@vmware.com>
27 */
28
29 /* Split indexed primitives with per-vertex copying.
30 */
31
32 #include <stdio.h>
33
34 #include "main/glheader.h"
35 #include "main/bufferobj.h"
36 #include "main/imports.h"
37 #include "main/glformats.h"
38 #include "main/macros.h"
39 #include "main/mtypes.h"
40 #include "main/varray.h"
41 #include "vbo/vbo.h"
42
43 #include "t_split.h"
44 #include "tnl.h"
45
46
47 #define ELT_TABLE_SIZE 16
48
49 /**
50 * Used for vertex-level splitting of indexed buffers. Note that
51 * non-indexed primitives may be converted to indexed in some cases
52 * (eg loops, fans) in order to use this splitting path.
53 */
54 struct copy_context {
55 struct gl_context *ctx;
56 const struct tnl_vertex_array *array;
57 const struct _mesa_prim *prim;
58 GLuint nr_prims;
59 const struct _mesa_index_buffer *ib;
60 tnl_draw_func draw;
61
62 const struct split_limits *limits;
63
64 struct {
65 GLuint attr;
66 GLuint size;
67 const struct tnl_vertex_array *array;
68 const GLubyte *src_ptr;
69
70 struct gl_vertex_buffer_binding dstbinding;
71 struct gl_array_attributes dstattribs;
72
73 } varying[VERT_ATTRIB_MAX];
74 GLuint nr_varying;
75
76 struct tnl_vertex_array dstarray[VERT_ATTRIB_MAX];
77 struct _mesa_index_buffer dstib;
78
79 GLuint *translated_elt_buf;
80 const GLuint *srcelt;
81
82 /** A baby hash table to avoid re-emitting (some) duplicate
83 * vertices when splitting indexed primitives.
84 */
85 struct {
86 GLuint in;
87 GLuint out;
88 } vert_cache[ELT_TABLE_SIZE];
89
90 GLuint vertex_size;
91 GLubyte *dstbuf;
92 GLubyte *dstptr; /**< dstptr == dstbuf + dstelt_max * vertsize */
93 GLuint dstbuf_size; /**< in vertices */
94 GLuint dstbuf_nr; /**< count of emitted vertices, also the largest value
95 * in dstelt. Our MaxIndex.
96 */
97
98 GLuint *dstelt;
99 GLuint dstelt_nr;
100 GLuint dstelt_size;
101
102 #define MAX_PRIM 32
103 struct _mesa_prim dstprim[MAX_PRIM];
104 GLuint dstprim_nr;
105 };
106
107
108 static GLuint
109 attr_size(const struct gl_array_attributes *attrib)
110 {
111 return attrib->Size * _mesa_sizeof_type(attrib->Type);
112 }
113
114
115 /**
116 * Shallow copy one vertex array to another.
117 */
118 static inline void
119 copy_vertex_array(struct tnl_vertex_array *dst,
120 const struct tnl_vertex_array *src)
121 {
122 dst->VertexAttrib = src->VertexAttrib;
123 dst->BufferBinding = src->BufferBinding;
124 }
125
126
127 /**
128 * Starts returning true slightly before the buffer fills, to ensure
129 * that there is sufficient room for any remaining vertices to finish
130 * off the prim:
131 */
132 static GLboolean
133 check_flush(struct copy_context *copy)
134 {
135 GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
136
137 if (GL_TRIANGLE_STRIP == mode &&
138 copy->dstelt_nr & 1) { /* see bug9962 */
139 return GL_FALSE;
140 }
141
142 if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
143 return GL_TRUE;
144
145 if (copy->dstelt_nr + 4 > copy->dstelt_size)
146 return GL_TRUE;
147
148 return GL_FALSE;
149 }
150
151
152 /**
153 * Dump the parameters/info for a vbo->draw() call.
154 */
155 static void
156 dump_draw_info(struct gl_context *ctx,
157 const struct tnl_vertex_array *arrays,
158 const struct _mesa_prim *prims,
159 GLuint nr_prims,
160 const struct _mesa_index_buffer *ib,
161 GLuint min_index,
162 GLuint max_index)
163 {
164 GLuint i, j;
165
166 printf("VBO Draw:\n");
167 for (i = 0; i < nr_prims; i++) {
168 printf("Prim %u of %u\n", i, nr_prims);
169 printf(" Prim mode 0x%x\n", prims[i].mode);
170 printf(" IB: %p\n", (void*) ib);
171 for (j = 0; j < VERT_ATTRIB_MAX; j++) {
172 const struct tnl_vertex_array *array = &arrays[j];
173 const struct gl_vertex_buffer_binding *binding
174 = array->BufferBinding;
175 const struct gl_array_attributes *attrib = array->VertexAttrib;
176 const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
177 printf(" array %d at %p:\n", j, (void*) &arrays[j]);
178 printf(" ptr %p, size %d, type 0x%x, stride %d\n",
179 ptr, attrib->Size, attrib->Type, binding->Stride);
180 if (0) {
181 GLint k = prims[i].start + prims[i].count - 1;
182 GLfloat *last = (GLfloat *) (ptr + binding->Stride * k);
183 printf(" last: %f %f %f\n",
184 last[0], last[1], last[2]);
185 }
186 }
187 }
188 }
189
190
191 static void
192 flush(struct copy_context *copy)
193 {
194 struct gl_context *ctx = copy->ctx;
195 GLuint i;
196
197 /* Set some counters:
198 */
199 copy->dstib.count = copy->dstelt_nr;
200
201 #if 0
202 dump_draw_info(copy->ctx,
203 copy->dstarray,
204 copy->dstprim,
205 copy->dstprim_nr,
206 &copy->dstib,
207 0,
208 copy->dstbuf_nr);
209 #else
210 (void) dump_draw_info;
211 #endif
212
213 copy->draw(ctx,
214 copy->dstarray,
215 copy->dstprim,
216 copy->dstprim_nr,
217 &copy->dstib,
218 GL_TRUE,
219 0,
220 copy->dstbuf_nr - 1,
221 NULL, 0, NULL);
222
223 /* Reset all pointers:
224 */
225 copy->dstprim_nr = 0;
226 copy->dstelt_nr = 0;
227 copy->dstbuf_nr = 0;
228 copy->dstptr = copy->dstbuf;
229
230 /* Clear the vertex cache:
231 */
232 for (i = 0; i < ELT_TABLE_SIZE; i++)
233 copy->vert_cache[i].in = ~0;
234 }
235
236
237 /**
238 * Called at begin of each primitive during replay.
239 */
240 static void
241 begin(struct copy_context *copy, GLenum mode, GLboolean begin_flag)
242 {
243 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
244
245 prim->mode = mode;
246 prim->begin = begin_flag;
247 prim->num_instances = 1;
248 }
249
250
251 /**
252 * Use a hashtable to attempt to identify recently-emitted vertices
253 * and avoid re-emitting them.
254 */
255 static GLuint
256 elt(struct copy_context *copy, GLuint elt_idx)
257 {
258 GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
259 GLuint slot = elt & (ELT_TABLE_SIZE-1);
260
261 /* Look up the incoming element in the vertex cache. Re-emit if
262 * necessary.
263 */
264 if (copy->vert_cache[slot].in != elt) {
265 GLubyte *csr = copy->dstptr;
266 GLuint i;
267
268 for (i = 0; i < copy->nr_varying; i++) {
269 const struct tnl_vertex_array *srcarray = copy->varying[i].array;
270 const struct gl_vertex_buffer_binding* srcbinding
271 = srcarray->BufferBinding;
272 const GLubyte *srcptr
273 = copy->varying[i].src_ptr + elt * srcbinding->Stride;
274
275 memcpy(csr, srcptr, copy->varying[i].size);
276 csr += copy->varying[i].size;
277
278 #ifdef NAN_CHECK
279 if (srcarray->Type == GL_FLOAT) {
280 GLuint k;
281 GLfloat *f = (GLfloat *) srcptr;
282 for (k = 0; k < srcarray->Size; k++) {
283 assert(!IS_INF_OR_NAN(f[k]));
284 assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
285 }
286 }
287 #endif
288
289 if (0) {
290 const GLuint *f = (const GLuint *)srcptr;
291 GLuint j;
292 printf(" varying %d: ", i);
293 for (j = 0; j < copy->varying[i].size / 4; j++)
294 printf("%x ", f[j]);
295 printf("\n");
296 }
297 }
298
299 copy->vert_cache[slot].in = elt;
300 copy->vert_cache[slot].out = copy->dstbuf_nr++;
301 copy->dstptr += copy->vertex_size;
302
303 assert(csr == copy->dstptr);
304 assert(copy->dstptr == (copy->dstbuf +
305 copy->dstbuf_nr * copy->vertex_size));
306 }
307
308 copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
309 return check_flush(copy);
310 }
311
312
313 /**
314 * Called at end of each primitive during replay.
315 */
316 static void
317 end(struct copy_context *copy, GLboolean end_flag)
318 {
319 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
320
321 prim->end = end_flag;
322 prim->count = copy->dstelt_nr - prim->start;
323
324 if (++copy->dstprim_nr == MAX_PRIM || check_flush(copy)) {
325 flush(copy);
326 }
327 }
328
329
330 static void
331 replay_elts(struct copy_context *copy)
332 {
333 GLuint i, j, k;
334 GLboolean split;
335
336 for (i = 0; i < copy->nr_prims; i++) {
337 const struct _mesa_prim *prim = &copy->prim[i];
338 const GLuint start = prim->start;
339 GLuint first, incr;
340
341 switch (prim->mode) {
342 case GL_LINE_LOOP:
343 /* Convert to linestrip and emit the final vertex explicitly,
344 * but only in the resultant strip that requires it.
345 */
346 j = 0;
347 while (j != prim->count) {
348 begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
349
350 for (split = GL_FALSE; j != prim->count && !split; j++)
351 split = elt(copy, start + j);
352
353 if (j == prim->count) {
354 /* Done, emit final line. Split doesn't matter as
355 * it is always raised a bit early so we can emit
356 * the last verts if necessary!
357 */
358 if (prim->end)
359 (void)elt(copy, start + 0);
360
361 end(copy, prim->end);
362 }
363 else {
364 /* Wrap
365 */
366 assert(split);
367 end(copy, 0);
368 j--;
369 }
370 }
371 break;
372
373 case GL_TRIANGLE_FAN:
374 case GL_POLYGON:
375 j = 2;
376 while (j != prim->count) {
377 begin(copy, prim->mode, prim->begin && j == 0);
378
379 split = elt(copy, start+0);
380 assert(!split);
381
382 split = elt(copy, start+j-1);
383 assert(!split);
384
385 for (; j != prim->count && !split; j++)
386 split = elt(copy, start+j);
387
388 end(copy, prim->end && j == prim->count);
389
390 if (j != prim->count) {
391 /* Wrapped the primitive, need to repeat some vertices:
392 */
393 j -= 1;
394 }
395 }
396 break;
397
398 default:
399 (void)_tnl_split_prim_inplace(prim->mode, &first, &incr);
400
401 j = 0;
402 while (j != prim->count) {
403
404 begin(copy, prim->mode, prim->begin && j == 0);
405
406 split = 0;
407 for (k = 0; k < first; k++, j++)
408 split |= elt(copy, start+j);
409
410 assert(!split);
411
412 for (; j != prim->count && !split;)
413 for (k = 0; k < incr; k++, j++)
414 split |= elt(copy, start+j);
415
416 end(copy, prim->end && j == prim->count);
417
418 if (j != prim->count) {
419 /* Wrapped the primitive, need to repeat some vertices:
420 */
421 assert(j > first - incr);
422 j -= (first - incr);
423 }
424 }
425 break;
426 }
427 }
428
429 if (copy->dstprim_nr)
430 flush(copy);
431 }
432
433
434 static void
435 replay_init(struct copy_context *copy)
436 {
437 struct gl_context *ctx = copy->ctx;
438 GLuint i;
439 GLuint offset;
440 const GLvoid *srcptr;
441
442 /* Make a list of varying attributes and their vbo's. Also
443 * calculate vertex size.
444 */
445 copy->vertex_size = 0;
446 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
447 const struct tnl_vertex_array *array = &copy->array[i];
448 const struct gl_vertex_buffer_binding *binding = array->BufferBinding;
449
450 if (binding->Stride == 0) {
451 copy_vertex_array(&copy->dstarray[i], array);
452 }
453 else {
454 const struct gl_array_attributes *attrib = array->VertexAttrib;
455 struct gl_buffer_object *vbo = binding->BufferObj;
456 const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
457 GLuint j = copy->nr_varying++;
458
459 copy->varying[j].attr = i;
460 copy->varying[j].array = &copy->array[i];
461 copy->varying[j].size = attr_size(attrib);
462 copy->vertex_size += attr_size(attrib);
463
464 if (_mesa_is_bufferobj(vbo) &&
465 !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
466 ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
467 MAP_INTERNAL);
468
469 copy->varying[j].src_ptr =
470 ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer, ptr);
471
472 copy->dstarray[i].VertexAttrib = &copy->varying[j].dstattribs;
473 copy->dstarray[i].BufferBinding = &copy->varying[j].dstbinding;
474 }
475 }
476
477 /* There must always be an index buffer. Currently require the
478 * caller convert non-indexed prims to indexed. Could alternately
479 * do it internally.
480 */
481 if (_mesa_is_bufferobj(copy->ib->obj) &&
482 !_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
483 ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
484 copy->ib->obj, MAP_INTERNAL);
485
486 srcptr = (const GLubyte *)
487 ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
488 copy->ib->ptr);
489
490 switch (copy->ib->index_size) {
491 case 1:
492 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
493 copy->srcelt = copy->translated_elt_buf;
494
495 for (i = 0; i < copy->ib->count; i++)
496 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
497 break;
498
499 case 2:
500 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
501 copy->srcelt = copy->translated_elt_buf;
502
503 for (i = 0; i < copy->ib->count; i++)
504 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
505 break;
506
507 case 4:
508 copy->translated_elt_buf = NULL;
509 copy->srcelt = (const GLuint *)srcptr;
510 break;
511 }
512
513 /* Figure out the maximum allowed vertex buffer size:
514 */
515 if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
516 copy->dstbuf_size = copy->limits->max_verts;
517 }
518 else {
519 copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
520 }
521
522 /* Allocate an output vertex buffer:
523 *
524 * XXX: This should be a VBO!
525 */
526 copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
527 copy->dstptr = copy->dstbuf;
528
529 /* Setup new vertex arrays to point into the output buffer:
530 */
531 for (offset = 0, i = 0; i < copy->nr_varying; i++) {
532 const struct tnl_vertex_array *src = copy->varying[i].array;
533 const struct gl_array_attributes *srcattr = src->VertexAttrib;
534 struct tnl_vertex_array *dst = &copy->dstarray[i];
535 struct gl_vertex_buffer_binding *dstbind = &copy->varying[i].dstbinding;
536 struct gl_array_attributes *dstattr = &copy->varying[i].dstattribs;
537
538 dstattr->Size = srcattr->Size;
539 dstattr->Type = srcattr->Type;
540 dstattr->Format = GL_RGBA;
541 dstbind->Stride = copy->vertex_size;
542 dstattr->Ptr = copy->dstbuf + offset;
543 dstattr->Normalized = srcattr->Normalized;
544 dstattr->Integer = srcattr->Integer;
545 dstattr->Doubles = srcattr->Doubles;
546 dstbind->BufferObj = ctx->Shared->NullBufferObj;
547 dstattr->_ElementSize = srcattr->_ElementSize;
548 dst->BufferBinding = dstbind;
549 dst->VertexAttrib = dstattr;
550
551 offset += copy->varying[i].size;
552 }
553
554 /* Allocate an output element list:
555 */
556 copy->dstelt_size = MIN2(65536, copy->ib->count * 2 + 3);
557 copy->dstelt_size = MIN2(copy->dstelt_size, copy->limits->max_indices);
558 copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
559 copy->dstelt_nr = 0;
560
561 /* Setup the new index buffer to point to the allocated element
562 * list:
563 */
564 copy->dstib.count = 0; /* duplicates dstelt_nr */
565 copy->dstib.index_size = 4;
566 copy->dstib.obj = ctx->Shared->NullBufferObj;
567 copy->dstib.ptr = copy->dstelt;
568 }
569
570
571 /**
572 * Free up everything allocated during split/replay.
573 */
574 static void
575 replay_finish(struct copy_context *copy)
576 {
577 struct gl_context *ctx = copy->ctx;
578 GLuint i;
579
580 /* Free our vertex and index buffers */
581 free(copy->translated_elt_buf);
582 free(copy->dstbuf);
583 free(copy->dstelt);
584
585 /* Unmap VBO's */
586 for (i = 0; i < copy->nr_varying; i++) {
587 struct gl_buffer_object *vbo =
588 copy->varying[i].array->BufferBinding->BufferObj;
589 if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
590 ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
591 }
592
593 /* Unmap index buffer */
594 if (_mesa_is_bufferobj(copy->ib->obj) &&
595 _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
596 ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
597 }
598 }
599
600
601 /**
602 * Split VBO into smaller pieces, draw the pieces.
603 */
604 void
605 _tnl_split_copy(struct gl_context *ctx,
606 const struct tnl_vertex_array *arrays,
607 const struct _mesa_prim *prim,
608 GLuint nr_prims,
609 const struct _mesa_index_buffer *ib,
610 tnl_draw_func draw,
611 const struct split_limits *limits)
612 {
613 struct copy_context copy;
614 GLuint i, this_nr_prims;
615
616 for (i = 0; i < nr_prims;) {
617 /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
618 * will rebase the elements to the basevertex, and we'll only
619 * emit strings of prims with the same basevertex in one draw call.
620 */
621 for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
622 this_nr_prims++) {
623 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
624 break;
625 }
626
627 memset(&copy, 0, sizeof(copy));
628
629 /* Require indexed primitives:
630 */
631 assert(ib);
632
633 copy.ctx = ctx;
634 copy.array = arrays;
635 copy.prim = &prim[i];
636 copy.nr_prims = this_nr_prims;
637 copy.ib = ib;
638 copy.draw = draw;
639 copy.limits = limits;
640
641 /* Clear the vertex cache:
642 */
643 for (i = 0; i < ELT_TABLE_SIZE; i++)
644 copy.vert_cache[i].in = ~0;
645
646 replay_init(&copy);
647 replay_elts(&copy);
648 replay_finish(&copy);
649 }
650 }