tnl: Use gl_array_attribute::_ElementSize.
[mesa.git] / src / mesa / tnl / t_split_copy.c
1
2 /*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Keith Whitwell <keithw@vmware.com>
27 */
28
29 /* Split indexed primitives with per-vertex copying.
30 */
31
32 #include <stdio.h>
33
34 #include "main/glheader.h"
35 #include "main/bufferobj.h"
36 #include "main/imports.h"
37 #include "main/glformats.h"
38 #include "main/macros.h"
39 #include "main/mtypes.h"
40 #include "main/varray.h"
41 #include "vbo/vbo.h"
42
43 #include "t_split.h"
44 #include "tnl.h"
45
46
47 #define ELT_TABLE_SIZE 16
48
49 /**
50 * Used for vertex-level splitting of indexed buffers. Note that
51 * non-indexed primitives may be converted to indexed in some cases
52 * (eg loops, fans) in order to use this splitting path.
53 */
54 struct copy_context {
55 struct gl_context *ctx;
56 const struct tnl_vertex_array *array;
57 const struct _mesa_prim *prim;
58 GLuint nr_prims;
59 const struct _mesa_index_buffer *ib;
60 tnl_draw_func draw;
61
62 const struct split_limits *limits;
63
64 struct {
65 GLuint attr;
66 GLuint size;
67 const struct tnl_vertex_array *array;
68 const GLubyte *src_ptr;
69
70 struct gl_vertex_buffer_binding dstbinding;
71 struct gl_array_attributes dstattribs;
72
73 } varying[VERT_ATTRIB_MAX];
74 GLuint nr_varying;
75
76 struct tnl_vertex_array dstarray[VERT_ATTRIB_MAX];
77 struct _mesa_index_buffer dstib;
78
79 GLuint *translated_elt_buf;
80 const GLuint *srcelt;
81
82 /** A baby hash table to avoid re-emitting (some) duplicate
83 * vertices when splitting indexed primitives.
84 */
85 struct {
86 GLuint in;
87 GLuint out;
88 } vert_cache[ELT_TABLE_SIZE];
89
90 GLuint vertex_size;
91 GLubyte *dstbuf;
92 GLubyte *dstptr; /**< dstptr == dstbuf + dstelt_max * vertsize */
93 GLuint dstbuf_size; /**< in vertices */
94 GLuint dstbuf_nr; /**< count of emitted vertices, also the largest value
95 * in dstelt. Our MaxIndex.
96 */
97
98 GLuint *dstelt;
99 GLuint dstelt_nr;
100 GLuint dstelt_size;
101
102 #define MAX_PRIM 32
103 struct _mesa_prim dstprim[MAX_PRIM];
104 GLuint dstprim_nr;
105 };
106
107
108 /**
109 * Shallow copy one vertex array to another.
110 */
111 static inline void
112 copy_vertex_array(struct tnl_vertex_array *dst,
113 const struct tnl_vertex_array *src)
114 {
115 dst->VertexAttrib = src->VertexAttrib;
116 dst->BufferBinding = src->BufferBinding;
117 }
118
119
120 /**
121 * Starts returning true slightly before the buffer fills, to ensure
122 * that there is sufficient room for any remaining vertices to finish
123 * off the prim:
124 */
125 static GLboolean
126 check_flush(struct copy_context *copy)
127 {
128 GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
129
130 if (GL_TRIANGLE_STRIP == mode &&
131 copy->dstelt_nr & 1) { /* see bug9962 */
132 return GL_FALSE;
133 }
134
135 if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
136 return GL_TRUE;
137
138 if (copy->dstelt_nr + 4 > copy->dstelt_size)
139 return GL_TRUE;
140
141 return GL_FALSE;
142 }
143
144
145 /**
146 * Dump the parameters/info for a vbo->draw() call.
147 */
148 static void
149 dump_draw_info(struct gl_context *ctx,
150 const struct tnl_vertex_array *arrays,
151 const struct _mesa_prim *prims,
152 GLuint nr_prims,
153 const struct _mesa_index_buffer *ib,
154 GLuint min_index,
155 GLuint max_index)
156 {
157 GLuint i, j;
158
159 printf("VBO Draw:\n");
160 for (i = 0; i < nr_prims; i++) {
161 printf("Prim %u of %u\n", i, nr_prims);
162 printf(" Prim mode 0x%x\n", prims[i].mode);
163 printf(" IB: %p\n", (void*) ib);
164 for (j = 0; j < VERT_ATTRIB_MAX; j++) {
165 const struct tnl_vertex_array *array = &arrays[j];
166 const struct gl_vertex_buffer_binding *binding
167 = array->BufferBinding;
168 const struct gl_array_attributes *attrib = array->VertexAttrib;
169 const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
170 printf(" array %d at %p:\n", j, (void*) &arrays[j]);
171 printf(" ptr %p, size %d, type 0x%x, stride %d\n",
172 ptr, attrib->Size, attrib->Type, binding->Stride);
173 if (0) {
174 GLint k = prims[i].start + prims[i].count - 1;
175 GLfloat *last = (GLfloat *) (ptr + binding->Stride * k);
176 printf(" last: %f %f %f\n",
177 last[0], last[1], last[2]);
178 }
179 }
180 }
181 }
182
183
184 static void
185 flush(struct copy_context *copy)
186 {
187 struct gl_context *ctx = copy->ctx;
188 GLuint i;
189
190 /* Set some counters:
191 */
192 copy->dstib.count = copy->dstelt_nr;
193
194 #if 0
195 dump_draw_info(copy->ctx,
196 copy->dstarray,
197 copy->dstprim,
198 copy->dstprim_nr,
199 &copy->dstib,
200 0,
201 copy->dstbuf_nr);
202 #else
203 (void) dump_draw_info;
204 #endif
205
206 copy->draw(ctx,
207 copy->dstarray,
208 copy->dstprim,
209 copy->dstprim_nr,
210 &copy->dstib,
211 GL_TRUE,
212 0,
213 copy->dstbuf_nr - 1,
214 NULL, 0, NULL);
215
216 /* Reset all pointers:
217 */
218 copy->dstprim_nr = 0;
219 copy->dstelt_nr = 0;
220 copy->dstbuf_nr = 0;
221 copy->dstptr = copy->dstbuf;
222
223 /* Clear the vertex cache:
224 */
225 for (i = 0; i < ELT_TABLE_SIZE; i++)
226 copy->vert_cache[i].in = ~0;
227 }
228
229
230 /**
231 * Called at begin of each primitive during replay.
232 */
233 static void
234 begin(struct copy_context *copy, GLenum mode, GLboolean begin_flag)
235 {
236 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
237
238 prim->mode = mode;
239 prim->begin = begin_flag;
240 prim->num_instances = 1;
241 }
242
243
244 /**
245 * Use a hashtable to attempt to identify recently-emitted vertices
246 * and avoid re-emitting them.
247 */
248 static GLuint
249 elt(struct copy_context *copy, GLuint elt_idx)
250 {
251 GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
252 GLuint slot = elt & (ELT_TABLE_SIZE-1);
253
254 /* Look up the incoming element in the vertex cache. Re-emit if
255 * necessary.
256 */
257 if (copy->vert_cache[slot].in != elt) {
258 GLubyte *csr = copy->dstptr;
259 GLuint i;
260
261 for (i = 0; i < copy->nr_varying; i++) {
262 const struct tnl_vertex_array *srcarray = copy->varying[i].array;
263 const struct gl_vertex_buffer_binding* srcbinding
264 = srcarray->BufferBinding;
265 const GLubyte *srcptr
266 = copy->varying[i].src_ptr + elt * srcbinding->Stride;
267
268 memcpy(csr, srcptr, copy->varying[i].size);
269 csr += copy->varying[i].size;
270
271 #ifdef NAN_CHECK
272 if (srcarray->Type == GL_FLOAT) {
273 GLuint k;
274 GLfloat *f = (GLfloat *) srcptr;
275 for (k = 0; k < srcarray->Size; k++) {
276 assert(!IS_INF_OR_NAN(f[k]));
277 assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
278 }
279 }
280 #endif
281
282 if (0) {
283 const GLuint *f = (const GLuint *)srcptr;
284 GLuint j;
285 printf(" varying %d: ", i);
286 for (j = 0; j < copy->varying[i].size / 4; j++)
287 printf("%x ", f[j]);
288 printf("\n");
289 }
290 }
291
292 copy->vert_cache[slot].in = elt;
293 copy->vert_cache[slot].out = copy->dstbuf_nr++;
294 copy->dstptr += copy->vertex_size;
295
296 assert(csr == copy->dstptr);
297 assert(copy->dstptr == (copy->dstbuf +
298 copy->dstbuf_nr * copy->vertex_size));
299 }
300
301 copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
302 return check_flush(copy);
303 }
304
305
306 /**
307 * Called at end of each primitive during replay.
308 */
309 static void
310 end(struct copy_context *copy, GLboolean end_flag)
311 {
312 struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
313
314 prim->end = end_flag;
315 prim->count = copy->dstelt_nr - prim->start;
316
317 if (++copy->dstprim_nr == MAX_PRIM || check_flush(copy)) {
318 flush(copy);
319 }
320 }
321
322
323 static void
324 replay_elts(struct copy_context *copy)
325 {
326 GLuint i, j, k;
327 GLboolean split;
328
329 for (i = 0; i < copy->nr_prims; i++) {
330 const struct _mesa_prim *prim = &copy->prim[i];
331 const GLuint start = prim->start;
332 GLuint first, incr;
333
334 switch (prim->mode) {
335 case GL_LINE_LOOP:
336 /* Convert to linestrip and emit the final vertex explicitly,
337 * but only in the resultant strip that requires it.
338 */
339 j = 0;
340 while (j != prim->count) {
341 begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
342
343 for (split = GL_FALSE; j != prim->count && !split; j++)
344 split = elt(copy, start + j);
345
346 if (j == prim->count) {
347 /* Done, emit final line. Split doesn't matter as
348 * it is always raised a bit early so we can emit
349 * the last verts if necessary!
350 */
351 if (prim->end)
352 (void)elt(copy, start + 0);
353
354 end(copy, prim->end);
355 }
356 else {
357 /* Wrap
358 */
359 assert(split);
360 end(copy, 0);
361 j--;
362 }
363 }
364 break;
365
366 case GL_TRIANGLE_FAN:
367 case GL_POLYGON:
368 j = 2;
369 while (j != prim->count) {
370 begin(copy, prim->mode, prim->begin && j == 0);
371
372 split = elt(copy, start+0);
373 assert(!split);
374
375 split = elt(copy, start+j-1);
376 assert(!split);
377
378 for (; j != prim->count && !split; j++)
379 split = elt(copy, start+j);
380
381 end(copy, prim->end && j == prim->count);
382
383 if (j != prim->count) {
384 /* Wrapped the primitive, need to repeat some vertices:
385 */
386 j -= 1;
387 }
388 }
389 break;
390
391 default:
392 (void)_tnl_split_prim_inplace(prim->mode, &first, &incr);
393
394 j = 0;
395 while (j != prim->count) {
396
397 begin(copy, prim->mode, prim->begin && j == 0);
398
399 split = 0;
400 for (k = 0; k < first; k++, j++)
401 split |= elt(copy, start+j);
402
403 assert(!split);
404
405 for (; j != prim->count && !split;)
406 for (k = 0; k < incr; k++, j++)
407 split |= elt(copy, start+j);
408
409 end(copy, prim->end && j == prim->count);
410
411 if (j != prim->count) {
412 /* Wrapped the primitive, need to repeat some vertices:
413 */
414 assert(j > first - incr);
415 j -= (first - incr);
416 }
417 }
418 break;
419 }
420 }
421
422 if (copy->dstprim_nr)
423 flush(copy);
424 }
425
426
427 static void
428 replay_init(struct copy_context *copy)
429 {
430 struct gl_context *ctx = copy->ctx;
431 GLuint i;
432 GLuint offset;
433 const GLvoid *srcptr;
434
435 /* Make a list of varying attributes and their vbo's. Also
436 * calculate vertex size.
437 */
438 copy->vertex_size = 0;
439 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
440 const struct tnl_vertex_array *array = &copy->array[i];
441 const struct gl_vertex_buffer_binding *binding = array->BufferBinding;
442
443 if (binding->Stride == 0) {
444 copy_vertex_array(&copy->dstarray[i], array);
445 }
446 else {
447 const struct gl_array_attributes *attrib = array->VertexAttrib;
448 struct gl_buffer_object *vbo = binding->BufferObj;
449 const GLubyte *ptr = _mesa_vertex_attrib_address(attrib, binding);
450 GLuint j = copy->nr_varying++;
451
452 copy->varying[j].attr = i;
453 copy->varying[j].array = &copy->array[i];
454 copy->varying[j].size = attrib->_ElementSize;
455 copy->vertex_size += attrib->_ElementSize;
456
457 if (_mesa_is_bufferobj(vbo) &&
458 !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
459 ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
460 MAP_INTERNAL);
461
462 copy->varying[j].src_ptr =
463 ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer, ptr);
464
465 copy->dstarray[i].VertexAttrib = &copy->varying[j].dstattribs;
466 copy->dstarray[i].BufferBinding = &copy->varying[j].dstbinding;
467 }
468 }
469
470 /* There must always be an index buffer. Currently require the
471 * caller convert non-indexed prims to indexed. Could alternately
472 * do it internally.
473 */
474 if (_mesa_is_bufferobj(copy->ib->obj) &&
475 !_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
476 ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
477 copy->ib->obj, MAP_INTERNAL);
478
479 srcptr = (const GLubyte *)
480 ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
481 copy->ib->ptr);
482
483 switch (copy->ib->index_size) {
484 case 1:
485 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
486 copy->srcelt = copy->translated_elt_buf;
487
488 for (i = 0; i < copy->ib->count; i++)
489 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
490 break;
491
492 case 2:
493 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
494 copy->srcelt = copy->translated_elt_buf;
495
496 for (i = 0; i < copy->ib->count; i++)
497 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
498 break;
499
500 case 4:
501 copy->translated_elt_buf = NULL;
502 copy->srcelt = (const GLuint *)srcptr;
503 break;
504 }
505
506 /* Figure out the maximum allowed vertex buffer size:
507 */
508 if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
509 copy->dstbuf_size = copy->limits->max_verts;
510 }
511 else {
512 copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
513 }
514
515 /* Allocate an output vertex buffer:
516 *
517 * XXX: This should be a VBO!
518 */
519 copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
520 copy->dstptr = copy->dstbuf;
521
522 /* Setup new vertex arrays to point into the output buffer:
523 */
524 for (offset = 0, i = 0; i < copy->nr_varying; i++) {
525 const struct tnl_vertex_array *src = copy->varying[i].array;
526 const struct gl_array_attributes *srcattr = src->VertexAttrib;
527 struct tnl_vertex_array *dst = &copy->dstarray[copy->varying[i].attr];
528 struct gl_vertex_buffer_binding *dstbind = &copy->varying[i].dstbinding;
529 struct gl_array_attributes *dstattr = &copy->varying[i].dstattribs;
530
531 dstattr->Size = srcattr->Size;
532 dstattr->Type = srcattr->Type;
533 dstattr->Format = GL_RGBA;
534 dstbind->Stride = copy->vertex_size;
535 dstattr->Ptr = copy->dstbuf + offset;
536 dstattr->Normalized = srcattr->Normalized;
537 dstattr->Integer = srcattr->Integer;
538 dstattr->Doubles = srcattr->Doubles;
539 dstbind->BufferObj = ctx->Shared->NullBufferObj;
540 dstattr->_ElementSize = srcattr->_ElementSize;
541 dst->BufferBinding = dstbind;
542 dst->VertexAttrib = dstattr;
543
544 offset += copy->varying[i].size;
545 }
546
547 /* Allocate an output element list:
548 */
549 copy->dstelt_size = MIN2(65536, copy->ib->count * 2 + 3);
550 copy->dstelt_size = MIN2(copy->dstelt_size, copy->limits->max_indices);
551 copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
552 copy->dstelt_nr = 0;
553
554 /* Setup the new index buffer to point to the allocated element
555 * list:
556 */
557 copy->dstib.count = 0; /* duplicates dstelt_nr */
558 copy->dstib.index_size = 4;
559 copy->dstib.obj = ctx->Shared->NullBufferObj;
560 copy->dstib.ptr = copy->dstelt;
561 }
562
563
564 /**
565 * Free up everything allocated during split/replay.
566 */
567 static void
568 replay_finish(struct copy_context *copy)
569 {
570 struct gl_context *ctx = copy->ctx;
571 GLuint i;
572
573 /* Free our vertex and index buffers */
574 free(copy->translated_elt_buf);
575 free(copy->dstbuf);
576 free(copy->dstelt);
577
578 /* Unmap VBO's */
579 for (i = 0; i < copy->nr_varying; i++) {
580 struct gl_buffer_object *vbo =
581 copy->varying[i].array->BufferBinding->BufferObj;
582 if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
583 ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
584 }
585
586 /* Unmap index buffer */
587 if (_mesa_is_bufferobj(copy->ib->obj) &&
588 _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
589 ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
590 }
591 }
592
593
594 /**
595 * Split VBO into smaller pieces, draw the pieces.
596 */
597 void
598 _tnl_split_copy(struct gl_context *ctx,
599 const struct tnl_vertex_array *arrays,
600 const struct _mesa_prim *prim,
601 GLuint nr_prims,
602 const struct _mesa_index_buffer *ib,
603 tnl_draw_func draw,
604 const struct split_limits *limits)
605 {
606 struct copy_context copy;
607 GLuint i, this_nr_prims;
608
609 for (i = 0; i < nr_prims;) {
610 /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
611 * will rebase the elements to the basevertex, and we'll only
612 * emit strings of prims with the same basevertex in one draw call.
613 */
614 for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
615 this_nr_prims++) {
616 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
617 break;
618 }
619
620 memset(&copy, 0, sizeof(copy));
621
622 /* Require indexed primitives:
623 */
624 assert(ib);
625
626 copy.ctx = ctx;
627 copy.array = arrays;
628 copy.prim = &prim[i];
629 copy.nr_prims = this_nr_prims;
630 copy.ib = ib;
631 copy.draw = draw;
632 copy.limits = limits;
633
634 /* Clear the vertex cache:
635 */
636 for (i = 0; i < ELT_TABLE_SIZE; i++)
637 copy.vert_cache[i].in = ~0;
638
639 replay_init(&copy);
640 replay_elts(&copy);
641 replay_finish(&copy);
642 }
643 }