3 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
26 * Keith Whitwell <keithw@vmware.com>
29 /* Split indexed primitives with per-vertex copying.
34 #include "main/glheader.h"
35 #include "main/bufferobj.h"
37 #include "main/glformats.h"
38 #include "main/macros.h"
39 #include "main/mtypes.h"
40 #include "main/varray.h"
47 #define ELT_TABLE_SIZE 16
50 * Used for vertex-level splitting of indexed buffers. Note that
51 * non-indexed primitives may be converted to indexed in some cases
52 * (eg loops, fans) in order to use this splitting path.
55 struct gl_context
*ctx
;
56 const struct tnl_vertex_array
*array
;
57 const struct _mesa_prim
*prim
;
59 const struct _mesa_index_buffer
*ib
;
62 const struct split_limits
*limits
;
67 const struct tnl_vertex_array
*array
;
68 const GLubyte
*src_ptr
;
70 struct gl_vertex_buffer_binding dstbinding
;
71 struct gl_array_attributes dstattribs
;
73 } varying
[VERT_ATTRIB_MAX
];
76 struct tnl_vertex_array dstarray
[VERT_ATTRIB_MAX
];
77 struct _mesa_index_buffer dstib
;
79 GLuint
*translated_elt_buf
;
82 /** A baby hash table to avoid re-emitting (some) duplicate
83 * vertices when splitting indexed primitives.
88 } vert_cache
[ELT_TABLE_SIZE
];
92 GLubyte
*dstptr
; /**< dstptr == dstbuf + dstelt_max * vertsize */
93 GLuint dstbuf_size
; /**< in vertices */
94 GLuint dstbuf_nr
; /**< count of emitted vertices, also the largest value
95 * in dstelt. Our MaxIndex.
103 struct _mesa_prim dstprim
[MAX_PRIM
];
109 * Shallow copy one vertex array to another.
112 copy_vertex_array(struct tnl_vertex_array
*dst
,
113 const struct tnl_vertex_array
*src
)
115 dst
->VertexAttrib
= src
->VertexAttrib
;
116 dst
->BufferBinding
= src
->BufferBinding
;
121 * Starts returning true slightly before the buffer fills, to ensure
122 * that there is sufficient room for any remaining vertices to finish
126 check_flush(struct copy_context
*copy
)
128 GLenum mode
= copy
->dstprim
[copy
->dstprim_nr
].mode
;
130 if (GL_TRIANGLE_STRIP
== mode
&&
131 copy
->dstelt_nr
& 1) { /* see bug9962 */
135 if (copy
->dstbuf_nr
+ 4 > copy
->dstbuf_size
)
138 if (copy
->dstelt_nr
+ 4 > copy
->dstelt_size
)
146 * Dump the parameters/info for a vbo->draw() call.
149 dump_draw_info(const struct tnl_vertex_array
*arrays
,
150 const struct _mesa_prim
*prims
,
152 const struct _mesa_index_buffer
*ib
)
156 printf("VBO Draw:\n");
157 for (i
= 0; i
< nr_prims
; i
++) {
158 printf("Prim %u of %u\n", i
, nr_prims
);
159 printf(" Prim mode 0x%x\n", prims
[i
].mode
);
160 printf(" IB: %p\n", (void*) ib
);
161 for (j
= 0; j
< VERT_ATTRIB_MAX
; j
++) {
162 const struct tnl_vertex_array
*array
= &arrays
[j
];
163 const struct gl_vertex_buffer_binding
*binding
164 = array
->BufferBinding
;
165 const struct gl_array_attributes
*attrib
= array
->VertexAttrib
;
166 const GLubyte
*ptr
= _mesa_vertex_attrib_address(attrib
, binding
);
167 printf(" array %d at %p:\n", j
, (void*) &arrays
[j
]);
168 printf(" ptr %p, size %d, type 0x%x, stride %d\n",
169 ptr
, attrib
->Format
.Size
, attrib
->Format
.Type
, binding
->Stride
);
171 GLint k
= prims
[i
].start
+ prims
[i
].count
- 1;
172 GLfloat
*last
= (GLfloat
*) (ptr
+ binding
->Stride
* k
);
173 printf(" last: %f %f %f\n",
174 last
[0], last
[1], last
[2]);
182 flush(struct copy_context
*copy
)
184 struct gl_context
*ctx
= copy
->ctx
;
187 /* Set some counters:
189 copy
->dstib
.count
= copy
->dstelt_nr
;
192 dump_draw_info(copy
->dstarray
,
197 (void) dump_draw_info
;
211 /* Reset all pointers:
213 copy
->dstprim_nr
= 0;
216 copy
->dstptr
= copy
->dstbuf
;
218 /* Clear the vertex cache:
220 for (i
= 0; i
< ELT_TABLE_SIZE
; i
++)
221 copy
->vert_cache
[i
].in
= ~0;
226 * Called at begin of each primitive during replay.
229 begin(struct copy_context
*copy
, GLenum mode
, GLboolean begin_flag
)
231 struct _mesa_prim
*prim
= ©
->dstprim
[copy
->dstprim_nr
];
234 prim
->begin
= begin_flag
;
239 * Use a hashtable to attempt to identify recently-emitted vertices
240 * and avoid re-emitting them.
243 elt(struct copy_context
*copy
, GLuint elt_idx
)
245 GLuint elt
= copy
->srcelt
[elt_idx
] + copy
->prim
->basevertex
;
246 GLuint slot
= elt
& (ELT_TABLE_SIZE
-1);
248 /* Look up the incoming element in the vertex cache. Re-emit if
251 if (copy
->vert_cache
[slot
].in
!= elt
) {
252 GLubyte
*csr
= copy
->dstptr
;
255 for (i
= 0; i
< copy
->nr_varying
; i
++) {
256 const struct tnl_vertex_array
*srcarray
= copy
->varying
[i
].array
;
257 const struct gl_vertex_buffer_binding
* srcbinding
258 = srcarray
->BufferBinding
;
259 const GLubyte
*srcptr
260 = copy
->varying
[i
].src_ptr
+ elt
* srcbinding
->Stride
;
262 memcpy(csr
, srcptr
, copy
->varying
[i
].size
);
263 csr
+= copy
->varying
[i
].size
;
266 if (srcarray
->Format
.Type
== GL_FLOAT
) {
268 GLfloat
*f
= (GLfloat
*) srcptr
;
269 for (k
= 0; k
< srcarray
->Size
; k
++) {
270 assert(!util_is_inf_or_nan(f
[k
]));
271 assert(f
[k
] <= 1.0e20
&& f
[k
] >= -1.0e20
);
277 const GLuint
*f
= (const GLuint
*)srcptr
;
279 printf(" varying %d: ", i
);
280 for (j
= 0; j
< copy
->varying
[i
].size
/ 4; j
++)
286 copy
->vert_cache
[slot
].in
= elt
;
287 copy
->vert_cache
[slot
].out
= copy
->dstbuf_nr
++;
288 copy
->dstptr
+= copy
->vertex_size
;
290 assert(csr
== copy
->dstptr
);
291 assert(copy
->dstptr
== (copy
->dstbuf
+
292 copy
->dstbuf_nr
* copy
->vertex_size
));
295 copy
->dstelt
[copy
->dstelt_nr
++] = copy
->vert_cache
[slot
].out
;
296 return check_flush(copy
);
301 * Called at end of each primitive during replay.
304 end(struct copy_context
*copy
, GLboolean end_flag
)
306 struct _mesa_prim
*prim
= ©
->dstprim
[copy
->dstprim_nr
];
308 prim
->end
= end_flag
;
309 prim
->count
= copy
->dstelt_nr
- prim
->start
;
311 if (++copy
->dstprim_nr
== MAX_PRIM
|| check_flush(copy
)) {
318 replay_elts(struct copy_context
*copy
)
323 for (i
= 0; i
< copy
->nr_prims
; i
++) {
324 const struct _mesa_prim
*prim
= ©
->prim
[i
];
325 const GLuint start
= prim
->start
;
328 switch (prim
->mode
) {
330 /* Convert to linestrip and emit the final vertex explicitly,
331 * but only in the resultant strip that requires it.
334 while (j
!= prim
->count
) {
335 begin(copy
, GL_LINE_STRIP
, prim
->begin
&& j
== 0);
337 for (split
= GL_FALSE
; j
!= prim
->count
&& !split
; j
++)
338 split
= elt(copy
, start
+ j
);
340 if (j
== prim
->count
) {
341 /* Done, emit final line. Split doesn't matter as
342 * it is always raised a bit early so we can emit
343 * the last verts if necessary!
346 (void)elt(copy
, start
+ 0);
348 end(copy
, prim
->end
);
360 case GL_TRIANGLE_FAN
:
363 while (j
!= prim
->count
) {
364 begin(copy
, prim
->mode
, prim
->begin
&& j
== 0);
366 split
= elt(copy
, start
+0);
369 split
= elt(copy
, start
+j
-1);
372 for (; j
!= prim
->count
&& !split
; j
++)
373 split
= elt(copy
, start
+j
);
375 end(copy
, prim
->end
&& j
== prim
->count
);
377 if (j
!= prim
->count
) {
378 /* Wrapped the primitive, need to repeat some vertices:
386 (void)_tnl_split_prim_inplace(prim
->mode
, &first
, &incr
);
389 while (j
!= prim
->count
) {
391 begin(copy
, prim
->mode
, prim
->begin
&& j
== 0);
394 for (k
= 0; k
< first
; k
++, j
++)
395 split
|= elt(copy
, start
+j
);
399 for (; j
!= prim
->count
&& !split
;)
400 for (k
= 0; k
< incr
; k
++, j
++)
401 split
|= elt(copy
, start
+j
);
403 end(copy
, prim
->end
&& j
== prim
->count
);
405 if (j
!= prim
->count
) {
406 /* Wrapped the primitive, need to repeat some vertices:
408 assert(j
> first
- incr
);
416 if (copy
->dstprim_nr
)
422 replay_init(struct copy_context
*copy
)
424 struct gl_context
*ctx
= copy
->ctx
;
427 const GLvoid
*srcptr
;
429 /* Make a list of varying attributes and their vbo's. Also
430 * calculate vertex size.
432 copy
->vertex_size
= 0;
433 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
434 const struct tnl_vertex_array
*array
= ©
->array
[i
];
435 const struct gl_vertex_buffer_binding
*binding
= array
->BufferBinding
;
437 if (binding
->Stride
== 0) {
438 copy_vertex_array(©
->dstarray
[i
], array
);
441 const struct gl_array_attributes
*attrib
= array
->VertexAttrib
;
442 struct gl_buffer_object
*vbo
= binding
->BufferObj
;
443 const GLubyte
*ptr
= _mesa_vertex_attrib_address(attrib
, binding
);
444 GLuint j
= copy
->nr_varying
++;
446 copy
->varying
[j
].attr
= i
;
447 copy
->varying
[j
].array
= ©
->array
[i
];
448 copy
->varying
[j
].size
= attrib
->Format
._ElementSize
;
449 copy
->vertex_size
+= attrib
->Format
._ElementSize
;
452 if (!_mesa_bufferobj_mapped(vbo
, MAP_INTERNAL
)) {
453 ctx
->Driver
.MapBufferRange(ctx
, 0, vbo
->Size
, GL_MAP_READ_BIT
, vbo
,
457 copy
->varying
[j
].src_ptr
=
458 ADD_POINTERS(vbo
->Mappings
[MAP_INTERNAL
].Pointer
, ptr
);
460 copy
->varying
[j
].src_ptr
= ptr
;
463 copy
->dstarray
[i
].VertexAttrib
= ©
->varying
[j
].dstattribs
;
464 copy
->dstarray
[i
].BufferBinding
= ©
->varying
[j
].dstbinding
;
468 /* There must always be an index buffer. Currently require the
469 * caller convert non-indexed prims to indexed. Could alternately
473 if (!_mesa_bufferobj_mapped(copy
->ib
->obj
, MAP_INTERNAL
))
474 ctx
->Driver
.MapBufferRange(ctx
, 0, copy
->ib
->obj
->Size
, GL_MAP_READ_BIT
,
475 copy
->ib
->obj
, MAP_INTERNAL
);
477 srcptr
= (const GLubyte
*)
478 ADD_POINTERS(copy
->ib
->obj
->Mappings
[MAP_INTERNAL
].Pointer
,
481 srcptr
= copy
->ib
->ptr
;
483 switch (copy
->ib
->index_size_shift
) {
485 copy
->translated_elt_buf
= malloc(sizeof(GLuint
) * copy
->ib
->count
);
486 copy
->srcelt
= copy
->translated_elt_buf
;
488 for (i
= 0; i
< copy
->ib
->count
; i
++)
489 copy
->translated_elt_buf
[i
] = ((const GLubyte
*)srcptr
)[i
];
493 copy
->translated_elt_buf
= malloc(sizeof(GLuint
) * copy
->ib
->count
);
494 copy
->srcelt
= copy
->translated_elt_buf
;
496 for (i
= 0; i
< copy
->ib
->count
; i
++)
497 copy
->translated_elt_buf
[i
] = ((const GLushort
*)srcptr
)[i
];
501 copy
->translated_elt_buf
= NULL
;
502 copy
->srcelt
= (const GLuint
*)srcptr
;
506 /* Figure out the maximum allowed vertex buffer size:
508 if (copy
->vertex_size
* copy
->limits
->max_verts
<= copy
->limits
->max_vb_size
) {
509 copy
->dstbuf_size
= copy
->limits
->max_verts
;
512 copy
->dstbuf_size
= copy
->limits
->max_vb_size
/ copy
->vertex_size
;
515 /* Allocate an output vertex buffer:
517 * XXX: This should be a VBO!
519 copy
->dstbuf
= malloc(copy
->dstbuf_size
* copy
->vertex_size
);
520 copy
->dstptr
= copy
->dstbuf
;
522 /* Setup new vertex arrays to point into the output buffer:
524 for (offset
= 0, i
= 0; i
< copy
->nr_varying
; i
++) {
525 const struct tnl_vertex_array
*src
= copy
->varying
[i
].array
;
526 const struct gl_array_attributes
*srcattr
= src
->VertexAttrib
;
527 struct tnl_vertex_array
*dst
= ©
->dstarray
[copy
->varying
[i
].attr
];
528 struct gl_vertex_buffer_binding
*dstbind
= ©
->varying
[i
].dstbinding
;
529 struct gl_array_attributes
*dstattr
= ©
->varying
[i
].dstattribs
;
531 dstattr
->Format
= srcattr
->Format
;
532 dstattr
->Ptr
= copy
->dstbuf
+ offset
;
533 dstbind
->Stride
= copy
->vertex_size
;
534 dstbind
->BufferObj
= NULL
;
535 dst
->BufferBinding
= dstbind
;
536 dst
->VertexAttrib
= dstattr
;
538 offset
+= copy
->varying
[i
].size
;
541 /* Allocate an output element list:
543 copy
->dstelt_size
= MIN2(65536, copy
->ib
->count
* 2 + 3);
544 copy
->dstelt_size
= MIN2(copy
->dstelt_size
, copy
->limits
->max_indices
);
545 copy
->dstelt
= malloc(sizeof(GLuint
) * copy
->dstelt_size
);
548 /* Setup the new index buffer to point to the allocated element
551 copy
->dstib
.count
= 0; /* duplicates dstelt_nr */
552 copy
->dstib
.index_size_shift
= 2;
553 copy
->dstib
.obj
= NULL
;
554 copy
->dstib
.ptr
= copy
->dstelt
;
559 * Free up everything allocated during split/replay.
562 replay_finish(struct copy_context
*copy
)
564 struct gl_context
*ctx
= copy
->ctx
;
567 /* Free our vertex and index buffers */
568 free(copy
->translated_elt_buf
);
573 for (i
= 0; i
< copy
->nr_varying
; i
++) {
574 struct gl_buffer_object
*vbo
=
575 copy
->varying
[i
].array
->BufferBinding
->BufferObj
;
576 if (vbo
&& _mesa_bufferobj_mapped(vbo
, MAP_INTERNAL
))
577 ctx
->Driver
.UnmapBuffer(ctx
, vbo
, MAP_INTERNAL
);
580 /* Unmap index buffer */
582 _mesa_bufferobj_mapped(copy
->ib
->obj
, MAP_INTERNAL
)) {
583 ctx
->Driver
.UnmapBuffer(ctx
, copy
->ib
->obj
, MAP_INTERNAL
);
589 * Split VBO into smaller pieces, draw the pieces.
592 _tnl_split_copy(struct gl_context
*ctx
,
593 const struct tnl_vertex_array
*arrays
,
594 const struct _mesa_prim
*prim
,
596 const struct _mesa_index_buffer
*ib
,
598 const struct split_limits
*limits
)
600 struct copy_context copy
;
601 GLuint i
, this_nr_prims
;
603 for (i
= 0; i
< nr_prims
;) {
604 /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
605 * will rebase the elements to the basevertex, and we'll only
606 * emit strings of prims with the same basevertex in one draw call.
608 for (this_nr_prims
= 1; i
+ this_nr_prims
< nr_prims
;
610 if (prim
[i
].basevertex
!= prim
[i
+ this_nr_prims
].basevertex
)
614 memset(©
, 0, sizeof(copy
));
616 /* Require indexed primitives:
622 copy
.prim
= &prim
[i
];
623 copy
.nr_prims
= this_nr_prims
;
626 copy
.limits
= limits
;
628 /* Clear the vertex cache:
630 for (i
= 0; i
< ELT_TABLE_SIZE
; i
++)
631 copy
.vert_cache
[i
].in
= ~0;
635 replay_finish(©
);