1 /**************************************************************************
3 * Copyright 2011 Marek Olšák <maraeo@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "util/u_vbuf.h"
30 #include "util/u_dump.h"
31 #include "util/u_format.h"
32 #include "util/u_inlines.h"
33 #include "util/u_memory.h"
34 #include "util/u_upload_mgr.h"
35 #include "translate/translate.h"
36 #include "translate/translate_cache.h"
37 #include "cso_cache/cso_cache.h"
38 #include "cso_cache/cso_hash.h"
40 struct u_vbuf_elements
{
42 struct pipe_vertex_element ve
[PIPE_MAX_ATTRIBS
];
44 unsigned src_format_size
[PIPE_MAX_ATTRIBS
];
46 /* If (velem[i].src_format != native_format[i]), the vertex buffer
47 * referenced by the vertex element cannot be used for rendering and
48 * its vertex data must be translated to native_format[i]. */
49 enum pipe_format native_format
[PIPE_MAX_ATTRIBS
];
50 unsigned native_format_size
[PIPE_MAX_ATTRIBS
];
52 /* This might mean two things:
53 * - src_format != native_format, as discussed above.
54 * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
55 uint32_t incompatible_elem_mask
; /* each bit describes a corresp. attrib */
56 /* Which buffer has at least one vertex element referencing it
58 uint32_t incompatible_vb_mask_any
;
59 /* Which buffer has all vertex elements referencing it incompatible. */
60 uint32_t incompatible_vb_mask_all
;
61 /* Which buffer has at least one vertex element referencing it
63 uint32_t compatible_vb_mask_any
;
64 /* Which buffer has all vertex elements referencing it compatible. */
65 uint32_t compatible_vb_mask_all
;
67 /* Which buffer has at least one vertex element referencing it
69 uint32_t noninstance_vb_mask_any
;
82 struct u_vbuf_caps caps
;
84 struct pipe_context
*pipe
;
85 struct translate_cache
*translate_cache
;
86 struct cso_cache
*cso_cache
;
87 struct u_upload_mgr
*uploader
;
89 /* This is what was set in set_vertex_buffers.
90 * May contain user buffers. */
91 struct pipe_vertex_buffer vertex_buffer
[PIPE_MAX_ATTRIBS
];
92 unsigned nr_vertex_buffers
;
94 /* Saved vertex buffers. */
95 struct pipe_vertex_buffer vertex_buffer_saved
[PIPE_MAX_ATTRIBS
];
96 unsigned nr_vertex_buffers_saved
;
98 /* Vertex buffers for the driver.
99 * There are no user buffers. */
100 struct pipe_vertex_buffer real_vertex_buffer
[PIPE_MAX_ATTRIBS
];
101 int nr_real_vertex_buffers
;
102 boolean vertex_buffers_dirty
;
104 /* The index buffer. */
105 struct pipe_index_buffer index_buffer
;
107 /* Vertex elements. */
108 struct u_vbuf_elements
*ve
, *ve_saved
;
110 /* Vertex elements used for the translate fallback. */
111 struct pipe_vertex_element fallback_velems
[PIPE_MAX_ATTRIBS
];
112 /* If non-NULL, this is a vertex element state used for the translate
113 * fallback and therefore used for rendering too. */
114 boolean using_translate
;
115 /* The vertex buffer slot index where translated vertices have been
117 unsigned fallback_vbs
[VB_NUM
];
119 /* Which buffer is a user buffer. */
120 uint32_t user_vb_mask
; /* each bit describes a corresp. buffer */
121 /* Which buffer is incompatible (unaligned). */
122 uint32_t incompatible_vb_mask
; /* each bit describes a corresp. buffer */
123 /* Which buffer has a non-zero stride. */
124 uint32_t nonzero_stride_vb_mask
; /* each bit describes a corresp. buffer */
128 u_vbuf_create_vertex_elements(struct u_vbuf
*mgr
, unsigned count
,
129 const struct pipe_vertex_element
*attribs
);
130 static void u_vbuf_delete_vertex_elements(struct u_vbuf
*mgr
, void *cso
);
133 void u_vbuf_get_caps(struct pipe_screen
*screen
, struct u_vbuf_caps
*caps
)
135 caps
->format_fixed32
=
136 screen
->is_format_supported(screen
, PIPE_FORMAT_R32_FIXED
, PIPE_BUFFER
,
137 0, PIPE_BIND_VERTEX_BUFFER
);
139 caps
->format_float16
=
140 screen
->is_format_supported(screen
, PIPE_FORMAT_R16_FLOAT
, PIPE_BUFFER
,
141 0, PIPE_BIND_VERTEX_BUFFER
);
143 caps
->format_float64
=
144 screen
->is_format_supported(screen
, PIPE_FORMAT_R64_FLOAT
, PIPE_BUFFER
,
145 0, PIPE_BIND_VERTEX_BUFFER
);
147 caps
->format_norm32
=
148 screen
->is_format_supported(screen
, PIPE_FORMAT_R32_UNORM
, PIPE_BUFFER
,
149 0, PIPE_BIND_VERTEX_BUFFER
) &&
150 screen
->is_format_supported(screen
, PIPE_FORMAT_R32_SNORM
, PIPE_BUFFER
,
151 0, PIPE_BIND_VERTEX_BUFFER
);
153 caps
->format_scaled32
=
154 screen
->is_format_supported(screen
, PIPE_FORMAT_R32_USCALED
, PIPE_BUFFER
,
155 0, PIPE_BIND_VERTEX_BUFFER
) &&
156 screen
->is_format_supported(screen
, PIPE_FORMAT_R32_SSCALED
, PIPE_BUFFER
,
157 0, PIPE_BIND_VERTEX_BUFFER
);
159 caps
->buffer_offset_unaligned
=
160 !screen
->get_param(screen
,
161 PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY
);
163 caps
->buffer_stride_unaligned
=
164 !screen
->get_param(screen
,
165 PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY
);
167 caps
->velem_src_offset_unaligned
=
168 !screen
->get_param(screen
,
169 PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY
);
171 caps
->user_vertex_buffers
=
172 screen
->get_param(screen
, PIPE_CAP_USER_VERTEX_BUFFERS
);
176 u_vbuf_create(struct pipe_context
*pipe
,
177 struct u_vbuf_caps
*caps
)
179 struct u_vbuf
*mgr
= CALLOC_STRUCT(u_vbuf
);
183 mgr
->cso_cache
= cso_cache_create();
184 mgr
->translate_cache
= translate_cache_create();
185 memset(mgr
->fallback_vbs
, ~0, sizeof(mgr
->fallback_vbs
));
187 mgr
->uploader
= u_upload_create(pipe
, 1024 * 1024, 4,
188 PIPE_BIND_VERTEX_BUFFER
);
193 /* u_vbuf uses its own caching for vertex elements, because it needs to keep
194 * its own preprocessed state per vertex element CSO. */
195 static struct u_vbuf_elements
*
196 u_vbuf_set_vertex_elements_internal(struct u_vbuf
*mgr
, unsigned count
,
197 const struct pipe_vertex_element
*states
)
199 struct pipe_context
*pipe
= mgr
->pipe
;
200 unsigned key_size
, hash_key
;
201 struct cso_hash_iter iter
;
202 struct u_vbuf_elements
*ve
;
203 struct cso_velems_state velems_state
;
205 /* need to include the count into the stored state data too. */
206 key_size
= sizeof(struct pipe_vertex_element
) * count
+ sizeof(unsigned);
207 velems_state
.count
= count
;
208 memcpy(velems_state
.velems
, states
,
209 sizeof(struct pipe_vertex_element
) * count
);
210 hash_key
= cso_construct_key((void*)&velems_state
, key_size
);
211 iter
= cso_find_state_template(mgr
->cso_cache
, hash_key
, CSO_VELEMENTS
,
212 (void*)&velems_state
, key_size
);
214 if (cso_hash_iter_is_null(iter
)) {
215 struct cso_velements
*cso
= MALLOC_STRUCT(cso_velements
);
216 memcpy(&cso
->state
, &velems_state
, key_size
);
217 cso
->data
= u_vbuf_create_vertex_elements(mgr
, count
, states
);
218 cso
->delete_state
= (cso_state_callback
)u_vbuf_delete_vertex_elements
;
219 cso
->context
= (void*)mgr
;
221 iter
= cso_insert_state(mgr
->cso_cache
, hash_key
, CSO_VELEMENTS
, cso
);
224 ve
= ((struct cso_velements
*)cso_hash_iter_data(iter
))->data
;
228 pipe
->bind_vertex_elements_state(pipe
, ve
->driver_cso
);
232 void u_vbuf_set_vertex_elements(struct u_vbuf
*mgr
, unsigned count
,
233 const struct pipe_vertex_element
*states
)
235 mgr
->ve
= u_vbuf_set_vertex_elements_internal(mgr
, count
, states
);
238 void u_vbuf_destroy(struct u_vbuf
*mgr
)
242 mgr
->pipe
->set_vertex_buffers(mgr
->pipe
, 0, NULL
);
244 for (i
= 0; i
< mgr
->nr_vertex_buffers
; i
++) {
245 pipe_resource_reference(&mgr
->vertex_buffer
[i
].buffer
, NULL
);
247 for (i
= 0; i
< mgr
->nr_real_vertex_buffers
; i
++) {
248 pipe_resource_reference(&mgr
->real_vertex_buffer
[i
].buffer
, NULL
);
251 translate_cache_destroy(mgr
->translate_cache
);
252 u_upload_destroy(mgr
->uploader
);
253 cso_cache_delete(mgr
->cso_cache
);
258 u_vbuf_translate_buffers(struct u_vbuf
*mgr
, struct translate_key
*key
,
259 unsigned vb_mask
, unsigned out_vb
,
260 int start_vertex
, unsigned num_vertices
,
261 int start_index
, unsigned num_indices
, int min_index
,
262 boolean unroll_indices
)
264 struct translate
*tr
;
265 struct pipe_transfer
*vb_transfer
[PIPE_MAX_ATTRIBS
] = {0};
266 struct pipe_resource
*out_buffer
= NULL
;
268 unsigned i
, out_offset
;
270 /* Get a translate object. */
271 tr
= translate_cache_find(mgr
->translate_cache
, key
);
273 /* Map buffers we want to translate. */
274 for (i
= 0; i
< mgr
->nr_vertex_buffers
; i
++) {
275 if (vb_mask
& (1 << i
)) {
276 struct pipe_vertex_buffer
*vb
= &mgr
->vertex_buffer
[i
];
277 unsigned offset
= vb
->buffer_offset
+ vb
->stride
* start_vertex
;
280 if (vb
->user_buffer
) {
281 map
= (uint8_t*)vb
->user_buffer
+ offset
;
283 unsigned size
= vb
->stride
? num_vertices
* vb
->stride
286 if (offset
+size
> vb
->buffer
->width0
) {
287 size
= vb
->buffer
->width0
- offset
;
290 map
= pipe_buffer_map_range(mgr
->pipe
, vb
->buffer
, offset
, size
,
291 PIPE_TRANSFER_READ
, &vb_transfer
[i
]);
294 /* Subtract min_index so that indexing with the index buffer works. */
295 if (unroll_indices
) {
296 map
-= vb
->stride
* min_index
;
299 tr
->set_buffer(tr
, i
, map
, vb
->stride
, ~0);
304 if (unroll_indices
) {
305 struct pipe_index_buffer
*ib
= &mgr
->index_buffer
;
306 struct pipe_transfer
*transfer
= NULL
;
307 unsigned offset
= ib
->offset
+ start_index
* ib
->index_size
;
310 assert(ib
->buffer
&& ib
->index_size
);
312 if (ib
->buffer
->user_ptr
) {
313 map
= ib
->buffer
->user_ptr
+ offset
;
315 map
= pipe_buffer_map_range(mgr
->pipe
, ib
->buffer
, offset
,
316 num_indices
* ib
->index_size
,
317 PIPE_TRANSFER_READ
, &transfer
);
320 /* Create and map the output buffer. */
321 u_upload_alloc(mgr
->uploader
, 0,
322 key
->output_stride
* num_indices
,
323 &out_offset
, &out_buffer
,
326 switch (ib
->index_size
) {
328 tr
->run_elts(tr
, (unsigned*)map
, num_indices
, 0, out_map
);
331 tr
->run_elts16(tr
, (uint16_t*)map
, num_indices
, 0, out_map
);
334 tr
->run_elts8(tr
, map
, num_indices
, 0, out_map
);
339 pipe_buffer_unmap(mgr
->pipe
, transfer
);
342 /* Create and map the output buffer. */
343 u_upload_alloc(mgr
->uploader
,
344 key
->output_stride
* start_vertex
,
345 key
->output_stride
* num_vertices
,
346 &out_offset
, &out_buffer
,
349 out_offset
-= key
->output_stride
* start_vertex
;
351 tr
->run(tr
, 0, num_vertices
, 0, out_map
);
354 /* Unmap all buffers. */
355 for (i
= 0; i
< mgr
->nr_vertex_buffers
; i
++) {
356 if (vb_transfer
[i
]) {
357 pipe_buffer_unmap(mgr
->pipe
, vb_transfer
[i
]);
361 /* Setup the new vertex buffer. */
362 mgr
->real_vertex_buffer
[out_vb
].buffer_offset
= out_offset
;
363 mgr
->real_vertex_buffer
[out_vb
].stride
= key
->output_stride
;
365 /* Move the buffer reference. */
366 pipe_resource_reference(
367 &mgr
->real_vertex_buffer
[out_vb
].buffer
, NULL
);
368 mgr
->real_vertex_buffer
[out_vb
].buffer
= out_buffer
;
372 u_vbuf_translate_find_free_vb_slots(struct u_vbuf
*mgr
,
373 unsigned mask
[VB_NUM
])
376 unsigned fallback_vbs
[VB_NUM
];
377 /* Set the bit for each buffer which is incompatible, or isn't set. */
378 uint32_t unused_vb_mask
=
379 mgr
->ve
->incompatible_vb_mask_all
| mgr
->incompatible_vb_mask
|
380 ~((1 << mgr
->nr_vertex_buffers
) - 1);
382 memset(fallback_vbs
, ~0, sizeof(fallback_vbs
));
384 /* Find free slots for each type if needed. */
385 for (type
= 0; type
< VB_NUM
; type
++) {
389 if (!unused_vb_mask
) {
390 /* fail, reset the number to its original value */
391 mgr
->nr_real_vertex_buffers
= mgr
->nr_vertex_buffers
;
395 index
= ffs(unused_vb_mask
) - 1;
396 fallback_vbs
[type
] = index
;
397 if (index
>= mgr
->nr_real_vertex_buffers
) {
398 mgr
->nr_real_vertex_buffers
= index
+ 1;
400 /*printf("found slot=%i for type=%i\n", index, type);*/
404 memcpy(mgr
->fallback_vbs
, fallback_vbs
, sizeof(fallback_vbs
));
409 u_vbuf_translate_begin(struct u_vbuf
*mgr
,
410 int start_vertex
, unsigned num_vertices
,
411 int start_instance
, unsigned num_instances
,
412 int start_index
, unsigned num_indices
, int min_index
,
413 boolean unroll_indices
)
415 unsigned mask
[VB_NUM
] = {0};
416 struct translate_key key
[VB_NUM
];
417 unsigned elem_index
[VB_NUM
][PIPE_MAX_ATTRIBS
]; /* ... into key.elements */
420 int start
[VB_NUM
] = {
421 start_vertex
, /* VERTEX */
422 start_instance
, /* INSTANCE */
426 unsigned num
[VB_NUM
] = {
427 num_vertices
, /* VERTEX */
428 num_instances
, /* INSTANCE */
432 memset(key
, 0, sizeof(key
));
433 memset(elem_index
, ~0, sizeof(elem_index
));
435 /* See if there are vertex attribs of each type to translate and
437 for (i
= 0; i
< mgr
->ve
->count
; i
++) {
438 unsigned vb_index
= mgr
->ve
->ve
[i
].vertex_buffer_index
;
440 if (!mgr
->vertex_buffer
[vb_index
].stride
) {
441 if (!(mgr
->ve
->incompatible_elem_mask
& (1 << i
)) &&
442 !(mgr
->incompatible_vb_mask
& (1 << vb_index
))) {
445 mask
[VB_CONST
] |= 1 << vb_index
;
446 } else if (mgr
->ve
->ve
[i
].instance_divisor
) {
447 if (!(mgr
->ve
->incompatible_elem_mask
& (1 << i
)) &&
448 !(mgr
->incompatible_vb_mask
& (1 << vb_index
))) {
451 mask
[VB_INSTANCE
] |= 1 << vb_index
;
453 if (!unroll_indices
&&
454 !(mgr
->ve
->incompatible_elem_mask
& (1 << i
)) &&
455 !(mgr
->incompatible_vb_mask
& (1 << vb_index
))) {
458 mask
[VB_VERTEX
] |= 1 << vb_index
;
462 assert(mask
[VB_VERTEX
] || mask
[VB_INSTANCE
] || mask
[VB_CONST
]);
464 /* Find free vertex buffer slots. */
465 if (!u_vbuf_translate_find_free_vb_slots(mgr
, mask
)) {
469 /* Initialize the translate keys. */
470 for (i
= 0; i
< mgr
->ve
->count
; i
++) {
471 struct translate_key
*k
;
472 struct translate_element
*te
;
473 unsigned bit
, vb_index
= mgr
->ve
->ve
[i
].vertex_buffer_index
;
476 if (!(mgr
->ve
->incompatible_elem_mask
& (1 << i
)) &&
477 !(mgr
->incompatible_vb_mask
& (1 << vb_index
)) &&
478 (!unroll_indices
|| !(mask
[VB_VERTEX
] & bit
))) {
482 /* Set type to what we will translate.
483 * Whether vertex, instance, or constant attribs. */
484 for (type
= 0; type
< VB_NUM
; type
++) {
485 if (mask
[type
] & bit
) {
489 assert(type
< VB_NUM
);
490 assert(translate_is_output_format_supported(mgr
->ve
->native_format
[i
]));
491 /*printf("velem=%i type=%i\n", i, type);*/
493 /* Add the vertex element. */
495 elem_index
[type
][i
] = k
->nr_elements
;
497 te
= &k
->element
[k
->nr_elements
];
498 te
->type
= TRANSLATE_ELEMENT_NORMAL
;
499 te
->instance_divisor
= 0;
500 te
->input_buffer
= vb_index
;
501 te
->input_format
= mgr
->ve
->ve
[i
].src_format
;
502 te
->input_offset
= mgr
->ve
->ve
[i
].src_offset
;
503 te
->output_format
= mgr
->ve
->native_format
[i
];
504 te
->output_offset
= k
->output_stride
;
506 k
->output_stride
+= mgr
->ve
->native_format_size
[i
];
510 /* Translate buffers. */
511 for (type
= 0; type
< VB_NUM
; type
++) {
512 if (key
[type
].nr_elements
) {
513 u_vbuf_translate_buffers(mgr
, &key
[type
], mask
[type
],
514 mgr
->fallback_vbs
[type
],
515 start
[type
], num
[type
],
516 start_index
, num_indices
, min_index
,
517 unroll_indices
&& type
== VB_VERTEX
);
519 /* Fixup the stride for constant attribs. */
520 if (type
== VB_CONST
) {
521 mgr
->real_vertex_buffer
[mgr
->fallback_vbs
[VB_CONST
]].stride
= 0;
526 /* Setup new vertex elements. */
527 for (i
= 0; i
< mgr
->ve
->count
; i
++) {
528 for (type
= 0; type
< VB_NUM
; type
++) {
529 if (elem_index
[type
][i
] < key
[type
].nr_elements
) {
530 struct translate_element
*te
= &key
[type
].element
[elem_index
[type
][i
]];
531 mgr
->fallback_velems
[i
].instance_divisor
= mgr
->ve
->ve
[i
].instance_divisor
;
532 mgr
->fallback_velems
[i
].src_format
= te
->output_format
;
533 mgr
->fallback_velems
[i
].src_offset
= te
->output_offset
;
534 mgr
->fallback_velems
[i
].vertex_buffer_index
= mgr
->fallback_vbs
[type
];
536 /* elem_index[type][i] can only be set for one type. */
537 assert(type
> VB_INSTANCE
|| elem_index
[type
+1][i
] == ~0);
538 assert(type
> VB_VERTEX
|| elem_index
[type
+2][i
] == ~0);
542 /* No translating, just copy the original vertex element over. */
543 if (type
== VB_NUM
) {
544 memcpy(&mgr
->fallback_velems
[i
], &mgr
->ve
->ve
[i
],
545 sizeof(struct pipe_vertex_element
));
549 u_vbuf_set_vertex_elements_internal(mgr
, mgr
->ve
->count
,
550 mgr
->fallback_velems
);
551 mgr
->using_translate
= TRUE
;
555 static void u_vbuf_translate_end(struct u_vbuf
*mgr
)
559 /* Restore vertex elements. */
560 mgr
->pipe
->bind_vertex_elements_state(mgr
->pipe
, mgr
->ve
->driver_cso
);
561 mgr
->using_translate
= FALSE
;
563 /* Unreference the now-unused VBOs. */
564 for (i
= 0; i
< VB_NUM
; i
++) {
565 unsigned vb
= mgr
->fallback_vbs
[i
];
567 pipe_resource_reference(&mgr
->real_vertex_buffer
[vb
].buffer
, NULL
);
568 mgr
->fallback_vbs
[i
] = ~0;
571 mgr
->nr_real_vertex_buffers
= mgr
->nr_vertex_buffers
;
574 #define FORMAT_REPLACE(what, withwhat) \
575 case PIPE_FORMAT_##what: format = PIPE_FORMAT_##withwhat; break
578 u_vbuf_create_vertex_elements(struct u_vbuf
*mgr
, unsigned count
,
579 const struct pipe_vertex_element
*attribs
)
581 struct pipe_context
*pipe
= mgr
->pipe
;
583 struct pipe_vertex_element driver_attribs
[PIPE_MAX_ATTRIBS
];
584 struct u_vbuf_elements
*ve
= CALLOC_STRUCT(u_vbuf_elements
);
585 uint32_t used_buffers
= 0;
589 memcpy(ve
->ve
, attribs
, sizeof(struct pipe_vertex_element
) * count
);
590 memcpy(driver_attribs
, attribs
, sizeof(struct pipe_vertex_element
) * count
);
592 /* Set the best native format in case the original format is not
594 for (i
= 0; i
< count
; i
++) {
595 enum pipe_format format
= ve
->ve
[i
].src_format
;
597 ve
->src_format_size
[i
] = util_format_get_blocksize(format
);
599 used_buffers
|= 1 << ve
->ve
[i
].vertex_buffer_index
;
601 if (!ve
->ve
[i
].instance_divisor
) {
602 ve
->noninstance_vb_mask_any
|= 1 << ve
->ve
[i
].vertex_buffer_index
;
605 /* Choose a native format.
606 * For now we don't care about the alignment, that's going to
607 * be sorted out later. */
608 if (!mgr
->caps
.format_fixed32
) {
610 FORMAT_REPLACE(R32_FIXED
, R32_FLOAT
);
611 FORMAT_REPLACE(R32G32_FIXED
, R32G32_FLOAT
);
612 FORMAT_REPLACE(R32G32B32_FIXED
, R32G32B32_FLOAT
);
613 FORMAT_REPLACE(R32G32B32A32_FIXED
, R32G32B32A32_FLOAT
);
617 if (!mgr
->caps
.format_float16
) {
619 FORMAT_REPLACE(R16_FLOAT
, R32_FLOAT
);
620 FORMAT_REPLACE(R16G16_FLOAT
, R32G32_FLOAT
);
621 FORMAT_REPLACE(R16G16B16_FLOAT
, R32G32B32_FLOAT
);
622 FORMAT_REPLACE(R16G16B16A16_FLOAT
, R32G32B32A32_FLOAT
);
626 if (!mgr
->caps
.format_float64
) {
628 FORMAT_REPLACE(R64_FLOAT
, R32_FLOAT
);
629 FORMAT_REPLACE(R64G64_FLOAT
, R32G32_FLOAT
);
630 FORMAT_REPLACE(R64G64B64_FLOAT
, R32G32B32_FLOAT
);
631 FORMAT_REPLACE(R64G64B64A64_FLOAT
, R32G32B32A32_FLOAT
);
635 if (!mgr
->caps
.format_norm32
) {
637 FORMAT_REPLACE(R32_UNORM
, R32_FLOAT
);
638 FORMAT_REPLACE(R32G32_UNORM
, R32G32_FLOAT
);
639 FORMAT_REPLACE(R32G32B32_UNORM
, R32G32B32_FLOAT
);
640 FORMAT_REPLACE(R32G32B32A32_UNORM
, R32G32B32A32_FLOAT
);
641 FORMAT_REPLACE(R32_SNORM
, R32_FLOAT
);
642 FORMAT_REPLACE(R32G32_SNORM
, R32G32_FLOAT
);
643 FORMAT_REPLACE(R32G32B32_SNORM
, R32G32B32_FLOAT
);
644 FORMAT_REPLACE(R32G32B32A32_SNORM
, R32G32B32A32_FLOAT
);
648 if (!mgr
->caps
.format_scaled32
) {
650 FORMAT_REPLACE(R32_USCALED
, R32_FLOAT
);
651 FORMAT_REPLACE(R32G32_USCALED
, R32G32_FLOAT
);
652 FORMAT_REPLACE(R32G32B32_USCALED
, R32G32B32_FLOAT
);
653 FORMAT_REPLACE(R32G32B32A32_USCALED
,R32G32B32A32_FLOAT
);
654 FORMAT_REPLACE(R32_SSCALED
, R32_FLOAT
);
655 FORMAT_REPLACE(R32G32_SSCALED
, R32G32_FLOAT
);
656 FORMAT_REPLACE(R32G32B32_SSCALED
, R32G32B32_FLOAT
);
657 FORMAT_REPLACE(R32G32B32A32_SSCALED
,R32G32B32A32_FLOAT
);
662 driver_attribs
[i
].src_format
= format
;
663 ve
->native_format
[i
] = format
;
664 ve
->native_format_size
[i
] =
665 util_format_get_blocksize(ve
->native_format
[i
]);
667 if (ve
->ve
[i
].src_format
!= format
||
668 (!mgr
->caps
.velem_src_offset_unaligned
&&
669 ve
->ve
[i
].src_offset
% 4 != 0)) {
670 ve
->incompatible_elem_mask
|= 1 << i
;
671 ve
->incompatible_vb_mask_any
|= 1 << ve
->ve
[i
].vertex_buffer_index
;
673 ve
->compatible_vb_mask_any
|= 1 << ve
->ve
[i
].vertex_buffer_index
;
677 ve
->compatible_vb_mask_all
= ~ve
->incompatible_vb_mask_any
& used_buffers
;
678 ve
->incompatible_vb_mask_all
= ~ve
->compatible_vb_mask_any
& used_buffers
;
680 /* Align the formats to the size of DWORD if needed. */
681 if (!mgr
->caps
.velem_src_offset_unaligned
) {
682 for (i
= 0; i
< count
; i
++) {
683 ve
->native_format_size
[i
] = align(ve
->native_format_size
[i
], 4);
688 pipe
->create_vertex_elements_state(pipe
, count
, driver_attribs
);
692 static void u_vbuf_delete_vertex_elements(struct u_vbuf
*mgr
, void *cso
)
694 struct pipe_context
*pipe
= mgr
->pipe
;
695 struct u_vbuf_elements
*ve
= cso
;
697 pipe
->delete_vertex_elements_state(pipe
, ve
->driver_cso
);
701 void u_vbuf_set_vertex_buffers(struct u_vbuf
*mgr
, unsigned count
,
702 const struct pipe_vertex_buffer
*bufs
)
706 mgr
->user_vb_mask
= 0;
707 mgr
->incompatible_vb_mask
= 0;
708 mgr
->nonzero_stride_vb_mask
= 0;
710 for (i
= 0; i
< count
; i
++) {
711 const struct pipe_vertex_buffer
*vb
= &bufs
[i
];
712 struct pipe_vertex_buffer
*orig_vb
= &mgr
->vertex_buffer
[i
];
713 struct pipe_vertex_buffer
*real_vb
= &mgr
->real_vertex_buffer
[i
];
715 pipe_resource_reference(&orig_vb
->buffer
, vb
->buffer
);
716 orig_vb
->user_buffer
= vb
->user_buffer
;
718 real_vb
->buffer_offset
= orig_vb
->buffer_offset
= vb
->buffer_offset
;
719 real_vb
->stride
= orig_vb
->stride
= vb
->stride
;
722 mgr
->nonzero_stride_vb_mask
|= 1 << i
;
725 if (!vb
->buffer
&& !vb
->user_buffer
) {
726 pipe_resource_reference(&real_vb
->buffer
, NULL
);
730 if ((!mgr
->caps
.buffer_offset_unaligned
&& vb
->buffer_offset
% 4 != 0) ||
731 (!mgr
->caps
.buffer_stride_unaligned
&& vb
->stride
% 4 != 0)) {
732 mgr
->incompatible_vb_mask
|= 1 << i
;
733 pipe_resource_reference(&real_vb
->buffer
, NULL
);
737 if (!mgr
->caps
.user_vertex_buffers
&& vb
->user_buffer
) {
738 mgr
->user_vb_mask
|= 1 << i
;
739 pipe_resource_reference(&real_vb
->buffer
, NULL
);
743 pipe_resource_reference(&real_vb
->buffer
, vb
->buffer
);
746 for (i
= count
; i
< mgr
->nr_vertex_buffers
; i
++) {
747 pipe_resource_reference(&mgr
->vertex_buffer
[i
].buffer
, NULL
);
749 for (i
= count
; i
< mgr
->nr_real_vertex_buffers
; i
++) {
750 pipe_resource_reference(&mgr
->real_vertex_buffer
[i
].buffer
, NULL
);
753 mgr
->nr_vertex_buffers
= count
;
754 mgr
->nr_real_vertex_buffers
= count
;
755 mgr
->vertex_buffers_dirty
= TRUE
;
758 void u_vbuf_set_index_buffer(struct u_vbuf
*mgr
,
759 const struct pipe_index_buffer
*ib
)
761 struct pipe_context
*pipe
= mgr
->pipe
;
763 if (ib
&& ib
->buffer
) {
764 assert(ib
->offset
% ib
->index_size
== 0);
765 pipe_resource_reference(&mgr
->index_buffer
.buffer
, ib
->buffer
);
766 mgr
->index_buffer
.offset
= ib
->offset
;
767 mgr
->index_buffer
.index_size
= ib
->index_size
;
769 pipe_resource_reference(&mgr
->index_buffer
.buffer
, NULL
);
772 pipe
->set_index_buffer(pipe
, ib
);
776 u_vbuf_upload_buffers(struct u_vbuf
*mgr
,
777 int start_vertex
, unsigned num_vertices
,
778 int start_instance
, unsigned num_instances
)
781 unsigned nr_velems
= mgr
->ve
->count
;
782 unsigned nr_vbufs
= mgr
->nr_vertex_buffers
;
783 struct pipe_vertex_element
*velems
=
784 mgr
->using_translate
? mgr
->fallback_velems
: mgr
->ve
->ve
;
785 unsigned start_offset
[PIPE_MAX_ATTRIBS
];
786 unsigned end_offset
[PIPE_MAX_ATTRIBS
] = {0};
788 /* Determine how much data needs to be uploaded. */
789 for (i
= 0; i
< nr_velems
; i
++) {
790 struct pipe_vertex_element
*velem
= &velems
[i
];
791 unsigned index
= velem
->vertex_buffer_index
;
792 struct pipe_vertex_buffer
*vb
= &mgr
->vertex_buffer
[index
];
793 unsigned instance_div
, first
, size
;
795 /* Skip the buffers generated by translate. */
796 if (index
== mgr
->fallback_vbs
[VB_VERTEX
] ||
797 index
== mgr
->fallback_vbs
[VB_INSTANCE
] ||
798 index
== mgr
->fallback_vbs
[VB_CONST
]) {
802 if (!vb
->user_buffer
) {
806 instance_div
= velem
->instance_divisor
;
807 first
= vb
->buffer_offset
+ velem
->src_offset
;
810 /* Constant attrib. */
811 size
= mgr
->ve
->src_format_size
[i
];
812 } else if (instance_div
) {
813 /* Per-instance attrib. */
814 unsigned count
= (num_instances
+ instance_div
- 1) / instance_div
;
815 first
+= vb
->stride
* start_instance
;
816 size
= vb
->stride
* (count
- 1) + mgr
->ve
->src_format_size
[i
];
818 /* Per-vertex attrib. */
819 first
+= vb
->stride
* start_vertex
;
820 size
= vb
->stride
* (num_vertices
- 1) + mgr
->ve
->src_format_size
[i
];
823 /* Update offsets. */
824 if (!end_offset
[index
]) {
825 start_offset
[index
] = first
;
826 end_offset
[index
] = first
+ size
;
828 if (first
< start_offset
[index
])
829 start_offset
[index
] = first
;
830 if (first
+ size
> end_offset
[index
])
831 end_offset
[index
] = first
+ size
;
835 /* Upload buffers. */
836 for (i
= 0; i
< nr_vbufs
; i
++) {
837 unsigned start
, end
= end_offset
[i
];
838 struct pipe_vertex_buffer
*real_vb
;
845 start
= start_offset
[i
];
848 real_vb
= &mgr
->real_vertex_buffer
[i
];
849 ptr
= mgr
->vertex_buffer
[i
].user_buffer
;
851 u_upload_data(mgr
->uploader
, start
, end
- start
, ptr
+ start
,
852 &real_vb
->buffer_offset
, &real_vb
->buffer
);
854 real_vb
->buffer_offset
-= start
;
858 static boolean
u_vbuf_need_minmax_index(struct u_vbuf
*mgr
)
860 /* See if there are any per-vertex attribs which will be uploaded or
861 * translated. Use bitmasks to get the info instead of looping over vertex
863 return ((mgr
->user_vb_mask
| mgr
->incompatible_vb_mask
|
864 mgr
->ve
->incompatible_vb_mask_any
) &
865 mgr
->ve
->noninstance_vb_mask_any
& mgr
->nonzero_stride_vb_mask
) != 0;
868 static boolean
u_vbuf_mapping_vertex_buffer_blocks(struct u_vbuf
*mgr
)
870 /* Return true if there are hw buffers which don't need to be translated.
872 * We could query whether each buffer is busy, but that would
873 * be way more costly than this. */
874 return (~mgr
->user_vb_mask
& ~mgr
->incompatible_vb_mask
&
875 mgr
->ve
->compatible_vb_mask_all
& mgr
->ve
->noninstance_vb_mask_any
&
876 mgr
->nonzero_stride_vb_mask
) != 0;
879 static void u_vbuf_get_minmax_index(struct pipe_context
*pipe
,
880 struct pipe_index_buffer
*ib
,
881 const struct pipe_draw_info
*info
,
885 struct pipe_transfer
*transfer
= NULL
;
888 unsigned restart_index
= info
->restart_index
;
890 if (ib
->buffer
->user_ptr
) {
891 indices
= ib
->buffer
->user_ptr
+
892 ib
->offset
+ info
->start
* ib
->index_size
;
894 indices
= pipe_buffer_map_range(pipe
, ib
->buffer
,
895 ib
->offset
+ info
->start
* ib
->index_size
,
896 info
->count
* ib
->index_size
,
897 PIPE_TRANSFER_READ
, &transfer
);
900 switch (ib
->index_size
) {
902 const unsigned *ui_indices
= (const unsigned*)indices
;
904 unsigned min_ui
= ~0U;
905 if (info
->primitive_restart
) {
906 for (i
= 0; i
< info
->count
; i
++) {
907 if (ui_indices
[i
] != restart_index
) {
908 if (ui_indices
[i
] > max_ui
) max_ui
= ui_indices
[i
];
909 if (ui_indices
[i
] < min_ui
) min_ui
= ui_indices
[i
];
914 for (i
= 0; i
< info
->count
; i
++) {
915 if (ui_indices
[i
] > max_ui
) max_ui
= ui_indices
[i
];
916 if (ui_indices
[i
] < min_ui
) min_ui
= ui_indices
[i
];
919 *out_min_index
= min_ui
;
920 *out_max_index
= max_ui
;
924 const unsigned short *us_indices
= (const unsigned short*)indices
;
926 unsigned min_us
= ~0U;
927 if (info
->primitive_restart
) {
928 for (i
= 0; i
< info
->count
; i
++) {
929 if (us_indices
[i
] != restart_index
) {
930 if (us_indices
[i
] > max_us
) max_us
= us_indices
[i
];
931 if (us_indices
[i
] < min_us
) min_us
= us_indices
[i
];
936 for (i
= 0; i
< info
->count
; i
++) {
937 if (us_indices
[i
] > max_us
) max_us
= us_indices
[i
];
938 if (us_indices
[i
] < min_us
) min_us
= us_indices
[i
];
941 *out_min_index
= min_us
;
942 *out_max_index
= max_us
;
946 const unsigned char *ub_indices
= (const unsigned char*)indices
;
948 unsigned min_ub
= ~0U;
949 if (info
->primitive_restart
) {
950 for (i
= 0; i
< info
->count
; i
++) {
951 if (ub_indices
[i
] != restart_index
) {
952 if (ub_indices
[i
] > max_ub
) max_ub
= ub_indices
[i
];
953 if (ub_indices
[i
] < min_ub
) min_ub
= ub_indices
[i
];
958 for (i
= 0; i
< info
->count
; i
++) {
959 if (ub_indices
[i
] > max_ub
) max_ub
= ub_indices
[i
];
960 if (ub_indices
[i
] < min_ub
) min_ub
= ub_indices
[i
];
963 *out_min_index
= min_ub
;
964 *out_max_index
= max_ub
;
974 pipe_buffer_unmap(pipe
, transfer
);
978 void u_vbuf_draw_vbo(struct u_vbuf
*mgr
, const struct pipe_draw_info
*info
)
980 struct pipe_context
*pipe
= mgr
->pipe
;
981 int start_vertex
, min_index
;
982 unsigned num_vertices
;
983 boolean unroll_indices
= FALSE
;
984 uint32_t user_vb_mask
= mgr
->user_vb_mask
;
986 /* Normal draw. No fallback and no user buffers. */
987 if (!mgr
->incompatible_vb_mask
&&
988 !mgr
->ve
->incompatible_elem_mask
&&
990 /* Set vertex buffers if needed. */
991 if (mgr
->vertex_buffers_dirty
) {
992 pipe
->set_vertex_buffers(pipe
, mgr
->nr_real_vertex_buffers
,
993 mgr
->real_vertex_buffer
);
994 mgr
->vertex_buffers_dirty
= FALSE
;
997 pipe
->draw_vbo(pipe
, info
);
1001 if (info
->indexed
) {
1002 /* See if anything needs to be done for per-vertex attribs. */
1003 if (u_vbuf_need_minmax_index(mgr
)) {
1006 if (info
->max_index
!= ~0) {
1007 min_index
= info
->min_index
;
1008 max_index
= info
->max_index
;
1010 u_vbuf_get_minmax_index(mgr
->pipe
, &mgr
->index_buffer
, info
,
1011 &min_index
, &max_index
);
1014 assert(min_index
<= max_index
);
1016 start_vertex
= min_index
+ info
->index_bias
;
1017 num_vertices
= max_index
+ 1 - min_index
;
1019 /* Primitive restart doesn't work when unrolling indices.
1020 * We would have to break this drawing operation into several ones. */
1021 /* Use some heuristic to see if unrolling indices improves
1023 if (!info
->primitive_restart
&&
1024 num_vertices
> info
->count
*2 &&
1025 num_vertices
-info
->count
> 32 &&
1026 !u_vbuf_mapping_vertex_buffer_blocks(mgr
)) {
1027 /*printf("num_vertices=%i count=%i\n", num_vertices, info->count);*/
1028 unroll_indices
= TRUE
;
1029 user_vb_mask
&= ~(mgr
->nonzero_stride_vb_mask
&
1030 mgr
->ve
->noninstance_vb_mask_any
);
1033 /* Nothing to do for per-vertex attribs. */
1039 start_vertex
= info
->start
;
1040 num_vertices
= info
->count
;
1044 /* Translate vertices with non-native layouts or formats. */
1045 if (unroll_indices
||
1046 mgr
->incompatible_vb_mask
||
1047 mgr
->ve
->incompatible_elem_mask
) {
1048 /* XXX check the return value */
1049 u_vbuf_translate_begin(mgr
, start_vertex
, num_vertices
,
1050 info
->start_instance
, info
->instance_count
,
1051 info
->start
, info
->count
, min_index
,
1054 user_vb_mask
&= ~(mgr
->incompatible_vb_mask
|
1055 mgr
->ve
->incompatible_vb_mask_all
);
1058 /* Upload user buffers. */
1060 u_vbuf_upload_buffers(mgr
, start_vertex
, num_vertices
,
1061 info
->start_instance
, info
->instance_count
);
1065 if (unroll_indices) {
1066 printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1067 start_vertex, num_vertices);
1068 util_dump_draw_info(stdout, info);
1073 for (i = 0; i < mgr->nr_vertex_buffers; i++) {
1074 printf("input %i: ", i);
1075 util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
1078 for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
1079 printf("real %i: ", i);
1080 util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
1085 u_upload_unmap(mgr
->uploader
);
1086 pipe
->set_vertex_buffers(pipe
, mgr
->nr_real_vertex_buffers
,
1087 mgr
->real_vertex_buffer
);
1089 if (unlikely(unroll_indices
)) {
1090 struct pipe_draw_info new_info
= *info
;
1091 new_info
.indexed
= FALSE
;
1092 new_info
.index_bias
= 0;
1093 new_info
.min_index
= 0;
1094 new_info
.max_index
= info
->count
- 1;
1097 pipe
->draw_vbo(pipe
, &new_info
);
1099 pipe
->draw_vbo(pipe
, info
);
1102 if (mgr
->using_translate
) {
1103 u_vbuf_translate_end(mgr
);
1105 mgr
->vertex_buffers_dirty
= TRUE
;
1108 void u_vbuf_save_vertex_elements(struct u_vbuf
*mgr
)
1110 assert(!mgr
->ve_saved
);
1111 mgr
->ve_saved
= mgr
->ve
;
1114 void u_vbuf_restore_vertex_elements(struct u_vbuf
*mgr
)
1116 if (mgr
->ve
!= mgr
->ve_saved
) {
1117 struct pipe_context
*pipe
= mgr
->pipe
;
1119 mgr
->ve
= mgr
->ve_saved
;
1120 pipe
->bind_vertex_elements_state(pipe
,
1121 mgr
->ve
? mgr
->ve
->driver_cso
: NULL
);
1123 mgr
->ve_saved
= NULL
;
1126 void u_vbuf_save_vertex_buffers(struct u_vbuf
*mgr
)
1128 util_copy_vertex_buffers(mgr
->vertex_buffer_saved
,
1129 &mgr
->nr_vertex_buffers_saved
,
1131 mgr
->nr_vertex_buffers
);
1134 void u_vbuf_restore_vertex_buffers(struct u_vbuf
*mgr
)
1138 u_vbuf_set_vertex_buffers(mgr
, mgr
->nr_vertex_buffers_saved
,
1139 mgr
->vertex_buffer_saved
);
1140 for (i
= 0; i
< mgr
->nr_vertex_buffers_saved
; i
++) {
1141 pipe_resource_reference(&mgr
->vertex_buffer_saved
[i
].buffer
, NULL
);
1143 mgr
->nr_vertex_buffers_saved
= 0;