1 /**************************************************************************
3 * Copyright 2011 Marek Olšák <maraeo@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * This module uploads user buffers and translates the vertex buffers which
30 * contain incompatible vertices (i.e. not supported by the driver/hardware)
31 * into compatible ones, based on the Gallium CAPs.
33 * It does not upload index buffers.
35 * The module heavily uses bitmasks to represent per-buffer and
36 * per-vertex-element flags to avoid looping over the list of buffers just
37 * to see if there's a non-zero stride, or user buffer, or unsupported format,
40 * There are 3 categories of vertex elements, which are processed separately:
41 * - per-vertex attribs (stride != 0, instance_divisor == 0)
42 * - instanced attribs (stride != 0, instance_divisor > 0)
43 * - constant attribs (stride == 0)
45 * All needed uploads and translations are performed every draw command, but
46 * only the subset of vertices needed for that draw command is uploaded or
47 * translated. (the module never translates whole buffers)
50 * The module consists of two main parts:
53 * 1) Translate (u_vbuf_translate_begin/end)
55 * This is pretty much a vertex fetch fallback. It translates vertices from
56 * one vertex buffer to another in an unused vertex buffer slot. It does
57 * whatever is needed to make the vertices readable by the hardware (changes
58 * vertex formats and aligns offsets and strides). The translate module is
61 * Each of the 3 categories is translated to a separate buffer.
62 * Only the [min_index, max_index] range is translated. For instanced attribs,
63 * the range is [start_instance, start_instance+instance_count]. For constant
64 * attribs, the range is [0, 1].
67 * 2) User buffer uploading (u_vbuf_upload_buffers)
69 * Only the [min_index, max_index] range is uploaded (just like Translate)
70 * with a single memcpy.
72 * This method works best for non-indexed draw operations or indexed draw
73 * operations where the [min_index, max_index] range is not being way bigger
74 * than the vertex count.
76 * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
77 * the per-vertex attribs are uploaded via the translate module, all packed
78 * into one vertex buffer, and the indexed draw call is turned into
79 * a non-indexed one in the process. This adds additional complexity
80 * to the translate part, but it prevents bad apps from bringing your frame
84 * If there is nothing to do, it forwards every command to the driver.
85 * The module also has its own CSO cache of vertex element states.
88 #include "util/u_vbuf.h"
90 #include "util/u_dump.h"
91 #include "util/format/u_format.h"
92 #include "util/u_inlines.h"
93 #include "util/u_memory.h"
94 #include "util/u_screen.h"
95 #include "util/u_upload_mgr.h"
96 #include "translate/translate.h"
97 #include "translate/translate_cache.h"
98 #include "cso_cache/cso_cache.h"
99 #include "cso_cache/cso_hash.h"
101 struct u_vbuf_elements
{
103 struct pipe_vertex_element ve
[PIPE_MAX_ATTRIBS
];
105 unsigned src_format_size
[PIPE_MAX_ATTRIBS
];
107 /* If (velem[i].src_format != native_format[i]), the vertex buffer
108 * referenced by the vertex element cannot be used for rendering and
109 * its vertex data must be translated to native_format[i]. */
110 enum pipe_format native_format
[PIPE_MAX_ATTRIBS
];
111 unsigned native_format_size
[PIPE_MAX_ATTRIBS
];
113 /* Which buffers are used by the vertex element state. */
114 uint32_t used_vb_mask
;
115 /* This might mean two things:
116 * - src_format != native_format, as discussed above.
117 * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
118 uint32_t incompatible_elem_mask
; /* each bit describes a corresp. attrib */
119 /* Which buffer has at least one vertex element referencing it
121 uint32_t incompatible_vb_mask_any
;
122 /* Which buffer has all vertex elements referencing it incompatible. */
123 uint32_t incompatible_vb_mask_all
;
124 /* Which buffer has at least one vertex element referencing it
126 uint32_t compatible_vb_mask_any
;
127 /* Which buffer has all vertex elements referencing it compatible. */
128 uint32_t compatible_vb_mask_all
;
130 /* Which buffer has at least one vertex element referencing it
132 uint32_t noninstance_vb_mask_any
;
145 struct u_vbuf_caps caps
;
146 bool has_signed_vb_offset
;
148 struct pipe_context
*pipe
;
149 struct translate_cache
*translate_cache
;
150 struct cso_cache
*cso_cache
;
152 /* This is what was set in set_vertex_buffers.
153 * May contain user buffers. */
154 struct pipe_vertex_buffer vertex_buffer
[PIPE_MAX_ATTRIBS
];
155 uint32_t enabled_vb_mask
;
157 /* Saved vertex buffer. */
158 struct pipe_vertex_buffer vertex_buffer0_saved
;
160 /* Vertex buffers for the driver.
161 * There are usually no user buffers. */
162 struct pipe_vertex_buffer real_vertex_buffer
[PIPE_MAX_ATTRIBS
];
163 uint32_t dirty_real_vb_mask
; /* which buffers are dirty since the last
164 call of set_vertex_buffers */
166 /* Vertex elements. */
167 struct u_vbuf_elements
*ve
, *ve_saved
;
169 /* Vertex elements used for the translate fallback. */
170 struct cso_velems_state fallback_velems
;
171 /* If non-NULL, this is a vertex element state used for the translate
172 * fallback and therefore used for rendering too. */
173 boolean using_translate
;
174 /* The vertex buffer slot index where translated vertices have been
176 unsigned fallback_vbs
[VB_NUM
];
177 unsigned fallback_vbs_mask
;
179 /* Which buffer is a user buffer. */
180 uint32_t user_vb_mask
; /* each bit describes a corresp. buffer */
181 /* Which buffer is incompatible (unaligned). */
182 uint32_t incompatible_vb_mask
; /* each bit describes a corresp. buffer */
183 /* Which buffer has a non-zero stride. */
184 uint32_t nonzero_stride_vb_mask
; /* each bit describes a corresp. buffer */
185 /* Which buffers are allowed (supported by hardware). */
186 uint32_t allowed_vb_mask
;
190 u_vbuf_create_vertex_elements(struct u_vbuf
*mgr
, unsigned count
,
191 const struct pipe_vertex_element
*attribs
);
192 static void u_vbuf_delete_vertex_elements(struct u_vbuf
*mgr
, void *cso
);
194 static const struct {
195 enum pipe_format from
, to
;
196 } vbuf_format_fallbacks
[] = {
197 { PIPE_FORMAT_R32_FIXED
, PIPE_FORMAT_R32_FLOAT
},
198 { PIPE_FORMAT_R32G32_FIXED
, PIPE_FORMAT_R32G32_FLOAT
},
199 { PIPE_FORMAT_R32G32B32_FIXED
, PIPE_FORMAT_R32G32B32_FLOAT
},
200 { PIPE_FORMAT_R32G32B32A32_FIXED
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
201 { PIPE_FORMAT_R16_FLOAT
, PIPE_FORMAT_R32_FLOAT
},
202 { PIPE_FORMAT_R16G16_FLOAT
, PIPE_FORMAT_R32G32_FLOAT
},
203 { PIPE_FORMAT_R16G16B16_FLOAT
, PIPE_FORMAT_R32G32B32_FLOAT
},
204 { PIPE_FORMAT_R16G16B16A16_FLOAT
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
205 { PIPE_FORMAT_R64_FLOAT
, PIPE_FORMAT_R32_FLOAT
},
206 { PIPE_FORMAT_R64G64_FLOAT
, PIPE_FORMAT_R32G32_FLOAT
},
207 { PIPE_FORMAT_R64G64B64_FLOAT
, PIPE_FORMAT_R32G32B32_FLOAT
},
208 { PIPE_FORMAT_R64G64B64A64_FLOAT
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
209 { PIPE_FORMAT_R32_UNORM
, PIPE_FORMAT_R32_FLOAT
},
210 { PIPE_FORMAT_R32G32_UNORM
, PIPE_FORMAT_R32G32_FLOAT
},
211 { PIPE_FORMAT_R32G32B32_UNORM
, PIPE_FORMAT_R32G32B32_FLOAT
},
212 { PIPE_FORMAT_R32G32B32A32_UNORM
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
213 { PIPE_FORMAT_R32_SNORM
, PIPE_FORMAT_R32_FLOAT
},
214 { PIPE_FORMAT_R32G32_SNORM
, PIPE_FORMAT_R32G32_FLOAT
},
215 { PIPE_FORMAT_R32G32B32_SNORM
, PIPE_FORMAT_R32G32B32_FLOAT
},
216 { PIPE_FORMAT_R32G32B32A32_SNORM
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
217 { PIPE_FORMAT_R32_USCALED
, PIPE_FORMAT_R32_FLOAT
},
218 { PIPE_FORMAT_R32G32_USCALED
, PIPE_FORMAT_R32G32_FLOAT
},
219 { PIPE_FORMAT_R32G32B32_USCALED
, PIPE_FORMAT_R32G32B32_FLOAT
},
220 { PIPE_FORMAT_R32G32B32A32_USCALED
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
221 { PIPE_FORMAT_R32_SSCALED
, PIPE_FORMAT_R32_FLOAT
},
222 { PIPE_FORMAT_R32G32_SSCALED
, PIPE_FORMAT_R32G32_FLOAT
},
223 { PIPE_FORMAT_R32G32B32_SSCALED
, PIPE_FORMAT_R32G32B32_FLOAT
},
224 { PIPE_FORMAT_R32G32B32A32_SSCALED
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
225 { PIPE_FORMAT_R16_UNORM
, PIPE_FORMAT_R32_FLOAT
},
226 { PIPE_FORMAT_R16G16_UNORM
, PIPE_FORMAT_R32G32_FLOAT
},
227 { PIPE_FORMAT_R16G16B16_UNORM
, PIPE_FORMAT_R32G32B32_FLOAT
},
228 { PIPE_FORMAT_R16G16B16A16_UNORM
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
229 { PIPE_FORMAT_R16_SNORM
, PIPE_FORMAT_R32_FLOAT
},
230 { PIPE_FORMAT_R16G16_SNORM
, PIPE_FORMAT_R32G32_FLOAT
},
231 { PIPE_FORMAT_R16G16B16_SNORM
, PIPE_FORMAT_R32G32B32_FLOAT
},
232 { PIPE_FORMAT_R16G16B16A16_SNORM
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
233 { PIPE_FORMAT_R16_USCALED
, PIPE_FORMAT_R32_FLOAT
},
234 { PIPE_FORMAT_R16G16_USCALED
, PIPE_FORMAT_R32G32_FLOAT
},
235 { PIPE_FORMAT_R16G16B16_USCALED
, PIPE_FORMAT_R32G32B32_FLOAT
},
236 { PIPE_FORMAT_R16G16B16A16_USCALED
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
237 { PIPE_FORMAT_R16_SSCALED
, PIPE_FORMAT_R32_FLOAT
},
238 { PIPE_FORMAT_R16G16_SSCALED
, PIPE_FORMAT_R32G32_FLOAT
},
239 { PIPE_FORMAT_R16G16B16_SSCALED
, PIPE_FORMAT_R32G32B32_FLOAT
},
240 { PIPE_FORMAT_R16G16B16A16_SSCALED
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
241 { PIPE_FORMAT_R8_UNORM
, PIPE_FORMAT_R32_FLOAT
},
242 { PIPE_FORMAT_R8G8_UNORM
, PIPE_FORMAT_R32G32_FLOAT
},
243 { PIPE_FORMAT_R8G8B8_UNORM
, PIPE_FORMAT_R32G32B32_FLOAT
},
244 { PIPE_FORMAT_R8G8B8A8_UNORM
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
245 { PIPE_FORMAT_R8_SNORM
, PIPE_FORMAT_R32_FLOAT
},
246 { PIPE_FORMAT_R8G8_SNORM
, PIPE_FORMAT_R32G32_FLOAT
},
247 { PIPE_FORMAT_R8G8B8_SNORM
, PIPE_FORMAT_R32G32B32_FLOAT
},
248 { PIPE_FORMAT_R8G8B8A8_SNORM
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
249 { PIPE_FORMAT_R8_USCALED
, PIPE_FORMAT_R32_FLOAT
},
250 { PIPE_FORMAT_R8G8_USCALED
, PIPE_FORMAT_R32G32_FLOAT
},
251 { PIPE_FORMAT_R8G8B8_USCALED
, PIPE_FORMAT_R32G32B32_FLOAT
},
252 { PIPE_FORMAT_R8G8B8A8_USCALED
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
253 { PIPE_FORMAT_R8_SSCALED
, PIPE_FORMAT_R32_FLOAT
},
254 { PIPE_FORMAT_R8G8_SSCALED
, PIPE_FORMAT_R32G32_FLOAT
},
255 { PIPE_FORMAT_R8G8B8_SSCALED
, PIPE_FORMAT_R32G32B32_FLOAT
},
256 { PIPE_FORMAT_R8G8B8A8_SSCALED
, PIPE_FORMAT_R32G32B32A32_FLOAT
},
259 void u_vbuf_get_caps(struct pipe_screen
*screen
, struct u_vbuf_caps
*caps
,
264 memset(caps
, 0, sizeof(*caps
));
266 /* I'd rather have a bitfield of which formats are supported and a static
267 * table of the translations indexed by format, but since we don't have C99
268 * we can't easily make a sparsely-populated table indexed by format. So,
269 * we construct the sparse table here.
271 for (i
= 0; i
< PIPE_FORMAT_COUNT
; i
++)
272 caps
->format_translation
[i
] = i
;
274 for (i
= 0; i
< ARRAY_SIZE(vbuf_format_fallbacks
); i
++) {
275 enum pipe_format format
= vbuf_format_fallbacks
[i
].from
;
276 unsigned comp_bits
= util_format_get_component_bits(format
, 0, 0);
278 if ((comp_bits
> 32) && !needs64b
)
281 if (!screen
->is_format_supported(screen
, format
, PIPE_BUFFER
, 0, 0,
282 PIPE_BIND_VERTEX_BUFFER
)) {
283 caps
->format_translation
[format
] = vbuf_format_fallbacks
[i
].to
;
284 caps
->fallback_always
= true;
288 caps
->buffer_offset_unaligned
=
289 !screen
->get_param(screen
,
290 PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY
);
291 caps
->buffer_stride_unaligned
=
292 !screen
->get_param(screen
,
293 PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY
);
294 caps
->velem_src_offset_unaligned
=
295 !screen
->get_param(screen
,
296 PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY
);
297 caps
->user_vertex_buffers
=
298 screen
->get_param(screen
, PIPE_CAP_USER_VERTEX_BUFFERS
);
299 caps
->max_vertex_buffers
=
300 screen
->get_param(screen
, PIPE_CAP_MAX_VERTEX_BUFFERS
);
302 /* OpenGL 2.0 requires a minimum of 16 vertex buffers */
303 if (caps
->max_vertex_buffers
< 16)
304 caps
->fallback_always
= true;
306 if (!caps
->buffer_offset_unaligned
||
307 !caps
->buffer_stride_unaligned
||
308 !caps
->velem_src_offset_unaligned
)
309 caps
->fallback_always
= true;
311 if (!caps
->fallback_always
&& !caps
->user_vertex_buffers
)
312 caps
->fallback_only_for_user_vbuffers
= true;
316 u_vbuf_create(struct pipe_context
*pipe
, struct u_vbuf_caps
*caps
)
318 struct u_vbuf
*mgr
= CALLOC_STRUCT(u_vbuf
);
322 mgr
->cso_cache
= cso_cache_create();
323 mgr
->translate_cache
= translate_cache_create();
324 memset(mgr
->fallback_vbs
, ~0, sizeof(mgr
->fallback_vbs
));
325 mgr
->allowed_vb_mask
= u_bit_consecutive(0, mgr
->caps
.max_vertex_buffers
);
327 mgr
->has_signed_vb_offset
=
328 pipe
->screen
->get_param(pipe
->screen
,
329 PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET
);
334 /* u_vbuf uses its own caching for vertex elements, because it needs to keep
335 * its own preprocessed state per vertex element CSO. */
336 static struct u_vbuf_elements
*
337 u_vbuf_set_vertex_elements_internal(struct u_vbuf
*mgr
,
338 const struct cso_velems_state
*velems
)
340 struct pipe_context
*pipe
= mgr
->pipe
;
341 unsigned key_size
, hash_key
;
342 struct cso_hash_iter iter
;
343 struct u_vbuf_elements
*ve
;
345 /* need to include the count into the stored state data too. */
346 key_size
= sizeof(struct pipe_vertex_element
) * velems
->count
+
348 hash_key
= cso_construct_key((void*)velems
, key_size
);
349 iter
= cso_find_state_template(mgr
->cso_cache
, hash_key
, CSO_VELEMENTS
,
350 (void*)velems
, key_size
);
352 if (cso_hash_iter_is_null(iter
)) {
353 struct cso_velements
*cso
= MALLOC_STRUCT(cso_velements
);
354 memcpy(&cso
->state
, velems
, key_size
);
355 cso
->data
= u_vbuf_create_vertex_elements(mgr
, velems
->count
,
357 cso
->delete_state
= (cso_state_callback
)u_vbuf_delete_vertex_elements
;
358 cso
->context
= (void*)mgr
;
360 iter
= cso_insert_state(mgr
->cso_cache
, hash_key
, CSO_VELEMENTS
, cso
);
363 ve
= ((struct cso_velements
*)cso_hash_iter_data(iter
))->data
;
369 pipe
->bind_vertex_elements_state(pipe
, ve
->driver_cso
);
374 void u_vbuf_set_vertex_elements(struct u_vbuf
*mgr
,
375 const struct cso_velems_state
*velems
)
377 mgr
->ve
= u_vbuf_set_vertex_elements_internal(mgr
, velems
);
380 void u_vbuf_unset_vertex_elements(struct u_vbuf
*mgr
)
385 void u_vbuf_destroy(struct u_vbuf
*mgr
)
387 struct pipe_screen
*screen
= mgr
->pipe
->screen
;
389 const unsigned num_vb
= screen
->get_shader_param(screen
, PIPE_SHADER_VERTEX
,
390 PIPE_SHADER_CAP_MAX_INPUTS
);
392 mgr
->pipe
->set_vertex_buffers(mgr
->pipe
, 0, num_vb
, NULL
);
394 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++)
395 pipe_vertex_buffer_unreference(&mgr
->vertex_buffer
[i
]);
396 for (i
= 0; i
< PIPE_MAX_ATTRIBS
; i
++)
397 pipe_vertex_buffer_unreference(&mgr
->real_vertex_buffer
[i
]);
399 pipe_vertex_buffer_unreference(&mgr
->vertex_buffer0_saved
);
401 translate_cache_destroy(mgr
->translate_cache
);
402 cso_cache_delete(mgr
->cso_cache
);
406 static enum pipe_error
407 u_vbuf_translate_buffers(struct u_vbuf
*mgr
, struct translate_key
*key
,
408 const struct pipe_draw_info
*info
,
409 unsigned vb_mask
, unsigned out_vb
,
410 int start_vertex
, unsigned num_vertices
,
411 int min_index
, boolean unroll_indices
)
413 struct translate
*tr
;
414 struct pipe_transfer
*vb_transfer
[PIPE_MAX_ATTRIBS
] = {0};
415 struct pipe_resource
*out_buffer
= NULL
;
417 unsigned out_offset
, mask
;
419 /* Get a translate object. */
420 tr
= translate_cache_find(mgr
->translate_cache
, key
);
422 /* Map buffers we want to translate. */
425 struct pipe_vertex_buffer
*vb
;
428 unsigned i
= u_bit_scan(&mask
);
430 vb
= &mgr
->vertex_buffer
[i
];
431 offset
= vb
->buffer_offset
+ vb
->stride
* start_vertex
;
433 if (vb
->is_user_buffer
) {
434 map
= (uint8_t*)vb
->buffer
.user
+ offset
;
436 unsigned size
= vb
->stride
? num_vertices
* vb
->stride
439 if (!vb
->buffer
.resource
)
442 if (offset
+ size
> vb
->buffer
.resource
->width0
) {
443 /* Don't try to map past end of buffer. This often happens when
444 * we're translating an attribute that's at offset > 0 from the
445 * start of the vertex. If we'd subtract attrib's offset from
446 * the size, this probably wouldn't happen.
448 size
= vb
->buffer
.resource
->width0
- offset
;
450 /* Also adjust num_vertices. A common user error is to call
451 * glDrawRangeElements() with incorrect 'end' argument. The 'end
452 * value should be the max index value, but people often
453 * accidentally add one to this value. This adjustment avoids
454 * crashing (by reading past the end of a hardware buffer mapping)
455 * when people do that.
457 num_vertices
= (size
+ vb
->stride
- 1) / vb
->stride
;
460 map
= pipe_buffer_map_range(mgr
->pipe
, vb
->buffer
.resource
, offset
, size
,
461 PIPE_TRANSFER_READ
, &vb_transfer
[i
]);
464 /* Subtract min_index so that indexing with the index buffer works. */
465 if (unroll_indices
) {
466 map
-= (ptrdiff_t)vb
->stride
* min_index
;
469 tr
->set_buffer(tr
, i
, map
, vb
->stride
, info
->max_index
);
473 if (unroll_indices
) {
474 struct pipe_transfer
*transfer
= NULL
;
475 const unsigned offset
= info
->start
* info
->index_size
;
478 /* Create and map the output buffer. */
479 u_upload_alloc(mgr
->pipe
->stream_uploader
, 0,
480 key
->output_stride
* info
->count
, 4,
481 &out_offset
, &out_buffer
,
484 return PIPE_ERROR_OUT_OF_MEMORY
;
486 if (info
->has_user_indices
) {
487 map
= (uint8_t*)info
->index
.user
+ offset
;
489 map
= pipe_buffer_map_range(mgr
->pipe
, info
->index
.resource
, offset
,
490 info
->count
* info
->index_size
,
491 PIPE_TRANSFER_READ
, &transfer
);
494 switch (info
->index_size
) {
496 tr
->run_elts(tr
, (unsigned*)map
, info
->count
, 0, 0, out_map
);
499 tr
->run_elts16(tr
, (uint16_t*)map
, info
->count
, 0, 0, out_map
);
502 tr
->run_elts8(tr
, map
, info
->count
, 0, 0, out_map
);
507 pipe_buffer_unmap(mgr
->pipe
, transfer
);
510 /* Create and map the output buffer. */
511 u_upload_alloc(mgr
->pipe
->stream_uploader
,
512 mgr
->has_signed_vb_offset
?
513 0 : key
->output_stride
* start_vertex
,
514 key
->output_stride
* num_vertices
, 4,
515 &out_offset
, &out_buffer
,
518 return PIPE_ERROR_OUT_OF_MEMORY
;
520 out_offset
-= key
->output_stride
* start_vertex
;
522 tr
->run(tr
, 0, num_vertices
, 0, 0, out_map
);
525 /* Unmap all buffers. */
528 unsigned i
= u_bit_scan(&mask
);
530 if (vb_transfer
[i
]) {
531 pipe_buffer_unmap(mgr
->pipe
, vb_transfer
[i
]);
535 /* Setup the new vertex buffer. */
536 mgr
->real_vertex_buffer
[out_vb
].buffer_offset
= out_offset
;
537 mgr
->real_vertex_buffer
[out_vb
].stride
= key
->output_stride
;
539 /* Move the buffer reference. */
540 pipe_vertex_buffer_unreference(&mgr
->real_vertex_buffer
[out_vb
]);
541 mgr
->real_vertex_buffer
[out_vb
].buffer
.resource
= out_buffer
;
542 mgr
->real_vertex_buffer
[out_vb
].is_user_buffer
= false;
548 u_vbuf_translate_find_free_vb_slots(struct u_vbuf
*mgr
,
549 unsigned mask
[VB_NUM
])
552 unsigned fallback_vbs
[VB_NUM
];
553 /* Set the bit for each buffer which is incompatible, or isn't set. */
554 uint32_t unused_vb_mask
=
555 mgr
->ve
->incompatible_vb_mask_all
| mgr
->incompatible_vb_mask
|
556 ~mgr
->enabled_vb_mask
;
557 uint32_t unused_vb_mask_orig
;
558 boolean insufficient_buffers
= false;
560 /* No vertex buffers available at all */
564 memset(fallback_vbs
, ~0, sizeof(fallback_vbs
));
565 mgr
->fallback_vbs_mask
= 0;
567 /* Find free slots for each type if needed. */
568 unused_vb_mask_orig
= unused_vb_mask
;
569 for (type
= 0; type
< VB_NUM
; type
++) {
573 if (!unused_vb_mask
) {
574 insufficient_buffers
= true;
578 index
= ffs(unused_vb_mask
) - 1;
579 fallback_vbs
[type
] = index
;
580 mgr
->fallback_vbs_mask
|= 1 << index
;
581 unused_vb_mask
&= ~(1 << index
);
582 /*printf("found slot=%i for type=%i\n", index, type);*/
586 if (insufficient_buffers
) {
587 /* not enough vbs for all types supported by the hardware, they will have to share one
589 uint32_t index
= ffs(unused_vb_mask_orig
) - 1;
590 /* When sharing one vertex buffer use per-vertex frequency for everything. */
591 fallback_vbs
[VB_VERTEX
] = index
;
592 mgr
->fallback_vbs_mask
= 1 << index
;
593 mask
[VB_VERTEX
] = mask
[VB_VERTEX
] | mask
[VB_CONST
] | mask
[VB_INSTANCE
];
595 mask
[VB_INSTANCE
] = 0;
598 for (type
= 0; type
< VB_NUM
; type
++) {
600 mgr
->dirty_real_vb_mask
|= 1 << fallback_vbs
[type
];
604 memcpy(mgr
->fallback_vbs
, fallback_vbs
, sizeof(fallback_vbs
));
609 u_vbuf_translate_begin(struct u_vbuf
*mgr
,
610 const struct pipe_draw_info
*info
,
611 int start_vertex
, unsigned num_vertices
,
612 int min_index
, boolean unroll_indices
)
614 unsigned mask
[VB_NUM
] = {0};
615 struct translate_key key
[VB_NUM
];
616 unsigned elem_index
[VB_NUM
][PIPE_MAX_ATTRIBS
]; /* ... into key.elements */
618 const unsigned incompatible_vb_mask
= mgr
->incompatible_vb_mask
&
619 mgr
->ve
->used_vb_mask
;
621 const int start
[VB_NUM
] = {
622 start_vertex
, /* VERTEX */
623 info
->start_instance
, /* INSTANCE */
627 const unsigned num
[VB_NUM
] = {
628 num_vertices
, /* VERTEX */
629 info
->instance_count
, /* INSTANCE */
633 memset(key
, 0, sizeof(key
));
634 memset(elem_index
, ~0, sizeof(elem_index
));
636 /* See if there are vertex attribs of each type to translate and
638 for (i
= 0; i
< mgr
->ve
->count
; i
++) {
639 unsigned vb_index
= mgr
->ve
->ve
[i
].vertex_buffer_index
;
641 if (!mgr
->vertex_buffer
[vb_index
].stride
) {
642 if (!(mgr
->ve
->incompatible_elem_mask
& (1 << i
)) &&
643 !(incompatible_vb_mask
& (1 << vb_index
))) {
646 mask
[VB_CONST
] |= 1 << vb_index
;
647 } else if (mgr
->ve
->ve
[i
].instance_divisor
) {
648 if (!(mgr
->ve
->incompatible_elem_mask
& (1 << i
)) &&
649 !(incompatible_vb_mask
& (1 << vb_index
))) {
652 mask
[VB_INSTANCE
] |= 1 << vb_index
;
654 if (!unroll_indices
&&
655 !(mgr
->ve
->incompatible_elem_mask
& (1 << i
)) &&
656 !(incompatible_vb_mask
& (1 << vb_index
))) {
659 mask
[VB_VERTEX
] |= 1 << vb_index
;
663 assert(mask
[VB_VERTEX
] || mask
[VB_INSTANCE
] || mask
[VB_CONST
]);
665 /* Find free vertex buffer slots. */
666 if (!u_vbuf_translate_find_free_vb_slots(mgr
, mask
)) {
670 /* Initialize the translate keys. */
671 for (i
= 0; i
< mgr
->ve
->count
; i
++) {
672 struct translate_key
*k
;
673 struct translate_element
*te
;
674 enum pipe_format output_format
= mgr
->ve
->native_format
[i
];
675 unsigned bit
, vb_index
= mgr
->ve
->ve
[i
].vertex_buffer_index
;
678 if (!(mgr
->ve
->incompatible_elem_mask
& (1 << i
)) &&
679 !(incompatible_vb_mask
& (1 << vb_index
)) &&
680 (!unroll_indices
|| !(mask
[VB_VERTEX
] & bit
))) {
684 /* Set type to what we will translate.
685 * Whether vertex, instance, or constant attribs. */
686 for (type
= 0; type
< VB_NUM
; type
++) {
687 if (mask
[type
] & bit
) {
691 assert(type
< VB_NUM
);
692 if (mgr
->ve
->ve
[i
].src_format
!= output_format
)
693 assert(translate_is_output_format_supported(output_format
));
694 /*printf("velem=%i type=%i\n", i, type);*/
696 /* Add the vertex element. */
698 elem_index
[type
][i
] = k
->nr_elements
;
700 te
= &k
->element
[k
->nr_elements
];
701 te
->type
= TRANSLATE_ELEMENT_NORMAL
;
702 te
->instance_divisor
= 0;
703 te
->input_buffer
= vb_index
;
704 te
->input_format
= mgr
->ve
->ve
[i
].src_format
;
705 te
->input_offset
= mgr
->ve
->ve
[i
].src_offset
;
706 te
->output_format
= output_format
;
707 te
->output_offset
= k
->output_stride
;
709 k
->output_stride
+= mgr
->ve
->native_format_size
[i
];
713 /* Translate buffers. */
714 for (type
= 0; type
< VB_NUM
; type
++) {
715 if (key
[type
].nr_elements
) {
717 err
= u_vbuf_translate_buffers(mgr
, &key
[type
], info
, mask
[type
],
718 mgr
->fallback_vbs
[type
],
719 start
[type
], num
[type
], min_index
,
720 unroll_indices
&& type
== VB_VERTEX
);
724 /* Fixup the stride for constant attribs. */
725 if (type
== VB_CONST
) {
726 mgr
->real_vertex_buffer
[mgr
->fallback_vbs
[VB_CONST
]].stride
= 0;
731 /* Setup new vertex elements. */
732 for (i
= 0; i
< mgr
->ve
->count
; i
++) {
733 for (type
= 0; type
< VB_NUM
; type
++) {
734 if (elem_index
[type
][i
] < key
[type
].nr_elements
) {
735 struct translate_element
*te
= &key
[type
].element
[elem_index
[type
][i
]];
736 mgr
->fallback_velems
.velems
[i
].instance_divisor
= mgr
->ve
->ve
[i
].instance_divisor
;
737 mgr
->fallback_velems
.velems
[i
].src_format
= te
->output_format
;
738 mgr
->fallback_velems
.velems
[i
].src_offset
= te
->output_offset
;
739 mgr
->fallback_velems
.velems
[i
].vertex_buffer_index
= mgr
->fallback_vbs
[type
];
741 /* elem_index[type][i] can only be set for one type. */
742 assert(type
> VB_INSTANCE
|| elem_index
[type
+1][i
] == ~0u);
743 assert(type
> VB_VERTEX
|| elem_index
[type
+2][i
] == ~0u);
747 /* No translating, just copy the original vertex element over. */
748 if (type
== VB_NUM
) {
749 memcpy(&mgr
->fallback_velems
.velems
[i
], &mgr
->ve
->ve
[i
],
750 sizeof(struct pipe_vertex_element
));
754 mgr
->fallback_velems
.count
= mgr
->ve
->count
;
756 u_vbuf_set_vertex_elements_internal(mgr
, &mgr
->fallback_velems
);
757 mgr
->using_translate
= TRUE
;
761 static void u_vbuf_translate_end(struct u_vbuf
*mgr
)
765 /* Restore vertex elements. */
766 mgr
->pipe
->bind_vertex_elements_state(mgr
->pipe
, mgr
->ve
->driver_cso
);
767 mgr
->using_translate
= FALSE
;
769 /* Unreference the now-unused VBOs. */
770 for (i
= 0; i
< VB_NUM
; i
++) {
771 unsigned vb
= mgr
->fallback_vbs
[i
];
773 pipe_resource_reference(&mgr
->real_vertex_buffer
[vb
].buffer
.resource
, NULL
);
774 mgr
->fallback_vbs
[i
] = ~0;
777 /* This will cause the buffer to be unbound in the driver later. */
778 mgr
->dirty_real_vb_mask
|= mgr
->fallback_vbs_mask
;
779 mgr
->fallback_vbs_mask
= 0;
783 u_vbuf_create_vertex_elements(struct u_vbuf
*mgr
, unsigned count
,
784 const struct pipe_vertex_element
*attribs
)
786 struct pipe_context
*pipe
= mgr
->pipe
;
788 struct pipe_vertex_element driver_attribs
[PIPE_MAX_ATTRIBS
];
789 struct u_vbuf_elements
*ve
= CALLOC_STRUCT(u_vbuf_elements
);
790 uint32_t used_buffers
= 0;
794 memcpy(ve
->ve
, attribs
, sizeof(struct pipe_vertex_element
) * count
);
795 memcpy(driver_attribs
, attribs
, sizeof(struct pipe_vertex_element
) * count
);
797 /* Set the best native format in case the original format is not
799 for (i
= 0; i
< count
; i
++) {
800 enum pipe_format format
= ve
->ve
[i
].src_format
;
802 ve
->src_format_size
[i
] = util_format_get_blocksize(format
);
804 used_buffers
|= 1 << ve
->ve
[i
].vertex_buffer_index
;
806 if (!ve
->ve
[i
].instance_divisor
) {
807 ve
->noninstance_vb_mask_any
|= 1 << ve
->ve
[i
].vertex_buffer_index
;
810 format
= mgr
->caps
.format_translation
[format
];
812 driver_attribs
[i
].src_format
= format
;
813 ve
->native_format
[i
] = format
;
814 ve
->native_format_size
[i
] =
815 util_format_get_blocksize(ve
->native_format
[i
]);
817 if (ve
->ve
[i
].src_format
!= format
||
818 (!mgr
->caps
.velem_src_offset_unaligned
&&
819 ve
->ve
[i
].src_offset
% 4 != 0)) {
820 ve
->incompatible_elem_mask
|= 1 << i
;
821 ve
->incompatible_vb_mask_any
|= 1 << ve
->ve
[i
].vertex_buffer_index
;
823 ve
->compatible_vb_mask_any
|= 1 << ve
->ve
[i
].vertex_buffer_index
;
827 if (used_buffers
& ~mgr
->allowed_vb_mask
) {
828 /* More vertex buffers are used than the hardware supports. In
829 * principle, we only need to make sure that less vertex buffers are
830 * used, and mark some of the latter vertex buffers as incompatible.
831 * For now, mark all vertex buffers as incompatible.
833 ve
->incompatible_vb_mask_any
= used_buffers
;
834 ve
->compatible_vb_mask_any
= 0;
835 ve
->incompatible_elem_mask
= u_bit_consecutive(0, count
);
838 ve
->used_vb_mask
= used_buffers
;
839 ve
->compatible_vb_mask_all
= ~ve
->incompatible_vb_mask_any
& used_buffers
;
840 ve
->incompatible_vb_mask_all
= ~ve
->compatible_vb_mask_any
& used_buffers
;
842 /* Align the formats and offsets to the size of DWORD if needed. */
843 if (!mgr
->caps
.velem_src_offset_unaligned
) {
844 for (i
= 0; i
< count
; i
++) {
845 ve
->native_format_size
[i
] = align(ve
->native_format_size
[i
], 4);
846 driver_attribs
[i
].src_offset
= align(ve
->ve
[i
].src_offset
, 4);
850 /* Only create driver CSO if no incompatible elements */
851 if (!ve
->incompatible_elem_mask
) {
853 pipe
->create_vertex_elements_state(pipe
, count
, driver_attribs
);
859 static void u_vbuf_delete_vertex_elements(struct u_vbuf
*mgr
, void *cso
)
861 struct pipe_context
*pipe
= mgr
->pipe
;
862 struct u_vbuf_elements
*ve
= cso
;
865 pipe
->delete_vertex_elements_state(pipe
, ve
->driver_cso
);
869 void u_vbuf_set_vertex_buffers(struct u_vbuf
*mgr
,
870 unsigned start_slot
, unsigned count
,
871 const struct pipe_vertex_buffer
*bufs
)
874 /* which buffers are enabled */
875 uint32_t enabled_vb_mask
= 0;
876 /* which buffers are in user memory */
877 uint32_t user_vb_mask
= 0;
878 /* which buffers are incompatible with the driver */
879 uint32_t incompatible_vb_mask
= 0;
880 /* which buffers have a non-zero stride */
881 uint32_t nonzero_stride_vb_mask
= 0;
882 const uint32_t mask
= ~(((1ull << count
) - 1) << start_slot
);
884 /* Zero out the bits we are going to rewrite completely. */
885 mgr
->user_vb_mask
&= mask
;
886 mgr
->incompatible_vb_mask
&= mask
;
887 mgr
->nonzero_stride_vb_mask
&= mask
;
888 mgr
->enabled_vb_mask
&= mask
;
891 struct pipe_context
*pipe
= mgr
->pipe
;
893 mgr
->dirty_real_vb_mask
&= mask
;
895 for (i
= 0; i
< count
; i
++) {
896 unsigned dst_index
= start_slot
+ i
;
898 pipe_vertex_buffer_unreference(&mgr
->vertex_buffer
[dst_index
]);
899 pipe_vertex_buffer_unreference(&mgr
->real_vertex_buffer
[dst_index
]);
902 pipe
->set_vertex_buffers(pipe
, start_slot
, count
, NULL
);
906 for (i
= 0; i
< count
; i
++) {
907 unsigned dst_index
= start_slot
+ i
;
908 const struct pipe_vertex_buffer
*vb
= &bufs
[i
];
909 struct pipe_vertex_buffer
*orig_vb
= &mgr
->vertex_buffer
[dst_index
];
910 struct pipe_vertex_buffer
*real_vb
= &mgr
->real_vertex_buffer
[dst_index
];
912 if (!vb
->buffer
.resource
) {
913 pipe_vertex_buffer_unreference(orig_vb
);
914 pipe_vertex_buffer_unreference(real_vb
);
918 pipe_vertex_buffer_reference(orig_vb
, vb
);
921 nonzero_stride_vb_mask
|= 1 << dst_index
;
923 enabled_vb_mask
|= 1 << dst_index
;
925 if ((!mgr
->caps
.buffer_offset_unaligned
&& vb
->buffer_offset
% 4 != 0) ||
926 (!mgr
->caps
.buffer_stride_unaligned
&& vb
->stride
% 4 != 0)) {
927 incompatible_vb_mask
|= 1 << dst_index
;
928 real_vb
->buffer_offset
= vb
->buffer_offset
;
929 real_vb
->stride
= vb
->stride
;
930 pipe_vertex_buffer_unreference(real_vb
);
931 real_vb
->is_user_buffer
= false;
935 if (!mgr
->caps
.user_vertex_buffers
&& vb
->is_user_buffer
) {
936 user_vb_mask
|= 1 << dst_index
;
937 real_vb
->buffer_offset
= vb
->buffer_offset
;
938 real_vb
->stride
= vb
->stride
;
939 pipe_vertex_buffer_unreference(real_vb
);
940 real_vb
->is_user_buffer
= false;
944 pipe_vertex_buffer_reference(real_vb
, vb
);
947 mgr
->user_vb_mask
|= user_vb_mask
;
948 mgr
->incompatible_vb_mask
|= incompatible_vb_mask
;
949 mgr
->nonzero_stride_vb_mask
|= nonzero_stride_vb_mask
;
950 mgr
->enabled_vb_mask
|= enabled_vb_mask
;
952 /* All changed buffers are marked as dirty, even the NULL ones,
953 * which will cause the NULL buffers to be unbound in the driver later. */
954 mgr
->dirty_real_vb_mask
|= ~mask
;
957 static enum pipe_error
958 u_vbuf_upload_buffers(struct u_vbuf
*mgr
,
959 int start_vertex
, unsigned num_vertices
,
960 int start_instance
, unsigned num_instances
)
963 unsigned nr_velems
= mgr
->ve
->count
;
964 const struct pipe_vertex_element
*velems
=
965 mgr
->using_translate
? mgr
->fallback_velems
.velems
: mgr
->ve
->ve
;
966 unsigned start_offset
[PIPE_MAX_ATTRIBS
];
967 unsigned end_offset
[PIPE_MAX_ATTRIBS
];
968 uint32_t buffer_mask
= 0;
970 /* Determine how much data needs to be uploaded. */
971 for (i
= 0; i
< nr_velems
; i
++) {
972 const struct pipe_vertex_element
*velem
= &velems
[i
];
973 unsigned index
= velem
->vertex_buffer_index
;
974 struct pipe_vertex_buffer
*vb
= &mgr
->vertex_buffer
[index
];
975 unsigned instance_div
, first
, size
, index_bit
;
977 /* Skip the buffers generated by translate. */
978 if ((1 << index
) & mgr
->fallback_vbs_mask
) {
982 if (!vb
->is_user_buffer
) {
986 instance_div
= velem
->instance_divisor
;
987 first
= vb
->buffer_offset
+ velem
->src_offset
;
990 /* Constant attrib. */
991 size
= mgr
->ve
->src_format_size
[i
];
992 } else if (instance_div
) {
993 /* Per-instance attrib. */
995 /* Figure out how many instances we'll render given instance_div. We
996 * can't use the typical div_round_up() pattern because the CTS uses
997 * instance_div = ~0 for a test, which overflows div_round_up()'s
1000 unsigned count
= num_instances
/ instance_div
;
1001 if (count
* instance_div
!= num_instances
)
1004 first
+= vb
->stride
* start_instance
;
1005 size
= vb
->stride
* (count
- 1) + mgr
->ve
->src_format_size
[i
];
1007 /* Per-vertex attrib. */
1008 first
+= vb
->stride
* start_vertex
;
1009 size
= vb
->stride
* (num_vertices
- 1) + mgr
->ve
->src_format_size
[i
];
1012 index_bit
= 1 << index
;
1014 /* Update offsets. */
1015 if (!(buffer_mask
& index_bit
)) {
1016 start_offset
[index
] = first
;
1017 end_offset
[index
] = first
+ size
;
1019 if (first
< start_offset
[index
])
1020 start_offset
[index
] = first
;
1021 if (first
+ size
> end_offset
[index
])
1022 end_offset
[index
] = first
+ size
;
1025 buffer_mask
|= index_bit
;
1028 /* Upload buffers. */
1029 while (buffer_mask
) {
1030 unsigned start
, end
;
1031 struct pipe_vertex_buffer
*real_vb
;
1034 i
= u_bit_scan(&buffer_mask
);
1036 start
= start_offset
[i
];
1037 end
= end_offset
[i
];
1038 assert(start
< end
);
1040 real_vb
= &mgr
->real_vertex_buffer
[i
];
1041 ptr
= mgr
->vertex_buffer
[i
].buffer
.user
;
1043 u_upload_data(mgr
->pipe
->stream_uploader
,
1044 mgr
->has_signed_vb_offset
? 0 : start
,
1046 ptr
+ start
, &real_vb
->buffer_offset
, &real_vb
->buffer
.resource
);
1047 if (!real_vb
->buffer
.resource
)
1048 return PIPE_ERROR_OUT_OF_MEMORY
;
1050 real_vb
->buffer_offset
-= start
;
1056 static boolean
u_vbuf_need_minmax_index(const struct u_vbuf
*mgr
)
1058 /* See if there are any per-vertex attribs which will be uploaded or
1059 * translated. Use bitmasks to get the info instead of looping over vertex
1061 return (mgr
->ve
->used_vb_mask
&
1062 ((mgr
->user_vb_mask
|
1063 mgr
->incompatible_vb_mask
|
1064 mgr
->ve
->incompatible_vb_mask_any
) &
1065 mgr
->ve
->noninstance_vb_mask_any
&
1066 mgr
->nonzero_stride_vb_mask
)) != 0;
1069 static boolean
u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf
*mgr
)
1071 /* Return true if there are hw buffers which don't need to be translated.
1073 * We could query whether each buffer is busy, but that would
1074 * be way more costly than this. */
1075 return (mgr
->ve
->used_vb_mask
&
1076 (~mgr
->user_vb_mask
&
1077 ~mgr
->incompatible_vb_mask
&
1078 mgr
->ve
->compatible_vb_mask_all
&
1079 mgr
->ve
->noninstance_vb_mask_any
&
1080 mgr
->nonzero_stride_vb_mask
)) != 0;
1084 u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info
*info
,
1085 const void *indices
, unsigned *out_min_index
,
1086 unsigned *out_max_index
)
1094 switch (info
->index_size
) {
1096 const unsigned *ui_indices
= (const unsigned*)indices
;
1099 if (info
->primitive_restart
) {
1100 for (unsigned i
= 0; i
< info
->count
; i
++) {
1101 if (ui_indices
[i
] != info
->restart_index
) {
1102 if (ui_indices
[i
] > max
) max
= ui_indices
[i
];
1103 if (ui_indices
[i
] < min
) min
= ui_indices
[i
];
1108 for (unsigned i
= 0; i
< info
->count
; i
++) {
1109 if (ui_indices
[i
] > max
) max
= ui_indices
[i
];
1110 if (ui_indices
[i
] < min
) min
= ui_indices
[i
];
1113 *out_min_index
= min
;
1114 *out_max_index
= max
;
1118 const unsigned short *us_indices
= (const unsigned short*)indices
;
1119 unsigned short max
= 0;
1120 unsigned short min
= ~((unsigned short)0);
1121 if (info
->primitive_restart
) {
1122 for (unsigned i
= 0; i
< info
->count
; i
++) {
1123 if (us_indices
[i
] != info
->restart_index
) {
1124 if (us_indices
[i
] > max
) max
= us_indices
[i
];
1125 if (us_indices
[i
] < min
) min
= us_indices
[i
];
1130 for (unsigned i
= 0; i
< info
->count
; i
++) {
1131 if (us_indices
[i
] > max
) max
= us_indices
[i
];
1132 if (us_indices
[i
] < min
) min
= us_indices
[i
];
1135 *out_min_index
= min
;
1136 *out_max_index
= max
;
1140 const unsigned char *ub_indices
= (const unsigned char*)indices
;
1141 unsigned char max
= 0;
1142 unsigned char min
= ~((unsigned char)0);
1143 if (info
->primitive_restart
) {
1144 for (unsigned i
= 0; i
< info
->count
; i
++) {
1145 if (ub_indices
[i
] != info
->restart_index
) {
1146 if (ub_indices
[i
] > max
) max
= ub_indices
[i
];
1147 if (ub_indices
[i
] < min
) min
= ub_indices
[i
];
1152 for (unsigned i
= 0; i
< info
->count
; i
++) {
1153 if (ub_indices
[i
] > max
) max
= ub_indices
[i
];
1154 if (ub_indices
[i
] < min
) min
= ub_indices
[i
];
1157 *out_min_index
= min
;
1158 *out_max_index
= max
;
1162 unreachable("bad index size");
1166 void u_vbuf_get_minmax_index(struct pipe_context
*pipe
,
1167 const struct pipe_draw_info
*info
,
1168 unsigned *out_min_index
, unsigned *out_max_index
)
1170 struct pipe_transfer
*transfer
= NULL
;
1171 const void *indices
;
1173 if (info
->has_user_indices
) {
1174 indices
= (uint8_t*)info
->index
.user
+
1175 info
->start
* info
->index_size
;
1177 indices
= pipe_buffer_map_range(pipe
, info
->index
.resource
,
1178 info
->start
* info
->index_size
,
1179 info
->count
* info
->index_size
,
1180 PIPE_TRANSFER_READ
, &transfer
);
1183 u_vbuf_get_minmax_index_mapped(info
, indices
, out_min_index
, out_max_index
);
1186 pipe_buffer_unmap(pipe
, transfer
);
1190 static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf
*mgr
)
1192 struct pipe_context
*pipe
= mgr
->pipe
;
1193 unsigned start_slot
, count
;
1195 start_slot
= ffs(mgr
->dirty_real_vb_mask
) - 1;
1196 count
= util_last_bit(mgr
->dirty_real_vb_mask
>> start_slot
);
1198 pipe
->set_vertex_buffers(pipe
, start_slot
, count
,
1199 mgr
->real_vertex_buffer
+ start_slot
);
1200 mgr
->dirty_real_vb_mask
= 0;
1204 u_vbuf_split_indexed_multidraw(struct u_vbuf
*mgr
, struct pipe_draw_info
*info
,
1205 unsigned *indirect_data
, unsigned stride
,
1206 unsigned draw_count
)
1208 assert(info
->index_size
);
1209 info
->indirect
= NULL
;
1211 for (unsigned i
= 0; i
< draw_count
; i
++) {
1212 unsigned offset
= i
* stride
/ 4;
1214 info
->count
= indirect_data
[offset
+ 0];
1215 info
->instance_count
= indirect_data
[offset
+ 1];
1217 if (!info
->count
|| !info
->instance_count
)
1220 info
->start
= indirect_data
[offset
+ 2];
1221 info
->index_bias
= indirect_data
[offset
+ 3];
1222 info
->start_instance
= indirect_data
[offset
+ 4];
1224 u_vbuf_draw_vbo(mgr
, info
);
1228 void u_vbuf_draw_vbo(struct u_vbuf
*mgr
, const struct pipe_draw_info
*info
)
1230 struct pipe_context
*pipe
= mgr
->pipe
;
1233 unsigned num_vertices
;
1234 boolean unroll_indices
= FALSE
;
1235 const uint32_t used_vb_mask
= mgr
->ve
->used_vb_mask
;
1236 uint32_t user_vb_mask
= mgr
->user_vb_mask
& used_vb_mask
;
1237 const uint32_t incompatible_vb_mask
=
1238 mgr
->incompatible_vb_mask
& used_vb_mask
;
1239 struct pipe_draw_info new_info
;
1241 /* Normal draw. No fallback and no user buffers. */
1242 if (!incompatible_vb_mask
&&
1243 !mgr
->ve
->incompatible_elem_mask
&&
1246 /* Set vertex buffers if needed. */
1247 if (mgr
->dirty_real_vb_mask
& used_vb_mask
) {
1248 u_vbuf_set_driver_vertex_buffers(mgr
);
1251 pipe
->draw_vbo(pipe
, info
);
1257 /* Handle indirect (multi)draws. */
1258 if (new_info
.indirect
) {
1259 const struct pipe_draw_indirect_info
*indirect
= new_info
.indirect
;
1260 unsigned draw_count
= 0;
1262 /* Get the number of draws. */
1263 if (indirect
->indirect_draw_count
) {
1264 pipe_buffer_read(pipe
, indirect
->indirect_draw_count
,
1265 indirect
->indirect_draw_count_offset
,
1268 draw_count
= indirect
->draw_count
;
1274 unsigned data_size
= (draw_count
- 1) * indirect
->stride
+
1275 (new_info
.index_size
? 20 : 16);
1276 unsigned *data
= malloc(data_size
);
1278 return; /* report an error? */
1280 /* Read the used buffer range only once, because the read can be
1283 pipe_buffer_read(pipe
, indirect
->buffer
, indirect
->offset
, data_size
,
1286 if (info
->index_size
) {
1287 /* Indexed multidraw. */
1288 unsigned index_bias0
= data
[3];
1289 bool index_bias_same
= true;
1291 /* If we invoke the translate path, we have to split the multidraw. */
1292 if (incompatible_vb_mask
||
1293 mgr
->ve
->incompatible_elem_mask
) {
1294 u_vbuf_split_indexed_multidraw(mgr
, &new_info
, data
,
1295 indirect
->stride
, draw_count
);
1300 /* See if index_bias is the same for all draws. */
1301 for (unsigned i
= 1; i
< draw_count
; i
++) {
1302 if (data
[i
* indirect
->stride
/ 4 + 3] != index_bias0
) {
1303 index_bias_same
= false;
1308 /* Split the multidraw if index_bias is different. */
1309 if (!index_bias_same
) {
1310 u_vbuf_split_indexed_multidraw(mgr
, &new_info
, data
,
1311 indirect
->stride
, draw_count
);
1316 /* If we don't need to use the translate path and index_bias is
1317 * the same, we can process the multidraw with the time complexity
1318 * equal to 1 draw call (except for the index range computation).
1319 * We only need to compute the index range covering all draw calls
1322 * The driver will not look at these values because indirect != NULL.
1323 * These values determine the user buffer bounds to upload.
1325 new_info
.index_bias
= index_bias0
;
1326 new_info
.min_index
= ~0u;
1327 new_info
.max_index
= 0;
1328 new_info
.start_instance
= ~0u;
1329 unsigned end_instance
= 0;
1331 struct pipe_transfer
*transfer
= NULL
;
1332 const uint8_t *indices
;
1334 if (info
->has_user_indices
) {
1335 indices
= (uint8_t*)info
->index
.user
;
1337 indices
= (uint8_t*)pipe_buffer_map(pipe
, info
->index
.resource
,
1338 PIPE_TRANSFER_READ
, &transfer
);
1341 for (unsigned i
= 0; i
< draw_count
; i
++) {
1342 unsigned offset
= i
* indirect
->stride
/ 4;
1343 unsigned start
= data
[offset
+ 2];
1344 unsigned count
= data
[offset
+ 0];
1345 unsigned start_instance
= data
[offset
+ 4];
1346 unsigned instance_count
= data
[offset
+ 1];
1348 if (!count
|| !instance_count
)
1351 /* Update the ranges of instances. */
1352 new_info
.start_instance
= MIN2(new_info
.start_instance
,
1354 end_instance
= MAX2(end_instance
, start_instance
+ instance_count
);
1356 /* Update the index range. */
1358 new_info
.count
= count
; /* only used by get_minmax_index */
1359 u_vbuf_get_minmax_index_mapped(&new_info
,
1361 new_info
.index_size
* start
,
1364 new_info
.min_index
= MIN2(new_info
.min_index
, min
);
1365 new_info
.max_index
= MAX2(new_info
.max_index
, max
);
1370 pipe_buffer_unmap(pipe
, transfer
);
1372 /* Set the final instance count. */
1373 new_info
.instance_count
= end_instance
- new_info
.start_instance
;
1375 if (new_info
.start_instance
== ~0u || !new_info
.instance_count
)
1378 /* Non-indexed multidraw.
1380 * Keep the draw call indirect and compute minimums & maximums,
1381 * which will determine the user buffer bounds to upload, but
1382 * the driver will not look at these values because indirect != NULL.
1384 * This efficiently processes the multidraw with the time complexity
1385 * equal to 1 draw call.
1387 new_info
.start
= ~0u;
1388 new_info
.start_instance
= ~0u;
1389 unsigned end_vertex
= 0;
1390 unsigned end_instance
= 0;
1392 for (unsigned i
= 0; i
< draw_count
; i
++) {
1393 unsigned offset
= i
* indirect
->stride
/ 4;
1394 unsigned start
= data
[offset
+ 2];
1395 unsigned count
= data
[offset
+ 0];
1396 unsigned start_instance
= data
[offset
+ 3];
1397 unsigned instance_count
= data
[offset
+ 1];
1399 new_info
.start
= MIN2(new_info
.start
, start
);
1400 new_info
.start_instance
= MIN2(new_info
.start_instance
,
1403 end_vertex
= MAX2(end_vertex
, start
+ count
);
1404 end_instance
= MAX2(end_instance
, start_instance
+ instance_count
);
1408 /* Set the final counts. */
1409 new_info
.count
= end_vertex
- new_info
.start
;
1410 new_info
.instance_count
= end_instance
- new_info
.start_instance
;
1412 if (new_info
.start
== ~0u || !new_info
.count
|| !new_info
.instance_count
)
1417 if (new_info
.index_size
) {
1418 /* See if anything needs to be done for per-vertex attribs. */
1419 if (u_vbuf_need_minmax_index(mgr
)) {
1422 if (new_info
.max_index
!= ~0u) {
1423 min_index
= new_info
.min_index
;
1424 max_index
= new_info
.max_index
;
1426 u_vbuf_get_minmax_index(mgr
->pipe
, &new_info
,
1427 &min_index
, &max_index
);
1430 assert(min_index
<= max_index
);
1432 start_vertex
= min_index
+ new_info
.index_bias
;
1433 num_vertices
= max_index
+ 1 - min_index
;
1435 /* Primitive restart doesn't work when unrolling indices.
1436 * We would have to break this drawing operation into several ones. */
1437 /* Use some heuristic to see if unrolling indices improves
1439 if (!info
->indirect
&&
1440 !new_info
.primitive_restart
&&
1441 util_is_vbo_upload_ratio_too_large(new_info
.count
, num_vertices
) &&
1442 !u_vbuf_mapping_vertex_buffer_blocks(mgr
)) {
1443 unroll_indices
= TRUE
;
1444 user_vb_mask
&= ~(mgr
->nonzero_stride_vb_mask
&
1445 mgr
->ve
->noninstance_vb_mask_any
);
1448 /* Nothing to do for per-vertex attribs. */
1454 start_vertex
= new_info
.start
;
1455 num_vertices
= new_info
.count
;
1459 /* Translate vertices with non-native layouts or formats. */
1460 if (unroll_indices
||
1461 incompatible_vb_mask
||
1462 mgr
->ve
->incompatible_elem_mask
) {
1463 if (!u_vbuf_translate_begin(mgr
, &new_info
, start_vertex
, num_vertices
,
1464 min_index
, unroll_indices
)) {
1465 debug_warn_once("u_vbuf_translate_begin() failed");
1469 if (unroll_indices
) {
1470 new_info
.index_size
= 0;
1471 new_info
.index_bias
= 0;
1472 new_info
.min_index
= 0;
1473 new_info
.max_index
= new_info
.count
- 1;
1477 user_vb_mask
&= ~(incompatible_vb_mask
|
1478 mgr
->ve
->incompatible_vb_mask_all
);
1481 /* Upload user buffers. */
1483 if (u_vbuf_upload_buffers(mgr
, start_vertex
, num_vertices
,
1484 new_info
.start_instance
,
1485 new_info
.instance_count
) != PIPE_OK
) {
1486 debug_warn_once("u_vbuf_upload_buffers() failed");
1490 mgr
->dirty_real_vb_mask
|= user_vb_mask
;
1494 if (unroll_indices) {
1495 printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1496 start_vertex, num_vertices);
1497 util_dump_draw_info(stdout, info);
1502 for (i = 0; i < mgr->nr_vertex_buffers; i++) {
1503 printf("input %i: ", i);
1504 util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
1507 for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
1508 printf("real %i: ", i);
1509 util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
1514 u_upload_unmap(pipe
->stream_uploader
);
1515 u_vbuf_set_driver_vertex_buffers(mgr
);
1517 pipe
->draw_vbo(pipe
, &new_info
);
1519 if (mgr
->using_translate
) {
1520 u_vbuf_translate_end(mgr
);
1524 void u_vbuf_save_vertex_elements(struct u_vbuf
*mgr
)
1526 assert(!mgr
->ve_saved
);
1527 mgr
->ve_saved
= mgr
->ve
;
1530 void u_vbuf_restore_vertex_elements(struct u_vbuf
*mgr
)
1532 if (mgr
->ve
!= mgr
->ve_saved
) {
1533 struct pipe_context
*pipe
= mgr
->pipe
;
1535 mgr
->ve
= mgr
->ve_saved
;
1536 pipe
->bind_vertex_elements_state(pipe
,
1537 mgr
->ve
? mgr
->ve
->driver_cso
: NULL
);
1539 mgr
->ve_saved
= NULL
;
1542 void u_vbuf_save_vertex_buffer0(struct u_vbuf
*mgr
)
1544 pipe_vertex_buffer_reference(&mgr
->vertex_buffer0_saved
,
1545 &mgr
->vertex_buffer
[0]);
1548 void u_vbuf_restore_vertex_buffer0(struct u_vbuf
*mgr
)
1550 u_vbuf_set_vertex_buffers(mgr
, 0, 1, &mgr
->vertex_buffer0_saved
);
1551 pipe_vertex_buffer_unreference(&mgr
->vertex_buffer0_saved
);