info->instance_count,
info->start,
info->start_instance);
+
+ /* On large client-buffer draw, we used client buffer directly, without
+ * copy. Block until draw is finished.
+ * VMD is an example application that benefits from this. */
+ if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
+ struct swr_screen *screen = swr_screen(pipe->screen);
+ swr_fence_submit(ctx, screen->flush_fence);
+ swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
+ }
}
#define SWR_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */
#define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
+/* Default max client_copy_limit */
+#define SWR_CLIENT_COPY_LIMIT 32768
+
/* Flag indicates creation of alternate surface, to prevent recursive loop
* in resource creation when msaa_force_enable is set. */
#define SWR_RESOURCE_FLAG_ALT_SURFACE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
static void
swr_validate_env_options(struct swr_screen *screen)
{
+ /* The client_copy_limit sets a maximum on the amount of user-buffer memory
+ * copied to scratch space on a draw. Past this, the draw will access
+ * user-buffer directly and then block. This is faster than queuing many
+ * large client draws. */
+ screen->client_copy_limit = SWR_CLIENT_COPY_LIMIT;
+ int client_copy_limit =
+ debug_get_num_option("SWR_CLIENT_COPY_LIMIT", SWR_CLIENT_COPY_LIMIT);
+ if (client_copy_limit > 0)
+ screen->client_copy_limit = client_copy_limit;
+
/* XXX msaa under development, disable by default for now */
screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
partial_inbounds = 0;
min_vertex_index = info.min_index;
- /* Copy only needed vertices to scratch space */
size = AlignUp(size, 4);
- const void *ptr = (const uint8_t *) vb->buffer.user + base;
- ptr = (uint8_t *)swr_copy_to_scratch_space(
- ctx, &ctx->scratch->vertex_buffer, ptr, size);
- p_data = (const uint8_t *)ptr - base;
+ /* If size of client memory copy is too large, don't copy. The
+ * draw will access user-buffer directly and then block. This is
+ * faster than queuing many large client draws. */
+ if (size >= screen->client_copy_limit) {
+ post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW;
+ p_data = (const uint8_t *) vb->buffer.user;
+ } else {
+ /* Copy only needed vertices to scratch space */
+ const void *ptr = (const uint8_t *) vb->buffer.user + base;
+ ptr = (uint8_t *)swr_copy_to_scratch_space(
+ ctx, &ctx->scratch->vertex_buffer, ptr, size);
+ p_data = (const uint8_t *)ptr - base;
+ }
}
swrVertexBuffers[i] = {0};
size = info.count * pitch;
size = AlignUp(size, 4);
-
- /* Copy indices to scratch space */
- const void *ptr = info.index.user;
- ptr = swr_copy_to_scratch_space(
- ctx, &ctx->scratch->index_buffer, ptr, size);
- p_data = (const uint8_t *)ptr;
+ /* If size of client memory copy is too large, don't copy. The
+ * draw will access user-buffer directly and then block. This is
+ * faster than queuing many large client draws. */
+ if (size >= screen->client_copy_limit) {
+ post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW;
+ p_data = (const uint8_t *) info.index.user;
+ } else {
+ /* Copy indices to scratch space */
+ const void *ptr = info.index.user;
+ ptr = swr_copy_to_scratch_space(
+ ctx, &ctx->scratch->index_buffer, ptr, size);
+ p_data = (const uint8_t *)ptr;
+ }
}
SWR_INDEX_BUFFER_STATE swrIndexBuffer;