X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fsvga%2Fsvga_resource_buffer.c;h=e9d31de6166919398f4097b93e89d26bfe26ec78;hb=ce785f5ffd7dbed14a3909164e55a975a023ee97;hp=198d40133284158c18465621467e45e1bbdc16ec;hpb=695cc370a280a637f411f5ff3877b3fd1c05e424;p=mesa.git diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index 198d4013328..e9d31de6166 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -31,6 +31,7 @@ #include "os/os_thread.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_resource.h" #include "svga_context.h" #include "svga_screen.h" @@ -44,72 +45,202 @@ * Vertex and index buffers need hardware backing. Constant buffers * do not. No other types of buffers currently supported. */ -static INLINE boolean +static inline boolean svga_buffer_needs_hw_storage(unsigned usage) { - return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER); + return (usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER | + PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT)) != 0; } -static unsigned int -svga_buffer_is_referenced( struct pipe_context *pipe, - struct pipe_resource *buf, - unsigned face, unsigned level) +/** + * Create a buffer transfer. + * + * Unlike texture DMAs (which are written immediately to the command buffer and + * therefore inherently serialized with other context operations), for buffers + * we try to coalesce multiple range mappings (i.e, multiple calls to this + * function) into a single DMA command, for better efficiency in command + * processing. This means we need to exercise extra care here to ensure that + * the end result is exactly the same as if one DMA was used for every mapped + * range. + */ +static void * +svga_buffer_transfer_map(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) { + struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); - struct svga_buffer *sbuf = svga_buffer(buf); + struct svga_buffer *sbuf = svga_buffer(resource); + struct pipe_transfer *transfer; + uint8_t *map = NULL; + int64_t begin = svga_get_time(svga); - /** - * XXX: Check this. - * The screen may cache buffer writes, but when we map, we map out - * of those cached writes, so we don't need to set a - * PIPE_REFERENCED_FOR_WRITE flag for cached buffers. - */ + SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERMAP); - if (!sbuf->handle || ss->sws->surface_is_flushed(ss->sws, sbuf->handle)) - return PIPE_UNREFERENCED; + assert(box->y == 0); + assert(box->z == 0); + assert(box->height == 1); + assert(box->depth == 1); - /** - * sws->surface_is_flushed() does not distinguish between read references - * and write references. So assume a reference is both, - * however, we make an exception for index- and vertex buffers, to avoid - * a flush in st_bufferobj_get_subdata, during display list replay. - */ + transfer = MALLOC_STRUCT(pipe_transfer); + if (!transfer) { + goto done; + } - if (sbuf->b.b.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) - return PIPE_REFERENCED_FOR_READ; + transfer->resource = resource; + transfer->level = level; + transfer->usage = usage; + transfer->box = *box; + transfer->stride = 0; + transfer->layer_stride = 0; + + if (usage & PIPE_TRANSFER_WRITE) { + /* If we write to the buffer for any reason, free any saved translated + * vertices. + */ + pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); + } - return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE; -} + if ((usage & PIPE_TRANSFER_READ) && sbuf->dirty) { + enum pipe_error ret; + /* Host-side buffers can only be dirtied with vgpu10 features + * (streamout and buffer copy). + */ + assert(svga_have_vgpu10(svga)); + if (!sbuf->user) { + (void) svga_buffer_handle(svga, resource, sbuf->bind_flags); + } + if (sbuf->dma.pending > 0) { + svga_buffer_upload_flush(svga, sbuf); + svga_context_finish(svga); + } + assert(sbuf->handle); + ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); + if (ret != PIPE_OK) { + svga_context_flush(svga, NULL); + ret = SVGA3D_vgpu10_ReadbackSubResource(svga->swc, sbuf->handle, 0); + assert(ret == PIPE_OK); + } -static void * -svga_buffer_map_range( struct pipe_screen *screen, - struct pipe_resource *buf, - unsigned offset, - unsigned length, - unsigned usage ) -{ - struct svga_screen *ss = svga_screen(screen); - struct svga_winsys_screen *sws = ss->sws; - struct svga_buffer *sbuf = svga_buffer( buf ); - void *map; + svga->hud.num_readbacks++; + + svga_context_finish(svga); + + sbuf->dirty = FALSE; + } + + if (usage & PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { + /* + * Flush any pending primitives, finish writing any pending DMA + * commands, and tell the host to discard the buffer contents on + * the next DMA operation. + */ + + svga_hwtnl_flush_buffer(svga, resource); + + if (sbuf->dma.pending) { + svga_buffer_upload_flush(svga, sbuf); + + /* + * Instead of flushing the context command buffer, simply discard + * the current hwbuf, and start a new one. + * With GB objects, the map operation takes care of this + * if passed the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag, + * and the old backing store is busy. + */ + + if (!svga_have_gb_objects(svga)) + svga_buffer_destroy_hw_storage(ss, sbuf); + } + + sbuf->map.num_ranges = 0; + sbuf->dma.flags.discard = TRUE; + } + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { + if (!sbuf->map.num_ranges) { + /* + * No pending ranges to upload so far, so we can tell the host to + * not synchronize on the next DMA command. + */ - if (!sbuf->swbuf && !sbuf->hwbuf) { - if (svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) { + sbuf->dma.flags.unsynchronized = TRUE; + } + } else { + /* + * Synchronizing, so flush any pending primitives, finish writing any + * pending DMA command, and ensure the next DMA will be done in order. + */ + + svga_hwtnl_flush_buffer(svga, resource); + + if (sbuf->dma.pending) { + svga_buffer_upload_flush(svga, sbuf); + + if (svga_buffer_has_hw_storage(sbuf)) { + /* + * We have a pending DMA upload from a hardware buffer, therefore + * we need to ensure that the host finishes processing that DMA + * command before the state tracker can start overwriting the + * hardware buffer. + * + * XXX: This could be avoided by tying the hardware buffer to + * the transfer (just as done with textures), which would allow + * overlapping DMAs commands to be queued on the same context + * buffer. However, due to the likelihood of software vertex + * processing, it is more convenient to hold on to the hardware + * buffer, allowing to quickly access the contents from the CPU + * without having to do a DMA download from the host. + */ + + if (usage & PIPE_TRANSFER_DONTBLOCK) { + /* + * Flushing the command buffer here will most likely cause + * the map of the hwbuf below to block, so preemptively + * return NULL here if DONTBLOCK is set to prevent unnecessary + * command buffer flushes. + */ + + FREE(transfer); + goto done; + } + + svga_context_flush(svga, NULL); + } + } + + sbuf->dma.flags.unsynchronized = FALSE; + } + } + + if (!sbuf->swbuf && !svga_buffer_has_hw_storage(sbuf)) { + if (svga_buffer_create_hw_storage(ss, sbuf, sbuf->bind_flags) != PIPE_OK) { /* * We can't create a hardware buffer big enough, so create a malloc * buffer instead. */ - debug_printf("%s: failed to allocate %u KB of DMA, splitting DMA transfers\n", - __FUNCTION__, - (sbuf->b.b.width0 + 1023)/1024); + if (0) { + debug_printf("%s: failed to allocate %u KB of DMA, " + "splitting DMA transfers\n", + __FUNCTION__, + (sbuf->b.b.width0 + 1023)/1024); + } sbuf->swbuf = align_malloc(sbuf->b.b.width0, 16); + if (!sbuf->swbuf) { + FREE(transfer); + goto done; + } } } @@ -117,80 +248,106 @@ svga_buffer_map_range( struct pipe_screen *screen, /* User/malloc buffer */ map = sbuf->swbuf; } - else if (sbuf->hwbuf) { - map = sws->buffer_map(sws, sbuf->hwbuf, usage); + else if (svga_buffer_has_hw_storage(sbuf)) { + boolean retry; + + map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); + if (map == NULL && retry) { + /* + * At this point, svga_buffer_get_transfer() has already + * hit the DISCARD_WHOLE_RESOURCE path and flushed HWTNL + * for this buffer. + */ + svga_context_flush(svga, NULL); + map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry); + } } else { map = NULL; } - if(map) { + if (map) { ++sbuf->map.count; - - if (usage & PIPE_TRANSFER_WRITE) { - assert(sbuf->map.count <= 1); - sbuf->map.writing = TRUE; - if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) - sbuf->map.flush_explicit = TRUE; - } + map += transfer->box.x; + *ptransfer = transfer; + } else { + FREE(transfer); } - + + svga->hud.map_buffer_time += (svga_get_time(svga) - begin); + +done: + SVGA_STATS_TIME_POP(svga_sws(svga)); return map; } - -static void -svga_buffer_flush_mapped_range( struct pipe_screen *screen, - struct pipe_resource *buf, - unsigned offset, unsigned length) +static void +svga_buffer_transfer_flush_region( struct pipe_context *pipe, + struct pipe_transfer *transfer, + const struct pipe_box *box) { - struct svga_buffer *sbuf = svga_buffer( buf ); - struct svga_screen *ss = svga_screen(screen); - - pipe_mutex_lock(ss->swc_mutex); - assert(sbuf->map.writing); - if(sbuf->map.writing) { - assert(sbuf->map.flush_explicit); - svga_buffer_add_range(sbuf, offset, offset + length); - } - pipe_mutex_unlock(ss->swc_mutex); + struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_buffer *sbuf = svga_buffer(transfer->resource); + + unsigned offset = transfer->box.x + box->x; + unsigned length = box->width; + + assert(transfer->usage & PIPE_TRANSFER_WRITE); + assert(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT); + + mtx_lock(&ss->swc_mutex); + svga_buffer_add_range(sbuf, offset, offset + length); + mtx_unlock(&ss->swc_mutex); } -static void -svga_buffer_unmap( struct pipe_screen *screen, - struct pipe_resource *buf) + +static void +svga_buffer_transfer_unmap( struct pipe_context *pipe, + struct pipe_transfer *transfer ) { - struct svga_screen *ss = svga_screen(screen); - struct svga_winsys_screen *sws = ss->sws; - struct svga_buffer *sbuf = svga_buffer( buf ); - - pipe_mutex_lock(ss->swc_mutex); - + struct svga_screen *ss = svga_screen(pipe->screen); + struct svga_context *svga = svga_context(pipe); + struct svga_buffer *sbuf = svga_buffer(transfer->resource); + + SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_BUFFERTRANSFERUNMAP); + + mtx_lock(&ss->swc_mutex); + assert(sbuf->map.count); - if(sbuf->map.count) + if (sbuf->map.count) { --sbuf->map.count; + } - if(sbuf->hwbuf) - sws->buffer_unmap(sws, sbuf->hwbuf); + if (svga_buffer_has_hw_storage(sbuf)) { + /* Note: we may wind up flushing here and unmapping other buffers + * which leads to recursively locking ss->swc_mutex. + */ + svga_buffer_hw_storage_unmap(svga, sbuf); + } + + if (transfer->usage & PIPE_TRANSFER_WRITE) { + if (!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) { + /* + * Mapped range not flushed explicitly, so flush the whole buffer, + * and tell the host to discard the contents when processing the DMA + * command. + */ - if(sbuf->map.writing) { - if(!sbuf->map.flush_explicit) { - /* No mapped range was flushed -- flush the whole buffer */ SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n"); - + + sbuf->dma.flags.discard = TRUE; + svga_buffer_add_range(sbuf, 0, sbuf->b.b.width0); } - - sbuf->map.writing = FALSE; - sbuf->map.flush_explicit = FALSE; } - pipe_mutex_unlock(ss->swc_mutex); + mtx_unlock(&ss->swc_mutex); + FREE(transfer); + SVGA_STATS_TIME_POP(svga_sws(svga)); } - static void svga_buffer_destroy( struct pipe_screen *screen, struct pipe_resource *buf ) @@ -199,85 +356,39 @@ svga_buffer_destroy( struct pipe_screen *screen, struct svga_buffer *sbuf = svga_buffer( buf ); assert(!p_atomic_read(&buf->reference.count)); - + assert(!sbuf->dma.pending); - if(sbuf->handle) + if (sbuf->handle) svga_buffer_destroy_host_surface(ss, sbuf); - - if(sbuf->uploaded.buffer) + + if (sbuf->uploaded.buffer) pipe_resource_reference(&sbuf->uploaded.buffer, NULL); - if(sbuf->hwbuf) + if (sbuf->hwbuf) svga_buffer_destroy_hw_storage(ss, sbuf); - - if(sbuf->swbuf && !sbuf->user) - align_free(sbuf->swbuf); - - FREE(sbuf); -} - - -/* Keep the original code more or less intact, implement transfers in - * terms of the old functions. - */ -static void * -svga_buffer_transfer_map( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - uint8_t *map = svga_buffer_map_range( pipe->screen, - transfer->resource, - transfer->box.x, - transfer->box.width, - transfer->usage ); - if (map == NULL) - return NULL; - - /* map_buffer() returned a pointer to the beginning of the buffer, - * but transfers are expected to return a pointer to just the - * region specified in the box. - */ - return map + transfer->box.x; -} + if (sbuf->swbuf && !sbuf->user) + align_free(sbuf->swbuf); + pipe_resource_reference(&sbuf->translated_indices.buffer, NULL); -static void svga_buffer_transfer_flush_region( struct pipe_context *pipe, - struct pipe_transfer *transfer, - const struct pipe_box *box) -{ - assert(box->x + box->width <= transfer->box.width); + ss->hud.total_resource_bytes -= sbuf->size; + assert(ss->hud.num_resources > 0); + if (ss->hud.num_resources > 0) + ss->hud.num_resources--; - svga_buffer_flush_mapped_range(pipe->screen, - transfer->resource, - transfer->box.x + box->x, - box->width); -} - -static void svga_buffer_transfer_unmap( struct pipe_context *pipe, - struct pipe_transfer *transfer ) -{ - svga_buffer_unmap(pipe->screen, - transfer->resource); + FREE(sbuf); } - - - - - -struct u_resource_vtbl svga_buffer_vtbl = +struct u_resource_vtbl svga_buffer_vtbl = { u_default_resource_get_handle, /* get_handle */ svga_buffer_destroy, /* resource_destroy */ - svga_buffer_is_referenced, /* is_resource_referenced */ - u_default_get_transfer, /* get_transfer */ - u_default_transfer_destroy, /* transfer_destroy */ svga_buffer_transfer_map, /* transfer_map */ svga_buffer_transfer_flush_region, /* transfer_flush_region */ svga_buffer_transfer_unmap, /* transfer_unmap */ - u_default_transfer_inline_write /* transfer_inline_write */ }; @@ -288,34 +399,86 @@ svga_buffer_create(struct pipe_screen *screen, { struct svga_screen *ss = svga_screen(screen); struct svga_buffer *sbuf; - + unsigned bind_flags; + + SVGA_STATS_TIME_PUSH(ss->sws, SVGA_STATS_TIME_CREATEBUFFER); + sbuf = CALLOC_STRUCT(svga_buffer); - if(!sbuf) + if (!sbuf) goto error1; - + sbuf->b.b = *template; sbuf->b.vtbl = &svga_buffer_vtbl; pipe_reference_init(&sbuf->b.b.reference, 1); sbuf->b.b.screen = screen; + bind_flags = template->bind; + + LIST_INITHEAD(&sbuf->surfaces); + + if (bind_flags & PIPE_BIND_CONSTANT_BUFFER) { + /* Constant buffers can only have the PIPE_BIND_CONSTANT_BUFFER + * flag set. + */ + if (ss->sws->have_vgpu10) { + bind_flags = PIPE_BIND_CONSTANT_BUFFER; + } + } + + /* Although svga device only requires constant buffer size to be + * in multiples of 16, in order to allow bind_flags promotion, + * we are mandating all buffer size to be in multiples of 16. + */ + sbuf->b.b.width0 = align(sbuf->b.b.width0, 16); + + if (svga_buffer_needs_hw_storage(bind_flags)) { + + /* If the buffer is not used for constant buffer, set + * the vertex/index bind flags as well so that the buffer will be + * accepted for those uses. + * Note that the PIPE_BIND_ flags we get from the state tracker are + * just a hint about how the buffer may be used. And OpenGL buffer + * object may be used for many different things. + * Also note that we do not unconditionally set the streamout + * bind flag since streamout buffer is an output buffer and + * might have performance implication. + */ + if (!(template->bind & PIPE_BIND_CONSTANT_BUFFER)) { + /* Not a constant buffer. The buffer may be used for vertex data + * or indexes. + */ + bind_flags |= (PIPE_BIND_VERTEX_BUFFER | + PIPE_BIND_INDEX_BUFFER); + } - if(svga_buffer_needs_hw_storage(template->bind)) { - if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK) + if (svga_buffer_create_host_surface(ss, sbuf, bind_flags) != PIPE_OK) goto error2; } else { - sbuf->swbuf = align_malloc(template->width0, 64); - if(!sbuf->swbuf) + sbuf->swbuf = align_malloc(sbuf->b.b.width0, 64); + if (!sbuf->swbuf) goto error2; } - - return &sbuf->b.b; + + debug_reference(&sbuf->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); + + sbuf->bind_flags = bind_flags; + sbuf->size = util_resource_size(&sbuf->b.b); + ss->hud.total_resource_bytes += sbuf->size; + + ss->hud.num_resources++; + SVGA_STATS_TIME_POP(ss->sws); + + return &sbuf->b.b; error2: FREE(sbuf); error1: + SVGA_STATS_TIME_POP(ss->sws); return NULL; } + struct pipe_resource * svga_user_buffer_create(struct pipe_screen *screen, void *ptr, @@ -323,11 +486,12 @@ svga_user_buffer_create(struct pipe_screen *screen, unsigned bind) { struct svga_buffer *sbuf; - + struct svga_screen *ss = svga_screen(screen); + sbuf = CALLOC_STRUCT(svga_buffer); - if(!sbuf) + if (!sbuf) goto no_sbuf; - + pipe_reference_init(&sbuf->b.b.reference, 1); sbuf->b.vtbl = &svga_buffer_vtbl; sbuf->b.b.screen = screen; @@ -337,11 +501,18 @@ svga_user_buffer_create(struct pipe_screen *screen, sbuf->b.b.width0 = bytes; sbuf->b.b.height0 = 1; sbuf->b.b.depth0 = 1; + sbuf->b.b.array_size = 1; + sbuf->bind_flags = bind; sbuf->swbuf = ptr; sbuf->user = TRUE; - - return &sbuf->b.b; + + debug_reference(&sbuf->b.b.reference, + (debug_reference_descriptor)debug_describe_resource, 0); + + ss->hud.num_resources++; + + return &sbuf->b.b; no_sbuf: return NULL;