X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fstate_trackers%2Fnine%2Fbuffer9.c;h=ca4e4380277a06b46353bfb36437fdf921e7ad39;hb=dbc24835d75466951a44b391b42e39461a6ac5a2;hp=b4b91ec2a02fe82fed05b9c35eb5c9759915a588;hpb=ea3f504f7caf9900f71a52f1711baf8a50fec490;p=mesa.git diff --git a/src/gallium/state_trackers/nine/buffer9.c b/src/gallium/state_trackers/nine/buffer9.c index b4b91ec2a02..ca4e4380277 100644 --- a/src/gallium/state_trackers/nine/buffer9.c +++ b/src/gallium/state_trackers/nine/buffer9.c @@ -23,6 +23,7 @@ #include "buffer9.h" #include "device9.h" +#include "nine_buffer_upload.h" #include "nine_helpers.h" #include "nine_pipe.h" @@ -32,6 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" #include "util/u_box.h" +#include "util/u_inlines.h" #define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER) @@ -50,39 +52,68 @@ NineBuffer9_ctor( struct NineBuffer9 *This, user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL); - This->maps = MALLOC(sizeof(struct pipe_transfer *)); + This->maps = MALLOC(sizeof(struct NineTransfer)); if (!This->maps) return E_OUTOFMEMORY; This->nmaps = 0; This->maxmaps = 1; This->size = Size; - This->pipe = pParams->device->pipe; - info->screen = pParams->device->screen; info->target = PIPE_BUFFER; info->format = PIPE_FORMAT_R8_UNORM; info->width0 = Size; info->flags = 0; - info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE; - if (!(Usage & D3DUSAGE_WRITEONLY)) - info->bind |= PIPE_BIND_TRANSFER_READ; + /* Note: WRITEONLY is just tip for resource placement, the resource + * can still be read (but slower). */ + info->bind = PIPE_BIND_VERTEX_BUFFER; - info->usage = PIPE_USAGE_DEFAULT; - if (Usage & D3DUSAGE_DYNAMIC) - info->usage = PIPE_USAGE_STREAM; - else if (Pool == D3DPOOL_SYSTEMMEM) + /* It is hard to find clear information on where to place the buffer in + * memory depending on the flag. + * MSDN: resources are static, except for those with DYNAMIC, thus why you + * can only use DISCARD on them. + * ATI doc: The driver has the liberty it wants for having things static + * or not. + * MANAGED: Ram + uploads to Vram copy at unlock (msdn and nvidia doc say + * at first draw call using the buffer) + * DEFAULT + Usage = 0 => System memory backing for easy read access + * (That doc is very unclear on the details, like whether some copies to + * vram copy are involved or not). + * DEFAULT + WRITEONLY => Vram + * DEFAULT + WRITEONLY + DYNAMIC => Either Vram buffer or GTT_WC, depending on what the driver wants. + */ + if (Pool == D3DPOOL_SYSTEMMEM) info->usage = PIPE_USAGE_STAGING; + else if (Pool == D3DPOOL_MANAGED) + info->usage = PIPE_USAGE_DEFAULT; + else if (Usage & D3DUSAGE_DYNAMIC && Usage & D3DUSAGE_WRITEONLY) + info->usage = PIPE_USAGE_STREAM; + else if (Usage & D3DUSAGE_WRITEONLY) + info->usage = PIPE_USAGE_DEFAULT; + /* For the remaining two, PIPE_USAGE_STAGING would probably be + * a good fit according to the doc. However it seems rather a mistake + * from apps to use these (mistakes that do really happen). Try + * to put the flags that are the best compromise between the real + * behaviour and what buggy apps should get for better performance. */ + else if (Usage & D3DUSAGE_DYNAMIC) + info->usage = PIPE_USAGE_STREAM; + else + info->usage = PIPE_USAGE_DYNAMIC; + /* When Writeonly is not set, we don't want to enable the + * optimizations */ + This->discard_nooverwrite_only = !!(Usage & D3DUSAGE_WRITEONLY) && + pParams->device->buffer_upload; /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */ /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */ /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */ /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */ /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */ + /* The buffer must be usable with both sw and hw + * vertex processing. It is expected to be slower with hw. */ if (Usage & D3DUSAGE_SOFTWAREPROCESSING) - DBG("Application asked for Software Vertex Processing, " - "but this is unimplemented\n"); + info->usage = PIPE_USAGE_STAGING; /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */ info->height0 = 1; @@ -93,12 +124,32 @@ NineBuffer9_ctor( struct NineBuffer9 *This, hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE, Type, Pool, Usage); - return hr; + + if (FAILED(hr)) + return hr; + + if (Pool == D3DPOOL_MANAGED) { + This->managed.data = align_calloc( + nine_format_get_level_alloc_size(This->base.info.format, + Size, 1, 0), 32); + if (!This->managed.data) + return E_OUTOFMEMORY; + memset(This->managed.data, 0, Size); + This->managed.dirty = TRUE; + u_box_1d(0, Size, &This->managed.dirty_box); + list_inithead(&This->managed.list); + list_inithead(&This->managed.list2); + list_add(&This->managed.list2, &pParams->device->managed_buffers); + } + + return D3D_OK; } void NineBuffer9_dtor( struct NineBuffer9 *This ) { + DBG("This=%p\n", This); + if (This->maps) { while (This->nmaps) { NineBuffer9_Unlock(This); @@ -106,25 +157,61 @@ NineBuffer9_dtor( struct NineBuffer9 *This ) FREE(This->maps); } + if (This->base.pool == D3DPOOL_MANAGED) { + if (This->managed.data) + align_free(This->managed.data); + if (This->managed.list.prev != NULL && This->managed.list.next != NULL) + list_del(&This->managed.list); + if (This->managed.list2.prev != NULL && This->managed.list2.next != NULL) + list_del(&This->managed.list2); + } + + if (This->buf) + nine_upload_release_buffer(This->base.base.device->buffer_upload, This->buf); + NineResource9_dtor(&This->base); } struct pipe_resource * -NineBuffer9_GetResource( struct NineBuffer9 *This ) +NineBuffer9_GetResource( struct NineBuffer9 *This, unsigned *offset ) { + if (This->buf) + return nine_upload_buffer_resource_and_offset(This->buf, offset); + *offset = 0; return NineResource9_GetResource(&This->base); } -HRESULT WINAPI +static void +NineBuffer9_RebindIfRequired( struct NineBuffer9 *This, + struct NineDevice9 *device ) +{ + int i; + + if (!This->bind_count) + return; + for (i = 0; i < device->caps.MaxStreams; i++) { + if (device->state.stream[i] == (struct NineVertexBuffer9 *)This) + nine_context_set_stream_source(device, i, + (struct NineVertexBuffer9 *)This, + device->state.vtxbuf[i].buffer_offset, + device->state.vtxbuf[i].stride); + } + if (device->state.idxbuf == (struct NineIndexBuffer9 *)This) + nine_context_set_indices(device, (struct NineIndexBuffer9 *)This); +} + +HRESULT NINE_WINAPI NineBuffer9_Lock( struct NineBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags ) { + struct NineDevice9 *device = This->base.base.device; struct pipe_box box; + struct pipe_context *pipe; void *data; - unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags); + unsigned usage; DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n", This, This->base.resource, @@ -138,10 +225,65 @@ NineBuffer9_Lock( struct NineBuffer9 *This, D3DLOCK_READONLY | D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL); + if (SizeToLock == 0) { + SizeToLock = This->size - OffsetToLock; + user_warn(OffsetToLock != 0); + } + + u_box_1d(OffsetToLock, SizeToLock, &box); + + if (This->base.pool == D3DPOOL_MANAGED) { + /* READONLY doesn't dirty the buffer */ + /* Tests on Win: READONLY doesn't wait for the upload */ + if (!(Flags & D3DLOCK_READONLY)) { + if (!This->managed.dirty) { + assert(LIST_IS_EMPTY(&This->managed.list)); + This->managed.dirty = TRUE; + This->managed.dirty_box = box; + if (p_atomic_read(&This->managed.pending_upload)) + nine_csmt_process(This->base.base.device); + } else + u_box_union_2d(&This->managed.dirty_box, &This->managed.dirty_box, &box); + /* Tests trying to draw while the buffer is locked show that + * MANAGED buffers are made dirty at Lock time */ + BASEBUF_REGISTER_UPDATE(This); + } + *ppbData = (char *)This->managed.data + OffsetToLock; + DBG("returning pointer %p\n", *ppbData); + This->nmaps++; + return D3D_OK; + } + + /* Driver ddi doc: READONLY is never passed to the device. So it can only + * have effect on things handled by the driver (MANAGED pool for example). + * Msdn doc: DISCARD and NOOVERWRITE are only for DYNAMIC. + * ATI doc: You can use DISCARD and NOOVERWRITE without DYNAMIC. + * Msdn doc: D3DLOCK_DONOTWAIT is not among the valid flags for buffers. + * Our tests: On win 7 nvidia, D3DLOCK_DONOTWAIT does return + * D3DERR_WASSTILLDRAWING if the resource is in use, except for DYNAMIC. + * Our tests: some apps do use both DISCARD and NOOVERWRITE at the same + * time. On windows it seems to return different pointer, thus indicating + * DISCARD is taken into account. + * Our tests: SYSTEMMEM doesn't DISCARD */ + + if (This->base.pool == D3DPOOL_SYSTEMMEM) + Flags &= ~D3DLOCK_DISCARD; + + if (Flags & D3DLOCK_DISCARD) + usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE; + else if (Flags & D3DLOCK_NOOVERWRITE) + usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED; + else + usage = PIPE_TRANSFER_READ_WRITE; + if (Flags & D3DLOCK_DONOTWAIT && !(This->base.usage & D3DUSAGE_DYNAMIC)) + usage |= PIPE_TRANSFER_DONTBLOCK; + + This->discard_nooverwrite_only &= !!(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE)); + if (This->nmaps == This->maxmaps) { - struct pipe_transfer **newmaps = - REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps, - sizeof(struct pipe_transfer *)*(This->maxmaps << 1)); + struct NineTransfer *newmaps = + REALLOC(This->maps, sizeof(struct NineTransfer)*This->maxmaps, + sizeof(struct NineTransfer)*(This->maxmaps << 1)); if (newmaps == NULL) return E_OUTOFMEMORY; @@ -149,15 +291,88 @@ NineBuffer9_Lock( struct NineBuffer9 *This, This->maps = newmaps; } - if (SizeToLock == 0) { - SizeToLock = This->size - OffsetToLock; - user_warn(OffsetToLock != 0); + if (This->buf && !This->discard_nooverwrite_only) { + struct pipe_box src_box; + unsigned offset; + struct pipe_resource *src_res; + DBG("Disabling nine_subbuffer for a buffer having" + "used a nine_subbuffer buffer\n"); + /* Copy buffer content to the buffer resource, which + * we will now use. + * Note: The behaviour may be different from what is expected + * with double lock. However applications can't really make expectations + * about double locks, and don't really use them, so that's ok. */ + src_res = nine_upload_buffer_resource_and_offset(This->buf, &offset); + u_box_1d(offset, This->size, &src_box); + + pipe = NineDevice9_GetPipe(device); + pipe->resource_copy_region(pipe, This->base.resource, 0, 0, 0, 0, + src_res, 0, &src_box); + /* Release previous resource */ + if (This->nmaps >= 1) + This->maps[This->nmaps-1].should_destroy_buf = true; + else + nine_upload_release_buffer(device->buffer_upload, This->buf); + This->buf = NULL; + /* Rebind buffer */ + NineBuffer9_RebindIfRequired(This, device); } - u_box_1d(OffsetToLock, SizeToLock, &box); + This->maps[This->nmaps].transfer = NULL; + This->maps[This->nmaps].is_pipe_secondary = false; + This->maps[This->nmaps].buf = NULL; + This->maps[This->nmaps].should_destroy_buf = false; + + if (This->discard_nooverwrite_only) { + if (This->buf && (Flags & D3DLOCK_DISCARD)) { + /* Release previous buffer */ + if (This->nmaps >= 1) + This->maps[This->nmaps-1].should_destroy_buf = true; + else + nine_upload_release_buffer(device->buffer_upload, This->buf); + This->buf = NULL; + } + + if (!This->buf) { + This->buf = nine_upload_create_buffer(device->buffer_upload, This->base.info.width0); + NineBuffer9_RebindIfRequired(This, device); + } + + if (This->buf) { + This->maps[This->nmaps].buf = This->buf; + This->nmaps++; + *ppbData = nine_upload_buffer_get_map(This->buf) + OffsetToLock; + return D3D_OK; + } else { + /* Fallback to normal path, and don't try again */ + This->discard_nooverwrite_only = false; + } + } + + /* When csmt is active, we want to avoid stalls as much as possible, + * and thus we want to create a new resource on discard and map it + * with the secondary pipe, instead of waiting on the main pipe. */ + if (Flags & D3DLOCK_DISCARD && device->csmt_active) { + struct pipe_screen *screen = NineDevice9_GetScreen(device); + struct pipe_resource *new_res = screen->resource_create(screen, &This->base.info); + if (new_res) { + /* Use the new resource */ + pipe_resource_reference(&This->base.resource, new_res); + pipe_resource_reference(&new_res, NULL); + usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED; + NineBuffer9_RebindIfRequired(This, device); + This->maps[This->nmaps].is_pipe_secondary = TRUE; + } + } else if (Flags & D3DLOCK_NOOVERWRITE && device->csmt_active) + This->maps[This->nmaps].is_pipe_secondary = TRUE; + + if (This->maps[This->nmaps].is_pipe_secondary) + pipe = device->pipe_secondary; + else + pipe = NineDevice9_GetPipe(device); - data = This->pipe->transfer_map(This->pipe, This->base.resource, 0, - usage, &box, &This->maps[This->nmaps]); + data = pipe->transfer_map(pipe, This->base.resource, 0, + usage, &box, &This->maps[This->nmaps].transfer); if (!data) { DBG("pipe::transfer_map failed\n" @@ -165,7 +380,7 @@ NineBuffer9_Lock( struct NineBuffer9 *This, " box.x = %u\n" " box.width = %u\n", usage, box.x, box.width); - /* not sure what to return, msdn suggests this */ + if (Flags & D3DLOCK_DONOTWAIT) return D3DERR_WASSTILLDRAWING; return D3DERR_INVALIDCALL; @@ -178,12 +393,38 @@ NineBuffer9_Lock( struct NineBuffer9 *This, return D3D_OK; } -HRESULT WINAPI +HRESULT NINE_WINAPI NineBuffer9_Unlock( struct NineBuffer9 *This ) { + struct NineDevice9 *device = This->base.base.device; + struct pipe_context *pipe; DBG("This=%p\n", This); user_assert(This->nmaps > 0, D3DERR_INVALIDCALL); - This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]); + This->nmaps--; + if (This->base.pool != D3DPOOL_MANAGED) { + if (!This->maps[This->nmaps].buf) { + pipe = This->maps[This->nmaps].is_pipe_secondary ? + device->pipe_secondary : + nine_context_get_pipe_acquire(device); + pipe->transfer_unmap(pipe, This->maps[This->nmaps].transfer); + /* We need to flush in case the driver does implicit copies */ + if (This->maps[This->nmaps].is_pipe_secondary) + pipe->flush(pipe, NULL, 0); + else + nine_context_get_pipe_release(device); + } else if (This->maps[This->nmaps].should_destroy_buf) + nine_upload_release_buffer(device->buffer_upload, This->maps[This->nmaps].buf); + } return D3D_OK; } + +void +NineBuffer9_SetDirty( struct NineBuffer9 *This ) +{ + assert(This->base.pool == D3DPOOL_MANAGED); + + This->managed.dirty = TRUE; + u_box_1d(0, This->size, &This->managed.dirty_box); + BASEBUF_REGISTER_UPDATE(This); +}