st/nine: Fix non inversible matrix check
[mesa.git] / src / gallium / state_trackers / nine / buffer9.c
index b4b91ec2a02fe82fed05b9c35eb5c9759915a588..ca4e4380277a06b46353bfb36437fdf921e7ad39 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "buffer9.h"
 #include "device9.h"
+#include "nine_buffer_upload.h"
 #include "nine_helpers.h"
 #include "nine_pipe.h"
 
@@ -32,6 +33,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_format.h"
 #include "util/u_box.h"
+#include "util/u_inlines.h"
 
 #define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER)
 
@@ -50,39 +52,68 @@ NineBuffer9_ctor( struct NineBuffer9 *This,
 
     user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
 
-    This->maps = MALLOC(sizeof(struct pipe_transfer *));
+    This->maps = MALLOC(sizeof(struct NineTransfer));
     if (!This->maps)
         return E_OUTOFMEMORY;
     This->nmaps = 0;
     This->maxmaps = 1;
     This->size = Size;
 
-    This->pipe = pParams->device->pipe;
-
     info->screen = pParams->device->screen;
     info->target = PIPE_BUFFER;
     info->format = PIPE_FORMAT_R8_UNORM;
     info->width0 = Size;
     info->flags = 0;
 
-    info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
-    if (!(Usage & D3DUSAGE_WRITEONLY))
-        info->bind |= PIPE_BIND_TRANSFER_READ;
+    /* Note: WRITEONLY is just tip for resource placement, the resource
+     * can still be read (but slower). */
+    info->bind = PIPE_BIND_VERTEX_BUFFER;
 
-    info->usage = PIPE_USAGE_DEFAULT;
-    if (Usage & D3DUSAGE_DYNAMIC)
-        info->usage = PIPE_USAGE_STREAM;
-    else if (Pool == D3DPOOL_SYSTEMMEM)
+    /* It is hard to find clear information on where to place the buffer in
+     * memory depending on the flag.
+     * MSDN: resources are static, except for those with DYNAMIC, thus why you
+     *   can only use DISCARD on them.
+     * ATI doc: The driver has the liberty it wants for having things static
+     *   or not.
+     *   MANAGED: Ram + uploads to Vram copy at unlock (msdn and nvidia doc say
+     *   at first draw call using the buffer)
+     *   DEFAULT + Usage = 0 => System memory backing for easy read access
+     *   (That doc is very unclear on the details, like whether some copies to
+     *   vram copy are involved or not).
+     *   DEFAULT + WRITEONLY => Vram
+     *   DEFAULT + WRITEONLY + DYNAMIC => Either Vram buffer or GTT_WC, depending on what the driver wants.
+     */
+    if (Pool == D3DPOOL_SYSTEMMEM)
         info->usage = PIPE_USAGE_STAGING;
+    else if (Pool == D3DPOOL_MANAGED)
+        info->usage = PIPE_USAGE_DEFAULT;
+    else if (Usage & D3DUSAGE_DYNAMIC && Usage & D3DUSAGE_WRITEONLY)
+        info->usage = PIPE_USAGE_STREAM;
+    else if (Usage & D3DUSAGE_WRITEONLY)
+        info->usage = PIPE_USAGE_DEFAULT;
+    /* For the remaining two, PIPE_USAGE_STAGING would probably be
+     * a good fit according to the doc. However it seems rather a mistake
+     * from apps to use these (mistakes that do really happen). Try
+     * to put the flags that are the best compromise between the real
+     * behaviour and what buggy apps should get for better performance. */
+    else if (Usage & D3DUSAGE_DYNAMIC)
+        info->usage = PIPE_USAGE_STREAM;
+    else
+        info->usage = PIPE_USAGE_DYNAMIC;
 
+    /* When Writeonly is not set, we don't want to enable the
+     * optimizations */
+    This->discard_nooverwrite_only = !!(Usage & D3DUSAGE_WRITEONLY) &&
+                                     pParams->device->buffer_upload;
     /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
     /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
     /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
     /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
     /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
+    /* The buffer must be usable with both sw and hw
+     * vertex processing. It is expected to be slower with hw. */
     if (Usage & D3DUSAGE_SOFTWAREPROCESSING)
-        DBG("Application asked for Software Vertex Processing, "
-            "but this is unimplemented\n");
+        info->usage = PIPE_USAGE_STAGING;
     /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
 
     info->height0 = 1;
@@ -93,12 +124,32 @@ NineBuffer9_ctor( struct NineBuffer9 *This,
 
     hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
                             Type, Pool, Usage);
-    return hr;
+
+    if (FAILED(hr))
+        return hr;
+
+    if (Pool == D3DPOOL_MANAGED) {
+        This->managed.data = align_calloc(
+            nine_format_get_level_alloc_size(This->base.info.format,
+                                             Size, 1, 0), 32);
+        if (!This->managed.data)
+            return E_OUTOFMEMORY;
+        memset(This->managed.data, 0, Size);
+        This->managed.dirty = TRUE;
+        u_box_1d(0, Size, &This->managed.dirty_box);
+        list_inithead(&This->managed.list);
+        list_inithead(&This->managed.list2);
+        list_add(&This->managed.list2, &pParams->device->managed_buffers);
+    }
+
+    return D3D_OK;
 }
 
 void
 NineBuffer9_dtor( struct NineBuffer9 *This )
 {
+    DBG("This=%p\n", This);
+
     if (This->maps) {
         while (This->nmaps) {
             NineBuffer9_Unlock(This);
@@ -106,25 +157,61 @@ NineBuffer9_dtor( struct NineBuffer9 *This )
         FREE(This->maps);
     }
 
+    if (This->base.pool == D3DPOOL_MANAGED) {
+        if (This->managed.data)
+            align_free(This->managed.data);
+        if (This->managed.list.prev != NULL && This->managed.list.next != NULL)
+            list_del(&This->managed.list);
+        if (This->managed.list2.prev != NULL && This->managed.list2.next != NULL)
+            list_del(&This->managed.list2);
+    }
+
+    if (This->buf)
+        nine_upload_release_buffer(This->base.base.device->buffer_upload, This->buf);
+
     NineResource9_dtor(&This->base);
 }
 
 struct pipe_resource *
-NineBuffer9_GetResource( struct NineBuffer9 *This )
+NineBuffer9_GetResource( struct NineBuffer9 *This, unsigned *offset )
 {
+    if (This->buf)
+        return nine_upload_buffer_resource_and_offset(This->buf, offset);
+    *offset = 0;
     return NineResource9_GetResource(&This->base);
 }
 
-HRESULT WINAPI
+static void
+NineBuffer9_RebindIfRequired( struct NineBuffer9 *This,
+                              struct NineDevice9 *device )
+{
+    int i;
+
+    if (!This->bind_count)
+        return;
+    for (i = 0; i < device->caps.MaxStreams; i++) {
+        if (device->state.stream[i] == (struct NineVertexBuffer9 *)This)
+            nine_context_set_stream_source(device, i,
+                                           (struct NineVertexBuffer9 *)This,
+                                           device->state.vtxbuf[i].buffer_offset,
+                                           device->state.vtxbuf[i].stride);
+    }
+    if (device->state.idxbuf == (struct NineIndexBuffer9 *)This)
+        nine_context_set_indices(device, (struct NineIndexBuffer9 *)This);
+}
+
+HRESULT NINE_WINAPI
 NineBuffer9_Lock( struct NineBuffer9 *This,
                         UINT OffsetToLock,
                         UINT SizeToLock,
                         void **ppbData,
                         DWORD Flags )
 {
+    struct NineDevice9 *device = This->base.base.device;
     struct pipe_box box;
+    struct pipe_context *pipe;
     void *data;
-    unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
+    unsigned usage;
 
     DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
         This, This->base.resource,
@@ -138,10 +225,65 @@ NineBuffer9_Lock( struct NineBuffer9 *This,
                             D3DLOCK_READONLY |
                             D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL);
 
+    if (SizeToLock == 0) {
+        SizeToLock = This->size - OffsetToLock;
+        user_warn(OffsetToLock != 0);
+    }
+
+    u_box_1d(OffsetToLock, SizeToLock, &box);
+
+    if (This->base.pool == D3DPOOL_MANAGED) {
+        /* READONLY doesn't dirty the buffer */
+        /* Tests on Win: READONLY doesn't wait for the upload */
+        if (!(Flags & D3DLOCK_READONLY)) {
+            if (!This->managed.dirty) {
+                assert(LIST_IS_EMPTY(&This->managed.list));
+                This->managed.dirty = TRUE;
+                This->managed.dirty_box = box;
+                if (p_atomic_read(&This->managed.pending_upload))
+                    nine_csmt_process(This->base.base.device);
+            } else
+                u_box_union_2d(&This->managed.dirty_box, &This->managed.dirty_box, &box);
+            /* Tests trying to draw while the buffer is locked show that
+             * MANAGED buffers are made dirty at Lock time */
+            BASEBUF_REGISTER_UPDATE(This);
+        }
+        *ppbData = (char *)This->managed.data + OffsetToLock;
+        DBG("returning pointer %p\n", *ppbData);
+        This->nmaps++;
+        return D3D_OK;
+    }
+
+    /* Driver ddi doc: READONLY is never passed to the device. So it can only
+     * have effect on things handled by the driver (MANAGED pool for example).
+     * Msdn doc: DISCARD and NOOVERWRITE are only for DYNAMIC.
+     * ATI doc: You can use DISCARD and NOOVERWRITE without DYNAMIC.
+     * Msdn doc: D3DLOCK_DONOTWAIT is not among the valid flags for buffers.
+     * Our tests: On win 7 nvidia, D3DLOCK_DONOTWAIT does return
+     * D3DERR_WASSTILLDRAWING if the resource is in use, except for DYNAMIC.
+     * Our tests: some apps do use both DISCARD and NOOVERWRITE at the same
+     * time. On windows it seems to return different pointer, thus indicating
+     * DISCARD is taken into account.
+     * Our tests: SYSTEMMEM doesn't DISCARD */
+
+    if (This->base.pool == D3DPOOL_SYSTEMMEM)
+        Flags &= ~D3DLOCK_DISCARD;
+
+    if (Flags & D3DLOCK_DISCARD)
+        usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+    else if (Flags & D3DLOCK_NOOVERWRITE)
+        usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED;
+    else
+        usage = PIPE_TRANSFER_READ_WRITE;
+    if (Flags & D3DLOCK_DONOTWAIT && !(This->base.usage & D3DUSAGE_DYNAMIC))
+        usage |= PIPE_TRANSFER_DONTBLOCK;
+
+    This->discard_nooverwrite_only &= !!(Flags & (D3DLOCK_DISCARD | D3DLOCK_NOOVERWRITE));
+
     if (This->nmaps == This->maxmaps) {
-        struct pipe_transfer **newmaps =
-            REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps,
-                    sizeof(struct pipe_transfer *)*(This->maxmaps << 1));
+        struct NineTransfer *newmaps =
+            REALLOC(This->maps, sizeof(struct NineTransfer)*This->maxmaps,
+                    sizeof(struct NineTransfer)*(This->maxmaps << 1));
         if (newmaps == NULL)
             return E_OUTOFMEMORY;
 
@@ -149,15 +291,88 @@ NineBuffer9_Lock( struct NineBuffer9 *This,
         This->maps = newmaps;
     }
 
-    if (SizeToLock == 0) {
-        SizeToLock = This->size - OffsetToLock;
-        user_warn(OffsetToLock != 0);
+    if (This->buf && !This->discard_nooverwrite_only) {
+        struct pipe_box src_box;
+        unsigned offset;
+        struct pipe_resource *src_res;
+        DBG("Disabling nine_subbuffer for a buffer having"
+            "used a nine_subbuffer buffer\n");
+        /* Copy buffer content to the buffer resource, which
+         * we will now use.
+         * Note: The behaviour may be different from what is expected
+         * with double lock. However applications can't really make expectations
+         * about double locks, and don't really use them, so that's ok. */
+        src_res = nine_upload_buffer_resource_and_offset(This->buf, &offset);
+        u_box_1d(offset, This->size, &src_box);
+
+        pipe = NineDevice9_GetPipe(device);
+        pipe->resource_copy_region(pipe, This->base.resource, 0, 0, 0, 0,
+                                   src_res, 0, &src_box);
+        /* Release previous resource */
+        if (This->nmaps >= 1)
+            This->maps[This->nmaps-1].should_destroy_buf = true;
+        else
+            nine_upload_release_buffer(device->buffer_upload, This->buf);
+        This->buf = NULL;
+        /* Rebind buffer */
+        NineBuffer9_RebindIfRequired(This, device);
     }
 
-    u_box_1d(OffsetToLock, SizeToLock, &box);
+    This->maps[This->nmaps].transfer = NULL;
+    This->maps[This->nmaps].is_pipe_secondary = false;
+    This->maps[This->nmaps].buf = NULL;
+    This->maps[This->nmaps].should_destroy_buf = false;
+
+    if (This->discard_nooverwrite_only) {
+        if (This->buf && (Flags & D3DLOCK_DISCARD)) {
+            /* Release previous buffer */
+            if (This->nmaps >= 1)
+                This->maps[This->nmaps-1].should_destroy_buf = true;
+            else
+                nine_upload_release_buffer(device->buffer_upload, This->buf);
+            This->buf = NULL;
+        }
+
+        if (!This->buf) {
+            This->buf = nine_upload_create_buffer(device->buffer_upload, This->base.info.width0);
+            NineBuffer9_RebindIfRequired(This, device);
+        }
+
+        if (This->buf) {
+            This->maps[This->nmaps].buf = This->buf;
+            This->nmaps++;
+            *ppbData = nine_upload_buffer_get_map(This->buf) + OffsetToLock;
+            return D3D_OK;
+        } else {
+            /* Fallback to normal path, and don't try again */
+            This->discard_nooverwrite_only = false;
+        }
+    }
+
+    /* When csmt is active, we want to avoid stalls as much as possible,
+     * and thus we want to create a new resource on discard and map it
+     * with the secondary pipe, instead of waiting on the main pipe. */
+    if (Flags & D3DLOCK_DISCARD && device->csmt_active) {
+        struct pipe_screen *screen = NineDevice9_GetScreen(device);
+        struct pipe_resource *new_res = screen->resource_create(screen, &This->base.info);
+        if (new_res) {
+            /* Use the new resource */
+            pipe_resource_reference(&This->base.resource, new_res);
+            pipe_resource_reference(&new_res, NULL);
+            usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED;
+            NineBuffer9_RebindIfRequired(This, device);
+            This->maps[This->nmaps].is_pipe_secondary = TRUE;
+        }
+    } else if (Flags & D3DLOCK_NOOVERWRITE && device->csmt_active)
+        This->maps[This->nmaps].is_pipe_secondary = TRUE;
+
+    if (This->maps[This->nmaps].is_pipe_secondary)
+        pipe = device->pipe_secondary;
+    else
+        pipe = NineDevice9_GetPipe(device);
 
-    data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
-                                    usage, &box, &This->maps[This->nmaps]);
+    data = pipe->transfer_map(pipe, This->base.resource, 0,
+                              usage, &box, &This->maps[This->nmaps].transfer);
 
     if (!data) {
         DBG("pipe::transfer_map failed\n"
@@ -165,7 +380,7 @@ NineBuffer9_Lock( struct NineBuffer9 *This,
             " box.x = %u\n"
             " box.width = %u\n",
             usage, box.x, box.width);
-        /* not sure what to return, msdn suggests this */
+
         if (Flags & D3DLOCK_DONOTWAIT)
             return D3DERR_WASSTILLDRAWING;
         return D3DERR_INVALIDCALL;
@@ -178,12 +393,38 @@ NineBuffer9_Lock( struct NineBuffer9 *This,
     return D3D_OK;
 }
 
-HRESULT WINAPI
+HRESULT NINE_WINAPI
 NineBuffer9_Unlock( struct NineBuffer9 *This )
 {
+    struct NineDevice9 *device = This->base.base.device;
+    struct pipe_context *pipe;
     DBG("This=%p\n", This);
 
     user_assert(This->nmaps > 0, D3DERR_INVALIDCALL);
-    This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]);
+    This->nmaps--;
+    if (This->base.pool != D3DPOOL_MANAGED) {
+        if (!This->maps[This->nmaps].buf) {
+            pipe = This->maps[This->nmaps].is_pipe_secondary ?
+                device->pipe_secondary :
+                nine_context_get_pipe_acquire(device);
+            pipe->transfer_unmap(pipe, This->maps[This->nmaps].transfer);
+            /* We need to flush in case the driver does implicit copies */
+            if (This->maps[This->nmaps].is_pipe_secondary)
+                pipe->flush(pipe, NULL, 0);
+            else
+                nine_context_get_pipe_release(device);
+        } else if (This->maps[This->nmaps].should_destroy_buf)
+            nine_upload_release_buffer(device->buffer_upload, This->maps[This->nmaps].buf);
+    }
     return D3D_OK;
 }
+
+void
+NineBuffer9_SetDirty( struct NineBuffer9 *This )
+{
+    assert(This->base.pool == D3DPOOL_MANAGED);
+
+    This->managed.dirty = TRUE;
+    u_box_1d(0, This->size, &This->managed.dirty_box);
+    BASEBUF_REGISTER_UPDATE(This);
+}