radeon/winsys: add dma ring support to winsys v3
authorJerome Glisse <jglisse@redhat.com>
Mon, 7 Jan 2013 16:49:23 +0000 (11:49 -0500)
committerJerome Glisse <jglisse@redhat.com>
Mon, 28 Jan 2013 16:30:35 +0000 (11:30 -0500)
Add ring support, you can create a cs for each ring. DMA ring is
bit special regarding relocation as you must emit as much relocation
as there is use of the buffer.

v2: - Improved comment on relocation changes
    - Use a single thread to queue cs submittion this simplify driver
      code while not impacting performances. Rational for this is that
      you have to wait for all previous submission to have completed
      so there was never a case while we could have 2 different thread
      submitting a command stream at the same time. This code just
      consolidate submission into one single thread per winsys.
v3: - Do not use semaphore for empty queue signaling, instead use
      cond var. This is because it's tricky to maintain an even number
      of call to semaphore wait and semaphore signal (the number of
      cs in the stack would for instance make that number vary).

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/drivers/r300/r300_context.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/radeonsi/radeonsi_pipe.c
src/gallium/winsys/radeon/drm/radeon_drm_bo.c
src/gallium/winsys/radeon/drm/radeon_drm_cs.c
src/gallium/winsys/radeon/drm/radeon_drm_cs.h
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
src/gallium/winsys/radeon/drm/radeon_winsys.h

index d8af13f99548285a6ac4f713b9feb15256ed9504..340a7f04cbc1256828899e7b9b347606e7fef897 100644 (file)
@@ -379,7 +379,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
                      sizeof(struct pipe_transfer), 64,
                      UTIL_SLAB_SINGLETHREADED);
 
-    r300->cs = rws->cs_create(rws);
+    r300->cs = rws->cs_create(rws, RING_GFX);
     if (r300->cs == NULL)
         goto fail;
 
index fda507454cc7d00f05e45ab95b68f68bd0a4621e..e4a35cfe06c0d6189800f808577d523799e0d24e 100644 (file)
@@ -289,7 +289,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
                goto fail;
        }
 
-       rctx->cs = rctx->ws->cs_create(rctx->ws);
+       rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
        rctx->ws->cs_set_flush_callback(rctx->cs, r600_flush_from_winsys, rctx);
 
        rctx->uploader = u_upload_create(&rctx->context, 1024 * 1024, 256,
index 2f976094e0f72fc9532b0cb2e5eb4fbebbc66874..471dd48b7ec04d483d4f20e29108c926757587e1 100644 (file)
@@ -225,7 +225,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
        case TAHITI:
                si_init_state_functions(rctx);
                LIST_INITHEAD(&rctx->active_query_list);
-               rctx->cs = rctx->ws->cs_create(rctx->ws);
+               rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX);
                rctx->max_db = 8;
                si_init_config(rctx);
                break;
index 897e9627dbc16cef01b7e0bc5d5ce9bc538c8e1e..6daafc3caedc369197007ca05b2d640291b09064 100644 (file)
@@ -453,7 +453,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
                 } else {
                     /* Try to avoid busy-waiting in radeon_bo_wait. */
                     if (p_atomic_read(&bo->num_active_ioctls))
-                        radeon_drm_cs_sync_flush(cs);
+                        radeon_drm_cs_sync_flush(rcs);
                 }
 
                 radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
index c5e7f1e44c2935537a6146aea27aef9d2ae9a6e0..cab27040bba8b5b138b1f0b0dbed447fe961f214 100644 (file)
 #define RADEON_CS_RING_COMPUTE      1
 #endif
 
+#ifndef RADEON_CS_RING_DMA
+#define RADEON_CS_RING_DMA          2
+#endif
+
 #ifndef RADEON_CS_END_OF_FRAME
 #define RADEON_CS_END_OF_FRAME      0x04
 #endif
@@ -158,10 +162,8 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
     FREE(csc->relocs);
 }
 
-DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
-static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param);
 
-static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
+static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, enum ring_type ring_type)
 {
     struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
     struct radeon_drm_cs *cs;
@@ -170,7 +172,6 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
     if (!cs) {
         return NULL;
     }
-    pipe_semaphore_init(&cs->flush_queued, 0);
     pipe_semaphore_init(&cs->flush_completed, 0);
 
     cs->ws = ws;
@@ -189,10 +190,9 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
     cs->csc = &cs->csc1;
     cs->cst = &cs->csc2;
     cs->base.buf = cs->csc->buf;
+    cs->base.ring_type = ring_type;
 
     p_atomic_inc(&ws->num_cs);
-    if (cs->ws->num_cpus > 1 && debug_get_option_thread())
-        cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs);
     return &cs->base;
 }
 
@@ -246,35 +246,49 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
     return -1;
 }
 
-static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
+static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
                                  struct radeon_bo *bo,
                                  enum radeon_bo_usage usage,
                                  enum radeon_bo_domain domains,
                                  enum radeon_bo_domain *added_domains)
 {
+    struct radeon_cs_context *csc = cs->csc;
     struct drm_radeon_cs_reloc *reloc;
-    unsigned i;
     unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
     enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
     enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
+    bool update_hash = TRUE;
+    int i;
 
+    *added_domains = 0;
     if (csc->is_handle_added[hash]) {
         i = csc->reloc_indices_hashlist[hash];
         reloc = &csc->relocs[i];
-        if (reloc->handle == bo->handle) {
-            update_reloc_domains(reloc, rd, wd, added_domains);
-            return i;
+        if (reloc->handle != bo->handle) {
+            /* Hash collision, look for the BO in the list of relocs linearly. */
+            for (i = csc->crelocs - 1; i >= 0; i--) {
+                reloc = &csc->relocs[i];
+                if (reloc->handle == bo->handle) {
+                    /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
+                    break;
+                }
+            }
         }
 
-        /* Hash collision, look for the BO in the list of relocs linearly. */
-        for (i = csc->crelocs; i != 0;) {
-            --i;
-            reloc = &csc->relocs[i];
-            if (reloc->handle == bo->handle) {
-                update_reloc_domains(reloc, rd, wd, added_domains);
-
+        if (i >= 0) {
+            /* On DMA ring we need to emit as many relocation as there is use of the bo
+             * thus each time this function is call we should grow add again the bo to
+             * the relocation buffer
+             *
+             * Do not update the hash table if it's dma ring, so that first hash always point
+             * to first bo relocation which will the one used by the kernel. Following relocation
+             * will be ignore by the kernel memory placement (but still use by the kernel to
+             * update the cmd stream with proper buffer offset).
+             */
+            update_hash = FALSE;
+            update_reloc_domains(reloc, rd, wd, added_domains);
+            if (cs->base.ring_type != RING_DMA) {
                 csc->reloc_indices_hashlist[hash] = i;
-                /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
                 return i;
             }
         }
@@ -305,7 +319,9 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
     reloc->flags = 0;
 
     csc->is_handle_added[hash] = TRUE;
-    csc->reloc_indices_hashlist[hash] = csc->crelocs;
+    if (update_hash) {
+        csc->reloc_indices_hashlist[hash] = csc->crelocs;
+    }
 
     csc->chunks[1].length_dw += RELOC_DWORDS;
 
@@ -321,8 +337,7 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
     struct radeon_bo *bo = (struct radeon_bo*)buf;
     enum radeon_bo_domain added_domains;
-
-    unsigned index = radeon_add_reloc(cs->csc, bo, usage, domains, &added_domains);
+    unsigned index = radeon_add_reloc(cs, bo, usage, domains, &added_domains);
 
     if (added_domains & RADEON_DOMAIN_GTT)
         cs->csc->used_gart += bo->base.size;
@@ -373,7 +388,6 @@ static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
     struct radeon_bo *bo = (struct radeon_bo*)buf;
-
     unsigned index = radeon_get_reloc(cs->csc, bo);
 
     if (index == -1) {
@@ -385,7 +399,7 @@ static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
     OUT_CS(&cs->base, index * RELOC_DWORDS);
 }
 
-static void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc)
+void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc)
 {
     unsigned i;
 
@@ -410,25 +424,15 @@ static void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc)
     radeon_cs_context_cleanup(csc);
 }
 
-static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param)
+/*
+ * Make sure previous submission of this cs are completed
+ */
+void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
 {
-    struct radeon_drm_cs *cs = (struct radeon_drm_cs*)param;
-
-    while (1) {
-        pipe_semaphore_wait(&cs->flush_queued);
-        if (cs->kill_thread)
-            break;
-        radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
-        pipe_semaphore_signal(&cs->flush_completed);
-    }
-    pipe_semaphore_signal(&cs->flush_completed);
-    return NULL;
-}
+    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
 
-void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs)
-{
     /* Wait for any pending ioctl to complete. */
-    if (cs->thread && cs->flush_started) {
+    if (cs->ws->thread && cs->flush_started) {
         pipe_semaphore_wait(&cs->flush_completed);
         cs->flush_started = 0;
     }
@@ -445,7 +449,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
        fprintf(stderr, "radeon: command stream overflowed\n");
     }
 
-    radeon_drm_cs_sync_flush(cs);
+    radeon_drm_cs_sync_flush(rcs);
 
     /* Flip command streams. */
     tmp = cs->csc;
@@ -453,8 +457,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
     cs->cst = tmp;
 
     /* If the CS is not empty or overflowed, emit it in a separate thread. */
-    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS &&
-       !debug_get_option_noop()) {
+    if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
         unsigned i, crelocs = cs->cst->crelocs;
 
         cs->cst->chunks[0].length_dw = cs->base.cdw;
@@ -464,31 +467,50 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
             p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
         }
 
-        cs->cst->flags[0] = 0;
-        cs->cst->flags[1] = RADEON_CS_RING_GFX;
-        cs->cst->cs.num_chunks = 2;
-        if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
-            cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
-            cs->cst->cs.num_chunks = 3;
-        }
-        if (cs->ws->info.r600_virtual_address) {
-            cs->cst->flags[0] |= RADEON_CS_USE_VM;
-            cs->cst->cs.num_chunks = 3;
-        }
-        if (flags & RADEON_FLUSH_END_OF_FRAME) {
-            cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
-            cs->cst->cs.num_chunks = 3;
-        }
-        if (flags & RADEON_FLUSH_COMPUTE) {
-            cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
+        switch (cs->base.ring_type) {
+        case RING_DMA:
+            cs->cst->flags[0] = 0;
+            cs->cst->flags[1] = RADEON_CS_RING_DMA;
             cs->cst->cs.num_chunks = 3;
+            if (cs->ws->info.r600_virtual_address) {
+                cs->cst->flags[0] |= RADEON_CS_USE_VM;
+            }
+            break;
+        default:
+        case RING_GFX:
+            cs->cst->flags[0] = 0;
+            cs->cst->flags[1] = RADEON_CS_RING_GFX;
+            cs->cst->cs.num_chunks = 2;
+            if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
+                cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
+                cs->cst->cs.num_chunks = 3;
+            }
+            if (cs->ws->info.r600_virtual_address) {
+                cs->cst->flags[0] |= RADEON_CS_USE_VM;
+                cs->cst->cs.num_chunks = 3;
+            }
+            if (flags & RADEON_FLUSH_END_OF_FRAME) {
+                cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
+                cs->cst->cs.num_chunks = 3;
+            }
+            if (flags & RADEON_FLUSH_COMPUTE) {
+                cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
+                cs->cst->cs.num_chunks = 3;
+            }
+            break;
         }
 
-        if (cs->thread &&
-            (flags & RADEON_FLUSH_ASYNC)) {
+        if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) {
             cs->flush_started = 1;
-            pipe_semaphore_signal(&cs->flush_queued);
+            radeon_drm_ws_queue_cs(cs->ws, cs);
         } else {
+            pipe_mutex_lock(cs->ws->cs_stack_lock);
+            if (cs->ws->thread) {
+                while (p_atomic_read(&cs->ws->ncs)) {
+                    pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock);
+                }
+            }
+            pipe_mutex_unlock(cs->ws->cs_stack_lock);
             radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
         }
     } else {
@@ -503,14 +525,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
-    radeon_drm_cs_sync_flush(cs);
-    if (cs->thread) {
-        cs->kill_thread = 1;
-        pipe_semaphore_signal(&cs->flush_queued);
-        pipe_semaphore_wait(&cs->flush_completed);
-        pipe_thread_wait(cs->thread);
-    }
-    pipe_semaphore_destroy(&cs->flush_queued);
+
+    radeon_drm_cs_sync_flush(rcs);
     pipe_semaphore_destroy(&cs->flush_completed);
     radeon_cs_context_cleanup(&cs->csc1);
     radeon_cs_context_cleanup(&cs->csc2);
@@ -525,6 +541,7 @@ static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs,
                                     void *user)
 {
     struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+
     cs->flush_cs = flush;
     cs->flush_data = user;
 }
@@ -562,4 +579,5 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
     ws->base.cs_flush = radeon_drm_cs_flush;
     ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
     ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
+    ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
 }
index a88fba5b40eb775fd91efac4f8a8d82605ed21a2..570842dc51c47c58e5ff11d76b97e4280a1be097 100644 (file)
@@ -74,9 +74,8 @@ struct radeon_drm_cs {
     void (*flush_cs)(void *ctx, unsigned flags);
     void *flush_data;
 
-    pipe_thread thread;
-    int flush_started, kill_thread;
-    pipe_semaphore flush_queued, flush_completed;
+    int flush_started;
+    pipe_semaphore flush_completed;
 };
 
 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
@@ -118,7 +117,8 @@ radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
     return bo->num_cs_references != 0;
 }
 
-void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs);
+void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs);
 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws);
+void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc);
 
 #endif
index b8a876c166241bba90211a6a9647da0693953c43..d23220df8a81ae00869247594d77cfcf19df589b 100644 (file)
@@ -316,6 +316,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
         break;
     }
 
+    /* Check for dma */
+    ws->info.r600_has_dma = FALSE;
+    if (ws->info.chip_class >= R700 && ws->info.drm_minor >= 27) {
+        ws->info.r600_has_dma = TRUE;
+    }
+
     /* Get GEM info. */
     retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO,
             &gem_info, sizeof(gem_info));
@@ -389,12 +395,21 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
 {
     struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
 
+    if (ws->thread) {
+        ws->kill_thread = 1;
+        pipe_semaphore_signal(&ws->cs_queued);
+        pipe_thread_wait(ws->thread);
+    }
+    pipe_semaphore_destroy(&ws->cs_queued);
+    pipe_condvar_destroy(ws->cs_queue_empty);
+
     if (!pipe_reference(&ws->base.reference, NULL)) {
         return;
     }
 
     pipe_mutex_destroy(ws->hyperz_owner_mutex);
     pipe_mutex_destroy(ws->cmask_owner_mutex);
+    pipe_mutex_destroy(ws->cs_stack_lock);
 
     ws->cman->destroy(ws->cman);
     ws->kman->destroy(ws->kman);
@@ -477,6 +492,71 @@ static int compare_fd(void *key1, void *key2)
     return pointer_to_intptr(key1) != pointer_to_intptr(key2);
 }
 
+void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs)
+{
+retry:
+    pipe_mutex_lock(ws->cs_stack_lock);
+    if (p_atomic_read(&ws->ncs) >= RING_LAST) {
+        /* no room left for a flush */
+        pipe_mutex_unlock(ws->cs_stack_lock);
+        goto retry;
+    }
+    ws->cs_stack[p_atomic_read(&ws->ncs)] = cs;
+    p_atomic_inc(&ws->ncs);
+    pipe_mutex_unlock(ws->cs_stack_lock);
+    pipe_semaphore_signal(&ws->cs_queued);
+}
+
+static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param)
+{
+    struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)param;
+    struct radeon_drm_cs *cs;
+    unsigned i, empty_stack;
+
+    while (1) {
+        pipe_semaphore_wait(&ws->cs_queued);
+        if (ws->kill_thread)
+            break;
+next:
+        pipe_mutex_lock(ws->cs_stack_lock);
+        cs = ws->cs_stack[0];
+        pipe_mutex_unlock(ws->cs_stack_lock);
+
+        if (cs) {
+            radeon_drm_cs_emit_ioctl_oneshot(cs->cst);
+
+            pipe_mutex_lock(ws->cs_stack_lock);
+            for (i = 1; i < p_atomic_read(&ws->ncs); i++) {
+                ws->cs_stack[i - 1] = ws->cs_stack[i];
+            }
+            ws->cs_stack[p_atomic_read(&ws->ncs) - 1] = NULL;
+            empty_stack = p_atomic_dec_zero(&ws->ncs);
+            if (empty_stack) {
+                pipe_condvar_signal(ws->cs_queue_empty);
+            }
+            pipe_mutex_unlock(ws->cs_stack_lock);
+
+            pipe_semaphore_signal(&cs->flush_completed);
+
+            if (!empty_stack) {
+                goto next;
+            }
+        }
+    }
+    pipe_mutex_lock(ws->cs_stack_lock);
+    for (i = 0; i < p_atomic_read(&ws->ncs); i++) {
+        pipe_semaphore_signal(&ws->cs_stack[i]->flush_completed);
+        ws->cs_stack[i] = NULL;
+    }
+    p_atomic_set(&ws->ncs, 0);
+    pipe_condvar_signal(ws->cs_queue_empty);
+    pipe_mutex_unlock(ws->cs_stack_lock);
+    return NULL;
+}
+
+DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE)
+static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param);
+
 struct radeon_winsys *radeon_drm_winsys_create(int fd)
 {
     struct radeon_drm_winsys *ws;
@@ -531,6 +611,13 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd)
 
     pipe_mutex_init(ws->hyperz_owner_mutex);
     pipe_mutex_init(ws->cmask_owner_mutex);
+    pipe_mutex_init(ws->cs_stack_lock);
+
+    p_atomic_set(&ws->ncs, 0);
+    pipe_semaphore_init(&ws->cs_queued, 0);
+    pipe_condvar_init(ws->cs_queue_empty);
+    if (ws->num_cpus > 1 && debug_get_option_thread())
+        ws->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, ws);
 
     return &ws->base;
 
index e714127730fee7669eed299ba0ba98efca9cd4f9..74eb408151b109619056c74fce6ad23fa19851b1 100644 (file)
@@ -33,6 +33,8 @@
 #include "radeon_winsys.h"
 #include "os/os_thread.h"
 
+struct radeon_drm_cs;
+
 enum radeon_generation {
     DRV_R300,
     DRV_R600,
@@ -58,6 +60,19 @@ struct radeon_drm_winsys {
     pipe_mutex hyperz_owner_mutex;
     struct radeon_drm_cs *cmask_owner;
     pipe_mutex cmask_owner_mutex;
+
+    /* rings submission thread */
+    pipe_mutex cs_stack_lock;
+    pipe_semaphore cs_queued;
+    /* we cannot use semaphore for empty queue because maintaining an even
+     * number of call to semaphore_wait and semaphore_signal is, to say the
+     * least, tricky
+     */
+    pipe_condvar cs_queue_empty;
+    pipe_thread thread;
+    int kill_thread;
+    int ncs;
+    struct radeon_drm_cs *cs_stack[RING_LAST];
 };
 
 static INLINE struct radeon_drm_winsys *
@@ -66,4 +81,6 @@ radeon_drm_winsys(struct radeon_winsys *base)
     return (struct radeon_drm_winsys*)base;
 }
 
+void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs);
+
 #endif
index d0c48224cc50f6a2214450219e23bacccca4da85..7fdef3fad87531eb4ef21d76892442d3cb485153 100644 (file)
@@ -138,12 +138,19 @@ enum chip_class {
     TAHITI,
 };
 
+enum ring_type {
+    RING_GFX = 0,
+    RING_DMA,
+    RING_LAST,
+};
+
 struct winsys_handle;
 struct radeon_winsys_cs_handle;
 
 struct radeon_winsys_cs {
-    unsigned cdw;  /* Number of used dwords. */
-    uint32_t *buf; /* The command buffer. */
+    unsigned                    cdw;  /* Number of used dwords. */
+    uint32_t                    *buf; /* The command buffer. */
+    enum ring_type              ring_type;
 };
 
 struct radeon_info {
@@ -170,6 +177,7 @@ struct radeon_info {
     uint32_t                    r600_max_pipes;
     boolean                     r600_backend_map_valid;
     boolean                     r600_virtual_address;
+    boolean                     r600_has_dma;
 };
 
 enum radeon_feature_id {
@@ -350,7 +358,7 @@ struct radeon_winsys {
      *
      * \param ws        The winsys this function is called from.
      */
-    struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws);
+    struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type);
 
     /**
      * Destroy a command stream.
@@ -433,6 +441,12 @@ struct radeon_winsys {
     boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
                                   enum radeon_feature_id fid,
                                   boolean enable);
+     /**
+      * Make sure all asynchronous flush of the cs have completed
+      *
+      * \param cs        A command stream.
+      */
+    void (*cs_sync_flush)(struct radeon_winsys_cs *cs);
 
     /**
      * Initialize surface