r300g: do not wait for a busy BO if neither GPU nor CPU is changing it
authorMarek Olšák <maraeo@gmail.com>
Sun, 3 Apr 2011 19:25:40 +0000 (21:25 +0200)
committerMarek Olšák <maraeo@gmail.com>
Sun, 3 Apr 2011 20:49:22 +0000 (22:49 +0200)
Improves frame rate in apps with at least one user vertex buffer and
a hw index buffer.

src/gallium/winsys/radeon/drm/radeon_drm_bo.c
src/gallium/winsys/radeon/drm/radeon_drm_bo.h
src/gallium/winsys/radeon/drm/radeon_drm_cs.c
src/gallium/winsys/radeon/drm/radeon_drm_cs.h

index d90903a91a52e18e42f537a480d31a7b9c7a9c4a..3d0fcea1052d37a10d5793608983e069ec1727ab 100644 (file)
@@ -99,20 +99,27 @@ static void radeon_bo_wait(struct r300_winsys_bo *_buf)
     args.handle = bo->handle;
     while (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
                                &args, sizeof(args)) == -EBUSY);
+
+    bo->busy_for_write = FALSE;
 }
 
 static boolean radeon_bo_is_busy(struct r300_winsys_bo *_buf)
 {
     struct radeon_bo *bo = get_radeon_bo(pb_buffer(_buf));
     struct drm_radeon_gem_busy args = {};
+    boolean busy;
 
     if (p_atomic_read(&bo->num_active_ioctls)) {
         return TRUE;
     }
 
     args.handle = bo->handle;
-    return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
+    busy = drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
                                &args, sizeof(args)) != 0;
+
+    if (!busy)
+        bo->busy_for_write = FALSE;
+    return busy;
 }
 
 static void radeon_bo_destroy(struct pb_buffer *_buf)
@@ -141,6 +148,9 @@ static unsigned get_pb_usage_from_transfer_flags(enum pipe_transfer_usage usage)
 {
     unsigned res = 0;
 
+    if (usage & PIPE_TRANSFER_WRITE)
+        res |= PB_USAGE_CPU_WRITE;
+
     if (usage & PIPE_TRANSFER_DONTBLOCK)
         res |= PB_USAGE_DONTBLOCK;
 
@@ -171,15 +181,36 @@ static void *radeon_bo_map_internal(struct pb_buffer *_buf,
                 return NULL;
             }
         } else {
-            if (radeon_bo_is_referenced_by_cs(cs, bo)) {
-                cs->flush_cs(cs->flush_data, 0);
+            if (!(flags & PB_USAGE_CPU_WRITE)) {
+                /* Mapping for read.
+                 *
+                 * Since we are mapping for read, we don't need to wait
+                 * if the GPU is using the buffer for read too
+                 * (neither one is changing it).
+                 *
+                 * Only check whether the buffer is being used for write. */
+                if (radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
+                    cs->flush_cs(cs->flush_data, 0);
+                    radeon_bo_wait((struct r300_winsys_bo*)bo);
+                } else if (bo->busy_for_write) {
+                    /* Update the busy_for_write field (done by radeon_bo_is_busy)
+                     * and wait if needed. */
+                    if (radeon_bo_is_busy((struct r300_winsys_bo*)bo)) {
+                        radeon_bo_wait((struct r300_winsys_bo*)bo);
+                    }
+                }
             } else {
-                /* Try to avoid busy-waiting in radeon_bo_wait. */
-                if (p_atomic_read(&bo->num_active_ioctls))
-                    radeon_drm_cs_sync_flush(cs);
+                /* Mapping for write. */
+                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
+                    cs->flush_cs(cs->flush_data, 0);
+                } else {
+                    /* Try to avoid busy-waiting in radeon_bo_wait. */
+                    if (p_atomic_read(&bo->num_active_ioctls))
+                        radeon_drm_cs_sync_flush(cs);
+                }
+
+                radeon_bo_wait((struct r300_winsys_bo*)bo);
             }
-
-            radeon_bo_wait((struct r300_winsys_bo*)bo);
         }
     }
 
index e0247f2dfc3f1c0635efc82653ff8f4b03fae09a..b20a0996c93d1a9b07d03b2985faac69ba023824 100644 (file)
@@ -60,6 +60,13 @@ struct radeon_bo {
      * thread, is this bo referenced in? */
     int num_active_ioctls;
 
+    /* Whether the buffer has been relocated for write and is busy since then.
+     * This field is updated in:
+     * - radeon_drm_cs_flush (to TRUE if it's relocated for write)
+     * - radeon_bo_is_busy (to FALSE if it's not busy)
+     * - radeon_bo_wait (to FALSE) */
+    boolean busy_for_write;
+
     boolean flinked;
     uint32_t flink;
 };
index 4adf4ade770ac59440f3018680574af4e9fe4ce9..a506bdc06133cc5e6c8e884f804c870f0499110e 100644 (file)
@@ -377,9 +377,16 @@ static void radeon_drm_cs_flush(struct r300_winsys_cs *rcs, unsigned flags)
 
         cs->csc->chunks[0].length_dw = cs->base.cdw;
 
-        for (i = 0; i < crelocs; i++)
+        for (i = 0; i < crelocs; i++) {
+            /* Update the number of active asynchronous CS ioctls for the buffer. */
             p_atomic_inc(&cs->csc->relocs_bo[i]->num_active_ioctls);
 
+            /* Update whether the buffer is busy for write. */
+            if (cs->csc->relocs[i].write_domain) {
+                cs->csc->relocs_bo[i]->busy_for_write = TRUE;
+            }
+        }
+
         if (cs->ws->num_cpus > 1 && debug_get_option_thread() &&
             (flags & R300_FLUSH_ASYNC)) {
             cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs->csc);
index dfaa161c3189eaf70fc1bb4575f9dddb6bdd0c5b..dc2050a27eb8723a8004e96d85d0488277bef8bf 100644 (file)
@@ -84,14 +84,32 @@ radeon_drm_cs(struct r300_winsys_cs *base)
     return (struct radeon_drm_cs*)base;
 }
 
-static INLINE boolean radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
-                                                    struct radeon_bo *bo)
+static INLINE boolean
+radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
+                              struct radeon_bo *bo)
 {
     return bo->num_cs_references == bo->rws->num_cs ||
            (bo->num_cs_references && radeon_get_reloc(cs->csc, bo) != -1);
 }
 
-static INLINE boolean radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
+static INLINE boolean
+radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
+                                        struct radeon_bo *bo)
+{
+    int index;
+
+    if (!bo->num_cs_references)
+        return FALSE;
+
+    index = radeon_get_reloc(cs->csc, bo);
+    if (index == -1)
+        return FALSE;
+
+    return cs->csc->relocs[index].write_domain != 0;
+}
+
+static INLINE boolean
+radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
 {
     return bo->num_cs_references;
 }