radeon: Reduce number of radeon_bo_map calls that require kernel entry.
authorPauli Nieminen <suokkos@gmail.com>
Mon, 1 Mar 2010 21:47:35 +0000 (23:47 +0200)
committerPauli Nieminen <suokkos@gmail.com>
Mon, 1 Mar 2010 21:56:35 +0000 (23:56 +0200)
DMA buffers are often remapped many times for singel cs. To reduce number of
mapping calls ending to kernel dma buffers are mapped when allocated and unmapped
when full or in cs flush. This makes indifual mapping calls in other parts of
code simple increment/decrement reference count which is faster than entering
kernel.

This improves Anholt's openarena benchmark from 36 fps to 44 fps.

src/mesa/drivers/dri/radeon/radeon_dma.c

index d31e4e47ddbbae1401bd5e14b7c373a76fc0eb42..22499bc38d1b8fb981f59626f0c7cc9b3b86daf8 100644 (file)
@@ -184,6 +184,8 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
        radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
                        __FUNCTION__, size, rmesa->dma.minimum_size);
 
+       if (!is_empty_list(&rmesa->dma.reserved))
+               radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
 
        if (is_empty_list(&rmesa->dma.free)
              || last_elem(&rmesa->dma.free)->bo->size < size) {
@@ -211,7 +213,7 @@ again_alloc:
 
        rmesa->dma.current_used = 0;
        rmesa->dma.current_vertexptr = 0;
-       
+
        if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
                                          first_elem(&rmesa->dma.reserved)->bo,
                                          RADEON_GEM_DOMAIN_GTT, 0))
@@ -221,6 +223,7 @@ again_alloc:
         /* Cmd buff have been flushed in radeon_revalidate_bos */
                goto again_alloc;
        }
+       radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
 }
 
 /* Allocates a region from rmesa->dma.current.  If there isn't enough
@@ -332,6 +335,10 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa)
                /* request updated cs processing information from kernel */
                legacy_track_pending(rmesa->radeonScreen->bom, 0);
        }
+
+       if (!is_empty_list(&rmesa->dma.reserved))
+               radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
+
        /* move waiting bos to free list.
           wait list provides gpu time to handle data before reuse */
        foreach_s(dma_bo, temp, &rmesa->dma.wait) {
@@ -349,8 +356,11 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa)
                   FREE(dma_bo);
                   continue;
                }
-               if (!radeon_bo_is_idle(dma_bo->bo))
+               if (!radeon_bo_is_idle(dma_bo->bo)) {
+                       if (rmesa->radeonScreen->driScreen->dri2.enabled)
+                               break;
                        continue;
+               }
                remove_from_list(dma_bo);
                dma_bo->expire_counter = expire_at;
                insert_at_tail(&rmesa->dma.free, dma_bo);
@@ -388,7 +398,7 @@ void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
 {
        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
        struct radeon_dma *dma = &rmesa->dma;
-               
+
        if (RADEON_DEBUG & RADEON_IOCTL)
                fprintf(stderr, "%s\n", __FUNCTION__);
        dma->flush = NULL;