- the slab buffer size increased from 128 KB to 2 MB (PTE fragment size)
- the max suballocated buffer size increased from 64 KB to 256 KB,
this increases memory usage because it wastes memory
- the number of suballocators increased from 1 to 3 and they are layered
on top of each other to minimize unused space in slabs
The final increase in memory usage is:
DeusEx:MD: 1.8%
DOTA 2: 1.75%
DiRT Rally: 0.2%
The kernel driver will also receive fewer buffers.
if (entry_size <= max_entry_size) {
/* The slab size is twice the size of the largest possible entry. */
slab_size = max_entry_size * 2;
+
+ /* The largest slab should have the same size as the PTE fragment
+ * size to get faster address translation.
+ */
+ if (i == NUM_SLAB_ALLOCATORS - 1 &&
+ slab_size < ws->info.pte_fragment_size)
+ slab_size = ws->info.pte_fragment_size;
+ break;
}
}
assert(slab_size != 0);
amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
unsigned min_slab_order = 9; /* 512 bytes */
- unsigned max_slab_order = 16; /* 64 KB - higher numbers increase memory usage */
+ unsigned max_slab_order = 18; /* 256 KB - higher numbers increase memory usage */
unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
NUM_SLAB_ALLOCATORS;
struct amdgpu_cs;
-#define NUM_SLAB_ALLOCATORS 1
+#define NUM_SLAB_ALLOCATORS 3
struct amdgpu_winsys {
struct radeon_winsys base;