#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
-#include <values.h>
+#include <limits.h>
#include <assert.h>
#include <linux/futex.h>
#include <linux/memfd.h>
static uint32_t
anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state);
-void
+VkResult
anv_block_pool_init(struct anv_block_pool *pool,
struct anv_device *device, uint32_t block_size)
{
+ VkResult result;
+
assert(util_is_power_of_two(block_size));
pool->device = device;
- pool->bo.gem_handle = 0;
- pool->bo.offset = 0;
- pool->bo.size = 0;
- pool->bo.is_winsys_bo = false;
+ anv_bo_init(&pool->bo, 0, 0);
pool->block_size = block_size;
pool->free_list = ANV_FREE_LIST_EMPTY;
pool->back_free_list = ANV_FREE_LIST_EMPTY;
pool->fd = memfd_create("block pool", MFD_CLOEXEC);
if (pool->fd == -1)
- return;
+ return vk_error(VK_ERROR_INITIALIZATION_FAILED);
/* Just make it 2GB up-front. The Linux kernel won't actually back it
* with pages until we either map and fault on one of them or we use
* userptr and send a chunk of it off to the GPU.
*/
- if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1)
- return;
+ if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
+ }
- anv_vector_init(&pool->mmap_cleanups,
- round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128);
+ if (!u_vector_init(&pool->mmap_cleanups,
+ round_to_power_of_two(sizeof(struct anv_mmap_cleanup)),
+ 128)) {
+ result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+ goto fail_fd;
+ }
pool->state.next = 0;
pool->state.end = 0;
/* Immediately grow the pool so we'll have a backing bo. */
pool->state.end = anv_block_pool_grow(pool, &pool->state);
+
+ return VK_SUCCESS;
+
+ fail_fd:
+ close(pool->fd);
+
+ return result;
}
void
{
struct anv_mmap_cleanup *cleanup;
- anv_vector_foreach(cleanup, &pool->mmap_cleanups) {
+ u_vector_foreach(cleanup, &pool->mmap_cleanups) {
if (cleanup->map)
munmap(cleanup->map, cleanup->size);
if (cleanup->gem_handle)
anv_gem_close(pool->device, cleanup->gem_handle);
}
- anv_vector_finish(&pool->mmap_cleanups);
+ u_vector_finish(&pool->mmap_cleanups);
close(pool->fd);
}
assert(center_bo_offset >= pool->back_state.end);
assert(size - center_bo_offset >= pool->state.end);
- cleanup = anv_vector_add(&pool->mmap_cleanups);
+ cleanup = u_vector_add(&pool->mmap_cleanups);
if (!cleanup)
goto fail;
*cleanup = ANV_MMAP_CLEANUP_INIT;
* values back into pool. */
pool->map = map + center_bo_offset;
pool->center_bo_offset = center_bo_offset;
- pool->bo.gem_handle = gem_handle;
- pool->bo.size = size;
+ anv_bo_init(&pool->bo, gem_handle, size);
pool->bo.map = map;
- pool->bo.index = 0;
done:
pthread_mutex_unlock(&pool->device->mutex);
if (anv_ptr_free_list_pop(&pool->free_list[bucket], &next_free_void)) {
struct bo_pool_bo_link *next_free = next_free_void;
*bo = VG_NOACCESS_READ(&next_free->bo);
+ assert(bo->gem_handle);
assert(bo->map == next_free);
assert(size <= bo->size);
{
/* Make a copy in case the anv_bo happens to be storred in the BO */
struct anv_bo bo = *bo_in;
+
+ VG(VALGRIND_MEMPOOL_FREE(pool, bo.map));
+
struct bo_pool_bo_link *link = bo.map;
- link->bo = bo;
+ VG_NOACCESS_WRITE(&link->bo, bo);
assert(util_is_power_of_two(bo.size));
const unsigned size_log2 = ilog2_round_up(bo.size);
const unsigned bucket = size_log2 - 12;
assert(bucket < ARRAY_SIZE(pool->free_list));
- VG(VALGRIND_MEMPOOL_FREE(pool, bo.map));
anv_ptr_free_list_push(&pool->free_list[bucket], link);
}
{
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
for (unsigned i = 0; i < 16; i++) {
- struct anv_bo *bo = &pool->bos[i][s];
- if (bo->size > 0)
- anv_gem_close(device, bo->gem_handle);
+ struct anv_scratch_bo *bo = &pool->bos[i][s];
+ if (bo->exists > 0)
+ anv_gem_close(device, bo->bo.gem_handle);
}
}
}
unsigned scratch_size_log2 = ffs(per_thread_scratch / 2048);
assert(scratch_size_log2 < 16);
- struct anv_bo *bo = &pool->bos[scratch_size_log2][stage];
+ struct anv_scratch_bo *bo = &pool->bos[scratch_size_log2][stage];
- /* From now on, we go into a critical section. In order to remain
- * thread-safe, we use the bo size as a lock. A value of 0 means we don't
- * have a valid BO yet. A value of 1 means locked. A value greater than 1
- * means we have a bo of the given size.
- */
+ /* We can use "exists" to shortcut and ignore the critical section */
+ if (bo->exists)
+ return &bo->bo;
- if (bo->size > 1)
- return bo;
+ pthread_mutex_lock(&device->mutex);
- uint64_t size = __sync_val_compare_and_swap(&bo->size, 0, 1);
- if (size == 0) {
- /* We own the lock. Allocate a buffer */
+ __sync_synchronize();
+ if (bo->exists)
+ return &bo->bo;
- struct gen_device_info *devinfo = &device->info;
- uint32_t max_threads[] = {
- [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
- [MESA_SHADER_TESS_CTRL] = devinfo->max_hs_threads,
- [MESA_SHADER_TESS_EVAL] = devinfo->max_ds_threads,
- [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
- [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
- [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads,
- };
+ const struct anv_physical_device *physical_device =
+ &device->instance->physicalDevice;
+ const struct gen_device_info *devinfo = &physical_device->info;
- size = per_thread_scratch * max_threads[stage];
+ /* WaCSScratchSize:hsw
+ *
+ * Haswell's scratch space address calculation appears to be sparse
+ * rather than tightly packed. The Thread ID has bits indicating which
+ * subslice, EU within a subslice, and thread within an EU it is.
+ * There's a maximum of two slices and two subslices, so these can be
+ * stored with a single bit. Even though there are only 10 EUs per
+ * subslice, this is stored in 4 bits, so there's an effective maximum
+ * value of 16 EUs. Similarly, although there are only 7 threads per EU,
+ * this is stored in a 3 bit number, giving an effective maximum value
+ * of 8 threads per EU.
+ *
+ * This means that we need to use 16 * 8 instead of 10 * 7 for the
+ * number of threads per subslice.
+ */
+ const unsigned subslices = MAX2(physical_device->subslice_total, 1);
+ const unsigned scratch_ids_per_subslice =
+ device->info.is_haswell ? 16 * 8 : devinfo->max_cs_threads;
- struct anv_bo new_bo;
- anv_bo_init_new(&new_bo, device, size);
+ uint32_t max_threads[] = {
+ [MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
+ [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
+ [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
+ [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
+ [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
+ [MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslices,
+ };
- bo->gem_handle = new_bo.gem_handle;
+ uint32_t size = per_thread_scratch * max_threads[stage];
- /* Set the size last because we use it as a lock */
- __sync_synchronize();
- bo->size = size;
+ anv_bo_init_new(&bo->bo, device, size);
- futex_wake((uint32_t *)&bo->size, INT_MAX);
- } else {
- /* Someone else got here first */
- while (bo->size == 1)
- futex_wait((uint32_t *)&bo->size, 1);
- }
+ /* Set the exists last because it may be read by other threads */
+ __sync_synchronize();
+ bo->exists = true;
+
+ pthread_mutex_unlock(&device->mutex);
- return bo;
+ return &bo->bo;
}