X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fwinsys%2Famdgpu%2Fdrm%2Famdgpu_winsys.c;h=79d2c1345efc61a3d8104fc89b1803edb51ddcb1;hb=ec22dd34c88f8b24a1b4f5b25f4de2aa89f0cad4;hp=b80a988d6b65f60a0cb7ff49d3c13d730e77eddf;hpb=f03b7c9ad92c1656a221297819fbc6d065cc0af7;p=mesa.git diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index b80a988d6b6..79d2c1345ef 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -26,20 +26,19 @@ * next paragraph) shall be included in all copies or substantial portions * of the Software. */ -/* - * Authors: - * Marek Olšák - */ #include "amdgpu_cs.h" #include "amdgpu_public.h" +#include "util/u_cpu_detect.h" #include "util/u_hash_table.h" +#include "util/hash_table.h" +#include "util/xmlconfig.h" #include #include #include #include -#include "amd/common/amdgpu_id.h" +#include "amd/common/ac_llvm_util.h" #include "amd/common/sid.h" #include "amd/common/gfx9d.h" @@ -48,22 +47,52 @@ #endif static struct util_hash_table *dev_tab = NULL; -static mtx_t dev_tab_mutex = _MTX_INITIALIZER_NP; +static simple_mtx_t dev_tab_mutex = _SIMPLE_MTX_INITIALIZER_NP; DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) +static void handle_env_var_force_family(struct amdgpu_winsys *ws) +{ + const char *family = debug_get_option("SI_FORCE_FAMILY", NULL); + unsigned i; + + if (!family) + return; + + for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { + if (!strcmp(family, ac_get_llvm_processor_name(i))) { + /* Override family and chip_class. */ + ws->info.family = i; + ws->info.name = "GCN-NOOP"; + + if (i >= CHIP_VEGA10) + ws->info.chip_class = GFX9; + else if (i >= CHIP_TONGA) + ws->info.chip_class = VI; + else if (i >= CHIP_BONAIRE) + ws->info.chip_class = CIK; + else + ws->info.chip_class = SI; + + /* Don't submit any IBs. */ + setenv("RADEON_NOOP", "1", 1); + return; + } + } + + fprintf(stderr, "radeonsi: Unknown family: %s\n", family); + exit(1); +} + /* Helper function to do the ioctls needed for setup and init. */ -static bool do_winsys_init(struct amdgpu_winsys *ws, int fd) +static bool do_winsys_init(struct amdgpu_winsys *ws, + const struct pipe_screen_config *config, + int fd) { if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo)) goto fail; - /* LLVM 5.0 is required for GFX9. */ - if (ws->info.chip_class >= GFX9 && HAVE_LLVM < 0x0500) { - fprintf(stderr, "amdgpu: LLVM 5.0 is required, got LLVM %i.%i\n", - HAVE_LLVM >> 8, HAVE_LLVM & 255); - goto fail; - } + handle_env_var_force_family(ws); ws->addrlib = amdgpu_addr_create(&ws->info, &ws->amdinfo, &ws->info.max_alignment); if (!ws->addrlib) { @@ -74,6 +103,8 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd) ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL; ws->debug_all_bos = debug_get_option_all_bos(); ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL; + ws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL || + driQueryOptionb(config->options, "radeonsi_zerovram"); return true; @@ -93,13 +124,21 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws) { struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; + if (ws->reserve_vmid) + amdgpu_vm_unreserve_vmid(ws->dev, 0); + if (util_queue_is_initialized(&ws->cs_queue)) util_queue_destroy(&ws->cs_queue); - mtx_destroy(&ws->bo_fence_lock); - pb_slabs_deinit(&ws->bo_slabs); + simple_mtx_destroy(&ws->bo_fence_lock); + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + if (ws->bo_slabs[i].groups) + pb_slabs_deinit(&ws->bo_slabs[i]); + } pb_cache_deinit(&ws->bo_cache); - mtx_destroy(&ws->global_bo_list_lock); + util_hash_table_destroy(ws->bo_export_table); + simple_mtx_destroy(&ws->global_bo_list_lock); + simple_mtx_destroy(&ws->bo_export_table_lock); do_winsys_deinit(ws); FREE(rws); } @@ -110,7 +149,7 @@ static void amdgpu_winsys_query_info(struct radeon_winsys *rws, *info = ((struct amdgpu_winsys *)rws)->info; } -static bool amdgpu_cs_request_feature(struct radeon_winsys_cs *rcs, +static bool amdgpu_cs_request_feature(struct radeon_cmdbuf *rcs, enum radeon_feature_id fid, bool enable) { @@ -195,16 +234,12 @@ static bool amdgpu_read_registers(struct radeon_winsys *rws, 0xffffffff, 0, out) == 0; } -static unsigned hash_dev(void *key) +static unsigned hash_pointer(void *key) { -#if defined(PIPE_ARCH_X86_64) - return pointer_to_intptr(key) ^ (pointer_to_intptr(key) >> 32); -#else - return pointer_to_intptr(key); -#endif + return _mesa_hash_pointer(key); } -static int compare_dev(void *key1, void *key2) +static int compare_pointers(void *key1, void *key2) { return key1 != key2; } @@ -219,13 +254,18 @@ static bool amdgpu_winsys_unref(struct radeon_winsys *rws) * This must happen while the mutex is locked, so that * amdgpu_winsys_create in another thread doesn't get the winsys * from the table when the counter drops to 0. */ - mtx_lock(&dev_tab_mutex); + simple_mtx_lock(&dev_tab_mutex); destroy = pipe_reference(&ws->reference, NULL); - if (destroy && dev_tab) + if (destroy && dev_tab) { util_hash_table_remove(dev_tab, ws->dev); + if (util_hash_table_count(dev_tab) == 0) { + util_hash_table_destroy(dev_tab); + dev_tab = NULL; + } + } - mtx_unlock(&dev_tab_mutex); + simple_mtx_unlock(&dev_tab_mutex); return destroy; } @@ -235,6 +275,14 @@ static const char* amdgpu_get_chip_name(struct radeon_winsys *ws) return amdgpu_get_marketing_name(dev); } +static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws, + unsigned cache) +{ + struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws; + + util_pin_thread_to_L3(ws->cs_queue.threads[0], cache, + util_cpu_caps.cores_per_L3); +} PUBLIC struct radeon_winsys * amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, @@ -253,15 +301,15 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, drmFreeVersion(version); /* Look up the winsys from the dev table. */ - mtx_lock(&dev_tab_mutex); + simple_mtx_lock(&dev_tab_mutex); if (!dev_tab) - dev_tab = util_hash_table_create(hash_dev, compare_dev); + dev_tab = util_hash_table_create(hash_pointer, compare_pointers); /* Initialize the amdgpu device. This should always return the same pointer * for the same fd. */ r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev); if (r) { - mtx_unlock(&dev_tab_mutex); + simple_mtx_unlock(&dev_tab_mutex); fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n"); return NULL; } @@ -270,7 +318,13 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, ws = util_hash_table_get(dev_tab, dev); if (ws) { pipe_reference(NULL, &ws->reference); - mtx_unlock(&dev_tab_mutex); + simple_mtx_unlock(&dev_tab_mutex); + + /* Release the device handle, because we don't need it anymore. + * This function is returning an existing winsys instance, which + * has its own device handle. + */ + amdgpu_device_deinitialize(dev); return &ws->base; } @@ -283,24 +337,42 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, ws->info.drm_major = drm_major; ws->info.drm_minor = drm_minor; - if (!do_winsys_init(ws, fd)) + if (!do_winsys_init(ws, config, fd)) goto fail_alloc; /* Create managers. */ - pb_cache_init(&ws->bo_cache, 500000, ws->check_vm ? 1.0f : 2.0f, 0, + pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, + 500000, ws->check_vm ? 1.0f : 2.0f, 0, (ws->info.vram_size + ws->info.gart_size) / 8, amdgpu_bo_destroy, amdgpu_bo_can_reclaim); - if (!pb_slabs_init(&ws->bo_slabs, - AMDGPU_SLAB_MIN_SIZE_LOG2, AMDGPU_SLAB_MAX_SIZE_LOG2, - RADEON_MAX_SLAB_HEAPS, - ws, - amdgpu_bo_can_reclaim_slab, - amdgpu_bo_slab_alloc, - amdgpu_bo_slab_free)) - goto fail_cache; + unsigned min_slab_order = 9; /* 512 bytes */ + unsigned max_slab_order = 18; /* 256 KB - higher numbers increase memory usage */ + unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) / + NUM_SLAB_ALLOCATORS; + + /* Divide the size order range among slab managers. */ + for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { + unsigned min_order = min_slab_order; + unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator, + max_slab_order); + + if (!pb_slabs_init(&ws->bo_slabs[i], + min_order, max_order, + RADEON_MAX_SLAB_HEAPS, + ws, + amdgpu_bo_can_reclaim_slab, + amdgpu_bo_slab_alloc, + amdgpu_bo_slab_free)) { + amdgpu_winsys_destroy(&ws->base); + simple_mtx_unlock(&dev_tab_mutex); + return NULL; + } + + min_slab_order = max_order + 1; + } - ws->info.min_alloc_size = 1 << AMDGPU_SLAB_MIN_SIZE_LOG2; + ws->info.min_alloc_size = 1 << ws->bo_slabs[0].min_order; /* init reference */ pipe_reference_init(&ws->reference, 1); @@ -313,19 +385,23 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, ws->base.query_value = amdgpu_query_value; ws->base.read_registers = amdgpu_read_registers; ws->base.get_chip_name = amdgpu_get_chip_name; + ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache; amdgpu_bo_init_functions(ws); amdgpu_cs_init_functions(ws); amdgpu_surface_init_functions(ws); LIST_INITHEAD(&ws->global_bo_list); - (void) mtx_init(&ws->global_bo_list_lock, mtx_plain); - (void) mtx_init(&ws->bo_fence_lock, mtx_plain); + ws->bo_export_table = util_hash_table_create(hash_pointer, compare_pointers); + + (void) simple_mtx_init(&ws->global_bo_list_lock, mtx_plain); + (void) simple_mtx_init(&ws->bo_fence_lock, mtx_plain); + (void) simple_mtx_init(&ws->bo_export_table_lock, mtx_plain); - if (!util_queue_init(&ws->cs_queue, "amdgpu_cs", 8, 1, + if (!util_queue_init(&ws->cs_queue, "cs", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL)) { amdgpu_winsys_destroy(&ws->base); - mtx_unlock(&dev_tab_mutex); + simple_mtx_unlock(&dev_tab_mutex); return NULL; } @@ -337,16 +413,24 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, ws->base.screen = screen_create(&ws->base, config); if (!ws->base.screen) { amdgpu_winsys_destroy(&ws->base); - mtx_unlock(&dev_tab_mutex); + simple_mtx_unlock(&dev_tab_mutex); return NULL; } util_hash_table_set(dev_tab, dev, ws); + if (ws->reserve_vmid) { + r = amdgpu_vm_reserve_vmid(dev, 0); + if (r) { + fprintf(stderr, "amdgpu: amdgpu_vm_reserve_vmid failed. (%i)\n", r); + goto fail_cache; + } + } + /* We must unlock the mutex once the winsys is fully initialized, so that * other threads attempting to create the winsys from the same fd will * get a fully initialized winsys and not just half-way initialized. */ - mtx_unlock(&dev_tab_mutex); + simple_mtx_unlock(&dev_tab_mutex); return &ws->base; @@ -356,6 +440,6 @@ fail_cache: fail_alloc: FREE(ws); fail: - mtx_unlock(&dev_tab_mutex); + simple_mtx_unlock(&dev_tab_mutex); return NULL; }