util: remove LIST_ADDTAIL macro

[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_winsys.c
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c

index 4fe36dc3f2578e237a4295ce62589fea17268992..36d506b4928099512be65e5bfcd7dbc31fe1db6f 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -29,6 +29,7 @@
  #include "radeon_drm_cs.h"
  #include "radeon_drm_public.h"
  
+#include "util/u_cpu_detect.h"
  #include "util/u_memory.h"
  #include "util/u_hash_table.h"
  
@@ -165,6 +166,7 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
      ws->info.drm_major = version->version_major;
      ws->info.drm_minor = version->version_minor;
      ws->info.drm_patchlevel = version->version_patchlevel;
+    ws->info.is_amdgpu = false;
      drmFreeVersion(version);
  
      /* Get PCI ID. */
@@ -182,7 +184,12 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
  #include "pci_ids/r600_pci_ids.h"
  #undef CHIPSET
  
-#define CHIPSET(pci_id, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; ws->gen = DRV_SI; break;
+#define CHIPSET(pci_id, cfamily) \
+    case pci_id: \
+        ws->info.family = CHIP_##cfamily; \
+        ws->info.name = #cfamily; \
+        ws->gen = DRV_SI; \
+        break;
  #include "pci_ids/radeonsi_pci_ids.h"
  #undef CHIPSET
  
@@ -263,14 +270,13 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
      case CHIP_VERDE:
      case CHIP_OLAND:
      case CHIP_HAINAN:
-        ws->info.chip_class = SI;
+        ws->info.chip_class = GFX6;
          break;
      case CHIP_BONAIRE:
      case CHIP_KAVERI:
      case CHIP_KABINI:
      case CHIP_HAWAII:
-    case CHIP_MULLINS:
-        ws->info.chip_class = CIK;
+        ws->info.chip_class = GFX7;
          break;
      }
  
@@ -290,7 +296,6 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
      case CHIP_ARUBA:
      case CHIP_KAVERI:
      case CHIP_KABINI:
-    case CHIP_MULLINS:
         ws->info.has_dedicated_vram = false;
         break;
  
@@ -356,13 +361,17 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
      if (ws->info.drm_minor < 49)
          ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024);
  
-    /* Radeon allocates all buffers as contigous, which makes large allocations
+    /* Radeon allocates all buffers contiguously, which makes large allocations
       * unlikely to succeed. */
-    ws->info.max_alloc_size = MAX2(ws->info.vram_size, ws->info.gart_size) * 0.7;
      if (ws->info.has_dedicated_vram)
-        ws->info.max_alloc_size = MIN2(ws->info.vram_size * 0.7, ws->info.max_alloc_size);
+           ws->info.max_alloc_size = ws->info.vram_size * 0.7;
+    else
+           ws->info.max_alloc_size = ws->info.gart_size * 0.7;
+
      if (ws->info.drm_minor < 40)
          ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024);
+    /* Both 32-bit and 64-bit address spaces only have 4GB. */
+    ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024);
  
      /* Get max clock frequency info and convert it to MHz */
      radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
@@ -437,22 +446,22 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
              radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
                                   &ws->info.enabled_rb_mask);
  
-        ws->info.has_virtual_memory = false;
+        ws->info.r600_has_virtual_memory = false;
          if (ws->info.drm_minor >= 13) {
              uint32_t ib_vm_max_size;
  
-            ws->info.has_virtual_memory = true;
+            ws->info.r600_has_virtual_memory = true;
              if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
                                        &ws->va_start))
-                ws->info.has_virtual_memory = false;
+                ws->info.r600_has_virtual_memory = false;
              if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
                                        &ib_vm_max_size))
-                ws->info.has_virtual_memory = false;
+                ws->info.r600_has_virtual_memory = false;
              radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
                                   &ws->va_unmap_working);
          }
         if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", false))
-               ws->info.has_virtual_memory = false;
+               ws->info.r600_has_virtual_memory = false;
      }
  
      /* Get max pipes, this is only needed for compute shaders.  All evergreen+
@@ -469,6 +478,31 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
      radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL,
                           &ws->info.max_se);
  
+    switch (ws->info.family) {
+    case CHIP_HAINAN:
+    case CHIP_KABINI:
+        ws->info.num_tcc_blocks = 2;
+        break;
+    case CHIP_VERDE:
+    case CHIP_OLAND:
+    case CHIP_BONAIRE:
+    case CHIP_KAVERI:
+        ws->info.num_tcc_blocks = 4;
+        break;
+    case CHIP_PITCAIRN:
+        ws->info.num_tcc_blocks = 8;
+        break;
+    case CHIP_TAHITI:
+        ws->info.num_tcc_blocks = 12;
+        break;
+    case CHIP_HAWAII:
+        ws->info.num_tcc_blocks = 16;
+        break;
+    default:
+        ws->info.num_tcc_blocks = 0;
+        break;
+    }
+
      if (!ws->info.max_se) {
          switch (ws->info.family) {
          default:
@@ -491,6 +525,10 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
  
      radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
                           &ws->info.max_sh_per_se);
+    if (ws->gen == DRV_SI) {
+        ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units /
+                                      (ws->info.max_se * ws->info.max_sh_per_se);
+    }
  
      radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL,
                           &ws->accel_working2);
@@ -502,18 +540,18 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
          return false;
      }
  
-    if (ws->info.chip_class == CIK) {
+    if (ws->info.chip_class == GFX7) {
          if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL,
                                    ws->info.cik_macrotile_mode_array)) {
-            fprintf(stderr, "radeon: Kernel 3.13 is required for CIK support.\n");
+            fprintf(stderr, "radeon: Kernel 3.13 is required for Sea Islands support.\n");
              return false;
          }
      }
  
-    if (ws->info.chip_class >= SI) {
+    if (ws->info.chip_class >= GFX6) {
          if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL,
                                    ws->info.si_tile_mode_array)) {
-            fprintf(stderr, "radeon: Kernel 3.10 is required for SI support.\n");
+            fprintf(stderr, "radeon: Kernel 3.10 is required for Southern Islands support.\n");
              return false;
          }
      }
@@ -521,12 +559,42 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
      /* Hawaii with old firmware needs type2 nop packet.
       * accel_working2 with value 3 indicates the new firmware.
       */
-    ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= SI ||
+    ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= GFX6 ||
                                      (ws->info.family == CHIP_HAWAII &&
                                       ws->accel_working2 < 3);
      ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
-
-    ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL;
+    ws->info.ib_start_alignment = 4096;
+    ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40;
+    /* HTILE is broken with 1D tiling on old kernels and GFX7. */
+    ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != GFX7 ||
+                                             ws->info.drm_minor >= 38;
+    ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48;
+    ws->info.has_bo_metadata = false;
+    ws->info.has_gpu_reset_status_query = ws->info.drm_minor >= 43;
+    ws->info.has_eqaa_surface_allocator = false;
+    ws->info.has_format_bc1_through_bc7 = ws->info.drm_minor >= 31;
+    ws->info.kernel_flushes_tc_l2_after_ib = true;
+    /* Old kernels disallowed register writes via COPY_DATA
+     * that are used for indirect compute dispatches. */
+    ws->info.has_indirect_compute_dispatch = ws->info.chip_class == GFX7 ||
+                                             (ws->info.chip_class == GFX6 &&
+                                              ws->info.drm_minor >= 45);
+    /* GFX6 doesn't support unaligned loads. */
+    ws->info.has_unaligned_shader_loads = ws->info.chip_class == GFX7 &&
+                                          ws->info.drm_minor >= 50;
+    ws->info.has_sparse_vm_mappings = false;
+    /* 2D tiling on GFX7 is supported since DRM 2.35.0 */
+    ws->info.has_2d_tiling = ws->info.chip_class <= GFX6 || ws->info.drm_minor >= 35;
+    ws->info.has_read_registers_query = ws->info.drm_minor >= 42;
+    ws->info.max_alignment = 1024*1024;
+    ws->info.has_graphics = true;
+    ws->info.cpdma_prefetch_writes_memory = true;
+    ws->info.max_wave64_per_simd = 10;
+    ws->info.num_physical_sgprs_per_simd = 512;
+    ws->info.num_physical_wave64_vgprs_per_simd = 256;
+
+    ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL ||
+                   strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
  
      return true;
  }
@@ -541,7 +609,7 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
      mtx_destroy(&ws->hyperz_owner_mutex);
      mtx_destroy(&ws->cmask_owner_mutex);
  
-    if (ws->info.has_virtual_memory)
+    if (ws->info.r600_has_virtual_memory)
          pb_slabs_deinit(&ws->bo_slabs);
      pb_cache_deinit(&ws->bo_cache);
  
@@ -553,6 +621,7 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
      util_hash_table_destroy(ws->bo_handles);
      util_hash_table_destroy(ws->bo_vas);
      mtx_destroy(&ws->bo_handles_mutex);
+    mtx_destroy(&ws->vm32.mutex);
      mtx_destroy(&ws->vm64.mutex);
      mtx_destroy(&ws->bo_fence_lock);
  
@@ -568,7 +637,7 @@ static void radeon_query_info(struct radeon_winsys *rws,
      *info = ((struct radeon_drm_winsys *)rws)->info;
  }
  
-static bool radeon_cs_request_feature(struct radeon_winsys_cs *rcs,
+static bool radeon_cs_request_feature(struct radeon_cmdbuf *rcs,
                                        enum radeon_feature_id fid,
                                        bool enable)
  {
@@ -590,6 +659,18 @@ static bool radeon_cs_request_feature(struct radeon_winsys_cs *rcs,
      return false;
  }
  
+uint32_t radeon_drm_get_gpu_reset_counter(struct radeon_drm_winsys *ws)
+{
+    uint64_t retval = 0;
+
+    if (!ws->info.has_gpu_reset_status_query)
+        return 0;
+
+    radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER,
+                         "gpu-reset-counter", (uint32_t*)&retval);
+    return retval;
+}
+
  static uint64_t radeon_query_value(struct radeon_winsys *rws,
                                     enum radeon_value_id value)
  {
@@ -652,10 +733,6 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws,
          radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK,
                               "current-gpu-mclk", (uint32_t*)&retval);
          return retval;
-    case RADEON_GPU_RESET_COUNTER:
-        radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER,
-                             "gpu-reset-counter", (uint32_t*)&retval);
-        return retval;
      case RADEON_CS_THREAD_TIME:
          return util_queue_get_thread_time_nano(&ws->cs_queue, 0);
      }
@@ -715,8 +792,13 @@ static bool radeon_winsys_unref(struct radeon_winsys *ws)
      mtx_lock(&fd_tab_mutex);
  
      destroy = pipe_reference(&rws->reference, NULL);
-    if (destroy && fd_tab)
+    if (destroy && fd_tab) {
          util_hash_table_remove(fd_tab, intptr_to_pointer(rws->fd));
+        if (util_hash_table_count(fd_tab) == 0) {
+           util_hash_table_destroy(fd_tab);
+           fd_tab = NULL;
+        }
+    }
  
      mtx_unlock(&fd_tab_mutex);
      return destroy;
@@ -734,6 +816,17 @@ static int handle_compare(void *key1, void *key2)
      return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
  }
  
+static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws,
+                                           unsigned cache)
+{
+    struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
+
+    if (util_queue_is_initialized(&rws->cs_queue)) {
+        util_pin_thread_to_L3(rws->cs_queue.threads[0], cache,
+                              util_cpu_caps.cores_per_L3);
+    }
+}
+
  PUBLIC struct radeon_winsys *
  radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
                          radeon_screen_create_t screen_create)
@@ -769,7 +862,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
                    radeon_bo_destroy,
                    radeon_bo_can_reclaim);
  
-    if (ws->info.has_virtual_memory) {
+    if (ws->info.r600_has_virtual_memory) {
          /* There is no fundamental obstacle to using slab buffer allocation
           * without GPUVM, but enabling it requires making sure that the drivers
           * honor the address offset.
@@ -801,6 +894,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
      ws->base.unref = radeon_winsys_unref;
      ws->base.destroy = radeon_winsys_destroy;
      ws->base.query_info = radeon_query_info;
+    ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache;
      ws->base.cs_request_feature = radeon_cs_request_feature;
      ws->base.query_value = radeon_query_value;
      ws->base.read_registers = radeon_read_registers;
@@ -816,17 +910,40 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
      ws->bo_handles = util_hash_table_create(handle_hash, handle_compare);
      ws->bo_vas = util_hash_table_create(handle_hash, handle_compare);
      (void) mtx_init(&ws->bo_handles_mutex, mtx_plain);
+    (void) mtx_init(&ws->vm32.mutex, mtx_plain);
      (void) mtx_init(&ws->vm64.mutex, mtx_plain);
      (void) mtx_init(&ws->bo_fence_lock, mtx_plain);
+    list_inithead(&ws->vm32.holes);
      list_inithead(&ws->vm64.holes);
  
-    ws->vm64.start = ws->va_start;
+    /* The kernel currently returns 8MB. Make sure this doesn't change. */
+    if (ws->va_start > 8 * 1024 * 1024) {
+        /* Not enough 32-bit address space. */
+        radeon_winsys_destroy(&ws->base);
+        mtx_unlock(&fd_tab_mutex);
+        return NULL;
+    }
+
+    ws->vm32.start = ws->va_start;
+    ws->vm32.end = 1ull << 32;
+
+    /* The maximum is 8GB of virtual address space limited by the kernel.
+     * It's obviously not enough for bigger cards, like Hawaiis with 4GB
+     * and 8GB of physical memory and 4GB of GART.
+     *
+     * Older kernels set the limit to 4GB, which is even worse, so they only
+     * have 32-bit address space.
+     */
+    if (ws->info.drm_minor >= 41) {
+        ws->vm64.start = 1ull << 32;
+        ws->vm64.end = 1ull << 33;
+    }
  
      /* TTM aligns the BO size to the CPU page size */
      ws->info.gart_page_size = sysconf(_SC_PAGESIZE);
  
      if (ws->num_cpus > 1 && debug_get_option_thread())
-        util_queue_init(&ws->cs_queue, "radeon_cs", 8, 1, 0);
+        util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0);
  
      /* Create the screen at the end. The winsys must be initialized
       * completely.
@@ -850,7 +967,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
      return &ws->base;
  
  fail_slab:
-    if (ws->info.has_virtual_memory)
+    if (ws->info.r600_has_virtual_memory)
          pb_slabs_deinit(&ws->bo_slabs);
  fail_cache:
      pb_cache_deinit(&ws->bo_cache);