nv50/ir/nir: fix smem size
[mesa.git] / src / gallium / drivers / nouveau / nouveau_screen.c
index 99546a213198538b0e8331ebb9b2641bc240c955..e725f37f0e262af29582871ca47178aaf54834f8 100644 (file)
 
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_format_s3tc.h"
+#include "util/format/u_format.h"
+#include "util/format/u_format_s3tc.h"
 #include "util/u_string.h"
 
+#include "os/os_mman.h"
+#include "util/os_time.h"
+
 #include <stdio.h>
 #include <errno.h>
 #include <stdlib.h>
 
-#include "nouveau/nouveau_bo.h"
-#include "nouveau/nouveau_mm.h"
+#include <nouveau_drm.h>
+#include <xf86drm.h>
+
 #include "nouveau_winsys.h"
 #include "nouveau_screen.h"
+#include "nouveau_context.h"
 #include "nouveau_fence.h"
+#include "nouveau_mm.h"
+#include "nouveau_buffer.h"
+
+#include <compiler/glsl_types.h>
 
 /* XXX this should go away */
-#include "state_tracker/drm_driver.h"
-#include "util/u_simple_screen.h"
+#include "frontend/drm_driver.h"
 
-#include "nouveau_drmif.h"
+/* Even though GPUs might allow addresses with more bits, some engines do not.
+ * Stick with 40 for compatibility.
+ */
+#define NV_GENERIC_VM_LIMIT_SHIFT 39
 
 int nouveau_mesa_debug = 0;
 
 static const char *
 nouveau_screen_get_name(struct pipe_screen *pscreen)
 {
-       struct nouveau_device *dev = nouveau_screen(pscreen)->device;
-       static char buffer[128];
+   struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+   static char buffer[128];
 
-       util_snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
-       return buffer;
+   snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset);
+   return buffer;
 }
 
 static const char *
 nouveau_screen_get_vendor(struct pipe_screen *pscreen)
 {
-       return "nouveau";
+   return "nouveau";
 }
 
-
-
-struct nouveau_bo *
-nouveau_screen_bo_new(struct pipe_screen *pscreen, unsigned alignment,
-                     unsigned usage, unsigned bind, unsigned size)
-{
-       struct nouveau_device *dev = nouveau_screen(pscreen)->device;
-       struct nouveau_bo *bo = NULL;
-       uint32_t flags = NOUVEAU_BO_MAP, tile_mode = 0, tile_flags = 0;
-       int ret;
-
-       if (bind & PIPE_BIND_VERTEX_BUFFER)
-               flags |= nouveau_screen(pscreen)->vertex_buffer_flags;
-       else if (bind & PIPE_BIND_INDEX_BUFFER)
-               flags |= nouveau_screen(pscreen)->index_buffer_flags;
-
-       if (bind & (PIPE_BIND_RENDER_TARGET |
-                       PIPE_BIND_DEPTH_STENCIL |
-                       PIPE_BIND_SCANOUT |
-                       PIPE_BIND_DISPLAY_TARGET |
-                       PIPE_BIND_SAMPLER_VIEW))
-       {
-               /* TODO: this may be incorrect or suboptimal */
-               if (!(bind & PIPE_BIND_SCANOUT))
-                       flags |= NOUVEAU_BO_GART;
-               if (usage != PIPE_USAGE_DYNAMIC)
-                       flags |= NOUVEAU_BO_VRAM;
-
-               if (dev->chipset == 0x50 || dev->chipset >= 0x80) {
-                       if (bind & PIPE_BIND_DEPTH_STENCIL)
-                               tile_flags = 0x2800;
-                       else
-                               tile_flags = 0x7000;
-               }
-       }
-
-       ret = nouveau_bo_new_tile(dev, flags, alignment, size,
-                                 tile_mode, tile_flags, &bo);
-       if (ret)
-               return NULL;
-
-       return bo;
-}
-
-void *
-nouveau_screen_bo_map(struct pipe_screen *pscreen,
-                     struct nouveau_bo *bo,
-                     unsigned map_flags)
+static const char *
+nouveau_screen_get_device_vendor(struct pipe_screen *pscreen)
 {
-       int ret;
-
-       ret = nouveau_bo_map(bo, map_flags);
-       if (ret) {
-               debug_printf("map failed: %d\n", ret);
-               return NULL;
-       }
-
-       return bo->map;
+   return "NVIDIA";
 }
 
-void *
-nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct nouveau_bo *bo,
-                           unsigned offset, unsigned length, unsigned flags)
+static uint64_t
+nouveau_screen_get_timestamp(struct pipe_screen *pscreen)
 {
-       int ret;
-
-       ret = nouveau_bo_map_range(bo, offset, length, flags);
-       if (ret) {
-               nouveau_bo_unmap(bo);
-               if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY)
-                       debug_printf("map_range failed: %d\n", ret);
-               return NULL;
-       }
+   int64_t cpu_time = os_time_get() * 1000;
 
-       return (char *)bo->map - offset; /* why gallium? why? */
-}
+   /* getparam of PTIMER_TIME takes about x10 as long (several usecs) */
 
-void
-nouveau_screen_bo_map_flush_range(struct pipe_screen *pscreen, struct nouveau_bo *bo,
-                                 unsigned offset, unsigned length)
-{
-       nouveau_bo_map_flush(bo, offset, length);
+   return cpu_time + nouveau_screen(pscreen)->cpu_gpu_time_delta;
 }
 
-void
-nouveau_screen_bo_unmap(struct pipe_screen *pscreen, struct nouveau_bo *bo)
+static struct disk_cache *
+nouveau_screen_get_disk_shader_cache(struct pipe_screen *pscreen)
 {
-       nouveau_bo_unmap(bo);
-}
-
-void
-nouveau_screen_bo_release(struct pipe_screen *pscreen, struct nouveau_bo *bo)
-{
-       nouveau_bo_ref(NULL, &bo);
+   return nouveau_screen(pscreen)->disk_shader_cache;
 }
 
 static void
 nouveau_screen_fence_ref(struct pipe_screen *pscreen,
-                        struct pipe_fence_handle **ptr,
-                        struct pipe_fence_handle *pfence)
-{
-       nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
-}
-
-static boolean
-nouveau_screen_fence_signalled(struct pipe_screen *screen,
-                               struct pipe_fence_handle *pfence)
+                         struct pipe_fence_handle **ptr,
+                         struct pipe_fence_handle *pfence)
 {
-        return nouveau_fence_signalled(nouveau_fence(pfence));
+   nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
 }
 
-static boolean
+static bool
 nouveau_screen_fence_finish(struct pipe_screen *screen,
-                           struct pipe_fence_handle *pfence,
+                            struct pipe_context *ctx,
+                            struct pipe_fence_handle *pfence,
                             uint64_t timeout)
 {
-        return nouveau_fence_wait(nouveau_fence(pfence));
+   if (!timeout)
+      return nouveau_fence_signalled(nouveau_fence(pfence));
+
+   return nouveau_fence_wait(nouveau_fence(pfence), NULL);
 }
 
 
 struct nouveau_bo *
 nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
-                             struct winsys_handle *whandle,
-                             unsigned *out_stride)
+                              struct winsys_handle *whandle,
+                              unsigned *out_stride)
 {
-       struct nouveau_device *dev = nouveau_screen(pscreen)->device;
-       struct nouveau_bo *bo = 0;
-       int ret;
-       ret = nouveau_bo_handle_ref(dev, whandle->handle, &bo);
-       if (ret) {
-               debug_printf("%s: ref name 0x%08x failed with %d\n",
-                            __FUNCTION__, whandle->handle, ret);
-               return NULL;
-       }
-
-       *out_stride = whandle->stride;
-       return bo;
+   struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+   struct nouveau_bo *bo = 0;
+   int ret;
+
+   if (whandle->offset != 0) {
+      debug_printf("%s: attempt to import unsupported winsys offset %d\n",
+                   __FUNCTION__, whandle->offset);
+      return NULL;
+   }
+
+   if (whandle->type != WINSYS_HANDLE_TYPE_SHARED &&
+       whandle->type != WINSYS_HANDLE_TYPE_FD) {
+      debug_printf("%s: attempt to import unsupported handle type %d\n",
+                   __FUNCTION__, whandle->type);
+      return NULL;
+   }
+
+   if (whandle->type == WINSYS_HANDLE_TYPE_SHARED)
+      ret = nouveau_bo_name_ref(dev, whandle->handle, &bo);
+   else
+      ret = nouveau_bo_prime_handle_ref(dev, whandle->handle, &bo);
+
+   if (ret) {
+      debug_printf("%s: ref name 0x%08x failed with %d\n",
+                   __FUNCTION__, whandle->handle, ret);
+      return NULL;
+   }
+
+   *out_stride = whandle->stride;
+   return bo;
 }
 
 
-boolean
+bool
 nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
-                            struct nouveau_bo *bo,
-                            unsigned stride,
-                            struct winsys_handle *whandle)
+                             struct nouveau_bo *bo,
+                             unsigned stride,
+                             struct winsys_handle *whandle)
 {
-       whandle->stride = stride;
-
-       if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { 
-               return nouveau_bo_handle_get(bo, &whandle->handle) == 0;
-       } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
-               whandle->handle = bo->handle;
-               return TRUE;
-       } else {
-               return FALSE;
-       }
+   whandle->stride = stride;
+
+   if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
+      return nouveau_bo_name_get(bo, &whandle->handle) == 0;
+   } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
+      whandle->handle = bo->handle;
+      return true;
+   } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
+      return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
+   } else {
+      return false;
+   }
+}
+
+static void
+nouveau_disk_cache_create(struct nouveau_screen *screen)
+{
+   struct mesa_sha1 ctx;
+   unsigned char sha1[20];
+   char cache_id[20 * 2 + 1];
+   uint64_t driver_flags = 0;
+
+   _mesa_sha1_init(&ctx);
+   if (!disk_cache_get_function_identifier(nouveau_disk_cache_create,
+                                           &ctx))
+      return;
+
+   _mesa_sha1_final(&ctx, sha1);
+   disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
+
+   if (screen->prefer_nir)
+      driver_flags |= NOUVEAU_SHADER_CACHE_FLAGS_IR_NIR;
+   else
+      driver_flags |= NOUVEAU_SHADER_CACHE_FLAGS_IR_TGSI;
+
+   screen->disk_shader_cache =
+      disk_cache_create(nouveau_screen_get_name(&screen->base),
+                        cache_id, driver_flags);
+}
+
+static void*
+reserve_vma(uintptr_t start, uint64_t reserved_size)
+{
+   void *reserved = os_mmap((void*)start, reserved_size, PROT_NONE,
+                            MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+   if (reserved == MAP_FAILED)
+      return NULL;
+   return reserved;
 }
 
 int
 nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
 {
-       struct pipe_screen *pscreen = &screen->base;
-       int ret;
+   struct pipe_screen *pscreen = &screen->base;
+   struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 };
+   struct nvc0_fifo nvc0_data = { };
+   uint64_t time;
+   int size, ret;
+   void *data;
+   union nouveau_bo_config mm_config;
+
+   char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
+   if (nv_dbg)
+      nouveau_mesa_debug = atoi(nv_dbg);
+
+   if (dev->chipset < 0x140)
+      screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
+   else
+      screen->prefer_nir = true;
+
+   screen->force_enable_cl = debug_get_bool_option("NOUVEAU_ENABLE_CL", false);
+   if (screen->force_enable_cl)
+      glsl_type_singleton_init_or_ref();
+
+   /* These must be set before any failure is possible, as the cleanup
+    * paths assume they're responsible for deleting them.
+    */
+   screen->drm = nouveau_drm(&dev->object);
+   screen->device = dev;
+
+   /*
+    * this is initialized to 1 in nouveau_drm_screen_create after screen
+    * is fully constructed and added to the global screen list.
+    */
+   screen->refcount = -1;
+
+   if (dev->chipset < 0xc0) {
+      data = &nv04_data;
+      size = sizeof(nv04_data);
+   } else {
+      data = &nvc0_data;
+      size = sizeof(nvc0_data);
+   }
+
+   screen->has_svm = false;
+   /* we only care about HMM with OpenCL enabled */
+   if (dev->chipset > 0x130 && screen->force_enable_cl) {
+      /* Before being able to enable SVM we need to carve out some memory for
+       * driver bo allocations. Let's just base the size on the available VRAM.
+       *
+       * 40 bit is the biggest we care about and for 32 bit systems we don't
+       * want to allocate all of the available memory either.
+       *
+       * Also we align the size we want to reserve to the next POT to make use
+       * of hugepages.
+       */
+      const int vram_shift = util_logbase2_ceil64(dev->vram_size);
+      const int limit_bit =
+         MIN2(sizeof(void*) * 8 - 1, NV_GENERIC_VM_LIMIT_SHIFT);
+      screen->svm_cutout_size =
+         BITFIELD64_BIT(MIN2(sizeof(void*) == 4 ? 26 : NV_GENERIC_VM_LIMIT_SHIFT, vram_shift));
+
+      size_t start = screen->svm_cutout_size;
+      do {
+         screen->svm_cutout = reserve_vma(start, screen->svm_cutout_size);
+         if (!screen->svm_cutout) {
+            start += screen->svm_cutout_size;
+            continue;
+         }
+
+         struct drm_nouveau_svm_init svm_args = {
+            .unmanaged_addr = (uint64_t)screen->svm_cutout,
+            .unmanaged_size = screen->svm_cutout_size,
+         };
+
+         ret = drmCommandWrite(screen->drm->fd, DRM_NOUVEAU_SVM_INIT,
+                               &svm_args, sizeof(svm_args));
+         screen->has_svm = !ret;
+         if (!screen->has_svm)
+            os_munmap(screen->svm_cutout, screen->svm_cutout_size);
+         break;
+      } while ((start + screen->svm_cutout_size) < BITFIELD64_MASK(limit_bit));
+   }
+
+   /*
+    * Set default VRAM domain if not overridden
+    */
+   if (!screen->vram_domain) {
+      if (dev->vram_size > 0)
+         screen->vram_domain = NOUVEAU_BO_VRAM;
+      else
+         screen->vram_domain = NOUVEAU_BO_GART;
+   }
+
+   ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
+                            data, size, &screen->channel);
+   if (ret)
+      goto err;
+
+   ret = nouveau_client_new(screen->device, &screen->client);
+   if (ret)
+      goto err;
+   ret = nouveau_pushbuf_new(screen->client, screen->channel,
+                             4, 512 * 1024, 1,
+                             &screen->pushbuf);
+   if (ret)
+      goto err;
+
+   /* getting CPU time first appears to be more accurate */
+   screen->cpu_gpu_time_delta = os_time_get();
+
+   ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_PTIMER_TIME, &time);
+   if (!ret)
+      screen->cpu_gpu_time_delta = time - screen->cpu_gpu_time_delta * 1000;
+
+   pscreen->get_name = nouveau_screen_get_name;
+   pscreen->get_vendor = nouveau_screen_get_vendor;
+   pscreen->get_device_vendor = nouveau_screen_get_device_vendor;
+   pscreen->get_disk_shader_cache = nouveau_screen_get_disk_shader_cache;
+
+   pscreen->get_timestamp = nouveau_screen_get_timestamp;
+
+   pscreen->fence_reference = nouveau_screen_fence_ref;
+   pscreen->fence_finish = nouveau_screen_fence_finish;
+
+   nouveau_disk_cache_create(screen);
+
+   screen->transfer_pushbuf_threshold = 192;
+   screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */
+   screen->vidmem_bindings =
+      PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL |
+      PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
+      PIPE_BIND_CURSOR |
+      PIPE_BIND_SAMPLER_VIEW |
+      PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
+      PIPE_BIND_COMPUTE_RESOURCE |
+      PIPE_BIND_GLOBAL;
+   screen->sysmem_bindings =
+      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
+      PIPE_BIND_COMMAND_ARGS_BUFFER;
+
+   memset(&mm_config, 0, sizeof(mm_config));
+
+   screen->mm_GART = nouveau_mm_create(dev,
+                                       NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+                                       &mm_config);
+   screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
+   return 0;
+
+err:
+   if (screen->svm_cutout)
+      os_munmap(screen->svm_cutout, screen->svm_cutout_size);
+   return ret;
+}
+
+void
+nouveau_screen_fini(struct nouveau_screen *screen)
+{
+   int fd = screen->drm->fd;
 
-       char *nv_dbg = getenv("NOUVEAU_MESA_DEBUG");
-       if (nv_dbg)
-          nouveau_mesa_debug = atoi(nv_dbg);
+   if (screen->force_enable_cl)
+      glsl_type_singleton_decref();
+   if (screen->has_svm)
+      os_munmap(screen->svm_cutout, screen->svm_cutout_size);
 
-       ret = nouveau_channel_alloc(dev, 0xbeef0201, 0xbeef0202,
-                                   512*1024, &screen->channel);
-       if (ret)
-               return ret;
-       screen->device = dev;
+   nouveau_mm_destroy(screen->mm_GART);
+   nouveau_mm_destroy(screen->mm_VRAM);
 
-       pscreen->get_name = nouveau_screen_get_name;
-       pscreen->get_vendor = nouveau_screen_get_vendor;
+   nouveau_pushbuf_del(&screen->pushbuf);
 
-       pscreen->fence_reference = nouveau_screen_fence_ref;
-       pscreen->fence_signalled = nouveau_screen_fence_signalled;
-       pscreen->fence_finish = nouveau_screen_fence_finish;
+   nouveau_client_del(&screen->client);
+   nouveau_object_del(&screen->channel);
 
-       util_format_s3tc_init();
+   nouveau_device_del(&screen->device);
+   nouveau_drm_del(&screen->drm);
+   close(fd);
 
-       screen->mm_GART = nouveau_mm_create(dev,
-                                           NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
-                                           0x000);
-       screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, 0x000);
-       return 0;
+   disk_cache_destroy(screen->disk_shader_cache);
 }
 
-void
-nouveau_screen_fini(struct nouveau_screen *screen)
+static void
+nouveau_set_debug_callback(struct pipe_context *pipe,
+                           const struct pipe_debug_callback *cb)
 {
-       nouveau_mm_destroy(screen->mm_GART);
-       nouveau_mm_destroy(screen->mm_VRAM);
-
-       nouveau_channel_free(&screen->channel);
+   struct nouveau_context *context = nouveau_context(pipe);
 
-       nouveau_device_close(&screen->device);
+   if (cb)
+      context->debug = *cb;
+   else
+      memset(&context->debug, 0, sizeof(context->debug));
 }
 
+void
+nouveau_context_init(struct nouveau_context *context)
+{
+   context->pipe.set_debug_callback = nouveau_set_debug_callback;
+}