llvmpipe: add grid launch
[mesa.git] / src / gallium / drivers / panfrost / pan_drm.c
index b21005feaebb4e74e2aa4bab73180e8600ea508f..4e69282480975dd5dc0bc6099de1ab358e82bc49 100644 (file)
@@ -38,7 +38,7 @@
 #include "pan_util.h"
 #include "pandecode/decode.h"
 
-static void
+void
 panfrost_drm_mmap_bo(struct panfrost_screen *screen, struct panfrost_bo *bo)
 {
         struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
@@ -49,14 +49,14 @@ panfrost_drm_mmap_bo(struct panfrost_screen *screen, struct panfrost_bo *bo)
 
         ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
         if (ret) {
-                fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %d\n", ret);
+                fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
                 assert(0);
         }
 
         bo->cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
                           screen->fd, mmap_bo.offset);
         if (bo->cpu == MAP_FAILED) {
-                fprintf(stderr, "mmap failed: %p\n", bo->cpu);
+                fprintf(stderr, "mmap failed: %p %m\n", bo->cpu);
                 assert(0);
         }
 
@@ -65,97 +65,163 @@ panfrost_drm_mmap_bo(struct panfrost_screen *screen, struct panfrost_bo *bo)
                 pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL);
 }
 
-void
-panfrost_drm_allocate_slab(struct panfrost_screen *screen,
-                          struct panfrost_memory *mem,
-                          size_t pages,
-                          bool same_va,
-                          int extra_flags,
-                          int commit_count,
-                          int extent)
+static void
+panfrost_drm_munmap_bo(struct panfrost_screen *screen, struct panfrost_bo *bo)
 {
-       struct drm_panfrost_create_bo create_bo = {
-                       .size = pages * 4096,
-                       .flags = 0,  // TODO figure out proper flags..
-       };
-       struct drm_panfrost_mmap_bo mmap_bo = {0,};
-       int ret;
-
-       // TODO cache allocations
-       // TODO properly handle errors
-       // TODO take into account extra_flags
-
-       ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
-       if (ret) {
-                fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %d\n", ret);
-               assert(0);
-       }
-
-       mem->gpu = create_bo.offset;
-       mem->gem_handle = create_bo.handle;
-        mem->stack_bottom = 0;
-        mem->size = create_bo.size;
-
-       // TODO map and unmap on demand?
-       mmap_bo.handle = create_bo.handle;
-       ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
-       if (ret) {
-                fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %d\n", ret);
-               assert(0);
-       }
-
-        mem->cpu = os_mmap(NULL, mem->size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                       screen->fd, mmap_bo.offset);
-        if (mem->cpu == MAP_FAILED) {
-                fprintf(stderr, "mmap failed: %p\n", mem->cpu);
-               assert(0);
-       }
+        if (!bo->cpu)
+                return;
 
-        /* Record the mmap if we're tracing */
-        if (pan_debug & PAN_DBG_TRACE)
-                pandecode_inject_mmap(mem->gpu, mem->cpu, mem->size, NULL);
+        if (os_munmap((void *) (uintptr_t)bo->cpu, bo->size)) {
+                perror("munmap");
+                abort();
+        }
+
+        bo->cpu = NULL;
+}
+
+struct panfrost_bo *
+panfrost_drm_create_bo(struct panfrost_screen *screen, size_t size,
+                       uint32_t flags)
+{
+        struct panfrost_bo *bo;
+
+        /* Kernel will fail (confusingly) with EPERM otherwise */
+        assert(size > 0);
+
+        /* To maximize BO cache usage, don't allocate tiny BOs */
+        size = MAX2(size, 4096);
+
+        /* GROWABLE BOs cannot be mmapped */
+        if (flags & PAN_ALLOCATE_GROWABLE)
+                assert(flags & PAN_ALLOCATE_INVISIBLE);
+
+        unsigned translated_flags = 0;
+
+        if (screen->kernel_version->version_major > 1 ||
+            screen->kernel_version->version_minor >= 1) {
+                if (flags & PAN_ALLOCATE_GROWABLE)
+                        translated_flags |= PANFROST_BO_HEAP;
+                if (!(flags & PAN_ALLOCATE_EXECUTE))
+                        translated_flags |= PANFROST_BO_NOEXEC;
+        }
+
+        struct drm_panfrost_create_bo create_bo = {
+                .size = size,
+                .flags = translated_flags,
+        };
+
+        /* Before creating a BO, we first want to check the cache */
+
+        bo = panfrost_bo_cache_fetch(screen, size, flags);
+
+        if (bo == NULL) {
+                /* Otherwise, the cache misses and we need to allocate a BO fresh from
+                 * the kernel */
+
+                int ret;
+
+                ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
+                if (ret) {
+                        fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
+                        assert(0);
+                }
+
+                /* We have a BO allocated from the kernel; fill in the userspace
+                 * version */
+
+                bo = rzalloc(screen, struct panfrost_bo);
+                bo->size = create_bo.size;
+                bo->gpu = create_bo.offset;
+                bo->gem_handle = create_bo.handle;
+                bo->flags = flags;
+        }
+
+        /* Only mmap now if we know we need to. For CPU-invisible buffers, we
+         * never map since we don't care about their contents; they're purely
+         * for GPU-internal use. But we do trace them anyway. */
+
+        if (!(flags & (PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_DELAY_MMAP)))
+                panfrost_drm_mmap_bo(screen, bo);
+        else if (flags & PAN_ALLOCATE_INVISIBLE) {
+                if (pan_debug & PAN_DBG_TRACE)
+                        pandecode_inject_mmap(bo->gpu, NULL, bo->size, NULL);
+        }
+
+        pipe_reference_init(&bo->reference, 1);
+        return bo;
 }
 
 void
-panfrost_drm_free_slab(struct panfrost_screen *screen, struct panfrost_memory *mem)
+panfrost_drm_release_bo(struct panfrost_screen *screen, struct panfrost_bo *bo, bool cacheable)
 {
-       struct drm_gem_close gem_close = {
-               .handle = mem->gem_handle,
-       };
-       int ret;
+        if (!bo)
+                return;
 
-        if (os_munmap((void *) (uintptr_t) mem->cpu, mem->size)) {
-                perror("munmap");
-                abort();
+        struct drm_gem_close gem_close = { .handle = bo->gem_handle };
+        int ret;
+
+        /* Rather than freeing the BO now, we'll cache the BO for later
+         * allocations if we're allowed to */
+
+        panfrost_drm_munmap_bo(screen, bo);
+
+        if (cacheable) {
+                bool cached = panfrost_bo_cache_put(screen, bo);
+
+                if (cached)
+                        return;
         }
 
-       mem->cpu = NULL;
+        /* Otherwise, if the BO wasn't cached, we'll legitimately free the BO */
 
-       ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
-       if (ret) {
-                fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %d\n", ret);
-               assert(0);
-       }
+        ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
+        if (ret) {
+                fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
+                assert(0);
+        }
+
+        ralloc_free(bo);
+}
+
+void
+panfrost_drm_allocate_slab(struct panfrost_screen *screen,
+                           struct panfrost_memory *mem,
+                           size_t pages,
+                           bool same_va,
+                           int extra_flags,
+                           int commit_count,
+                           int extent)
+{
+        // TODO cache allocations
+        // TODO properly handle errors
+        // TODO take into account extra_flags
+        mem->bo = panfrost_drm_create_bo(screen, pages * 4096, extra_flags);
+        mem->stack_bottom = 0;
+}
 
-       mem->gem_handle = -1;
+void
+panfrost_drm_free_slab(struct panfrost_screen *screen, struct panfrost_memory *mem)
+{
+        panfrost_bo_unreference(&screen->base, mem->bo);
+        mem->bo = NULL;
 }
 
 struct panfrost_bo *
 panfrost_drm_import_bo(struct panfrost_screen *screen, int fd)
 {
-       struct panfrost_bo *bo = rzalloc(screen, struct panfrost_bo);
+        struct panfrost_bo *bo = rzalloc(screen, struct panfrost_bo);
         struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
-        int ret;
+        ASSERTED int ret;
         unsigned gem_handle;
 
-       ret = drmPrimeFDToHandle(screen->fd, fd, &gem_handle);
-       assert(!ret);
+        ret = drmPrimeFDToHandle(screen->fd, fd, &gem_handle);
+        assert(!ret);
 
-       get_bo_offset.handle = gem_handle;
+        get_bo_offset.handle = gem_handle;
         ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
         assert(!ret);
 
-       bo->gem_handle = gem_handle;
+        bo->gem_handle = gem_handle;
         bo->gpu = (mali_ptr) get_bo_offset.offset;
         bo->size = lseek(fd, 0, SEEK_END);
         assert(bo->size > 0);
@@ -182,66 +248,67 @@ panfrost_drm_export_bo(struct panfrost_screen *screen, const struct panfrost_bo
 }
 
 static int
-panfrost_drm_submit_job(struct panfrost_context *ctx, u64 job_desc, int reqs, struct pipe_surface *surf)
+panfrost_drm_submit_job(struct panfrost_context *ctx, u64 job_desc, int reqs)
 {
         struct pipe_context *gallium = (struct pipe_context *) ctx;
         struct panfrost_screen *screen = pan_screen(gallium->screen);
+        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
         struct drm_panfrost_submit submit = {0,};
-        int bo_handles[7];
+        int *bo_handles, ret;
 
         submit.in_syncs = (u64) (uintptr_t) &ctx->out_sync;
         submit.in_sync_count = 1;
 
         submit.out_sync = ctx->out_sync;
 
-       submit.jc = job_desc;
-       submit.requirements = reqs;
-
-       if (surf) {
-               struct panfrost_resource *res = pan_resource(surf->texture);
-               assert(res->bo->gem_handle > 0);
-               bo_handles[submit.bo_handle_count++] = res->bo->gem_handle;
-       }
-
-       /* TODO: Add here the transient pools */
-        /* TODO: Add here the BOs listed in the panfrost_job */
-       bo_handles[submit.bo_handle_count++] = ctx->shaders.gem_handle;
-       bo_handles[submit.bo_handle_count++] = ctx->scratchpad.gem_handle;
-       bo_handles[submit.bo_handle_count++] = ctx->tiler_heap.gem_handle;
-       bo_handles[submit.bo_handle_count++] = ctx->varying_mem.gem_handle;
-       bo_handles[submit.bo_handle_count++] = ctx->tiler_polygon_list.gem_handle;
-       submit.bo_handles = (u64) (uintptr_t) bo_handles;
-
-       if (drmIoctl(screen->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit)) {
-               fprintf(stderr, "Error submitting: %m\n");
-               return errno;
-       }
+        submit.jc = job_desc;
+        submit.requirements = reqs;
+
+        bo_handles = calloc(job->bos->entries, sizeof(*bo_handles));
+        assert(bo_handles);
+
+        set_foreach(job->bos, entry) {
+                struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
+                assert(bo->gem_handle > 0);
+                bo_handles[submit.bo_handle_count++] = bo->gem_handle;
+        }
+
+        submit.bo_handles = (u64) (uintptr_t) bo_handles;
+        ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
+        free(bo_handles);
+        if (ret) {
+                fprintf(stderr, "Error submitting: %m\n");
+                return errno;
+        }
 
         /* Trace the job if we're doing that */
         if (pan_debug & PAN_DBG_TRACE) {
                 /* Wait so we can get errors reported back */
                 drmSyncobjWait(screen->fd, &ctx->out_sync, 1, INT64_MAX, 0, NULL);
-                pandecode_replay_jc(submit.jc, FALSE);
+                pandecode_jc(submit.jc, FALSE);
         }
 
-       return 0;
+        return 0;
 }
 
 int
-panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool is_scanout)
+panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws)
 {
-        struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
-       int ret;
+        int ret = 0;
 
         struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
 
+        panfrost_job_add_bo(job, ctx->scratchpad.bo);
+        panfrost_job_add_bo(job, ctx->tiler_heap.bo);
+        panfrost_job_add_bo(job, job->polygon_list);
+
         if (job->first_job.gpu) {
-               ret = panfrost_drm_submit_job(ctx, job->first_job.gpu, 0, NULL);
-               assert(!ret);
-       }
+                ret = panfrost_drm_submit_job(ctx, job->first_job.gpu, 0);
+                assert(!ret);
+        }
 
         if (job->first_tiler.gpu || job->clear) {
-                ret = panfrost_drm_submit_job(ctx, panfrost_fragment_job(ctx, has_draws), PANFROST_JD_REQ_FS, surf);
+                ret = panfrost_drm_submit_job(ctx, panfrost_fragment_job(ctx, has_draws), PANFROST_JD_REQ_FS);
                 assert(!ret);
         }
 
@@ -264,7 +331,7 @@ panfrost_fence_create(struct panfrost_context *ctx)
          */
         drmSyncobjExportSyncFile(screen->fd, ctx->out_sync, &f->fd);
         if (f->fd == -1) {
-                fprintf(stderr, "export failed\n");
+                fprintf(stderr, "export failed: %m\n");
                 free(f);
                 return NULL;
         }
@@ -276,18 +343,18 @@ panfrost_fence_create(struct panfrost_context *ctx)
 
 void
 panfrost_drm_force_flush_fragment(struct panfrost_context *ctx,
-                                 struct pipe_fence_handle **fence)
+                                  struct pipe_fence_handle **fence)
 {
         struct pipe_context *gallium = (struct pipe_context *) ctx;
         struct panfrost_screen *screen = pan_screen(gallium->screen);
 
-        if (!screen->last_fragment_flushed) {
-               drmSyncobjWait(screen->fd, &ctx->out_sync, 1, INT64_MAX, 0, NULL);
-                screen->last_fragment_flushed = true;
+        if (!ctx->last_fragment_flushed) {
+                drmSyncobjWait(screen->fd, &ctx->out_sync, 1, INT64_MAX, 0, NULL);
+                ctx->last_fragment_flushed = true;
 
                 /* The job finished up, so we're safe to clean it up now */
-                panfrost_free_job(ctx, screen->last_job);
-       }
+                panfrost_free_job(ctx, ctx->last_job);
+        }
 
         if (fence) {
                 struct panfrost_fence *f = panfrost_fence_create(ctx);
@@ -300,13 +367,13 @@ unsigned
 panfrost_drm_query_gpu_version(struct panfrost_screen *screen)
 {
         struct drm_panfrost_get_param get_param = {0,};
-        int ret;
+        ASSERTED int ret;
 
-       get_param.param = DRM_PANFROST_PARAM_GPU_PROD_ID;
+        get_param.param = DRM_PANFROST_PARAM_GPU_PROD_ID;
         ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param);
         assert(!ret);
 
-       return get_param.value;
+        return get_param.value;
 }
 
 int
@@ -321,8 +388,8 @@ panfrost_drm_init_context(struct panfrost_context *ctx)
 
 void
 panfrost_drm_fence_reference(struct pipe_screen *screen,
-                         struct pipe_fence_handle **ptr,
-                         struct pipe_fence_handle *fence)
+                             struct pipe_fence_handle **ptr,
+                             struct pipe_fence_handle *fence)
 {
         struct panfrost_fence **p = (struct panfrost_fence **)ptr;
         struct panfrost_fence *f = (struct panfrost_fence *)fence;
@@ -337,9 +404,9 @@ panfrost_drm_fence_reference(struct pipe_screen *screen,
 
 boolean
 panfrost_drm_fence_finish(struct pipe_screen *pscreen,
-                      struct pipe_context *ctx,
-                      struct pipe_fence_handle *fence,
-                      uint64_t timeout)
+                          struct pipe_context *ctx,
+                          struct pipe_fence_handle *fence,
+                          uint64_t timeout)
 {
         struct panfrost_screen *screen = pan_screen(pscreen);
         struct panfrost_fence *f = (struct panfrost_fence *)fence;
@@ -352,7 +419,7 @@ panfrost_drm_fence_finish(struct pipe_screen *pscreen,
                 return false;
         }
 
-        drmSyncobjImportSyncFile(screen->fd, syncobj, f->fd);
+        ret = drmSyncobjImportSyncFile(screen->fd, syncobj, f->fd);
         if (ret) {
                 fprintf(stderr, "Failed to import fence to syncobj: %m\n");
                 return false;