From 756f7b99895404bc7d7ce0cfcd84044cc21a799f Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Fri, 8 Mar 2019 10:27:07 +0100 Subject: [PATCH] panfrost: Add backend targeting the DRM driver This backend interacts with the new DRM driver for Midgard GPUs which is currently in development. When using this backend, Panfrost has roughly on-par functionality as when using the non-DRM driver from Arm. Alyssa Rosenzweig: To do so, we implement additional routines for runtime GPU version detection and fencing. We cleanup some duplicate code interfering with the new driver. We fix a long-standing memory leak which is aggravated on the new driver. Finally, we implement BO import/export in a way compatible with the new driver. These changes are squashed to preserve bisectability given the hard-to-track ABI shifts in the nondrm module Signed-off-by: Tomeu Vizoso Reviewed-by: Alyssa Rosenzweig --- include/drm-uapi/panfrost_drm.h | 141 +++++++ src/gallium/drivers/panfrost/pan_allocate.c | 4 +- src/gallium/drivers/panfrost/pan_context.c | 85 +++-- src/gallium/drivers/panfrost/pan_context.h | 17 + src/gallium/drivers/panfrost/pan_drm.c | 363 +++++++++++++++++++ src/gallium/drivers/panfrost/pan_resource.c | 39 +- src/gallium/drivers/panfrost/pan_screen.c | 11 +- src/gallium/drivers/panfrost/pan_screen.h | 19 +- src/gallium/drivers/panfrost/pan_wallpaper.c | 2 +- 9 files changed, 607 insertions(+), 74 deletions(-) create mode 100644 include/drm-uapi/panfrost_drm.h diff --git a/include/drm-uapi/panfrost_drm.h b/include/drm-uapi/panfrost_drm.h new file mode 100644 index 00000000000..7618f14f9e2 --- /dev/null +++ b/include/drm-uapi/panfrost_drm.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2018 Broadcom + * Copyright © 2019 Collabora ltd. + */ +#ifndef _PANFROST_DRM_H_ +#define _PANFROST_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_PANFROST_SUBMIT 0x00 +#define DRM_PANFROST_WAIT_BO 0x01 +#define DRM_PANFROST_CREATE_BO 0x02 +#define DRM_PANFROST_MMAP_BO 0x03 +#define DRM_PANFROST_GET_PARAM 0x04 +#define DRM_PANFROST_GET_BO_OFFSET 0x05 + +#define DRM_IOCTL_PANFROST_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) +#define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) +#define DRM_IOCTL_PANFROST_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_BO, struct drm_panfrost_create_bo) +#define DRM_IOCTL_PANFROST_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MMAP_BO, struct drm_panfrost_mmap_bo) +#define DRM_IOCTL_PANFROST_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_PARAM, struct drm_panfrost_get_param) +#define DRM_IOCTL_PANFROST_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_BO_OFFSET, struct drm_panfrost_get_bo_offset) + +#define PANFROST_JD_REQ_FS (1 << 0) + +/** + * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D + * engine. + * + * This asks the kernel to have the GPU execute a render command list. + */ +struct drm_panfrost_submit { + + /** Address to GPU mapping of job descriptor */ + __u64 jc; + + /** An optional array of sync objects to wait on before starting this job. */ + __u64 in_syncs; + + /** Number of sync objects to wait on before starting this job. */ + __u32 in_sync_count; + + /** An optional sync object to place the completion fence in. */ + __u32 out_sync; + + /** Pointer to a u32 array of the BOs that are referenced by the job. */ + __u64 bo_handles; + + /** Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + /** A combination of PANFROST_JD_REQ_* */ + __u32 requirements; +}; + +/** + * struct drm_panfrost_wait_bo - ioctl argument for waiting for + * completion of the last DRM_PANFROST_SUBMIT_CL on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_panfrost_wait_bo { + __u32 handle; + __u32 pad; + __s64 timeout_ns; /* absolute */ +}; + +/** + * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_panfrost_create_bo { + __u32 size; + __u32 flags; + /** Returned GEM handle for the BO. */ + __u32 handle; + /** + * Returned offset for the BO in the GPU address space. This offset + * is private to the DRM fd and is valid for the lifetime of the GEM + * handle. + * + * This offset value will always be nonzero, since various HW + * units treat 0 specially. + */ + __u64 offset; +}; + +/** + * struct drm_panfrost_mmap_bo - ioctl argument for mapping Panfrost BOs. + * + * This doesn't actually perform an mmap. Instead, it returns the + * offset you need to use in an mmap on the DRM device node. This + * means that tools like valgrind end up knowing about the mapped + * memory. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_panfrost_mmap_bo { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 flags; + /** offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +enum drm_panfrost_param { + DRM_PANFROST_PARAM_GPU_ID, +}; + +struct drm_panfrost_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + +/** + * Returns the offset for the BO in the GPU address space for this DRM fd. + * This is the same value returned by drm_panfrost_create_bo, if that was called + * from this DRM fd. + */ +struct drm_panfrost_get_bo_offset { + __u32 handle; + __u32 pad; + __u64 offset; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _PANFROST_DRM_H_ */ diff --git a/src/gallium/drivers/panfrost/pan_allocate.c b/src/gallium/drivers/panfrost/pan_allocate.c index 3732d253978..91ace74d0e4 100644 --- a/src/gallium/drivers/panfrost/pan_allocate.c +++ b/src/gallium/drivers/panfrost/pan_allocate.c @@ -41,7 +41,7 @@ panfrost_allocate_chunk(struct panfrost_context *ctx, size_t size, unsigned heap size = ALIGN(size, ALIGNMENT); struct pipe_context *gallium = (struct pipe_context *) ctx; - struct panfrost_screen *screen = panfrost_screen(gallium->screen); + struct panfrost_screen *screen = pan_screen(gallium->screen); struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, size, heap_id); struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry; @@ -81,7 +81,7 @@ panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz) if (pool->entry_index >= pool->entry_count) { /* Don't overflow the pool -- allocate a new one */ struct pipe_context *gallium = (struct pipe_context *) ctx; - struct panfrost_screen *screen = panfrost_screen(gallium->screen); + struct panfrost_screen *screen = pan_screen(gallium->screen); struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, pool->entry_size, HEAP_TRANSIENT); pool->entry_count++; diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 4c41969fd05..3c8a483b8f5 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -57,26 +57,13 @@ extern const char *pan_counters_base; /* TODO: Sample size, etc */ -/* True for t6XX, false for t8xx. TODO: Run-time settable for automatic - * hardware configuration. */ - -static bool is_t6xx = false; - -/* If set, we'll require the use of single render-target framebuffer - * descriptors (SFBD), for older hardware -- specifically, fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); - if (require_sfbd) { + if (ctx->require_sfbd) { SET_BIT(ctx->fragment_sfbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled); } else { SET_BIT(ctx->fragment_rts[0].format.flags, MALI_MFBD_FORMAT_MSAA, enabled); @@ -97,7 +84,7 @@ panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) static void panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds) { - if (require_sfbd) { + if (ctx->require_sfbd) { printf("AFBC not supported yet on SFBD\n"); assert(0); } @@ -120,6 +107,7 @@ panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsr true, 0, 0, 0); rsrc->bo->has_afbc = true; + rsrc->bo->gem_handle = rsrc->bo->afbc_slab.gem_handle; /* Compressed textured reads use a tagged pointer to the metadata */ @@ -156,7 +144,7 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx) if (!rsrc->bo->has_afbc) continue; - if (require_sfbd) { + if (ctx->require_sfbd) { fprintf(stderr, "Color AFBC not supported on SFBD\n"); assert(0); } @@ -180,7 +168,7 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx) struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture; if (rsrc->bo->has_afbc) { - if (require_sfbd) { + if (ctx->require_sfbd) { fprintf(stderr, "Depth AFBC not supported on SFBD\n"); assert(0); } @@ -204,7 +192,7 @@ panfrost_set_fragment_afbc(struct panfrost_context *ctx) /* For the special case of a depth-only FBO, we need to attach a dummy render target */ if (ctx->pipe_framebuffer.nr_cbufs == 0) { - if (require_sfbd) { + if (ctx->require_sfbd) { fprintf(stderr, "Depth-only FBO not supported on SFBD\n"); assert(0); } @@ -364,7 +352,7 @@ panfrost_new_frag_framebuffer(struct panfrost_context *ctx) stride = -stride; } - if (require_sfbd) { + if (ctx->require_sfbd) { struct mali_single_framebuffer fb = panfrost_emit_sfbd(ctx); fb.framebuffer = framebuffer; @@ -562,7 +550,7 @@ panfrost_attach_vt_sfbd(struct panfrost_context *ctx) static void panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) { - mali_ptr framebuffer = require_sfbd ? + mali_ptr framebuffer = ctx->require_sfbd ? panfrost_attach_vt_sfbd(ctx) : panfrost_attach_vt_mfbd(ctx); @@ -616,7 +604,7 @@ panfrost_invalidate_frame(struct panfrost_context *ctx) if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0]))) ctx->cmdstream_i = 0; - if (require_sfbd) + if (ctx->require_sfbd) ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); else ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); @@ -655,7 +643,7 @@ panfrost_emit_vertex_payload(struct panfrost_context *ctx) .workgroups_x_shift_2 = 0x2, .workgroups_x_shift_3 = 0x5, }, - .gl_enables = 0x4 | (is_t6xx ? 0 : 0x2), + .gl_enables = 0x4 | (ctx->is_t6xx ? 0 : 0x2), }; memcpy(&ctx->payload_vertex, &payload, sizeof(payload)); @@ -847,7 +835,7 @@ panfrost_default_shader_backend(struct panfrost_context *ctx) .unknown2_4 = MALI_NO_MSAA | 0x4e0, }; - if (is_t6xx) { + if (ctx->is_t6xx) { shader.unknown2_4 |= 0x10; } @@ -957,7 +945,7 @@ panfrost_fragment_job(struct panfrost_context *ctx) /* Actualize the clear late; TODO: Fix order dependency between clear * and afbc */ - if (require_sfbd) { + if (ctx->require_sfbd) { panfrost_clear_sfbd(job); } else { panfrost_clear_mfbd(job); @@ -969,7 +957,7 @@ panfrost_fragment_job(struct panfrost_context *ctx) struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; if (rsrc->bo->has_checksum) { - if (require_sfbd) { + if (ctx->require_sfbd) { fprintf(stderr, "Checksumming not supported on SFBD\n"); assert(0); } @@ -986,11 +974,11 @@ panfrost_fragment_job(struct panfrost_context *ctx) /* The frame is complete and therefore the framebuffer descriptor is * ready for linkage and upload */ - size_t sz = require_sfbd ? sizeof(struct mali_single_framebuffer) : (sizeof(struct bifrost_framebuffer) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1); + size_t sz = ctx->require_sfbd ? sizeof(struct mali_single_framebuffer) : (sizeof(struct bifrost_framebuffer) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1); struct panfrost_transfer fbd_t = panfrost_allocate_transient(ctx, sz); off_t offset = 0; - if (require_sfbd) { + if (ctx->require_sfbd) { /* Upload just the SFBD all at once */ memcpy(fbd_t.cpu, &ctx->fragment_sfbd, sizeof(ctx->fragment_sfbd)); offset += sizeof(ctx->fragment_sfbd); @@ -1022,10 +1010,10 @@ panfrost_fragment_job(struct panfrost_context *ctx) struct mali_payload_fragment payload = { .min_tile_coord = MALI_COORDINATE_TO_TILE_MIN(0, 0), .max_tile_coord = MALI_COORDINATE_TO_TILE_MAX(ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height), - .framebuffer = fbd_t.gpu | (require_sfbd ? MALI_SFBD : MALI_MFBD), + .framebuffer = fbd_t.gpu | (ctx->require_sfbd ? MALI_SFBD : MALI_MFBD), }; - if (!require_sfbd && ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { + if (!ctx->require_sfbd && ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { /* Signal that there is an extra portion of the framebuffer * descriptor */ @@ -1247,7 +1235,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) (ctx->blend->equation.alpha_mode == 0x122) && (ctx->blend->equation.color_mask == 0xf); - if (require_sfbd) { + if (ctx->require_sfbd) { /* When only a single render target platform is used, the blend * information is inside the shader meta itself. We * additionally need to signal CAN_DISCARD for nontrivial blend @@ -1270,7 +1258,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4; - if (!require_sfbd) { + if (!ctx->require_sfbd) { /* Additional blend descriptor tacked on for jobs using MFBD */ unsigned blend_count = 0; @@ -1360,7 +1348,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) int s = ctx->sampler_views[t][i]->hw.nr_mipmap_levels; if (!rsrc->bo->is_mipmap) { - if (is_t6xx) { + if (ctx->is_t6xx) { /* HW ERRATA, not needed after t6XX */ ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0]; @@ -1375,7 +1363,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) /* Restore */ ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s; - if (is_t6xx) { + if (ctx->is_t6xx) { ctx->sampler_views[t][i]->hw.unknown3A = 0; } } @@ -1534,7 +1522,8 @@ panfrost_link_jobs(struct panfrost_context *ctx) /* The entire frame is in memory -- send it off to the kernel! */ static void -panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate) +panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate, + struct pipe_fence_handle **fence) { struct pipe_context *gallium = (struct pipe_context *) ctx; struct panfrost_screen *screen = pan_screen(gallium->screen); @@ -1561,14 +1550,14 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate) /* If visual, we can stall a frame */ if (!flush_immediate) - screen->driver->force_flush_fragment(ctx); + screen->driver->force_flush_fragment(ctx, fence); screen->last_fragment_id = fragment_id; screen->last_fragment_flushed = false; /* If readback, flush now (hurts the pipelined performance) */ if (flush_immediate) - screen->driver->force_flush_fragment(ctx); + screen->driver->force_flush_fragment(ctx, fence); if (screen->driver->dump_counters && pan_counters_base) { screen->driver->dump_counters(screen); @@ -1599,7 +1588,7 @@ panfrost_flush( bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME; /* Submit the frame itself */ - panfrost_submit_frame(ctx, flush_immediate); + panfrost_submit_frame(ctx, flush_immediate, fence); /* Prepare for the next frame */ panfrost_invalidate_frame(ctx); @@ -1827,12 +1816,13 @@ panfrost_create_rasterizer_state( struct pipe_context *pctx, const struct pipe_rasterizer_state *cso) { + struct panfrost_context *ctx = pan_context(pctx); struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer); so->base = *cso; /* Bitmask, unknown meaning of the start value */ - so->tiler_gl_enables = is_t6xx ? 0x105 : 0x7; + so->tiler_gl_enables = ctx->is_t6xx ? 0x105 : 0x7; so->tiler_gl_enables |= MALI_FRONT_FACE( cso->front_ccw ? MALI_CCW : MALI_CW); @@ -2341,7 +2331,7 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, if (!cb) continue; - if (require_sfbd) + if (ctx->require_sfbd) ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); else ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); @@ -2376,7 +2366,7 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, if (zb) { /* FBO has depth */ - if (require_sfbd) + if (ctx->require_sfbd) ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); else ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); @@ -2596,9 +2586,16 @@ static void panfrost_destroy(struct pipe_context *pipe) { struct panfrost_context *panfrost = pan_context(pipe); + struct panfrost_screen *screen = pan_screen(pipe->screen); if (panfrost->blitter) util_blitter_destroy(panfrost->blitter); + + screen->driver->free_slab(screen, &panfrost->scratchpad); + screen->driver->free_slab(screen, &panfrost->varying_mem); + screen->driver->free_slab(screen, &panfrost->shaders); + screen->driver->free_slab(screen, &panfrost->tiler_heap); + screen->driver->free_slab(screen, &panfrost->misc_0); } static struct pipe_query * @@ -2724,8 +2721,14 @@ struct pipe_context * panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) { struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context); + struct panfrost_screen *pscreen = pan_screen(screen); memset(ctx, 0, sizeof(*ctx)); struct pipe_context *gallium = (struct pipe_context *) ctx; + unsigned gpu_id; + + gpu_id = pscreen->driver->query_gpu_version(pscreen); + ctx->is_t6xx = gpu_id <= 0x0750; /* For now, this flag means t76x or less */ + ctx->require_sfbd = gpu_id < 0x0750; /* t76x is the first to support MFD */ gallium->screen = screen; @@ -2792,6 +2795,8 @@ panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) panfrost_resource_context_init(gallium); + pscreen->driver->init_context(ctx); + panfrost_setup_hardware(ctx); /* XXX: leaks */ diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index b1a0a09146d..091d9988698 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -74,6 +74,11 @@ struct panfrost_query { struct panfrost_transfer transfer; }; +struct panfrost_fence { + struct pipe_reference reference; + int fd; +}; + #define PANFROST_MAX_TRANSIENT_ENTRIES 64 struct panfrost_transient_pool { @@ -208,6 +213,18 @@ struct panfrost_context { struct pipe_blend_color blend_color; struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_stencil_ref stencil_ref; + + /* True for t6XX, false for t8xx. */ + bool is_t6xx; + + /* If set, we'll require the use of single render-target framebuffer + * descriptors (SFBD), for older hardware -- specifically, +#include + +#include "drm-uapi/panfrost_drm.h" + #include "util/u_memory.h" +#include "util/os_time.h" #include "pan_screen.h" +#include "pan_resource.h" +#include "pan_context.h" #include "pan_drm.h" +#include "pan_trace.h" struct panfrost_drm { struct panfrost_driver base; int fd; }; +static void +panfrost_drm_allocate_slab(struct panfrost_screen *screen, + struct panfrost_memory *mem, + size_t pages, + bool same_va, + int extra_flags, + int commit_count, + int extent) +{ + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + struct drm_panfrost_create_bo create_bo = { + .size = pages * 4096, + .flags = 0, // TODO figure out proper flags.. + }; + struct drm_panfrost_mmap_bo mmap_bo = {0,}; + int ret; + + // TODO cache allocations + // TODO properly handle errors + // TODO take into account extra_flags + + ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo); + if (ret) { + fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %d\n", ret); + assert(0); + } + + mem->gpu = create_bo.offset; + mem->gem_handle = create_bo.handle; + mem->stack_bottom = 0; + mem->size = create_bo.size; + + // TODO map and unmap on demand? + mmap_bo.handle = create_bo.handle; + ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo); + if (ret) { + fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %d\n", ret); + assert(0); + } + + mem->cpu = mmap(NULL, mem->size, PROT_READ | PROT_WRITE, MAP_SHARED, + drm->fd, mmap_bo.offset); + if (mem->cpu == MAP_FAILED) { + fprintf(stderr, "mmap failed: %p\n", mem->cpu); + assert(0); + } + + /* Record the mmap if we're tracing */ + if (!(extra_flags & PAN_ALLOCATE_GROWABLE)) + pantrace_mmap(mem->gpu, mem->cpu, mem->size, NULL); +} + +static void +panfrost_drm_free_slab(struct panfrost_screen *screen, struct panfrost_memory *mem) +{ + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + struct drm_gem_close gem_close = { + .handle = mem->gem_handle, + }; + int ret; + + if (munmap((void *) (uintptr_t) mem->cpu, mem->size)) { + perror("munmap"); + abort(); + } + + mem->cpu = NULL; + + ret = drmIoctl(drm->fd, DRM_IOCTL_GEM_CLOSE, &gem_close); + if (ret) { + fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %d\n", ret); + assert(0); + } + + mem->gem_handle = -1; +} + +static struct panfrost_bo * +panfrost_drm_import_bo(struct panfrost_screen *screen, struct winsys_handle *whandle) +{ + struct panfrost_bo *bo = CALLOC_STRUCT(panfrost_bo); + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + struct drm_panfrost_get_bo_offset get_bo_offset = {0,}; + struct drm_panfrost_mmap_bo mmap_bo = {0,}; + int ret, size; + unsigned gem_handle; + + ret = drmPrimeFDToHandle(drm->fd, whandle->handle, &gem_handle); + assert(!ret); + + get_bo_offset.handle = gem_handle; + ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset); + assert(!ret); + + bo->gem_handle = gem_handle; + bo->gpu[0] = (mali_ptr) get_bo_offset.offset; + + // TODO map and unmap on demand? + mmap_bo.handle = gem_handle; + ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo); + if (ret) { + fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %d\n", ret); + assert(0); + } + + size = lseek(whandle->handle, 0, SEEK_END); + assert(size > 0); + bo->cpu[0] = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + drm->fd, mmap_bo.offset); + if (bo->cpu[0] == MAP_FAILED) { + fprintf(stderr, "mmap failed: %p\n", bo->cpu[0]); + assert(0); + } + + /* Record the mmap if we're tracing */ + pantrace_mmap(bo->gpu[0], bo->cpu[0], size, NULL); + + return bo; +} + +static int +panfrost_drm_export_bo(struct panfrost_screen *screen, int gem_handle, struct winsys_handle *whandle) +{ + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + struct drm_prime_handle args = { + .handle = gem_handle, + .flags = DRM_CLOEXEC, + }; + + int ret = drmIoctl(drm->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret == -1) + return FALSE; + + whandle->handle = args.fd; + + return TRUE; +} + +static void +panfrost_drm_free_imported_bo(struct panfrost_screen *screen, struct panfrost_bo *bo) +{ + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + struct drm_gem_close gem_close = { + .handle = bo->gem_handle, + }; + int ret; + + ret = drmIoctl(drm->fd, DRM_IOCTL_GEM_CLOSE, &gem_close); + if (ret) { + fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %d\n", ret); + assert(0); + } + + bo->gem_handle = -1; + bo->gpu[0] = (mali_ptr)NULL; +} + +static int +panfrost_drm_submit_job(struct panfrost_context *ctx, u64 job_desc, int reqs, struct pipe_surface *surf) +{ + struct pipe_context *gallium = (struct pipe_context *) ctx; + struct panfrost_screen *screen = pan_screen(gallium->screen); + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + struct drm_panfrost_submit submit = {0,}; + + submit.in_syncs = &ctx->out_sync; + submit.in_sync_count = 1; + + submit.out_sync = ctx->out_sync; + + submit.jc = job_desc; + submit.requirements = reqs; + + if (surf) { + struct panfrost_resource *res = pan_resource(surf->texture); + submit.bo_handles = (u64) &res->bo->gem_handle; + submit.bo_handle_count = 1; + } + + /* Dump memory _before_ submitting so we're not corrupted with actual GPU results */ + pantrace_dump_memory(); + + if (drmIoctl(drm->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit)) { + fprintf(stderr, "Error submitting: %m\n"); + return errno; + } + + /* Trace the job if we're doing that and do a memory dump. We may + * want to adjust this logic once we're ready to trace FBOs */ + pantrace_submit_job(submit.jc, submit.requirements, FALSE); + + return 0; +} + +static int +panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool is_scanout) +{ + struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0]; + int ret; + + if (has_draws) { + ret = panfrost_drm_submit_job(ctx, ctx->set_value_job, 0, NULL); + assert(!ret); + } + + ret = panfrost_drm_submit_job(ctx, panfrost_fragment_job(ctx), PANFROST_JD_REQ_FS, surf); + assert(!ret); + + return 0; +} + +static struct panfrost_fence * +panfrost_fence_create(struct panfrost_context *ctx) +{ + struct pipe_context *gallium = (struct pipe_context *) ctx; + struct panfrost_screen *screen = pan_screen(gallium->screen); + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + struct panfrost_fence *f = calloc(1, sizeof(*f)); + if (!f) + return NULL; + + /* Snapshot the last Panfrost's rendering's out fence. We'd rather have + * another syncobj instead of a sync file, but this is all we get. + * (HandleToFD/FDToHandle just gives you another syncobj ID for the + * same syncobj). + */ + drmSyncobjExportSyncFile(drm->fd, ctx->out_sync, &f->fd); + if (f->fd == -1) { + fprintf(stderr, "export failed\n"); + free(f); + return NULL; + } + + pipe_reference_init(&f->reference, 1); + + return f; +} + +static void +panfrost_drm_force_flush_fragment(struct panfrost_context *ctx, + struct pipe_fence_handle **fence) +{ + struct pipe_context *gallium = (struct pipe_context *) ctx; + + if (fence) { + struct panfrost_fence *f = panfrost_fence_create(ctx); + gallium->screen->fence_reference(gallium->screen, fence, NULL); + *fence = (struct pipe_fence_handle *)f; + } +} + +static void +panfrost_drm_enable_counters(struct panfrost_screen *screen) +{ + fprintf(stderr, "unimplemented: %s\n", __func__); +} + +static void +panfrost_drm_dump_counters(struct panfrost_screen *screen) +{ + fprintf(stderr, "unimplemented: %s\n", __func__); +} + +static unsigned +panfrost_drm_query_gpu_version(struct panfrost_screen *screen) +{ + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + struct drm_panfrost_get_param get_param = {0,}; + int ret; + + get_param.param = DRM_PANFROST_PARAM_GPU_ID; + ret = drmIoctl(drm->fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param); + assert(!ret); + + return get_param.value; +} + +static int +panfrost_drm_init_context(struct panfrost_context *ctx) +{ + struct pipe_context *gallium = (struct pipe_context *) ctx; + struct panfrost_screen *screen = pan_screen(gallium->screen); + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + + return drmSyncobjCreate(drm->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &ctx->out_sync); +} + +static void +panfrost_drm_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct panfrost_fence **p = (struct panfrost_fence **)ptr; + struct panfrost_fence *f = (struct panfrost_fence *)fence; + struct panfrost_fence *old = *p; + + if (pipe_reference(&(*p)->reference, &f->reference)) { + close(old->fd); + free(old); + } + *p = f; +} + +static boolean +panfrost_drm_fence_finish(struct pipe_screen *pscreen, + struct pipe_context *ctx, + struct pipe_fence_handle *fence, + uint64_t timeout) +{ + struct panfrost_screen *screen = pan_screen(pscreen); + struct panfrost_drm *drm = (struct panfrost_drm *)screen->driver; + struct panfrost_fence *f = (struct panfrost_fence *)fence; + int ret; + + unsigned syncobj; + ret = drmSyncobjCreate(drm->fd, 0, &syncobj); + if (ret) { + fprintf(stderr, "Failed to create syncobj to wait on: %m\n"); + return false; + } + + drmSyncobjImportSyncFile(drm->fd, syncobj, f->fd); + if (ret) { + fprintf(stderr, "Failed to import fence to syncobj: %m\n"); + return false; + } + + uint64_t abs_timeout = os_time_get_absolute_timeout(timeout); + if (abs_timeout == OS_TIMEOUT_INFINITE) + abs_timeout = INT64_MAX; + + ret = drmSyncobjWait(drm->fd, &syncobj, 1, abs_timeout, 0, NULL); + + drmSyncobjDestroy(drm->fd, syncobj); + + return ret >= 0; +} + struct panfrost_driver * panfrost_create_drm_driver(int fd) { @@ -38,5 +387,19 @@ panfrost_create_drm_driver(int fd) driver->fd = fd; + driver->base.import_bo = panfrost_drm_import_bo; + driver->base.export_bo = panfrost_drm_export_bo; + driver->base.free_imported_bo = panfrost_drm_free_imported_bo; + driver->base.submit_vs_fs_job = panfrost_drm_submit_vs_fs_job; + driver->base.force_flush_fragment = panfrost_drm_force_flush_fragment; + driver->base.allocate_slab = panfrost_drm_allocate_slab; + driver->base.free_slab = panfrost_drm_free_slab; + driver->base.enable_counters = panfrost_drm_enable_counters; + driver->base.query_gpu_version = panfrost_drm_query_gpu_version; + driver->base.init_context = panfrost_drm_init_context; + driver->base.fence_reference = panfrost_drm_fence_reference; + driver->base.fence_finish = panfrost_drm_fence_finish; + driver->base.dump_counters = panfrost_drm_dump_counters; + return &driver->base; } diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index f26f33db96b..abef927ada5 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -68,6 +68,12 @@ panfrost_resource_from_handle(struct pipe_screen *pscreen, rsc->bo = screen->driver->import_bo(screen, whandle); + if (screen->ro) { + rsc->scanout = + renderonly_create_gpu_import_for_resource(prsc, screen->ro, NULL); + /* failure is expected in some cases.. */ + } + return prsc; } @@ -87,17 +93,15 @@ panfrost_resource_get_handle(struct pipe_screen *pscreen, handle->stride = stride; handle->modifier = DRM_FORMAT_MOD_INVALID; - if (handle->type == WINSYS_HANDLE_TYPE_SHARED) { - printf("Missed shared handle\n"); - return FALSE; - } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) { - if (renderonly_get_handle(scanout, handle)) { - return TRUE; - } else { - printf("Missed nonrenderonly KMS handle for resource %p with scanout %p\n", pt, scanout); - return FALSE; - } - } else if (handle->type == WINSYS_HANDLE_TYPE_FD) { + if (handle->type == WINSYS_HANDLE_TYPE_SHARED) { + return FALSE; + } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) { + if (renderonly_get_handle(scanout, handle)) + return TRUE; + + handle->handle = rsrc->bo->gem_handle; + return TRUE; + } else if (handle->type == WINSYS_HANDLE_TYPE_FD) { if (scanout) { struct drm_prime_handle args = { .handle = scanout->handle, @@ -111,14 +115,11 @@ panfrost_resource_get_handle(struct pipe_screen *pscreen, handle->handle = args.fd; return TRUE; - } else { - printf("Missed nonscanout FD handle\n"); - assert(0); - return FALSE; - } - } + } else + return screen->driver->export_bo(screen, rsrc->bo->gem_handle, handle); + } - return FALSE; + return FALSE; } static void @@ -322,7 +323,7 @@ static void panfrost_resource_destroy(struct pipe_screen *screen, struct pipe_resource *pt) { - struct panfrost_screen *pscreen = panfrost_screen(screen); + struct panfrost_screen *pscreen = pan_screen(screen); struct panfrost_resource *rsrc = (struct panfrost_resource *) pt; if (rsrc->scanout) diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index b8a119fd343..b7f2e4bb2e9 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -500,21 +500,22 @@ panfrost_get_timestamp(struct pipe_screen *_screen) } static void -panfrost_fence_reference(struct pipe_screen *screen, +panfrost_fence_reference(struct pipe_screen *pscreen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - *ptr = fence; + struct panfrost_screen *screen = pan_screen(pscreen); + screen->driver->fence_reference(pscreen, ptr, fence); } static boolean -panfrost_fence_finish(struct pipe_screen *screen, +panfrost_fence_finish(struct pipe_screen *pscreen, struct pipe_context *ctx, struct pipe_fence_handle *fence, uint64_t timeout) { - assert(fence); - return TRUE; + struct panfrost_screen *screen = pan_screen(pscreen); + return screen->driver->fence_finish(pscreen, ctx, fence, timeout); } static const void * diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h index 0005b2feb15..882611e93e3 100644 --- a/src/gallium/drivers/panfrost/pan_screen.h +++ b/src/gallium/drivers/panfrost/pan_screen.h @@ -49,9 +49,11 @@ struct panfrost_screen; struct panfrost_driver { struct panfrost_bo * (*import_bo) (struct panfrost_screen *screen, struct winsys_handle *whandle); + int (*export_bo) (struct panfrost_screen *screen, int gem_handle, struct winsys_handle *whandle); int (*submit_vs_fs_job) (struct panfrost_context *ctx, bool has_draws, bool is_scanout); - void (*force_flush_fragment) (struct panfrost_context *ctx); + void (*force_flush_fragment) (struct panfrost_context *ctx, + struct pipe_fence_handle **fence); void (*allocate_slab) (struct panfrost_screen *screen, struct panfrost_memory *mem, size_t pages, @@ -65,6 +67,15 @@ struct panfrost_driver { struct panfrost_bo *bo); void (*enable_counters) (struct panfrost_screen *screen); void (*dump_counters) (struct panfrost_screen *screen); + unsigned (*query_gpu_version) (struct panfrost_screen *screen); + int (*init_context) (struct panfrost_context *ctx); + void (*fence_reference) (struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence); + boolean (*fence_finish) (struct pipe_screen *screen, + struct pipe_context *ctx, + struct pipe_fence_handle *fence, + uint64_t timeout); }; struct panfrost_screen { @@ -85,10 +96,4 @@ struct panfrost_screen { int last_fragment_flushed; }; -static inline struct panfrost_screen * -panfrost_screen( struct pipe_screen *pipe ) -{ - return (struct panfrost_screen *)pipe; -} - #endif /* PAN_SCREEN_H */ diff --git a/src/gallium/drivers/panfrost/pan_wallpaper.c b/src/gallium/drivers/panfrost/pan_wallpaper.c index 735524a49c9..48418012fe8 100644 --- a/src/gallium/drivers/panfrost/pan_wallpaper.c +++ b/src/gallium/drivers/panfrost/pan_wallpaper.c @@ -181,7 +181,7 @@ panfrost_draw_wallpaper(struct pipe_context *pipe) .normalized_coords = 1 }; - struct pipe_resource *rsrc = panfrost_screen(pipe->screen)->display_target; + struct pipe_resource *rsrc = pan_screen(pipe->screen)->display_target; struct pipe_sampler_state *sampler_state = pipe->create_sampler_state(pipe, &state); struct pipe_sampler_view *sampler_view = pipe->create_sampler_view(pipe, rsrc, &tmpl); -- 2.30.2