From: Jerome Glisse Date: Wed, 27 Mar 2013 15:04:29 +0000 (-0400) Subject: winsys/radeon: add command stream replay dump for faulty lockup v3 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b8998f976ee11e5bdffa78cd78278deeed2789c1;p=mesa.git winsys/radeon: add command stream replay dump for faulty lockup v3 Build time option, set RADEON_CS_DUMP_ON_LOCKUP to 1 in radeon_drm_cs.h to enable it. When enabled after each cs submission the code will try to detect lockup by waiting on one of the buffer of the cs to become idle, after a timeout it will consider that the cs triggered a lockup and will write a radeon_lockup.c file in current directory that have all information for replaying the cs. To build this file : gcc -O0 -g radeon_lockup.c -ldrm -o radeon_lockup -I/usr/include/libdrm v2: Add radeon_ctx.h file to mesa git tree v3: Slightly improve dumped file for easier editing, only dump first faulty cs Signed-off-by: Jerome Glisse --- diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources b/src/gallium/winsys/radeon/drm/Makefile.sources index 1d18d6164d5..4ca5ebb1b34 100644 --- a/src/gallium/winsys/radeon/drm/Makefile.sources +++ b/src/gallium/winsys/radeon/drm/Makefile.sources @@ -1,4 +1,5 @@ C_SOURCES := \ radeon_drm_bo.c \ radeon_drm_cs.c \ + radeon_drm_cs_dump.c \ radeon_drm_winsys.c diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 61570d0ee76..9e45dcc031c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -396,14 +396,54 @@ static void radeon_bo_destroy(struct pb_buffer *_buf) FREE(bo); } +void *radeon_bo_do_map(struct radeon_bo *bo) +{ + struct drm_radeon_gem_mmap args = {0}; + void *ptr; + + /* Return the pointer if it's already mapped. */ + if (bo->ptr) + return bo->ptr; + + /* Map the buffer. */ + pipe_mutex_lock(bo->map_mutex); + /* Return the pointer if it's already mapped (in case of a race). */ + if (bo->ptr) { + pipe_mutex_unlock(bo->map_mutex); + return bo->ptr; + } + args.handle = bo->handle; + args.offset = 0; + args.size = (uint64_t)bo->base.size; + if (drmCommandWriteRead(bo->rws->fd, + DRM_RADEON_GEM_MMAP, + &args, + sizeof(args))) { + pipe_mutex_unlock(bo->map_mutex); + fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n", + bo, bo->handle); + return NULL; + } + + ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, + bo->rws->fd, args.addr_ptr); + if (ptr == MAP_FAILED) { + pipe_mutex_unlock(bo->map_mutex); + fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno); + return NULL; + } + bo->ptr = ptr; + pipe_mutex_unlock(bo->map_mutex); + + return bo->ptr; +} + static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf, struct radeon_winsys_cs *rcs, enum pipe_transfer_usage usage) { struct radeon_bo *bo = (struct radeon_bo*)buf; struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs; - struct drm_radeon_gem_mmap args = {0}; - void *ptr; /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { @@ -466,41 +506,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf, } } - /* Return the pointer if it's already mapped. */ - if (bo->ptr) - return bo->ptr; - - /* Map the buffer. */ - pipe_mutex_lock(bo->map_mutex); - /* Return the pointer if it's already mapped (in case of a race). */ - if (bo->ptr) { - pipe_mutex_unlock(bo->map_mutex); - return bo->ptr; - } - args.handle = bo->handle; - args.offset = 0; - args.size = (uint64_t)bo->base.size; - if (drmCommandWriteRead(bo->rws->fd, - DRM_RADEON_GEM_MMAP, - &args, - sizeof(args))) { - pipe_mutex_unlock(bo->map_mutex); - fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n", - bo, bo->handle); - return NULL; - } - - ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, - bo->rws->fd, args.addr_ptr); - if (ptr == MAP_FAILED) { - pipe_mutex_unlock(bo->map_mutex); - fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno); - return NULL; - } - bo->ptr = ptr; - pipe_mutex_unlock(bo->map_mutex); - - return bo->ptr; + return radeon_bo_do_map(bo); } static void radeon_bo_unmap(struct radeon_winsys_cs_handle *_buf) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index 710a04cdc3d..ee8919b7ff4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -79,4 +79,6 @@ void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src) pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); } +void *radeon_bo_do_map(struct radeon_bo *bo); + #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 6a7115ba76b..aa7e295e21c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -428,6 +428,10 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc) } } +#if RADEON_CS_DUMP_ON_LOCKUP + radeon_dump_cs_on_lockup(csc); +#endif + for (i = 0; i < csc->crelocs; i++) p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 570842dc51c..66aee55dae0 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -30,6 +30,8 @@ #include "radeon_drm_bo.h" #include +#define RADEON_CS_DUMP_ON_LOCKUP 0 + struct radeon_cs_context { uint32_t buf[RADEON_MAX_CMDBUF_DWORDS]; @@ -121,4 +123,8 @@ void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs); void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc); +#if RADEON_CS_DUMP_ON_LOCKUP +void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc); +#endif + #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c new file mode 100644 index 00000000000..a3634bf2566 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c @@ -0,0 +1,152 @@ +/* + * Copyright © 2013 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse + */ +#include +#include +#include +#include +#include "radeon_drm_cs.h" +#include "radeon_drm_bo.h" + +#if RADEON_CS_DUMP_ON_LOCKUP +static bool dumped = false; + +void radeon_dump_cs_on_lockup(struct radeon_cs_context *csc) +{ + struct drm_radeon_gem_busy args; + FILE *dump; + unsigned i, lockup; + uint32_t *ptr; + + /* only dump the first cs to cause a lockup */ + if (!csc->crelocs || dumped) { + /* can not determine if there was a lockup if no bo were use by + * the cs and most likely in such case no lockup occurs + */ + return; + } + + memset(&args, 0, sizeof(args)); + args.handle = csc->relocs_bo[0]->handle; + for (i = 0; i < 10; i++) { + usleep(5); + lockup = drmCommandWriteRead(csc->fd, DRM_RADEON_GEM_BUSY, &args, sizeof(args)); + if (!lockup) { + break; + } + } + if (!lockup || i < 10) { + return; + } + + /* ok we are most likely facing a lockup write the standalone replay file */ + dump = fopen("radeon_lockup.c", "w"); + if (dump == NULL) { + return; + } + fprintf(dump, "/* To build this file you will need to copy radeon_ctx.h\n"); + fprintf(dump, " * in same directory. You can find radeon_ctx.h in mesa tree :\n"); + fprintf(dump, " * mesa/src/gallium/winsys/radeon/tools/radeon_ctx.h\n"); + fprintf(dump, " * Build with :\n"); + fprintf(dump, " * gcc -O0 -g radeon_lockup.c -ldrm -o radeon_lockup -I/usr/include/libdrm\n"); + fprintf(dump, " */\n"); + fprintf(dump, "#include \n"); + fprintf(dump, "#include \n"); + fprintf(dump, "#include \"radeon_ctx.h\"\n"); + fprintf(dump, "\n"); + fprintf(dump, "#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))\n"); + fprintf(dump, "\n"); + + for (i = 0; i < csc->crelocs; i++) { + unsigned j, ndw = (csc->relocs_bo[i]->base.size + 3) >> 2; + + ptr = radeon_bo_do_map(csc->relocs_bo[i]); + if (ptr) { + fprintf(dump, "static uint32_t bo_%04d_data[%d] = {\n ", i, ndw); + for (j = 0; j < ndw; j++) { + if (j && !(j % 8)) { + uint32_t offset = (j - 8) << 2; + fprintf(dump, " /* [0x%08x] va[0x%016lx] */\n ", offset, offset + csc->relocs_bo[i]->va); + } + fprintf(dump, " 0x%08x,", ptr[j]); + } + fprintf(dump, "};\n\n"); + } + } + + fprintf(dump, "static uint32_t bo_relocs[%d] = {\n", csc->crelocs * 4); + for (i = 0; i < csc->crelocs; i++) { + fprintf(dump, " 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n", + 0, csc->relocs[i].read_domains, csc->relocs[i].write_domain, csc->relocs[i].flags); + } + fprintf(dump, "};\n\n"); + + fprintf(dump, "static uint32_t cs[] = {\n"); + ptr = csc->buf; + for (i = 0; i < csc->chunks[0].length_dw; i++) { + fprintf(dump, " 0x%08x,\n", ptr[i]); + } + fprintf(dump, "};\n\n"); + + fprintf(dump, "static uint32_t cs_flags[2] = {\n"); + fprintf(dump, " 0x%08x,\n", csc->flags[0]); + fprintf(dump, " 0x%08x,\n", csc->flags[1]); + fprintf(dump, "};\n\n"); + + fprintf(dump, "int main(int argc, char *argv[])\n"); + fprintf(dump, "{\n"); + fprintf(dump, " struct bo *bo[%d];\n", csc->crelocs); + fprintf(dump, " struct ctx ctx;\n"); + fprintf(dump, "\n"); + fprintf(dump, " ctx_init(&ctx);\n"); + fprintf(dump, "\n"); + + for (i = 0; i < csc->crelocs; i++) { + unsigned ndw = (csc->relocs_bo[i]->base.size + 3) >> 2; + uint32_t *ptr; + + ptr = radeon_bo_do_map(csc->relocs_bo[i]); + if (ptr) { + fprintf(dump, " bo[%d] = bo_new(&ctx, %d, bo_%04d_data, 0x%016lx, 0x%08x);\n", + i, ndw, i, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment); + } else { + fprintf(dump, " bo[%d] = bo_new(&ctx, %d, NULL, 0x%016lx, 0x%08x);\n", + i, ndw, csc->relocs_bo[i]->va, csc->relocs_bo[i]->base.alignment); + } + } + fprintf(dump, "\n"); + fprintf(dump, " ctx_cs(&ctx, cs, cs_flags, ARRAY_SIZE(cs), bo, bo_relocs, %d);\n", csc->crelocs); + fprintf(dump, "\n"); + fprintf(dump, " fprintf(stderr, \"waiting for cs execution to end ....\\n\");\n"); + fprintf(dump, " bo_wait(&ctx, bo[0]);\n"); + fprintf(dump, "}\n"); + fclose(dump); + dumped = true; +} +#endif diff --git a/src/gallium/winsys/radeon/tools/radeon_ctx.h b/src/gallium/winsys/radeon/tools/radeon_ctx.h new file mode 100644 index 00000000000..c2967ff0ff8 --- /dev/null +++ b/src/gallium/winsys/radeon/tools/radeon_ctx.h @@ -0,0 +1,235 @@ +/* + * Copyright 2011 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jérôme Glisse + */ +#ifndef RADEON_CTX_H +#define RADEON_CTX_H + +#define _FILE_OFFSET_BITS 64 +#include + +#include +#include +#include +#include +#include +#include "xf86drm.h" +#include "radeon_drm.h" + +#ifndef RADEON_CHUNK_ID_FLAGS +#define RADEON_CHUNK_ID_FLAGS 0x03 +/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ +#define RADEON_CS_KEEP_TILING_FLAGS 0x01 +#endif + + +#ifndef RADEON_VA_MAP + +#define RADEON_VA_MAP 1 +#define RADEON_VA_UNMAP 2 +#define RADEON_VA_RESULT_OK 0 +#define RADEON_VA_RESULT_ERROR 1 +#define RADEON_VA_RESULT_VA_EXIST 2 +#define RADEON_VM_PAGE_VALID (1 << 0) +#define RADEON_VM_PAGE_READABLE (1 << 1) +#define RADEON_VM_PAGE_WRITEABLE (1 << 2) +#define RADEON_VM_PAGE_SYSTEM (1 << 3) +#define RADEON_VM_PAGE_SNOOPED (1 << 4) +struct drm_radeon_gem_va { + uint32_t handle; + uint32_t operation; + uint32_t vm_id; + uint32_t flags; + uint64_t offset; +}; +#define DRM_RADEON_GEM_VA 0x2b +#endif + + +struct ctx { + int fd; +}; + +struct bo { + uint32_t handle; + uint32_t alignment; + uint64_t size; + uint64_t va; + void *ptr; +}; + +static void ctx_init(struct ctx *ctx) +{ + ctx->fd = drmOpen("radeon", NULL); + if (ctx->fd < 0) { + fprintf(stderr, "failed to open radeon drm device file\n"); + exit(-1); + } +} + +static void bo_wait(struct ctx *ctx, struct bo *bo) +{ + struct drm_radeon_gem_wait_idle args; + void *ptr; + int r; + + /* Zero out args to make valgrind happy */ + memset(&args, 0, sizeof(args)); + args.handle = bo->handle; + do { + r = drmCommandWriteRead(ctx->fd, DRM_RADEON_GEM_WAIT_IDLE, &args, sizeof(args)); + } while (r == -EBUSY); +} + + +static void ctx_cs(struct ctx *ctx, uint32_t *cs, uint32_t cs_flags[2], unsigned ndw, + struct bo **bo, uint32_t *bo_relocs, unsigned nbo) +{ + struct drm_radeon_cs args; + struct drm_radeon_cs_chunk chunks[3]; + uint64_t chunk_array[3]; + unsigned i; + int r; + + /* update handle */ + for (i = 0; i < nbo; i++) { + bo_relocs[i*4+0] = bo[i]->handle; + } + + args.num_chunks = 2; + if (cs_flags[0] || cs_flags[1]) { + /* enable RADEON_CHUNK_ID_FLAGS */ + args.num_chunks = 3; + } + args.chunks = (uint64_t)(uintptr_t)chunk_array; + chunks[0].chunk_id = RADEON_CHUNK_ID_IB; + chunks[0].length_dw = ndw; + chunks[0].chunk_data = (uintptr_t)cs; + chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; + chunks[1].length_dw = nbo * 4; + chunks[1].chunk_data = (uintptr_t)bo_relocs; + chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; + chunks[2].length_dw = 2; + chunks[2].chunk_data = (uintptr_t)cs_flags; + chunk_array[0] = (uintptr_t)&chunks[0]; + chunk_array[1] = (uintptr_t)&chunks[1]; + chunk_array[2] = (uintptr_t)&chunks[2]; + + fprintf(stderr, "emiting cs %ddw with %d bo\n", ndw, nbo); + r = drmCommandWriteRead(ctx->fd, DRM_RADEON_CS, &args, sizeof(args)); + if (r) { + fprintf(stderr, "cs submission failed with %d\n", r); + return; + } +} + +static void bo_map(struct ctx *ctx, struct bo *bo) +{ + struct drm_radeon_gem_mmap args; + void *ptr; + int r; + + /* Zero out args to make valgrind happy */ + memset(&args, 0, sizeof(args)); + args.handle = bo->handle; + args.offset = 0; + args.size = (uint64_t)bo->size; + r = drmCommandWriteRead(ctx->fd, DRM_RADEON_GEM_MMAP, &args, sizeof(args)); + if (r) { + fprintf(stderr, "error mapping %p 0x%08X (error = %d)\n", bo, bo->handle, r); + exit(-1); + } + ptr = mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED, ctx->fd, args.addr_ptr); + if (ptr == MAP_FAILED) { + fprintf(stderr, "%s failed to map bo\n", __func__); + exit(-1); + } + bo->ptr = ptr; +} + +static void bo_va(struct ctx *ctx, struct bo *bo) +{ + struct drm_radeon_gem_va args; + int r; + + args.handle = bo->handle; + args.vm_id = 0; + args.operation = RADEON_VA_MAP; + args.flags = RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_WRITEABLE | RADEON_VM_PAGE_SNOOPED; + args.offset = bo->va; + r = drmCommandWriteRead(ctx->fd, DRM_RADEON_GEM_VA, &args, sizeof(args)); + if (r && args.operation == RADEON_VA_RESULT_ERROR) { + fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n"); + fprintf(stderr, "radeon: size : %d bytes\n", bo->size); + fprintf(stderr, "radeon: alignment : %d bytes\n", bo->alignment); + fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va); + exit(-1); + } +} + +static struct bo *bo_new(struct ctx *ctx, unsigned ndw, uint32_t *data, uint64_t va, uint32_t alignment) +{ + struct drm_radeon_gem_create args; + struct bo *bo; + int r; + + bo = calloc(1, sizeof(*bo)); + if (bo == NULL) { + fprintf(stderr, "failed to malloc bo struct\n"); + exit(-1); + } + bo->size = ndw * 4ULL; + bo->va = va; + bo->alignment = alignment; + + args.size = bo->size; + args.alignment = bo->alignment; + args.initial_domain = RADEON_GEM_DOMAIN_GTT; + args.flags = 0; + args.handle = 0; + + r = drmCommandWriteRead(ctx->fd, DRM_RADEON_GEM_CREATE, &args, sizeof(args)); + bo->handle = args.handle; + if (r) { + fprintf(stderr, "Failed to allocate :\n"); + fprintf(stderr, " size : %d bytes\n", bo->size); + fprintf(stderr, " alignment : %d bytes\n", bo->alignment); + free(bo); + exit(-1); + } + + if (data) { + bo_map(ctx, bo); + memcpy(bo->ptr, data, bo->size); + } + + if (va) { + bo_va(ctx, bo); + } + + return bo; +} + + +#endif