From 12cbd9a13f97f67b62657728bc244b96de7f7b83 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 31 Aug 2017 11:43:59 +0200 Subject: [PATCH] radeonsi: move si_vm_fault_occured() to AMD common code MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit For radv, in order to report VM faults when detected. Signed-off-by: Samuel Pitoiset Reviewed-by: Marek Olšák Reviewed-by: Bas Nieuwenhuizen --- src/amd/common/ac_debug.c | 109 ++++++++++++++++++++++++ src/amd/common/ac_debug.h | 4 + src/gallium/drivers/radeonsi/si_debug.c | 106 +---------------------- 3 files changed, 117 insertions(+), 102 deletions(-) diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index d46fc27a9e1..0de00e27e75 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -34,6 +34,8 @@ #define VG(x) #endif +#include + #include "sid.h" #include "gfx9d.h" #include "sid_tables.h" @@ -597,3 +599,110 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, fprintf(f, "------------------- %s end -------------------\n\n", name); } + +/** + * Parse dmesg and return TRUE if a VM fault has been detected. + * + * \param chip_class chip class + * \param old_dmesg_timestamp previous dmesg timestamp parsed at init time + * \param out_addr detected VM fault addr + */ +bool ac_vm_fault_occured(enum chip_class chip_class, + uint64_t *old_dmesg_timestamp, uint64_t *out_addr) +{ + char line[2000]; + unsigned sec, usec; + int progress = 0; + uint64_t dmesg_timestamp = 0; + bool fault = false; + + FILE *p = popen("dmesg", "r"); + if (!p) + return false; + + while (fgets(line, sizeof(line), p)) { + char *msg, len; + + if (!line[0] || line[0] == '\n') + continue; + + /* Get the timestamp. */ + if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) { + static bool hit = false; + if (!hit) { + fprintf(stderr, "%s: failed to parse line '%s'\n", + __func__, line); + hit = true; + } + continue; + } + dmesg_timestamp = sec * 1000000ull + usec; + + /* If just updating the timestamp. */ + if (!out_addr) + continue; + + /* Process messages only if the timestamp is newer. */ + if (dmesg_timestamp <= *old_dmesg_timestamp) + continue; + + /* Only process the first VM fault. */ + if (fault) + continue; + + /* Remove trailing \n */ + len = strlen(line); + if (len && line[len-1] == '\n') + line[len-1] = 0; + + /* Get the message part. */ + msg = strchr(line, ']'); + if (!msg) + continue; + msg++; + + const char *header_line, *addr_line_prefix, *addr_line_format; + + if (chip_class >= GFX9) { + /* Match this: + * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0) + * ..: at page 0x0000000219f8f000 from 27 + * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C + */ + header_line = "VMC page fault"; + addr_line_prefix = " at page"; + addr_line_format = "%"PRIx64; + } else { + header_line = "GPU fault detected:"; + addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR"; + addr_line_format = "%"PRIX64; + } + + switch (progress) { + case 0: + if (strstr(msg, header_line)) + progress = 1; + break; + case 1: + msg = strstr(msg, addr_line_prefix); + if (msg) { + msg = strstr(msg, "0x"); + if (msg) { + msg += 2; + if (sscanf(msg, addr_line_format, out_addr) == 1) + fault = true; + } + } + progress = 0; + break; + default: + progress = 0; + } + } + pclose(p); + + if (dmesg_timestamp > *old_dmesg_timestamp) + *old_dmesg_timestamp = dmesg_timestamp; + + return fault; +} diff --git a/src/amd/common/ac_debug.h b/src/amd/common/ac_debug.h index a37acd20296..277025d8b66 100644 --- a/src/amd/common/ac_debug.h +++ b/src/amd/common/ac_debug.h @@ -28,6 +28,7 @@ #include #include +#include #include "amd_family.h" @@ -46,4 +47,7 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count, const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback, void *addr_callback_data); +bool ac_vm_fault_occured(enum chip_class chip_class, + uint64_t *old_dmesg_timestamp, uint64_t *out_addr); + #endif diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 1b97ea40970..4e8d861019a 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -1092,106 +1092,6 @@ static void si_dump_dma(struct si_context *sctx, fprintf(f, "SDMA Dump Done.\n"); } -static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr) -{ - char line[2000]; - unsigned sec, usec; - int progress = 0; - uint64_t timestamp = 0; - bool fault = false; - - FILE *p = popen("dmesg", "r"); - if (!p) - return false; - - while (fgets(line, sizeof(line), p)) { - char *msg, len; - - if (!line[0] || line[0] == '\n') - continue; - - /* Get the timestamp. */ - if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) { - static bool hit = false; - if (!hit) { - fprintf(stderr, "%s: failed to parse line '%s'\n", - __func__, line); - hit = true; - } - continue; - } - timestamp = sec * 1000000ull + usec; - - /* If just updating the timestamp. */ - if (!out_addr) - continue; - - /* Process messages only if the timestamp is newer. */ - if (timestamp <= sctx->dmesg_timestamp) - continue; - - /* Only process the first VM fault. */ - if (fault) - continue; - - /* Remove trailing \n */ - len = strlen(line); - if (len && line[len-1] == '\n') - line[len-1] = 0; - - /* Get the message part. */ - msg = strchr(line, ']'); - if (!msg) { - assert(0); - continue; - } - msg++; - - const char *header_line, *addr_line_prefix, *addr_line_format; - - if (sctx->b.chip_class >= GFX9) { - /* Match this: - * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0) - * ..: at page 0x0000000219f8f000 from 27 - * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C - */ - header_line = "VMC page fault"; - addr_line_prefix = " at page"; - addr_line_format = "%"PRIx64; - } else { - header_line = "GPU fault detected:"; - addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR"; - addr_line_format = "%"PRIX64; - } - - switch (progress) { - case 0: - if (strstr(msg, header_line)) - progress = 1; - break; - case 1: - msg = strstr(msg, addr_line_prefix); - if (msg) { - msg = strstr(msg, "0x"); - if (msg) { - msg += 2; - if (sscanf(msg, addr_line_format, out_addr) == 1) - fault = true; - } - } - progress = 0; - break; - default: - progress = 0; - } - } - pclose(p); - - if (timestamp > sctx->dmesg_timestamp) - sctx->dmesg_timestamp = timestamp; - return fault; -} - void si_check_vm_faults(struct r600_common_context *ctx, struct radeon_saved_cs *saved, enum ring_type ring) { @@ -1201,7 +1101,8 @@ void si_check_vm_faults(struct r600_common_context *ctx, uint64_t addr; char cmd_line[4096]; - if (!si_vm_fault_occured(sctx, &addr)) + if (!ac_vm_fault_occured(sctx->b.chip_class, + &sctx->dmesg_timestamp, &addr)) return; f = dd_get_debug_file(false); @@ -1255,5 +1156,6 @@ void si_init_debug_functions(struct si_context *sctx) * only new messages will be checked for VM faults. */ if (sctx->screen->b.debug_flags & DBG_CHECK_VM) - si_vm_fault_occured(sctx, NULL); + ac_vm_fault_occured(sctx->b.chip_class, + &sctx->dmesg_timestamp, NULL); } -- 2.30.2