radv: dump some status MMIO registers when a hang occured
[mesa.git] / src / amd / vulkan / radv_debug.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdlib.h>
29 #include <stdio.h>
30
31 #include "sid.h"
32 #include "gfx9d.h"
33 #include "ac_debug.h"
34 #include "radv_debug.h"
35 #include "radv_shader.h"
36
37 #define TRACE_BO_SIZE 4096
38
39 /* Trace BO layout (offsets are 4 bytes):
40 *
41 * [0]: primary trace ID
42 * [1]: secondary trace ID
43 * [2-3]: 64-bit GFX pipeline pointer
44 * [4-5]: 64-bit COMPUTE pipeline pointer
45 */
46
47 bool
48 radv_init_trace(struct radv_device *device)
49 {
50 struct radeon_winsys *ws = device->ws;
51
52 device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
53 RADEON_DOMAIN_VRAM,
54 RADEON_FLAG_CPU_ACCESS);
55 if (!device->trace_bo)
56 return false;
57
58 device->trace_id_ptr = ws->buffer_map(device->trace_bo);
59 if (!device->trace_id_ptr)
60 return false;
61
62 memset(device->trace_id_ptr, 0, TRACE_BO_SIZE);
63
64 ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
65 &device->dmesg_timestamp, NULL);
66
67 return true;
68 }
69
70 static void
71 radv_dump_trace(struct radv_device *device, struct radeon_winsys_cs *cs)
72 {
73 const char *filename = getenv("RADV_TRACE_FILE");
74 FILE *f = fopen(filename, "w");
75
76 if (!f) {
77 fprintf(stderr, "Failed to write trace dump to %s\n", filename);
78 return;
79 }
80
81 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
82 device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
83 fclose(f);
84 }
85
86 static void
87 radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
88 {
89 struct radeon_winsys *ws = device->ws;
90 uint32_t value;
91
92 if (ws->read_registers(ws, offset, 1, &value))
93 ac_dump_reg(f, device->physical_device->rad_info.chip_class,
94 offset, value, ~0);
95 }
96
97 static void
98 radv_dump_debug_registers(struct radv_device *device, FILE *f)
99 {
100 struct radeon_info *info = &device->physical_device->rad_info;
101
102 if (info->drm_major == 2 && info->drm_minor < 42)
103 return; /* no radeon support */
104
105 fprintf(f, "Memory-mapped registers:\n");
106 radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
107
108 /* No other registers can be read on DRM < 3.1.0. */
109 if (info->drm_major < 3 || info->drm_minor < 1) {
110 fprintf(f, "\n");
111 return;
112 }
113
114 radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
115 radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
116 radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
117 radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
118 radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
119 radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
120 radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
121 if (info->chip_class <= VI) {
122 radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
123 radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
124 radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
125 }
126 radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
127 radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
128 radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
129 radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
130 radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
131 radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
132 radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
133 radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
134 radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
135 radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
136 fprintf(f, "\n");
137 }
138
139 static void
140 radv_dump_shader(struct radv_pipeline *pipeline,
141 struct radv_shader_variant *shader, gl_shader_stage stage,
142 FILE *f)
143 {
144 if (!shader)
145 return;
146
147 fprintf(f, "%s:\n%s\n\n", radv_get_shader_name(shader, stage),
148 shader->disasm_string);
149
150 radv_shader_dump_stats(pipeline->device, shader, stage, f);
151 }
152
153 static void
154 radv_dump_shaders(struct radv_pipeline *pipeline,
155 struct radv_shader_variant *compute_shader, FILE *f)
156 {
157 unsigned mask;
158
159 /* Dump active graphics shaders. */
160 mask = pipeline->active_stages;
161 while (mask) {
162 int stage = u_bit_scan(&mask);
163
164 radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
165 }
166
167 radv_dump_shader(pipeline, compute_shader, MESA_SHADER_COMPUTE, f);
168 }
169
170 static void
171 radv_dump_graphics_state(struct radv_pipeline *graphics_pipeline,
172 struct radv_pipeline *compute_pipeline, FILE *f)
173 {
174 struct radv_shader_variant *compute_shader =
175 compute_pipeline ? compute_pipeline->shaders[MESA_SHADER_COMPUTE] : NULL;
176
177 if (!graphics_pipeline)
178 return;
179
180 radv_dump_shaders(graphics_pipeline, compute_shader, f);
181 }
182
183 static void
184 radv_dump_compute_state(struct radv_pipeline *compute_pipeline, FILE *f)
185 {
186 if (!compute_pipeline)
187 return;
188
189 radv_dump_shaders(compute_pipeline,
190 compute_pipeline->shaders[MESA_SHADER_COMPUTE], f);
191 }
192
193 static struct radv_pipeline *
194 radv_get_saved_graphics_pipeline(struct radv_device *device)
195 {
196 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
197
198 return (struct radv_pipeline *)ptr[1];
199 }
200
201 static struct radv_pipeline *
202 radv_get_saved_compute_pipeline(struct radv_device *device)
203 {
204 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
205
206 return (struct radv_pipeline *)ptr[2];
207 }
208
209 static bool
210 radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
211 {
212 struct radeon_winsys *ws = queue->device->ws;
213
214 if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
215 return true;
216
217 return false;
218 }
219
220 void
221 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_winsys_cs *cs)
222 {
223 struct radv_pipeline *graphics_pipeline, *compute_pipeline;
224 struct radv_device *device = queue->device;
225 enum ring_type ring;
226 uint64_t addr;
227
228 ring = radv_queue_family_to_ring(queue->queue_family_index);
229
230 bool hang_occurred = radv_gpu_hang_occured(queue, ring);
231 bool vm_fault_occurred = false;
232 if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
233 vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
234 &device->dmesg_timestamp, &addr);
235 if (!hang_occurred && !vm_fault_occurred)
236 return;
237
238 graphics_pipeline = radv_get_saved_graphics_pipeline(device);
239 compute_pipeline = radv_get_saved_compute_pipeline(device);
240
241 if (vm_fault_occurred) {
242 fprintf(stderr, "VM fault report.\n\n");
243 fprintf(stderr, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
244 }
245
246 radv_dump_debug_registers(device, stderr);
247
248 switch (ring) {
249 case RING_GFX:
250 radv_dump_graphics_state(graphics_pipeline, compute_pipeline,
251 stderr);
252 break;
253 case RING_COMPUTE:
254 radv_dump_compute_state(compute_pipeline, stderr);
255 break;
256 default:
257 assert(0);
258 break;
259 }
260
261 radv_dump_trace(queue->device, cs);
262 abort();
263 }
264
265 void
266 radv_print_spirv(struct radv_shader_module *module, FILE *fp)
267 {
268 char path[] = "/tmp/fileXXXXXX";
269 char line[2048], command[128];
270 FILE *p;
271 int fd;
272
273 /* Dump the binary into a temporary file. */
274 fd = mkstemp(path);
275 if (fd < 0)
276 return;
277
278 if (write(fd, module->data, module->size) == -1)
279 goto fail;
280
281 sprintf(command, "spirv-dis %s", path);
282
283 /* Disassemble using spirv-dis if installed. */
284 p = popen(command, "r");
285 if (p) {
286 while (fgets(line, sizeof(line), p))
287 fprintf(fp, "%s", line);
288 pclose(p);
289 }
290
291 fail:
292 close(fd);
293 unlink(path);
294 }