intel/tools/aub_dump: move aub file initialization to maybe_init()
[mesa.git] / src / intel / tools / intel_dump_gpu.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <stdint.h>
28 #include <stdbool.h>
29 #include <signal.h>
30 #include <stdarg.h>
31 #include <fcntl.h>
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/stat.h>
35 #include <sys/ioctl.h>
36 #include <unistd.h>
37 #include <errno.h>
38 #include <sys/mman.h>
39 #include <dlfcn.h>
40 #include "drm-uapi/i915_drm.h"
41 #include <inttypes.h>
42
43 #include "intel_aub.h"
44 #include "aub_write.h"
45
46 #include "dev/gen_device_info.h"
47 #include "util/macros.h"
48
49 static int close_init_helper(int fd);
50 static int ioctl_init_helper(int fd, unsigned long request, ...);
51
52 static int (*libc_close)(int fd) = close_init_helper;
53 static int (*libc_ioctl)(int fd, unsigned long request, ...) = ioctl_init_helper;
54
55 static int drm_fd = -1;
56 static char *output_filename = NULL;
57 static FILE *output_file = NULL;
58 static int verbose = 0;
59 static bool device_override;
60
61 #define MAX_FD_COUNT 64
62 #define MAX_BO_COUNT 64 * 1024
63
64 struct bo {
65 uint32_t size;
66 uint64_t offset;
67 void *map;
68 };
69
70 static struct bo *bos;
71
72 #define DRM_MAJOR 226
73
74 /* We set bit 0 in the map pointer for userptr BOs so we know not to
75 * munmap them on DRM_IOCTL_GEM_CLOSE.
76 */
77 #define USERPTR_FLAG 1
78 #define IS_USERPTR(p) ((uintptr_t) (p) & USERPTR_FLAG)
79 #define GET_PTR(p) ( (void *) ((uintptr_t) p & ~(uintptr_t) 1) )
80
81 static void __attribute__ ((format(__printf__, 2, 3)))
82 fail_if(int cond, const char *format, ...)
83 {
84 va_list args;
85
86 if (!cond)
87 return;
88
89 va_start(args, format);
90 fprintf(stderr, "intel_dump_gpu: ");
91 vfprintf(stderr, format, args);
92 va_end(args);
93
94 raise(SIGTRAP);
95 }
96
97 static struct bo *
98 get_bo(unsigned fd, uint32_t handle)
99 {
100 struct bo *bo;
101
102 fail_if(handle >= MAX_BO_COUNT, "bo handle too large\n");
103 fail_if(fd >= MAX_FD_COUNT, "bo fd too large\n");
104 bo = &bos[handle + fd * MAX_BO_COUNT];
105
106 return bo;
107 }
108
109 static inline uint32_t
110 align_u32(uint32_t v, uint32_t a)
111 {
112 return (v + a - 1) & ~(a - 1);
113 }
114
115 static struct gen_device_info devinfo = {0};
116 static int device = 0;
117 static struct aub_file aub_file;
118
119 static void *
120 relocate_bo(int fd, struct bo *bo, const struct drm_i915_gem_execbuffer2 *execbuffer2,
121 const struct drm_i915_gem_exec_object2 *obj)
122 {
123 const struct drm_i915_gem_exec_object2 *exec_objects =
124 (struct drm_i915_gem_exec_object2 *) (uintptr_t) execbuffer2->buffers_ptr;
125 const struct drm_i915_gem_relocation_entry *relocs =
126 (const struct drm_i915_gem_relocation_entry *) (uintptr_t) obj->relocs_ptr;
127 void *relocated;
128 int handle;
129
130 relocated = malloc(bo->size);
131 fail_if(relocated == NULL, "out of memory\n");
132 memcpy(relocated, GET_PTR(bo->map), bo->size);
133 for (size_t i = 0; i < obj->relocation_count; i++) {
134 fail_if(relocs[i].offset >= bo->size, "reloc outside bo\n");
135
136 if (execbuffer2->flags & I915_EXEC_HANDLE_LUT)
137 handle = exec_objects[relocs[i].target_handle].handle;
138 else
139 handle = relocs[i].target_handle;
140
141 aub_write_reloc(&devinfo, ((char *)relocated) + relocs[i].offset,
142 get_bo(fd, handle)->offset + relocs[i].delta);
143 }
144
145 return relocated;
146 }
147
148 static int
149 gem_ioctl(int fd, unsigned long request, void *argp)
150 {
151 int ret;
152
153 do {
154 ret = libc_ioctl(fd, request, argp);
155 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
156
157 return ret;
158 }
159
160 static void *
161 gem_mmap(int fd, uint32_t handle, uint64_t offset, uint64_t size)
162 {
163 struct drm_i915_gem_mmap mmap = {
164 .handle = handle,
165 .offset = offset,
166 .size = size
167 };
168
169 if (gem_ioctl(fd, DRM_IOCTL_I915_GEM_MMAP, &mmap) == -1)
170 return MAP_FAILED;
171
172 return (void *)(uintptr_t) mmap.addr_ptr;
173 }
174
175 static enum drm_i915_gem_engine_class
176 engine_class_from_ring_flag(uint32_t ring_flag)
177 {
178 switch (ring_flag) {
179 case I915_EXEC_DEFAULT:
180 case I915_EXEC_RENDER:
181 return I915_ENGINE_CLASS_RENDER;
182 case I915_EXEC_BSD:
183 return I915_ENGINE_CLASS_VIDEO;
184 case I915_EXEC_BLT:
185 return I915_ENGINE_CLASS_COPY;
186 case I915_EXEC_VEBOX:
187 return I915_ENGINE_CLASS_VIDEO_ENHANCE;
188 default:
189 return I915_ENGINE_CLASS_INVALID;
190 }
191 }
192
193 static void
194 dump_execbuffer2(int fd, struct drm_i915_gem_execbuffer2 *execbuffer2)
195 {
196 struct drm_i915_gem_exec_object2 *exec_objects =
197 (struct drm_i915_gem_exec_object2 *) (uintptr_t) execbuffer2->buffers_ptr;
198 uint32_t ring_flag = execbuffer2->flags & I915_EXEC_RING_MASK;
199 uint32_t offset;
200 struct drm_i915_gem_exec_object2 *obj;
201 struct bo *bo, *batch_bo;
202 int batch_index;
203 void *data;
204
205 /* We can't do this at open time as we're not yet authenticated. */
206 if (device == 0) {
207 fail_if(!gen_get_device_info_from_fd(fd, &devinfo),
208 "failed to identify chipset.\n");
209 device = devinfo.chipset_id;
210 } else if (devinfo.gen == 0) {
211 fail_if(!gen_get_device_info_from_pci_id(device, &devinfo),
212 "failed to identify chipset.\n");
213 }
214
215 if (!aub_file.file) {
216 aub_file_init(&aub_file, output_file,
217 verbose == 2 ? stdout : NULL,
218 device, program_invocation_short_name);
219 aub_write_default_setup(&aub_file);
220
221 if (verbose)
222 printf("[running, output file %s, chipset id 0x%04x, gen %d]\n",
223 output_filename, device, devinfo.gen);
224 }
225
226 if (aub_use_execlists(&aub_file))
227 offset = 0x1000;
228 else
229 offset = aub_gtt_size(&aub_file);
230
231 if (verbose)
232 printf("Dumping execbuffer2:\n");
233
234 for (uint32_t i = 0; i < execbuffer2->buffer_count; i++) {
235 obj = &exec_objects[i];
236 bo = get_bo(fd, obj->handle);
237
238 /* If bo->size == 0, this means they passed us an invalid
239 * buffer. The kernel will reject it and so should we.
240 */
241 if (bo->size == 0) {
242 if (verbose)
243 printf("BO #%d is invalid!\n", obj->handle);
244 return;
245 }
246
247 if (obj->flags & EXEC_OBJECT_PINNED) {
248 bo->offset = obj->offset;
249 if (verbose)
250 printf("BO #%d (%dB) pinned @ 0x%" PRIx64 "\n",
251 obj->handle, bo->size, bo->offset);
252 } else {
253 if (obj->alignment != 0)
254 offset = align_u32(offset, obj->alignment);
255 bo->offset = offset;
256 if (verbose)
257 printf("BO #%d (%dB) @ 0x%" PRIx64 "\n", obj->handle,
258 bo->size, bo->offset);
259 offset = align_u32(offset + bo->size + 4095, 4096);
260 }
261
262 if (bo->map == NULL && bo->size > 0)
263 bo->map = gem_mmap(fd, obj->handle, 0, bo->size);
264 fail_if(bo->map == MAP_FAILED, "bo mmap failed\n");
265
266 if (aub_use_execlists(&aub_file))
267 aub_map_ppgtt(&aub_file, bo->offset, bo->size);
268 }
269
270 batch_index = (execbuffer2->flags & I915_EXEC_BATCH_FIRST) ? 0 :
271 execbuffer2->buffer_count - 1;
272 batch_bo = get_bo(fd, exec_objects[batch_index].handle);
273 for (uint32_t i = 0; i < execbuffer2->buffer_count; i++) {
274 obj = &exec_objects[i];
275 bo = get_bo(fd, obj->handle);
276
277 if (obj->relocation_count > 0)
278 data = relocate_bo(fd, bo, execbuffer2, obj);
279 else
280 data = bo->map;
281
282 if (bo == batch_bo) {
283 aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_BATCH,
284 GET_PTR(data), bo->size, bo->offset);
285 } else {
286 aub_write_trace_block(&aub_file, AUB_TRACE_TYPE_NOTYPE,
287 GET_PTR(data), bo->size, bo->offset);
288 }
289
290 if (data != bo->map)
291 free(data);
292 }
293
294 uint32_t ctx_id = execbuffer2->rsvd1;
295
296 aub_write_exec(&aub_file, ctx_id,
297 batch_bo->offset + execbuffer2->batch_start_offset,
298 offset, engine_class_from_ring_flag(ring_flag));
299
300 if (device_override &&
301 (execbuffer2->flags & I915_EXEC_FENCE_ARRAY) != 0) {
302 struct drm_i915_gem_exec_fence *fences =
303 (void*)(uintptr_t)execbuffer2->cliprects_ptr;
304 for (uint32_t i = 0; i < execbuffer2->num_cliprects; i++) {
305 if ((fences[i].flags & I915_EXEC_FENCE_SIGNAL) != 0) {
306 struct drm_syncobj_array arg = {
307 .handles = (uintptr_t)&fences[i].handle,
308 .count_handles = 1,
309 .pad = 0,
310 };
311 libc_ioctl(fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &arg);
312 }
313 }
314 }
315 }
316
317 static void
318 add_new_bo(unsigned fd, int handle, uint64_t size, void *map)
319 {
320 struct bo *bo = &bos[handle + fd * MAX_BO_COUNT];
321
322 fail_if(handle >= MAX_BO_COUNT, "bo handle out of range\n");
323 fail_if(fd >= MAX_FD_COUNT, "bo fd out of range\n");
324 fail_if(size == 0, "bo size is invalid\n");
325
326 bo->size = size;
327 bo->map = map;
328 }
329
330 static void
331 remove_bo(int fd, int handle)
332 {
333 struct bo *bo = get_bo(fd, handle);
334
335 if (bo->map && !IS_USERPTR(bo->map))
336 munmap(bo->map, bo->size);
337 bo->size = 0;
338 bo->map = NULL;
339 }
340
341 __attribute__ ((visibility ("default"))) int
342 close(int fd)
343 {
344 if (fd == drm_fd)
345 drm_fd = -1;
346
347 return libc_close(fd);
348 }
349
350 static int
351 get_pci_id(int fd, int *pci_id)
352 {
353 struct drm_i915_getparam gparam;
354
355 if (device_override) {
356 *pci_id = device;
357 return 0;
358 }
359
360 gparam.param = I915_PARAM_CHIPSET_ID;
361 gparam.value = pci_id;
362 return libc_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gparam);
363 }
364
365 static void
366 maybe_init(int fd)
367 {
368 static bool initialized = false;
369 FILE *config;
370 char *key, *value;
371
372 if (initialized)
373 return;
374
375 initialized = true;
376
377 config = fopen(getenv("INTEL_DUMP_GPU_CONFIG"), "r");
378 while (fscanf(config, "%m[^=]=%m[^\n]\n", &key, &value) != EOF) {
379 if (!strcmp(key, "verbose")) {
380 if (!strcmp(value, "1")) {
381 verbose = 1;
382 } else if (!strcmp(value, "2")) {
383 verbose = 2;
384 }
385 } else if (!strcmp(key, "device")) {
386 fail_if(device != 0, "Device/Platform override specified multiple times.");
387 fail_if(sscanf(value, "%i", &device) != 1,
388 "failed to parse device id '%s'",
389 value);
390 device_override = true;
391 } else if (!strcmp(key, "platform")) {
392 fail_if(device != 0, "Device/Platform override specified multiple times.");
393 device = gen_device_name_to_pci_device_id(value);
394 fail_if(device == -1, "Unknown platform '%s'", value);
395 device_override = true;
396 } else if (!strcmp(key, "file")) {
397 output_filename = strdup(value);
398 output_file = fopen(output_filename, "w+");
399 fail_if(output_file == NULL,
400 "failed to open file '%s'\n",
401 output_filename);
402 } else {
403 fprintf(stderr, "unknown option '%s'\n", key);
404 }
405
406 free(key);
407 free(value);
408 }
409 fclose(config);
410
411 bos = calloc(MAX_FD_COUNT * MAX_BO_COUNT, sizeof(bos[0]));
412 fail_if(bos == NULL, "out of memory\n");
413
414 int ret = get_pci_id(fd, &device);
415 assert(ret == 0);
416
417 aub_file_init(&aub_file, output_file,
418 verbose == 2 ? stdout : NULL,
419 device, program_invocation_short_name);
420 aub_write_default_setup(&aub_file);
421
422 if (verbose)
423 printf("[running, output file %s, chipset id 0x%04x, gen %d]\n",
424 output_filename, device, devinfo.gen);
425 }
426
427 __attribute__ ((visibility ("default"))) int
428 ioctl(int fd, unsigned long request, ...)
429 {
430 va_list args;
431 void *argp;
432 int ret;
433 struct stat buf;
434
435 va_start(args, request);
436 argp = va_arg(args, void *);
437 va_end(args);
438
439 if (_IOC_TYPE(request) == DRM_IOCTL_BASE &&
440 drm_fd != fd && fstat(fd, &buf) == 0 &&
441 (buf.st_mode & S_IFMT) == S_IFCHR && major(buf.st_rdev) == DRM_MAJOR) {
442 drm_fd = fd;
443 if (verbose)
444 printf("[intercept drm ioctl on fd %d]\n", fd);
445 }
446
447 if (fd == drm_fd) {
448 maybe_init(fd);
449
450 switch (request) {
451 case DRM_IOCTL_I915_GETPARAM: {
452 struct drm_i915_getparam *getparam = argp;
453 return get_pci_id(fd, getparam->value);
454 }
455
456 case DRM_IOCTL_I915_GEM_EXECBUFFER: {
457 static bool once;
458 if (!once) {
459 fprintf(stderr,
460 "application uses DRM_IOCTL_I915_GEM_EXECBUFFER, not handled\n");
461 once = true;
462 }
463 return libc_ioctl(fd, request, argp);
464 }
465
466 case DRM_IOCTL_I915_GEM_EXECBUFFER2:
467 case DRM_IOCTL_I915_GEM_EXECBUFFER2_WR: {
468 dump_execbuffer2(fd, argp);
469 if (device_override)
470 return 0;
471
472 return libc_ioctl(fd, request, argp);
473 }
474
475 case DRM_IOCTL_I915_GEM_CONTEXT_CREATE: {
476 uint32_t *ctx_id = NULL;
477 struct drm_i915_gem_context_create *create = argp;
478 ret = 0;
479 if (!device_override) {
480 ret = libc_ioctl(fd, request, argp);
481 ctx_id = &create->ctx_id;
482 }
483
484 if (ret == 0)
485 create->ctx_id = aub_write_context_create(&aub_file, ctx_id);
486
487 return ret;
488 }
489
490 case DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT: {
491 uint32_t *ctx_id = NULL;
492 struct drm_i915_gem_context_create_ext *create = argp;
493 ret = 0;
494 if (!device_override) {
495 ret = libc_ioctl(fd, request, argp);
496 ctx_id = &create->ctx_id;
497 }
498
499 if (ret == 0)
500 create->ctx_id = aub_write_context_create(&aub_file, ctx_id);
501
502 return ret;
503 }
504
505 case DRM_IOCTL_I915_GEM_CREATE: {
506 struct drm_i915_gem_create *create = argp;
507
508 ret = libc_ioctl(fd, request, argp);
509 if (ret == 0)
510 add_new_bo(fd, create->handle, create->size, NULL);
511
512 return ret;
513 }
514
515 case DRM_IOCTL_I915_GEM_USERPTR: {
516 struct drm_i915_gem_userptr *userptr = argp;
517
518 ret = libc_ioctl(fd, request, argp);
519 if (ret == 0)
520 add_new_bo(fd, userptr->handle, userptr->user_size,
521 (void *) (uintptr_t) (userptr->user_ptr | USERPTR_FLAG));
522
523 return ret;
524 }
525
526 case DRM_IOCTL_GEM_CLOSE: {
527 struct drm_gem_close *close = argp;
528
529 remove_bo(fd, close->handle);
530
531 return libc_ioctl(fd, request, argp);
532 }
533
534 case DRM_IOCTL_GEM_OPEN: {
535 struct drm_gem_open *open = argp;
536
537 ret = libc_ioctl(fd, request, argp);
538 if (ret == 0)
539 add_new_bo(fd, open->handle, open->size, NULL);
540
541 return ret;
542 }
543
544 case DRM_IOCTL_PRIME_FD_TO_HANDLE: {
545 struct drm_prime_handle *prime = argp;
546
547 ret = libc_ioctl(fd, request, argp);
548 if (ret == 0) {
549 off_t size;
550
551 size = lseek(prime->fd, 0, SEEK_END);
552 fail_if(size == -1, "failed to get prime bo size\n");
553 add_new_bo(fd, prime->handle, size, NULL);
554
555 }
556
557 return ret;
558 }
559
560 default:
561 return libc_ioctl(fd, request, argp);
562 }
563 } else {
564 return libc_ioctl(fd, request, argp);
565 }
566 }
567
568 static void
569 init(void)
570 {
571 libc_close = dlsym(RTLD_NEXT, "close");
572 libc_ioctl = dlsym(RTLD_NEXT, "ioctl");
573 fail_if(libc_close == NULL || libc_ioctl == NULL,
574 "failed to get libc ioctl or close\n");
575 }
576
577 static int
578 close_init_helper(int fd)
579 {
580 init();
581 return libc_close(fd);
582 }
583
584 static int
585 ioctl_init_helper(int fd, unsigned long request, ...)
586 {
587 va_list args;
588 void *argp;
589
590 va_start(args, request);
591 argp = va_arg(args, void *);
592 va_end(args);
593
594 init();
595 return libc_ioctl(fd, request, argp);
596 }
597
598 static void __attribute__ ((destructor))
599 fini(void)
600 {
601 if (devinfo.gen != 0) {
602 free(output_filename);
603 aub_file_finish(&aub_file);
604 free(bos);
605 }
606 }