2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
36 #include <sys/types.h>
41 #include "util/list.h"
42 #include "util/macros.h"
43 #include "util/rb_tree.h"
45 #include "common/gen_decoder.h"
46 #include "intel_aub.h"
49 #ifndef HAVE_MEMFD_CREATE
50 #include <sys/syscall.h>
53 memfd_create(const char *name
, unsigned int flags
)
55 return syscall(SYS_memfd_create
, name
, flags
);
59 /* Below is the only command missing from intel_aub.h in libdrm
60 * So, reuse intel_aub.h from libdrm and #define the
61 * AUB_MI_BATCH_BUFFER_END as below
63 #define AUB_MI_BATCH_BUFFER_END (0x0500 << 16)
66 #define BLUE_HEADER CSI "0;44m"
67 #define GREEN_HEADER CSI "1;42m"
68 #define NORMAL CSI "0m"
72 static int option_full_decode
= true;
73 static int option_print_offsets
= true;
74 static int max_vbo_lines
= -1;
75 static enum { COLOR_AUTO
, COLOR_ALWAYS
, COLOR_NEVER
} option_color
;
80 char *input_file
= NULL
, *xml_path
= NULL
;
81 struct gen_device_info devinfo
;
82 struct gen_batch_decode_ctx batch_ctx
;
85 struct list_head link
;
86 struct gen_batch_decode_bo bo
;
103 static struct list_head maps
;
104 static struct rb_tree ggtt
= {NULL
};
105 static struct rb_tree mem
= {NULL
};
107 off_t mem_fd_len
= 0;
111 struct brw_instruction
;
114 add_gtt_bo_map(struct gen_batch_decode_bo bo
, bool unmap_after_use
)
116 struct bo_map
*m
= calloc(1, sizeof(*m
));
119 m
->unmap_after_use
= unmap_after_use
;
120 list_add(&m
->link
, &maps
);
126 list_for_each_entry_safe(struct bo_map
, i
, &maps
, link
) {
127 if (i
->unmap_after_use
)
128 munmap((void *)i
->bo
.map
, i
->bo
.size
);
134 static inline struct ggtt_entry
*
135 ggtt_entry_next(struct ggtt_entry
*entry
)
139 struct rb_node
*node
= rb_node_next(&entry
->node
);
142 return rb_node_data(struct ggtt_entry
, node
, node
);
146 cmp_uint64(uint64_t a
, uint64_t b
)
156 cmp_ggtt_entry(const struct rb_node
*node
, const void *addr
)
158 struct ggtt_entry
*entry
= rb_node_data(struct ggtt_entry
, node
, node
);
159 return cmp_uint64(entry
->virt_addr
, *(const uint64_t *)addr
);
162 static struct ggtt_entry
*
163 ensure_ggtt_entry(struct rb_tree
*tree
, uint64_t virt_addr
)
165 struct rb_node
*node
= rb_tree_search_sloppy(&ggtt
, &virt_addr
,
168 if (!node
|| (cmp
= cmp_ggtt_entry(node
, &virt_addr
))) {
169 struct ggtt_entry
*new_entry
= calloc(1, sizeof(*new_entry
));
170 new_entry
->virt_addr
= virt_addr
;
171 rb_tree_insert_at(&ggtt
, node
, &new_entry
->node
, cmp
> 0);
172 node
= &new_entry
->node
;
175 return rb_node_data(struct ggtt_entry
, node
, node
);
178 static struct ggtt_entry
*
179 search_ggtt_entry(uint64_t virt_addr
)
183 struct rb_node
*node
= rb_tree_search(&ggtt
, &virt_addr
, cmp_ggtt_entry
);
188 return rb_node_data(struct ggtt_entry
, node
, node
);
192 cmp_phys_mem(const struct rb_node
*node
, const void *addr
)
194 struct phys_mem
*mem
= rb_node_data(struct phys_mem
, node
, node
);
195 return cmp_uint64(mem
->phys_addr
, *(uint64_t *)addr
);
198 static struct phys_mem
*
199 ensure_phys_mem(uint64_t phys_addr
)
201 struct rb_node
*node
= rb_tree_search_sloppy(&mem
, &phys_addr
, cmp_phys_mem
);
203 if (!node
|| (cmp
= cmp_phys_mem(node
, &phys_addr
))) {
204 struct phys_mem
*new_mem
= calloc(1, sizeof(*new_mem
));
205 new_mem
->phys_addr
= phys_addr
;
206 new_mem
->fd_offset
= mem_fd_len
;
208 MAYBE_UNUSED
int ftruncate_res
= ftruncate(mem_fd
, mem_fd_len
+= 4096);
209 assert(ftruncate_res
== 0);
211 new_mem
->data
= mmap(NULL
, 4096, PROT_READ
| PROT_WRITE
, MAP_SHARED
,
212 mem_fd
, new_mem
->fd_offset
);
213 assert(new_mem
->data
!= MAP_FAILED
);
215 rb_tree_insert_at(&mem
, node
, &new_mem
->node
, cmp
> 0);
216 node
= &new_mem
->node
;
219 return rb_node_data(struct phys_mem
, node
, node
);
222 static struct phys_mem
*
223 search_phys_mem(uint64_t phys_addr
)
227 struct rb_node
*node
= rb_tree_search(&mem
, &phys_addr
, cmp_phys_mem
);
232 return rb_node_data(struct phys_mem
, node
, node
);
236 handle_local_write(void *user_data
, uint64_t address
, const void *data
, uint32_t size
)
238 struct gen_batch_decode_bo bo
= {
243 add_gtt_bo_map(bo
, false);
247 handle_ggtt_entry_write(void *user_data
, uint64_t address
, const void *_data
, uint32_t _size
)
249 uint64_t virt_addr
= (address
/ sizeof(uint64_t)) << 12;
250 const uint64_t *data
= _data
;
251 size_t size
= _size
/ sizeof(*data
);
252 for (const uint64_t *entry
= data
;
254 entry
++, virt_addr
+= 4096) {
255 struct ggtt_entry
*pt
= ensure_ggtt_entry(&ggtt
, virt_addr
);
256 pt
->phys_addr
= *entry
;
261 handle_physical_write(void *user_data
, uint64_t phys_address
, const void *data
, uint32_t size
)
263 uint32_t to_write
= size
;
264 for (uint64_t page
= phys_address
& ~0xfff; page
< phys_address
+ size
; page
+= 4096) {
265 struct phys_mem
*mem
= ensure_phys_mem(page
);
266 uint64_t offset
= MAX2(page
, phys_address
) - page
;
267 uint32_t size_this_page
= MIN2(to_write
, 4096 - offset
);
268 to_write
-= size_this_page
;
269 memcpy(mem
->data
+ offset
, data
, size_this_page
);
270 data
= (const uint8_t *)data
+ size_this_page
;
275 handle_ggtt_write(void *user_data
, uint64_t virt_address
, const void *data
, uint32_t size
)
277 uint32_t to_write
= size
;
278 for (uint64_t page
= virt_address
& ~0xfff; page
< virt_address
+ size
; page
+= 4096) {
279 struct ggtt_entry
*entry
= search_ggtt_entry(page
);
280 assert(entry
&& entry
->phys_addr
& 0x1);
282 uint64_t offset
= MAX2(page
, virt_address
) - page
;
283 uint32_t size_this_page
= MIN2(to_write
, 4096 - offset
);
284 to_write
-= size_this_page
;
286 uint64_t phys_page
= entry
->phys_addr
& ~0xfff; /* Clear the validity bits. */
287 handle_physical_write(user_data
, phys_page
+ offset
, data
, size_this_page
);
288 data
= (const uint8_t *)data
+ size_this_page
;
292 static struct gen_batch_decode_bo
293 get_ggtt_batch_bo(void *user_data
, uint64_t address
)
295 struct gen_batch_decode_bo bo
= {0};
297 list_for_each_entry(struct bo_map
, i
, &maps
, link
)
298 if (i
->bo
.addr
<= address
&& i
->bo
.addr
+ i
->bo
.size
> address
)
303 struct ggtt_entry
*start
=
304 (struct ggtt_entry
*)rb_tree_search_sloppy(&ggtt
, &address
,
306 if (start
&& start
->virt_addr
< address
)
307 start
= ggtt_entry_next(start
);
311 struct ggtt_entry
*last
= start
;
312 for (struct ggtt_entry
*i
= ggtt_entry_next(last
);
313 i
&& last
->virt_addr
+ 4096 == i
->virt_addr
;
314 last
= i
, i
= ggtt_entry_next(last
))
317 bo
.addr
= MIN2(address
, start
->virt_addr
);
318 bo
.size
= last
->virt_addr
- bo
.addr
+ 4096;
319 bo
.map
= mmap(NULL
, bo
.size
, PROT_READ
, MAP_SHARED
| MAP_ANONYMOUS
, -1, 0);
320 assert(bo
.map
!= MAP_FAILED
);
322 for (struct ggtt_entry
*i
= start
;
324 i
= i
== last
? NULL
: ggtt_entry_next(i
)) {
325 uint64_t phys_addr
= i
->phys_addr
& ~0xfff;
326 struct phys_mem
*phys_mem
= search_phys_mem(phys_addr
);
331 uint32_t map_offset
= i
->virt_addr
- address
;
332 void *res
= mmap((uint8_t *)bo
.map
+ map_offset
, 4096, PROT_READ
,
333 MAP_SHARED
| MAP_FIXED
, mem_fd
, phys_mem
->fd_offset
);
334 assert(res
!= MAP_FAILED
);
337 add_gtt_bo_map(bo
, true);
342 static struct phys_mem
*
343 ppgtt_walk(uint64_t pml4
, uint64_t address
)
346 uint64_t addr
= pml4
;
347 for (int level
= 4; level
> 0; level
--) {
348 struct phys_mem
*table
= search_phys_mem(addr
);
351 int index
= (address
>> shift
) & 0x1ff;
352 uint64_t entry
= ((uint64_t *)table
->data
)[index
];
355 addr
= entry
& ~0xfff;
358 return search_phys_mem(addr
);
362 ppgtt_mapped(uint64_t pml4
, uint64_t address
)
364 return ppgtt_walk(pml4
, address
) != NULL
;
367 static struct gen_batch_decode_bo
368 get_ppgtt_batch_bo(void *user_data
, uint64_t address
)
370 struct gen_batch_decode_bo bo
= {0};
371 uint64_t pml4
= *(uint64_t *)user_data
;
375 if (!ppgtt_mapped(pml4
, address
))
378 /* Map everything until the first gap since we don't know how much the
379 * decoder actually needs.
381 uint64_t end
= address
;
382 while (ppgtt_mapped(pml4
, end
))
386 bo
.size
= end
- address
;
387 bo
.map
= mmap(NULL
, bo
.size
, PROT_READ
, MAP_SHARED
| MAP_ANONYMOUS
, -1, 0);
388 assert(bo
.map
!= MAP_FAILED
);
390 for (uint64_t page
= address
; page
< end
; page
+= 4096) {
391 struct phys_mem
*phys_mem
= ppgtt_walk(pml4
, page
);
393 void *res
= mmap((uint8_t *)bo
.map
+ (page
- bo
.addr
), 4096, PROT_READ
,
394 MAP_SHARED
| MAP_FIXED
, mem_fd
, phys_mem
->fd_offset
);
395 assert(res
!= MAP_FAILED
);
398 add_gtt_bo_map(bo
, true);
404 aubinator_error(void *user_data
, const void *aub_data
, const char *msg
)
406 fprintf(stderr
, msg
);
410 aubinator_init(void *user_data
, int aub_pci_id
, const char *app_name
)
414 if (!gen_get_device_info(pci_id
, &devinfo
)) {
415 fprintf(stderr
, "can't find device information: pci_id=0x%x\n", pci_id
);
419 enum gen_batch_decode_flags batch_flags
= 0;
420 if (option_color
== COLOR_ALWAYS
)
421 batch_flags
|= GEN_BATCH_DECODE_IN_COLOR
;
422 if (option_full_decode
)
423 batch_flags
|= GEN_BATCH_DECODE_FULL
;
424 if (option_print_offsets
)
425 batch_flags
|= GEN_BATCH_DECODE_OFFSETS
;
426 batch_flags
|= GEN_BATCH_DECODE_FLOATS
;
428 gen_batch_decode_ctx_init(&batch_ctx
, &devinfo
, outfile
, batch_flags
,
429 xml_path
, NULL
, NULL
, NULL
);
430 batch_ctx
.max_vbo_decoded_lines
= max_vbo_lines
;
432 char *color
= GREEN_HEADER
, *reset_color
= NORMAL
;
433 if (option_color
== COLOR_NEVER
)
434 color
= reset_color
= "";
436 fprintf(outfile
, "%sAubinator: Intel AUB file decoder.%-80s%s\n",
437 color
, "", reset_color
);
440 fprintf(outfile
, "File name: %s\n", input_file
);
443 fprintf(outfile
, "PCI ID: 0x%x\n", aub_pci_id
);
445 fprintf(outfile
, "Application name: %s\n", app_name
);
447 fprintf(outfile
, "Decoding as: %s\n", gen_get_device_name(pci_id
));
449 /* Throw in a new line before the first batch */
450 fprintf(outfile
, "\n");
454 handle_execlist_write(void *user_data
, enum gen_engine engine
, uint64_t context_descriptor
)
456 const uint32_t pphwsp_size
= 4096;
457 uint32_t pphwsp_addr
= context_descriptor
& 0xfffff000;
458 struct gen_batch_decode_bo pphwsp_bo
= get_ggtt_batch_bo(NULL
, pphwsp_addr
);
459 uint32_t *context
= (uint32_t *)((uint8_t *)pphwsp_bo
.map
+
460 (pphwsp_addr
- pphwsp_bo
.addr
) +
463 uint32_t ring_buffer_head
= context
[5];
464 uint32_t ring_buffer_tail
= context
[7];
465 uint32_t ring_buffer_start
= context
[9];
466 uint64_t pml4
= (uint64_t)context
[49] << 32 | context
[51];
468 struct gen_batch_decode_bo ring_bo
= get_ggtt_batch_bo(NULL
,
470 assert(ring_bo
.size
> 0);
471 void *commands
= (uint8_t *)ring_bo
.map
+ (ring_buffer_start
- ring_bo
.addr
);
473 if (context_descriptor
& 0x100 /* ppgtt */) {
474 batch_ctx
.get_bo
= get_ppgtt_batch_bo
;
475 batch_ctx
.user_data
= &pml4
;
477 batch_ctx
.get_bo
= get_ggtt_batch_bo
;
480 (void)engine
; /* TODO */
481 gen_print_batch(&batch_ctx
, commands
, ring_buffer_tail
- ring_buffer_head
,
487 handle_ring_write(void *user_data
, enum gen_engine engine
,
488 const void *data
, uint32_t data_len
)
490 batch_ctx
.get_bo
= get_ggtt_batch_bo
;
492 gen_print_batch(&batch_ctx
, data
, data_len
, 0);
500 void *map
, *end
, *cursor
;
503 static struct aub_file
*
504 aub_file_open(const char *filename
)
506 struct aub_file
*file
;
510 file
= calloc(1, sizeof *file
);
511 fd
= open(filename
, O_RDONLY
);
513 fprintf(stderr
, "open %s failed: %s\n", filename
, strerror(errno
));
517 if (fstat(fd
, &sb
) == -1) {
518 fprintf(stderr
, "stat failed: %s\n", strerror(errno
));
522 file
->map
= mmap(NULL
, sb
.st_size
,
523 PROT_READ
, MAP_SHARED
, fd
, 0);
524 if (file
->map
== MAP_FAILED
) {
525 fprintf(stderr
, "mmap failed: %s\n", strerror(errno
));
531 file
->cursor
= file
->map
;
532 file
->end
= file
->map
+ sb
.st_size
;
538 aub_file_more_stuff(struct aub_file
*file
)
540 return file
->cursor
< file
->end
|| (file
->stream
&& !feof(file
->stream
));
562 execlp("less", "less", "-FRSi", NULL
);
571 print_help(const char *progname
, FILE *file
)
574 "Usage: %s [OPTION]... FILE\n"
575 "Decode aub file contents from FILE.\n\n"
576 " --help display this help and exit\n"
577 " --gen=platform decode for given platform (3 letter platform name)\n"
578 " --headers decode only command headers\n"
579 " --color[=WHEN] colorize the output; WHEN can be 'auto' (default\n"
580 " if omitted), 'always', or 'never'\n"
581 " --max-vbo-lines=N limit the number of decoded VBO lines\n"
582 " --no-pager don't launch pager\n"
583 " --no-offsets don't print instruction offsets\n"
584 " --xml=DIR load hardware xml description from directory DIR\n",
588 int main(int argc
, char *argv
[])
590 struct aub_file
*file
;
592 bool help
= false, pager
= true;
593 const struct option aubinator_opts
[] = {
594 { "help", no_argument
, (int *) &help
, true },
595 { "no-pager", no_argument
, (int *) &pager
, false },
596 { "no-offsets", no_argument
, (int *) &option_print_offsets
, false },
597 { "gen", required_argument
, NULL
, 'g' },
598 { "headers", no_argument
, (int *) &option_full_decode
, false },
599 { "color", required_argument
, NULL
, 'c' },
600 { "xml", required_argument
, NULL
, 'x' },
601 { "max-vbo-lines", required_argument
, NULL
, 'v' },
608 while ((c
= getopt_long(argc
, argv
, "", aubinator_opts
, &i
)) != -1) {
611 const int id
= gen_device_name_to_pci_device_id(optarg
);
613 fprintf(stderr
, "can't parse gen: '%s', expected ivb, byt, hsw, "
614 "bdw, chv, skl, kbl or bxt\n", optarg
);
622 if (optarg
== NULL
|| strcmp(optarg
, "always") == 0)
623 option_color
= COLOR_ALWAYS
;
624 else if (strcmp(optarg
, "never") == 0)
625 option_color
= COLOR_NEVER
;
626 else if (strcmp(optarg
, "auto") == 0)
627 option_color
= COLOR_AUTO
;
629 fprintf(stderr
, "invalid value for --color: %s", optarg
);
634 xml_path
= strdup(optarg
);
637 max_vbo_lines
= atoi(optarg
);
645 input_file
= argv
[optind
];
647 if (help
|| !input_file
) {
648 print_help(argv
[0], stderr
);
652 /* Do this before we redirect stdout to pager. */
653 if (option_color
== COLOR_AUTO
)
654 option_color
= isatty(1) ? COLOR_ALWAYS
: COLOR_NEVER
;
656 if (isatty(1) && pager
)
659 mem_fd
= memfd_create("phys memory", 0);
661 list_inithead(&maps
);
663 file
= aub_file_open(input_file
);
665 struct aub_read aub_read
= {
667 .error
= aubinator_error
,
668 .info
= aubinator_init
,
669 .local_write
= handle_local_write
,
670 .phys_write
= handle_physical_write
,
671 .ggtt_write
= handle_ggtt_write
,
672 .ggtt_entry_write
= handle_ggtt_entry_write
,
673 .execlist_write
= handle_execlist_write
,
674 .ring_write
= handle_ring_write
,
677 while (aub_file_more_stuff(file
) &&
678 (consumed
= aub_read_command(&aub_read
, file
->cursor
,
679 file
->end
- file
->cursor
)) > 0) {
680 file
->cursor
+= consumed
;
684 /* close the stdout which is opened to write the output */