aubinator: Add a new tool called Aubinator to the src/intel/tools folder.
[mesa.git] / src / intel / tools / aubinator.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdint.h>
27
28 #include <unistd.h>
29 #include <fcntl.h>
30 #include <string.h>
31 #include <signal.h>
32 #include <errno.h>
33 #include <error.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/wait.h>
37 #include <sys/mman.h>
38
39 #include "decoder.h"
40 #include "intel_aub.h"
41 #include "gen_disasm.h"
42
43 /* Below is the only command missing from intel_aub.h in libdrm
44 * So, reuse intel_aub.h from libdrm and #define the
45 * AUB_MI_BATCH_BUFFER_END as below
46 */
47 #define AUB_MI_BATCH_BUFFER_END (0x0500 << 16)
48
49 #define CSI "\e["
50 #define HEADER CSI "37;44m"
51 #define NORMAL CSI "0m"
52 #define CLEAR_TO_EOL CSI "0K"
53
54 /* options */
55
56 static bool option_full_decode = true;
57 static bool option_print_offsets = true;
58 static enum { COLOR_AUTO, COLOR_ALWAYS, COLOR_NEVER } option_color;
59
60 /* state */
61
62 struct gen_disasm *disasm;
63
64 uint64_t gtt_size, gtt_end;
65 void *gtt;
66 uint64_t general_state_base;
67 uint64_t surface_state_base;
68 uint64_t dynamic_state_base;
69 uint64_t instruction_base;
70 uint64_t instruction_bound;
71
72 static inline uint32_t
73 field(uint32_t value, int start, int end)
74 {
75 uint32_t mask;
76
77 mask = ~0U >> (31 - end + start);
78
79 return (value >> start) & mask;
80 }
81
82 struct brw_instruction;
83
84 static inline int
85 valid_offset(uint32_t offset)
86 {
87 return offset < gtt_end;
88 }
89
90 static void
91 decode_structure(struct gen_spec *spec, struct gen_group *strct, const uint32_t *p)
92 {
93 struct gen_field_iterator iter;
94
95 gen_field_iterator_init(&iter, strct, p);
96 while (gen_field_iterator_next(&iter)) {
97 printf(" %s: %s\n", iter.name, iter.value);
98 }
99 }
100
101 static void
102 dump_binding_table(struct gen_spec *spec, uint32_t offset)
103 {
104 uint32_t *pointers, i;
105 uint64_t start;
106 struct gen_group *surface_state;
107
108 surface_state = gen_spec_find_struct(spec, "RENDER_SURFACE_STATE");
109 if (surface_state == NULL) {
110 printf("did not find RENDER_SURFACE_STATE info\n");
111 return;
112 }
113
114 start = surface_state_base + offset;
115 pointers = gtt + start;
116 for (i = 0; i < 16; i++) {
117 if (pointers[i] == 0)
118 continue;
119 start = pointers[i] + surface_state_base;
120 if (!valid_offset(start)) {
121 printf("pointer %u: %08x <not valid>\n",
122 i, pointers[i]);
123 continue;
124 } else {
125 printf("pointer %u: %08x\n", i, pointers[i]);
126 }
127
128 decode_structure(spec, surface_state, gtt + start);
129 }
130 }
131
132 static void
133 handle_3dstate_index_buffer(struct gen_spec *spec, uint32_t *p)
134 {
135 void *start;
136 uint32_t length, i, type, size;
137
138 start = gtt + p[2];
139 type = (p[1] >> 8) & 3;
140 size = 1 << type;
141 length = p[4] / size;
142 if (length > 10)
143 length = 10;
144
145 printf("\t");
146
147 for (i = 0; i < length; i++) {
148 switch (type) {
149 case 0:
150 printf("%3d ", ((uint8_t *)start)[i]);
151 break;
152 case 1:
153 printf("%3d ", ((uint16_t *)start)[i]);
154 break;
155 case 2:
156 printf("%3d ", ((uint32_t *)start)[i]);
157 break;
158 }
159 }
160 if (length < p[4] / size)
161 printf("...\n");
162 else
163 printf("\n");
164 }
165
166 static inline uint64_t
167 get_qword(uint32_t *p)
168 {
169 return ((uint64_t) p[1] << 32) | p[0];
170 }
171
172 static void
173 handle_state_base_address(struct gen_spec *spec, uint32_t *p)
174 {
175 uint64_t mask = ~((1 << 12) - 1);
176
177 if (gen_spec_get_gen(spec) >= gen_make_gen(8,0)) {
178 if (p[1] & 1)
179 general_state_base = get_qword(&p[1]) & mask;
180 if (p[4] & 1)
181 surface_state_base = get_qword(&p[4]) & mask;
182 if (p[6] & 1)
183 dynamic_state_base = get_qword(&p[6]) & mask;
184 if (p[10] & 1)
185 instruction_base = get_qword(&p[10]) & mask;
186 if (p[15] & 1)
187 instruction_bound = p[15] & mask;
188 } else {
189 if (p[2] & 1)
190 surface_state_base = p[2] & mask;
191 if (p[3] & 1)
192 dynamic_state_base = p[3] & mask;
193 if (p[5] & 1)
194 instruction_base = p[5] & mask;
195 if (p[9] & 1)
196 instruction_bound = p[9] & mask;
197 }
198 }
199
200 static void
201 dump_samplers(struct gen_spec *spec, uint32_t offset)
202 {
203 uint32_t i;
204 uint64_t start;
205 struct gen_group *sampler_state;
206
207 sampler_state = gen_spec_find_struct(spec, "SAMPLER_STATE");
208
209 start = dynamic_state_base + offset;
210 for (i = 0; i < 4; i++) {
211 printf("sampler state %d\n", i);
212 decode_structure(spec, sampler_state, gtt + start + i * 16);
213 }
214 }
215
216 static void
217 handle_media_interface_descriptor_load(struct gen_spec *spec, uint32_t *p)
218 {
219 int i, length = p[2] / 32;
220 struct gen_group *descriptor_structure;
221 uint32_t *descriptors;
222 uint64_t start;
223 struct brw_instruction *insns;
224
225 descriptor_structure =
226 gen_spec_find_struct(spec, "INTERFACE_DESCRIPTOR_DATA");
227 if (descriptor_structure == NULL) {
228 printf("did not find INTERFACE_DESCRIPTOR_DATA info\n");
229 return;
230 }
231
232 start = dynamic_state_base + p[3];
233 descriptors = gtt + start;
234 for (i = 0; i < length; i++, descriptors += 8) {
235 printf("descriptor %u: %08x\n", i, *descriptors);
236 decode_structure(spec, descriptor_structure, descriptors);
237
238 start = instruction_base + descriptors[0];
239 if (!valid_offset(start)) {
240 printf("kernel: %08lx <not valid>\n", start);
241 continue;
242 } else {
243 printf("kernel: %08lx\n", start);
244 }
245
246 insns = (struct brw_instruction *) (gtt + start);
247 gen_disasm_disassemble(disasm, insns, 0, 8192, stdout);
248
249 dump_samplers(spec, descriptors[3] & ~0x1f);
250 dump_binding_table(spec, descriptors[4] & ~0x1f);
251 }
252 }
253
254 /* Heuristic to determine whether a uint32_t is probably actually a float
255 * (http://stackoverflow.com/a/2953466)
256 */
257
258 static bool
259 probably_float(uint32_t bits)
260 {
261 int exp = ((bits & 0x7f800000U) >> 23) - 127;
262 uint32_t mant = bits & 0x007fffff;
263
264 /* +- 0.0 */
265 if (exp == -127 && mant == 0)
266 return true;
267
268 /* +- 1 billionth to 1 billion */
269 if (-30 <= exp && exp <= 30)
270 return true;
271
272 /* some value with only a few binary digits */
273 if ((mant & 0x0000ffff) == 0)
274 return true;
275
276 return false;
277 }
278
279 static void
280 handle_3dstate_vertex_buffers(struct gen_spec *spec, uint32_t *p)
281 {
282 uint32_t *end, *s, *dw, *dwend;
283 uint64_t offset;
284 int n, i, count, stride;
285
286 end = (p[0] & 0xff) + p + 2;
287 for (s = &p[1], n = 0; s < end; s += 4, n++) {
288 if (gen_spec_get_gen(spec) >= gen_make_gen(8, 0)) {
289 offset = *(uint64_t *) &s[1];
290 dwend = gtt + offset + s[3];
291 } else {
292 offset = s[1];
293 dwend = gtt + s[2] + 1;
294 }
295
296 stride = field(s[0], 0, 11);
297 count = 0;
298 printf("vertex buffer %d, size %d\n", n, s[3]);
299 for (dw = gtt + offset, i = 0; dw < dwend && i < 256; dw++) {
300 if (count == 0 && count % (8 * 4) == 0)
301 printf(" ");
302
303 if (probably_float(*dw))
304 printf(" %8.2f", *(float *) dw);
305 else
306 printf(" 0x%08x", *dw);
307
308 i++;
309 count += 4;
310
311 if (count == stride) {
312 printf("\n");
313 count = 0;
314 } else if (count % (8 * 4) == 0) {
315 printf("\n");
316 } else {
317 printf(" ");
318 }
319 }
320 if (count > 0 && count % (8 * 4) != 0)
321 printf("\n");
322 }
323 }
324
325 static void
326 handle_3dstate_vs(struct gen_spec *spec, uint32_t *p)
327 {
328 uint64_t start;
329 struct brw_instruction *insns;
330 int vs_enable;
331
332 if (gen_spec_get_gen(spec) >= gen_make_gen(8, 0)) {
333 start = get_qword(&p[1]);
334 vs_enable = p[7] & 1;
335 } else {
336 start = p[1];
337 vs_enable = p[5] & 1;
338 }
339
340 if (vs_enable) {
341 printf("instruction_base %08lx, start %08lx\n",
342 instruction_base, start);
343
344 insns = (struct brw_instruction *) (gtt + instruction_base + start);
345 gen_disasm_disassemble(disasm, insns, 0, 8192, stdout);
346 }
347 }
348
349 static void
350 handle_3dstate_constant(struct gen_spec *spec, uint32_t *p)
351 {
352 int i, j, length;
353 uint32_t *dw;
354 float *f;
355
356 for (i = 0; i < 4; i++) {
357 length = (p[1 + i / 2] >> (i & 1) * 16) & 0xffff;
358 f = (float *) (gtt + p[3 + i * 2] + dynamic_state_base);
359 dw = (uint32_t *) f;
360 for (j = 0; j < length * 8; j++) {
361 if (probably_float(dw[j]))
362 printf(" %04.3f", f[j]);
363 else
364 printf(" 0x%08x", dw[j]);
365
366 if ((j & 7) == 7)
367 printf("\n");
368 }
369 }
370 }
371
372 static void
373 handle_3dstate_ps(struct gen_spec *spec, uint32_t *p)
374 {
375 uint32_t mask = ~((1 << 6) - 1);
376 uint64_t start;
377 struct brw_instruction *insns;
378 static const char unused[] = "unused";
379 static const char *pixel_type[3] = {"8 pixel", "16 pixel", "32 pixel"};
380 const char *k0, *k1, *k2;
381 uint32_t k_mask, k1_offset, k2_offset;
382
383 if (gen_spec_get_gen(spec) >= gen_make_gen(8, 0)) {
384 k_mask = p[6] & 7;
385 k1_offset = 8;
386 k2_offset = 10;
387 } else {
388 k_mask = p[4] & 7;
389 k1_offset = 6;
390 k2_offset = 7;
391 }
392
393 #define DISPATCH_8 1
394 #define DISPATCH_16 2
395 #define DISPATCH_32 4
396
397 switch (k_mask) {
398 case DISPATCH_8:
399 k0 = pixel_type[0];
400 k1 = unused;
401 k2 = unused;
402 break;
403 case DISPATCH_16:
404 k0 = pixel_type[1];
405 k1 = unused;
406 k2 = unused;
407 break;
408 case DISPATCH_8 | DISPATCH_16:
409 k0 = pixel_type[0];
410 k1 = unused;
411 k2 = pixel_type[1];
412 break;
413 case DISPATCH_32:
414 k0 = pixel_type[2];
415 k1 = unused;
416 k2 = unused;
417 break;
418 case DISPATCH_16 | DISPATCH_32:
419 k0 = unused;
420 k1 = pixel_type[2];
421 k2 = pixel_type[1];
422 break;
423 case DISPATCH_8 | DISPATCH_16 | DISPATCH_32:
424 k0 = pixel_type[0];
425 k1 = pixel_type[2];
426 k2 = pixel_type[1];
427 break;
428 default:
429 k0 = unused;
430 k1 = unused;
431 k2 = unused;
432 break;
433 }
434
435 start = instruction_base + (p[1] & mask);
436 printf(" Kernel[0] %s\n", k0);
437 if (k0 != unused) {
438 insns = (struct brw_instruction *) (gtt + start);
439 gen_disasm_disassemble(disasm, insns, 0, 8192, stdout);
440 }
441
442 start = instruction_base + (p[k1_offset] & mask);
443 printf(" Kernel[1] %s\n", k1);
444 if (k1 != unused) {
445 insns = (struct brw_instruction *) (gtt + start);
446 gen_disasm_disassemble(disasm, insns, 0, 8192, stdout);
447 }
448
449 start = instruction_base + (p[k2_offset] & mask);
450 printf(" Kernel[2] %s\n", k2);
451 if (k2 != unused) {
452 insns = (struct brw_instruction *) (gtt + start);
453 gen_disasm_disassemble(disasm, insns, 0, 8192, stdout);
454 }
455 }
456
457 static void
458 handle_3dstate_binding_table_pointers(struct gen_spec *spec, uint32_t *p)
459 {
460 dump_binding_table(spec, p[1]);
461 }
462
463 static void
464 handle_3dstate_sampler_state_pointers(struct gen_spec *spec, uint32_t *p)
465 {
466 dump_samplers(spec, p[1]);
467 }
468
469 static void
470 handle_3dstate_viewport_state_pointers_cc(struct gen_spec *spec, uint32_t *p)
471 {
472 uint64_t start;
473 struct gen_group *cc_viewport;
474
475 cc_viewport = gen_spec_find_struct(spec, "CC_VIEWPORT");
476
477 start = dynamic_state_base + (p[1] & ~0x1fu);
478 for (uint32_t i = 0; i < 4; i++) {
479 printf("viewport %d\n", i);
480 decode_structure(spec, cc_viewport, gtt + start + i * 8);
481 }
482 }
483
484 static void
485 handle_3dstate_viewport_state_pointers_sf_clip(struct gen_spec *spec, uint32_t *p)
486 {
487 uint64_t start;
488 struct gen_group *sf_clip_viewport;
489
490 sf_clip_viewport = gen_spec_find_struct(spec, "SF_CLIP_VIEWPORT");
491
492 start = dynamic_state_base + (p[1] & ~0x3fu);
493 for (uint32_t i = 0; i < 4; i++) {
494 printf("viewport %d\n", i);
495 decode_structure(spec, sf_clip_viewport, gtt + start + i * 64);
496 }
497 }
498
499 static void
500 handle_3dstate_blend_state_pointers(struct gen_spec *spec, uint32_t *p)
501 {
502 uint64_t start;
503 struct gen_group *blend_state;
504
505 blend_state = gen_spec_find_struct(spec, "BLEND_STATE");
506
507 start = dynamic_state_base + (p[1] & ~0x3fu);
508 decode_structure(spec, blend_state, gtt + start);
509 }
510
511 static void
512 handle_3dstate_cc_state_pointers(struct gen_spec *spec, uint32_t *p)
513 {
514 uint64_t start;
515 struct gen_group *cc_state;
516
517 cc_state = gen_spec_find_struct(spec, "COLOR_CALC_STATE");
518
519 start = dynamic_state_base + (p[1] & ~0x3fu);
520 decode_structure(spec, cc_state, gtt + start);
521 }
522
523 static void
524 handle_3dstate_scissor_state_pointers(struct gen_spec *spec, uint32_t *p)
525 {
526 uint64_t start;
527 struct gen_group *scissor_rect;
528
529 scissor_rect = gen_spec_find_struct(spec, "SCISSOR_RECT");
530
531 start = dynamic_state_base + (p[1] & ~0x1fu);
532 decode_structure(spec, scissor_rect, gtt + start);
533 }
534
535 #define ARRAY_LENGTH(a) (sizeof (a) / sizeof (a)[0])
536
537 #define STATE_BASE_ADDRESS 0x61010000
538
539 #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x70020000
540
541 #define _3DSTATE_INDEX_BUFFER 0x780a0000
542 #define _3DSTATE_VERTEX_BUFFERS 0x78080000
543
544 #define _3DSTATE_VS 0x78100000
545 #define _3DSTATE_GS 0x78110000
546
547 #define _3DSTATE_CONSTANT_VS 0x78150000
548 #define _3DSTATE_CONSTANT_GS 0x78160000
549 #define _3DSTATE_CONSTANT_PS 0x78170000
550 #define _3DSTATE_CONSTANT_HS 0x78190000
551 #define _3DSTATE_CONSTANT_DS 0x781A0000
552
553 #define _3DSTATE_PS 0x78200000
554
555 #define _3DSTATE_BINDING_TABLE_POINTERS_VS 0x78260000
556 #define _3DSTATE_BINDING_TABLE_POINTERS_HS 0x78270000
557 #define _3DSTATE_BINDING_TABLE_POINTERS_DS 0x78280000
558 #define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x78290000
559 #define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782a0000
560
561 #define _3DSTATE_SAMPLER_STATE_POINTERS_VS 0x782b0000
562 #define _3DSTATE_SAMPLER_STATE_POINTERS_GS 0x782e0000
563 #define _3DSTATE_SAMPLER_STATE_POINTERS_PS 0x782f0000
564
565 #define _3DSTATE_VIEWPORT_STATE_POINTERS_CC 0x78230000
566 #define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP 0x78210000
567 #define _3DSTATE_BLEND_STATE_POINTERS 0x78240000
568 #define _3DSTATE_CC_STATE_POINTERS 0x780e0000
569 #define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f0000
570
571 struct custom_handler {
572 uint32_t opcode;
573 void (*handle)(struct gen_spec *spec, uint32_t *p);
574 } custom_handlers[] = {
575 { STATE_BASE_ADDRESS, handle_state_base_address },
576 { MEDIA_INTERFACE_DESCRIPTOR_LOAD, handle_media_interface_descriptor_load },
577 { _3DSTATE_VERTEX_BUFFERS, handle_3dstate_vertex_buffers },
578 { _3DSTATE_INDEX_BUFFER, handle_3dstate_index_buffer },
579 { _3DSTATE_VS, handle_3dstate_vs },
580 { _3DSTATE_GS, handle_3dstate_vs },
581 /* FIXME: Handle disassmbing for 3DSTATE_HS and 3DSTATE_DS. */
582 { _3DSTATE_CONSTANT_VS, handle_3dstate_constant },
583 { _3DSTATE_CONSTANT_GS, handle_3dstate_constant },
584 { _3DSTATE_CONSTANT_PS, handle_3dstate_constant },
585 { _3DSTATE_CONSTANT_HS, handle_3dstate_constant },
586 { _3DSTATE_CONSTANT_DS, handle_3dstate_constant },
587 { _3DSTATE_PS, handle_3dstate_ps },
588
589 { _3DSTATE_BINDING_TABLE_POINTERS_VS, handle_3dstate_binding_table_pointers },
590 { _3DSTATE_BINDING_TABLE_POINTERS_HS, handle_3dstate_binding_table_pointers },
591 { _3DSTATE_BINDING_TABLE_POINTERS_DS, handle_3dstate_binding_table_pointers },
592 { _3DSTATE_BINDING_TABLE_POINTERS_GS, handle_3dstate_binding_table_pointers },
593 { _3DSTATE_BINDING_TABLE_POINTERS_PS, handle_3dstate_binding_table_pointers },
594
595 { _3DSTATE_SAMPLER_STATE_POINTERS_VS, handle_3dstate_sampler_state_pointers },
596 { _3DSTATE_SAMPLER_STATE_POINTERS_GS, handle_3dstate_sampler_state_pointers },
597 { _3DSTATE_SAMPLER_STATE_POINTERS_PS, handle_3dstate_sampler_state_pointers },
598
599 { _3DSTATE_VIEWPORT_STATE_POINTERS_CC, handle_3dstate_viewport_state_pointers_cc },
600 { _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, handle_3dstate_viewport_state_pointers_sf_clip },
601 { _3DSTATE_BLEND_STATE_POINTERS, handle_3dstate_blend_state_pointers },
602 { _3DSTATE_CC_STATE_POINTERS, handle_3dstate_cc_state_pointers },
603 { _3DSTATE_SCISSOR_STATE_POINTERS, handle_3dstate_scissor_state_pointers }
604 };
605
606 static void
607 parse_commands(struct gen_spec *spec, uint32_t *cmds, int size, int engine)
608 {
609 uint32_t *p, *end = cmds + size / 4;
610 unsigned int length, i;
611 struct gen_group *inst;
612
613 for (p = cmds; p < end; p += length) {
614 inst = gen_spec_find_instruction(spec, p);
615 if (inst == NULL) {
616 printf("unknown instruction %08x\n", p[0]);
617 length = (p[0] & 0xff) + 2;
618 continue;
619 }
620 length = gen_group_get_length(inst, p);
621
622 const char *color, *reset_color = CLEAR_TO_EOL NORMAL;
623 uint64_t offset;
624
625 if (option_full_decode)
626 color = HEADER;
627 else
628 color = NORMAL;
629
630 if (option_color == COLOR_NEVER) {
631 color = "";
632 reset_color = "";
633 }
634
635 if (option_print_offsets)
636 offset = (void *) p - gtt;
637 else
638 offset = 0;
639
640 printf("%s0x%08lx: 0x%08x: %s%s\n",
641 color, offset, p[0],
642 gen_group_get_name(inst), reset_color);
643
644 if (option_full_decode) {
645 struct gen_field_iterator iter;
646 gen_field_iterator_init(&iter, inst, p);
647 while (gen_field_iterator_next(&iter)) {
648 printf(" %s: %s\n", iter.name, iter.value);
649 }
650
651 for (i = 0; i < ARRAY_LENGTH(custom_handlers); i++) {
652 if (gen_group_get_opcode(inst) ==
653 custom_handlers[i].opcode)
654 custom_handlers[i].handle(spec, p);
655 }
656 }
657
658 if ((p[0] & 0xffff0000) == AUB_MI_BATCH_BUFFER_START) {
659 uint64_t start;
660 if (gen_spec_get_gen(spec) >= gen_make_gen(8,0))
661 start = get_qword(&p[1]);
662 else
663 start = p[1];
664
665 parse_commands(spec, gtt + start, 1 << 20, engine);
666 } else if ((p[0] & 0xffff0000) == AUB_MI_BATCH_BUFFER_END) {
667 break;
668 }
669 }
670 }
671
672 #define GEN_ENGINE_RENDER 1
673 #define GEN_ENGINE_BLITTER 2
674
675 static void
676 handle_trace_block(struct gen_spec *spec, uint32_t *p)
677 {
678 int operation = p[1] & AUB_TRACE_OPERATION_MASK;
679 int type = p[1] & AUB_TRACE_TYPE_MASK;
680 int address_space = p[1] & AUB_TRACE_ADDRESS_SPACE_MASK;
681 uint64_t offset = p[3];
682 uint32_t size = p[4];
683 int header_length = p[0] & 0xffff;
684 uint32_t *data = p + header_length + 2;
685 int engine = GEN_ENGINE_RENDER;
686
687 if (gen_spec_get_gen(spec) >= gen_make_gen(8,0))
688 offset += (uint64_t) p[5] << 32;
689
690 switch (operation) {
691 case AUB_TRACE_OP_DATA_WRITE:
692 if (address_space != AUB_TRACE_MEMTYPE_GTT)
693 break;
694 if (gtt_size < offset + size)
695 error(EXIT_FAILURE, errno, "overflow gtt space");
696 memcpy((char *) gtt + offset, data, size);
697 if (gtt_end < offset + size)
698 gtt_end = offset + size;
699 break;
700 case AUB_TRACE_OP_COMMAND_WRITE:
701 switch (type) {
702 case AUB_TRACE_TYPE_RING_PRB0:
703 engine = GEN_ENGINE_RENDER;
704 break;
705 case AUB_TRACE_TYPE_RING_PRB2:
706 engine = GEN_ENGINE_BLITTER;
707 break;
708 default:
709 printf("command write to unknown ring %d\n", type);
710 break;
711 }
712
713 parse_commands(spec, data, size, engine);
714 gtt_end = 0;
715 break;
716 }
717 }
718
719 struct aub_file {
720 char *filename;
721 int fd;
722 struct stat sb;
723 uint32_t *map, *end, *cursor;
724 };
725
726 static struct aub_file *
727 aub_file_open(const char *filename)
728 {
729 struct aub_file *file;
730
731 file = malloc(sizeof *file);
732 file->filename = strdup(filename);
733 file->fd = open(file->filename, O_RDONLY);
734 if (file->fd == -1)
735 error(EXIT_FAILURE, errno, "open %s failed", file->filename);
736
737 if (fstat(file->fd, &file->sb) == -1)
738 error(EXIT_FAILURE, errno, "stat failed");
739
740 file->map = mmap(NULL, file->sb.st_size,
741 PROT_READ, MAP_SHARED, file->fd, 0);
742 if (file->map == MAP_FAILED)
743 error(EXIT_FAILURE, errno, "mmap failed");
744
745 file->cursor = file->map;
746 file->end = file->map + file->sb.st_size / 4;
747
748 /* mmap a terabyte for our gtt space. */
749 gtt_size = 1ul << 40;
750 gtt = mmap(NULL, gtt_size, PROT_READ | PROT_WRITE,
751 MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
752 if (gtt == MAP_FAILED)
753 error(EXIT_FAILURE, errno, "failed to alloc gtt space");
754
755 return file;
756 }
757
758 #define TYPE(dw) (((dw) >> 29) & 7)
759 #define OPCODE(dw) (((dw) >> 23) & 0x3f)
760 #define SUBOPCODE(dw) (((dw) >> 16) & 0x7f)
761
762 #define MAKE_HEADER(type, opcode, subopcode) \
763 (((type) << 29) | ((opcode) << 23) | ((subopcode) << 16))
764
765 #define TYPE_AUB 0x7
766
767 /* Classic AUB opcodes */
768 #define OPCODE_AUB 0x01
769 #define SUBOPCODE_HEADER 0x05
770 #define SUBOPCODE_BLOCK 0x41
771 #define SUBOPCODE_BMP 0x1e
772
773 /* Newer version AUB opcode*/
774 #define OPCODE_NEW_AUB 0x2e
775 #define SUBOPCODE_VERSION 0x00
776 #define SUBOPCODE_REG_WRITE 0x03
777 #define SUBOPCODE_MEM_POLL 0x05
778 #define SUBOPCODE_MEM_WRITE 0x06
779
780 #define MAKE_GEN(major, minor) ( ((major) << 8) | (minor) )
781
782 struct {
783 const char *name;
784 uint32_t gen;
785 } device_map[] = {
786 { "bwr", MAKE_GEN(4, 0) },
787 { "cln", MAKE_GEN(4, 0) },
788 { "blc", MAKE_GEN(4, 0) },
789 { "ctg", MAKE_GEN(4, 0) },
790 { "el", MAKE_GEN(4, 0) },
791 { "il", MAKE_GEN(4, 0) },
792 { "sbr", MAKE_GEN(6, 0) },
793 { "ivb", MAKE_GEN(7, 0) },
794 { "lrb2", MAKE_GEN(0, 0) },
795 { "hsw", MAKE_GEN(7, 5) },
796 { "vlv", MAKE_GEN(7, 0) },
797 { "bdw", MAKE_GEN(8, 0) },
798 { "skl", MAKE_GEN(9, 0) },
799 { "chv", MAKE_GEN(8, 0) },
800 { "bxt", MAKE_GEN(9, 0) }
801 };
802
803 static void
804 aub_file_decode_batch(struct aub_file *file, struct gen_spec *spec)
805 {
806 uint32_t *p, h, device, data_type;
807 int header_length, payload_size, bias;
808
809 p = file->cursor;
810 h = *p;
811 header_length = h & 0xffff;
812
813 switch (OPCODE(h)) {
814 case OPCODE_AUB:
815 bias = 2;
816 break;
817 case OPCODE_NEW_AUB:
818 bias = 1;
819 break;
820 default:
821 printf("unknown opcode %d at %ld/%ld\n",
822 OPCODE(h), file->cursor - file->map,
823 file->end - file->map);
824 file->cursor = file->end;
825 return;
826 }
827
828 payload_size = 0;
829 switch (h & 0xffff0000) {
830 case MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_HEADER):
831 payload_size = p[12];
832 break;
833 case MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_BLOCK):
834 payload_size = p[4];
835 handle_trace_block(spec, p);
836 break;
837 case MAKE_HEADER(TYPE_AUB, OPCODE_AUB, SUBOPCODE_BMP):
838 break;
839
840 case MAKE_HEADER(TYPE_AUB, OPCODE_NEW_AUB, SUBOPCODE_VERSION):
841 printf("version block: dw1 %08x\n", p[1]);
842 device = (p[1] >> 8) & 0xff;
843 printf(" device %s\n", device_map[device].name);
844 break;
845 case MAKE_HEADER(TYPE_AUB, OPCODE_NEW_AUB, SUBOPCODE_REG_WRITE):
846 printf("register write block: (dwords %d)\n", h & 0xffff);
847 printf(" reg 0x%x, data 0x%x\n", p[1], p[5]);
848 break;
849 case MAKE_HEADER(TYPE_AUB, OPCODE_NEW_AUB, SUBOPCODE_MEM_WRITE):
850 printf("memory write block (dwords %d):\n", h & 0xffff);
851 printf(" address 0x%lx\n", *(uint64_t *) &p[1]);
852 data_type = (p[3] >> 20) & 0xff;
853 if (data_type != 0)
854 printf(" data type 0x%x\n", data_type);
855 printf(" address space 0x%x\n", (p[3] >> 28) & 0xf);
856 break;
857 case MAKE_HEADER(TYPE_AUB, OPCODE_NEW_AUB, SUBOPCODE_MEM_POLL):
858 printf("memory poll block (dwords %d):\n", h & 0xffff);
859 break;
860 default:
861 printf("unknown block type=0x%x, opcode=0x%x, "
862 "subopcode=0x%x (%08x)\n", TYPE(h), OPCODE(h), SUBOPCODE(h), h);
863 break;
864 }
865 file->cursor = p + header_length + bias + payload_size / 4;
866 }
867
868 static int
869 aub_file_more_stuff(struct aub_file *file)
870 {
871 return file->cursor < file->end;
872 }
873
874 static void
875 setup_pager(void)
876 {
877 int fds[2];
878 pid_t pid;
879
880 if (!isatty(1))
881 return;
882
883 if (pipe(fds) == -1)
884 return;
885
886 pid = fork();
887 if (pid == -1)
888 return;
889
890 if (pid == 0) {
891 close(fds[1]);
892 dup2(fds[0], 0);
893 execlp("less", "less", "-rFi", NULL);
894 }
895
896 close(fds[0]);
897 dup2(fds[1], 1);
898 close(fds[1]);
899 }
900
901 static void
902 print_help(FILE *file)
903 {
904 fprintf(file,
905 "Usage: %s [OPTION]... FILE\n"
906 "Decode aub file contents.\n\n"
907 "A valid --gen option must be provided.\n\n"
908 " --help display this help and exit\n"
909 " --gen=platform decode for given platform (ivb, byt, hsw, bdw, chv, skl, kbl or bxt)\n"
910 " --headers decode only command headers\n"
911 " --color[=WHEN] colorize the output; WHEN can be 'auto' (default\n"
912 " if omitted), 'always', or 'never'\n"
913 " --no-pager don't launch pager\n"
914 " --no-offsets don't print instruction offsets\n",
915 basename(program_invocation_name));
916 }
917
918 static bool
919 is_prefix(const char *arg, const char *prefix, const char **value)
920 {
921 int l = strlen(prefix);
922
923 if (strncmp(arg, prefix, l) == 0 && (arg[l] == '\0' || arg[l] == '=')) {
924 if (arg[l] == '=')
925 *value = arg + l + 1;
926 else
927 *value = NULL;
928
929 return true;
930 }
931
932 return false;
933 }
934
935 int main(int argc, char *argv[])
936 {
937 struct gen_spec *spec;
938 struct aub_file *file;
939 int i, pci_id = 0;
940 bool found_arg_gen = false, pager = true;
941 int gen_major, gen_minor;
942 const char *value;
943 char gen_file[256], gen_val[24];
944
945 if (argc == 1) {
946 print_help(stderr);
947 exit(EXIT_FAILURE);
948 }
949
950 for (i = 1; i < argc; ++i) {
951 if (strcmp(argv[i], "--no-pager") == 0) {
952 pager = false;
953 } else if (strcmp(argv[i], "--no-offsets") == 0) {
954 option_print_offsets = false;
955 } else if (is_prefix(argv[i], "--gen", &value)) {
956 if (value == NULL)
957 error(EXIT_FAILURE, 0, "option '--gen' requires an argument\n");
958 found_arg_gen = true;
959 gen_major = 0;
960 gen_minor = 0;
961 snprintf(gen_val, sizeof(gen_val), "%s", value);
962 } else if (strcmp(argv[i], "--headers") == 0) {
963 option_full_decode = false;
964 } else if (is_prefix(argv[i], "--color", &value)) {
965 if (value == NULL || strcmp(value, "always") == 0)
966 option_color = COLOR_ALWAYS;
967 else if (strcmp(value, "never") == 0)
968 option_color = COLOR_NEVER;
969 else if (strcmp(value, "auto") == 0)
970 option_color = COLOR_AUTO;
971 else
972 error(EXIT_FAILURE, 0, "invalid value for --color: %s", value);
973 } else if (strcmp(argv[i], "--help") == 0) {
974 print_help(stdout);
975 exit(EXIT_SUCCESS);
976 } else {
977 if (argv[i][0] == '-') {
978 fprintf(stderr, "unknown option %s\n", argv[i]);
979 exit(EXIT_FAILURE);
980 }
981 break;
982 }
983 }
984
985 if (!found_arg_gen) {
986 fprintf(stderr, "argument --gen is required\n");
987 exit(EXIT_FAILURE);
988 }
989
990 if (strstr(gen_val,"ivb") != NULL) {
991 /* Intel(R) Ivybridge Mobile GT2 */
992 pci_id = 0x0166;
993 gen_major = 7;
994 gen_minor = 0;
995 } else if (strstr(gen_val,"hsw") != NULL) {
996 /* Intel(R) Haswell Mobile GT2 */
997 pci_id = 0x0416;
998 gen_major = 7;
999 gen_minor = 5;
1000 } else if (strstr(gen_val,"byt") != NULL) {
1001 /* Intel(R) Bay Trail */
1002 pci_id = 0x0155;
1003 gen_major = 7;
1004 gen_minor = 5;
1005 } else if (strstr(gen_val,"bdw") != NULL) {
1006 /* Intel(R) HD Graphics 5500 (Broadwell GT2) */
1007 pci_id = 0x1616;
1008 gen_major = 8;
1009 gen_minor = 0;
1010 } else if (strstr(gen_val,"chv") != NULL) {
1011 /* Intel(R) HD Graphics (Cherryview) */
1012 pci_id = 0x22B3;
1013 gen_major = 8;
1014 gen_minor = 0;
1015 } else if (strstr(gen_val,"skl") != NULL) {
1016 /* Intel(R) HD Graphics 530 (Skylake GT2) */
1017 pci_id = 0x1912;
1018 gen_major = 9;
1019 gen_minor = 0;
1020 } else if (strstr(gen_val,"kbl") != NULL) {
1021 /* Intel(R) Kabylake GT2 */
1022 pci_id = 0x591D;
1023 gen_major = 9;
1024 gen_minor = 0;
1025 } else if (strstr(gen_val,"bxt") != NULL) {
1026 /* Intel(R) HD Graphics (Broxton) */
1027 pci_id = 0x0A84;
1028 gen_major = 9;
1029 gen_minor = 0;
1030 } else {
1031 error(EXIT_FAILURE, 0, "can't parse gen: %s, expected ivb, byt, hsw, bdw, chv, skl, kbl or bxt\n", gen_val);
1032 }
1033
1034 /* Do this before we redirect stdout to pager. */
1035 if (option_color == COLOR_AUTO)
1036 option_color = isatty(1) ? COLOR_ALWAYS : COLOR_NEVER;
1037
1038 if (isatty(1) && pager)
1039 setup_pager();
1040
1041 if (gen_minor > 0)
1042 snprintf(gen_file, sizeof(gen_file), "../genxml/gen%d%d.xml", gen_major, gen_minor);
1043 else
1044 snprintf(gen_file, sizeof(gen_file), "../genxml/gen%d.xml", gen_major);
1045
1046 spec = gen_spec_load(gen_file);
1047 disasm = gen_disasm_create(pci_id);
1048
1049 if (argv[i] == NULL) {
1050 print_help(stderr);
1051 exit(EXIT_FAILURE);
1052 } else {
1053 file = aub_file_open(argv[i]);
1054 }
1055
1056 while (aub_file_more_stuff(file))
1057 aub_file_decode_batch(file, spec);
1058
1059 fflush(stdout);
1060 /* close the stdout which is opened to write the output */
1061 close(1);
1062
1063 wait(NULL);
1064
1065 return EXIT_SUCCESS;
1066 }