2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "common/gen_decoder.h"
25 #include "gen_disasm.h"
26 #include "util/macros.h"
31 gen_batch_decode_ctx_init(struct gen_batch_decode_ctx
*ctx
,
32 const struct gen_device_info
*devinfo
,
33 FILE *fp
, enum gen_batch_decode_flags flags
,
35 struct gen_batch_decode_bo (*get_bo
)(void *,
37 unsigned (*get_state_size
)(void *, uint32_t),
40 memset(ctx
, 0, sizeof(*ctx
));
43 ctx
->get_state_size
= get_state_size
;
44 ctx
->user_data
= user_data
;
47 ctx
->max_vbo_decoded_lines
= -1; /* No limit! */
50 ctx
->spec
= gen_spec_load(devinfo
);
52 ctx
->spec
= gen_spec_load_from_path(devinfo
, xml_path
);
53 ctx
->disasm
= gen_disasm_create(devinfo
);
57 gen_batch_decode_ctx_finish(struct gen_batch_decode_ctx
*ctx
)
59 gen_spec_destroy(ctx
->spec
);
60 gen_disasm_destroy(ctx
->disasm
);
64 #define RED_COLOR CSI "31m"
65 #define BLUE_HEADER CSI "0;44m"
66 #define GREEN_HEADER CSI "1;42m"
67 #define NORMAL CSI "0m"
70 ctx_print_group(struct gen_batch_decode_ctx
*ctx
,
71 struct gen_group
*group
,
72 uint64_t address
, const void *map
)
74 gen_print_group(ctx
->fp
, group
, address
, map
, 0,
75 (ctx
->flags
& GEN_BATCH_DECODE_IN_COLOR
) != 0);
78 static struct gen_batch_decode_bo
79 ctx_get_bo(struct gen_batch_decode_ctx
*ctx
, uint64_t addr
)
81 if (gen_spec_get_gen(ctx
->spec
) >= gen_make_gen(8,0)) {
82 /* On Broadwell and above, we have 48-bit addresses which consume two
83 * dwords. Some packets require that these get stored in a "canonical
84 * form" which means that bit 47 is sign-extended through the upper
85 * bits. In order to correctly handle those aub dumps, we need to mask
86 * off the top 16 bits.
88 addr
&= (~0ull >> 16);
91 struct gen_batch_decode_bo bo
= ctx
->get_bo(ctx
->user_data
, addr
);
93 if (gen_spec_get_gen(ctx
->spec
) >= gen_make_gen(8,0))
94 bo
.addr
&= (~0ull >> 16);
96 /* We may actually have an offset into the bo */
98 assert(bo
.addr
<= addr
);
99 uint64_t offset
= addr
- bo
.addr
;
109 update_count(struct gen_batch_decode_ctx
*ctx
,
110 uint32_t offset_from_dsba
,
111 unsigned element_dwords
,
116 if (ctx
->get_state_size
)
117 size
= ctx
->get_state_size(ctx
->user_data
, offset_from_dsba
);
120 return size
/ (sizeof(uint32_t) * element_dwords
);
122 /* In the absence of any information, just guess arbitrarily. */
127 ctx_disassemble_program(struct gen_batch_decode_ctx
*ctx
,
128 uint32_t ksp
, const char *type
)
130 uint64_t addr
= ctx
->instruction_base
+ ksp
;
131 struct gen_batch_decode_bo bo
= ctx_get_bo(ctx
, addr
);
135 fprintf(ctx
->fp
, "\nReferenced %s:\n", type
);
136 gen_disasm_disassemble(ctx
->disasm
, bo
.map
, 0, ctx
->fp
);
139 /* Heuristic to determine whether a uint32_t is probably actually a float
140 * (http://stackoverflow.com/a/2953466)
144 probably_float(uint32_t bits
)
146 int exp
= ((bits
& 0x7f800000U
) >> 23) - 127;
147 uint32_t mant
= bits
& 0x007fffff;
150 if (exp
== -127 && mant
== 0)
153 /* +- 1 billionth to 1 billion */
154 if (-30 <= exp
&& exp
<= 30)
157 /* some value with only a few binary digits */
158 if ((mant
& 0x0000ffff) == 0)
165 ctx_print_buffer(struct gen_batch_decode_ctx
*ctx
,
166 struct gen_batch_decode_bo bo
,
167 uint32_t read_length
,
171 const uint32_t *dw_end
= bo
.map
+ MIN2(bo
.size
, read_length
);
173 int column_count
= 0, line_count
= -1;
174 for (const uint32_t *dw
= bo
.map
; dw
< dw_end
; dw
++) {
175 if (column_count
* 4 == pitch
|| column_count
== 8) {
176 fprintf(ctx
->fp
, "\n");
180 if (max_lines
>= 0 && line_count
>= max_lines
)
183 fprintf(ctx
->fp
, column_count
== 0 ? " " : " ");
185 if ((ctx
->flags
& GEN_BATCH_DECODE_FLOATS
) && probably_float(*dw
))
186 fprintf(ctx
->fp
, " %8.2f", *(float *) dw
);
188 fprintf(ctx
->fp
, " 0x%08x", *dw
);
192 fprintf(ctx
->fp
, "\n");
196 handle_state_base_address(struct gen_batch_decode_ctx
*ctx
, const uint32_t *p
)
198 struct gen_group
*inst
= gen_spec_find_instruction(ctx
->spec
, p
);
200 struct gen_field_iterator iter
;
201 gen_field_iterator_init(&iter
, inst
, p
, 0, false);
203 uint64_t surface_base
= 0, dynamic_base
= 0, instruction_base
= 0;
204 bool surface_modify
= 0, dynamic_modify
= 0, instruction_modify
= 0;
206 while (gen_field_iterator_next(&iter
)) {
207 if (strcmp(iter
.name
, "Surface State Base Address") == 0) {
208 surface_base
= iter
.raw_value
;
209 } else if (strcmp(iter
.name
, "Dynamic State Base Address") == 0) {
210 dynamic_base
= iter
.raw_value
;
211 } else if (strcmp(iter
.name
, "Instruction Base Address") == 0) {
212 instruction_base
= iter
.raw_value
;
213 } else if (strcmp(iter
.name
, "Surface State Base Address Modify Enable") == 0) {
214 surface_modify
= iter
.raw_value
;
215 } else if (strcmp(iter
.name
, "Dynamic State Base Address Modify Enable") == 0) {
216 dynamic_modify
= iter
.raw_value
;
217 } else if (strcmp(iter
.name
, "Insntruction Base Address Modify Enable") == 0) {
218 instruction_modify
= iter
.raw_value
;
223 ctx
->dynamic_base
= dynamic_base
;
226 ctx
->surface_base
= surface_base
;
228 if (instruction_modify
)
229 ctx
->instruction_base
= instruction_base
;
233 dump_binding_table(struct gen_batch_decode_ctx
*ctx
, uint32_t offset
, int count
)
235 struct gen_group
*strct
=
236 gen_spec_find_struct(ctx
->spec
, "RENDER_SURFACE_STATE");
238 fprintf(ctx
->fp
, "did not find RENDER_SURFACE_STATE info\n");
243 count
= update_count(ctx
, offset
, 1, 8);
245 if (offset
% 32 != 0 || offset
>= UINT16_MAX
) {
246 fprintf(ctx
->fp
, " invalid binding table pointer\n");
250 struct gen_batch_decode_bo bind_bo
=
251 ctx_get_bo(ctx
, ctx
->surface_base
+ offset
);
253 if (bind_bo
.map
== NULL
) {
254 fprintf(ctx
->fp
, " binding table unavailable\n");
258 const uint32_t *pointers
= bind_bo
.map
;
259 for (int i
= 0; i
< count
; i
++) {
260 if (pointers
[i
] == 0)
263 uint64_t addr
= ctx
->surface_base
+ pointers
[i
];
264 struct gen_batch_decode_bo bo
= ctx_get_bo(ctx
, addr
);
265 uint32_t size
= strct
->dw_length
* 4;
267 if (pointers
[i
] % 32 != 0 ||
268 addr
< bo
.addr
|| addr
+ size
>= bo
.addr
+ bo
.size
) {
269 fprintf(ctx
->fp
, "pointer %u: 0x%08x <not valid>\n", i
, pointers
[i
]);
273 fprintf(ctx
->fp
, "pointer %u: 0x%08x\n", i
, pointers
[i
]);
274 ctx_print_group(ctx
, strct
, addr
, bo
.map
+ (addr
- bo
.addr
));
279 dump_samplers(struct gen_batch_decode_ctx
*ctx
, uint32_t offset
, int count
)
281 struct gen_group
*strct
= gen_spec_find_struct(ctx
->spec
, "SAMPLER_STATE");
284 count
= update_count(ctx
, offset
, strct
->dw_length
, 4);
286 uint64_t state_addr
= ctx
->dynamic_base
+ offset
;
287 struct gen_batch_decode_bo bo
= ctx_get_bo(ctx
, state_addr
);
288 const void *state_map
= bo
.map
;
290 if (state_map
== NULL
) {
291 fprintf(ctx
->fp
, " samplers unavailable\n");
295 if (offset
% 32 != 0 || state_addr
- bo
.addr
>= bo
.size
) {
296 fprintf(ctx
->fp
, " invalid sampler state pointer\n");
300 for (int i
= 0; i
< count
; i
++) {
301 fprintf(ctx
->fp
, "sampler state %d\n", i
);
302 ctx_print_group(ctx
, strct
, state_addr
, state_map
);
309 handle_media_interface_descriptor_load(struct gen_batch_decode_ctx
*ctx
,
312 struct gen_group
*inst
= gen_spec_find_instruction(ctx
->spec
, p
);
313 struct gen_group
*desc
=
314 gen_spec_find_struct(ctx
->spec
, "INTERFACE_DESCRIPTOR_DATA");
316 struct gen_field_iterator iter
;
317 gen_field_iterator_init(&iter
, inst
, p
, 0, false);
318 uint32_t descriptor_offset
= 0;
319 int descriptor_count
= 0;
320 while (gen_field_iterator_next(&iter
)) {
321 if (strcmp(iter
.name
, "Interface Descriptor Data Start Address") == 0) {
322 descriptor_offset
= strtol(iter
.value
, NULL
, 16);
323 } else if (strcmp(iter
.name
, "Interface Descriptor Total Length") == 0) {
325 strtol(iter
.value
, NULL
, 16) / (desc
->dw_length
* 4);
329 uint64_t desc_addr
= ctx
->dynamic_base
+ descriptor_offset
;
330 struct gen_batch_decode_bo bo
= ctx_get_bo(ctx
, desc_addr
);
331 const void *desc_map
= bo
.map
;
333 if (desc_map
== NULL
) {
334 fprintf(ctx
->fp
, " interface descriptors unavailable\n");
338 for (int i
= 0; i
< descriptor_count
; i
++) {
339 fprintf(ctx
->fp
, "descriptor %d: %08x\n", i
, descriptor_offset
);
341 ctx_print_group(ctx
, desc
, desc_addr
, desc_map
);
343 gen_field_iterator_init(&iter
, desc
, desc_map
, 0, false);
345 uint32_t sampler_offset
= 0, sampler_count
= 0;
346 uint32_t binding_table_offset
= 0, binding_entry_count
= 0;
347 while (gen_field_iterator_next(&iter
)) {
348 if (strcmp(iter
.name
, "Kernel Start Pointer") == 0) {
349 ksp
= strtoll(iter
.value
, NULL
, 16);
350 } else if (strcmp(iter
.name
, "Sampler State Pointer") == 0) {
351 sampler_offset
= strtol(iter
.value
, NULL
, 16);
352 } else if (strcmp(iter
.name
, "Sampler Count") == 0) {
353 sampler_count
= strtol(iter
.value
, NULL
, 10);
354 } else if (strcmp(iter
.name
, "Binding Table Pointer") == 0) {
355 binding_table_offset
= strtol(iter
.value
, NULL
, 16);
356 } else if (strcmp(iter
.name
, "Binding Table Entry Count") == 0) {
357 binding_entry_count
= strtol(iter
.value
, NULL
, 10);
361 ctx_disassemble_program(ctx
, ksp
, "compute shader");
364 dump_samplers(ctx
, sampler_offset
, sampler_count
);
365 dump_binding_table(ctx
, binding_table_offset
, binding_entry_count
);
367 desc_map
+= desc
->dw_length
;
368 desc_addr
+= desc
->dw_length
* 4;
373 handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx
*ctx
,
376 struct gen_group
*inst
= gen_spec_find_instruction(ctx
->spec
, p
);
377 struct gen_group
*vbs
= gen_spec_find_struct(ctx
->spec
, "VERTEX_BUFFER_STATE");
379 struct gen_batch_decode_bo vb
= {};
380 uint32_t vb_size
= 0;
385 struct gen_field_iterator iter
;
386 gen_field_iterator_init(&iter
, inst
, p
, 0, false);
387 while (gen_field_iterator_next(&iter
)) {
388 if (iter
.struct_desc
!= vbs
)
391 struct gen_field_iterator vbs_iter
;
392 gen_field_iterator_init(&vbs_iter
, vbs
, &iter
.p
[iter
.start_bit
/ 32], 0, false);
393 while (gen_field_iterator_next(&vbs_iter
)) {
394 if (strcmp(vbs_iter
.name
, "Vertex Buffer Index") == 0) {
395 index
= vbs_iter
.raw_value
;
396 } else if (strcmp(vbs_iter
.name
, "Buffer Pitch") == 0) {
397 pitch
= vbs_iter
.raw_value
;
398 } else if (strcmp(vbs_iter
.name
, "Buffer Starting Address") == 0) {
399 vb
= ctx_get_bo(ctx
, vbs_iter
.raw_value
);
400 } else if (strcmp(vbs_iter
.name
, "Buffer Size") == 0) {
401 vb_size
= vbs_iter
.raw_value
;
403 } else if (strcmp(vbs_iter
.name
, "End Address") == 0) {
404 if (vb
.map
&& vbs_iter
.raw_value
>= vb
.addr
)
405 vb_size
= vbs_iter
.raw_value
- vb
.addr
;
414 fprintf(ctx
->fp
, "vertex buffer %d, size %d\n", index
, vb_size
);
416 if (vb
.map
== NULL
) {
417 fprintf(ctx
->fp
, " buffer contents unavailable\n");
421 if (vb
.map
== 0 || vb_size
== 0)
424 ctx_print_buffer(ctx
, vb
, vb_size
, pitch
, ctx
->max_vbo_decoded_lines
);
436 handle_3dstate_index_buffer(struct gen_batch_decode_ctx
*ctx
,
439 struct gen_group
*inst
= gen_spec_find_instruction(ctx
->spec
, p
);
441 struct gen_batch_decode_bo ib
= {};
442 uint32_t ib_size
= 0;
445 struct gen_field_iterator iter
;
446 gen_field_iterator_init(&iter
, inst
, p
, 0, false);
447 while (gen_field_iterator_next(&iter
)) {
448 if (strcmp(iter
.name
, "Index Format") == 0) {
449 format
= iter
.raw_value
;
450 } else if (strcmp(iter
.name
, "Buffer Starting Address") == 0) {
451 ib
= ctx_get_bo(ctx
, iter
.raw_value
);
452 } else if (strcmp(iter
.name
, "Buffer Size") == 0) {
453 ib_size
= iter
.raw_value
;
457 if (ib
.map
== NULL
) {
458 fprintf(ctx
->fp
, " buffer contents unavailable\n");
462 const void *m
= ib
.map
;
463 const void *ib_end
= ib
.map
+ MIN2(ib
.size
, ib_size
);
464 for (int i
= 0; m
< ib_end
&& i
< 10; i
++) {
467 fprintf(ctx
->fp
, "%3d ", *(uint8_t *)m
);
471 fprintf(ctx
->fp
, "%3d ", *(uint16_t *)m
);
475 fprintf(ctx
->fp
, "%3d ", *(uint32_t *)m
);
482 fprintf(ctx
->fp
, "...");
483 fprintf(ctx
->fp
, "\n");
487 decode_single_ksp(struct gen_batch_decode_ctx
*ctx
, const uint32_t *p
)
489 struct gen_group
*inst
= gen_spec_find_instruction(ctx
->spec
, p
);
492 bool is_simd8
= false; /* vertex shaders on Gen8+ only */
493 bool is_enabled
= true;
495 struct gen_field_iterator iter
;
496 gen_field_iterator_init(&iter
, inst
, p
, 0, false);
497 while (gen_field_iterator_next(&iter
)) {
498 if (strcmp(iter
.name
, "Kernel Start Pointer") == 0) {
499 ksp
= iter
.raw_value
;
500 } else if (strcmp(iter
.name
, "SIMD8 Dispatch Enable") == 0) {
501 is_simd8
= iter
.raw_value
;
502 } else if (strcmp(iter
.name
, "Dispatch Mode") == 0) {
503 is_simd8
= strcmp(iter
.value
, "SIMD8") == 0;
504 } else if (strcmp(iter
.name
, "Dispatch Enable") == 0) {
505 is_simd8
= strcmp(iter
.value
, "SIMD8") == 0;
506 } else if (strcmp(iter
.name
, "Enable") == 0) {
507 is_enabled
= iter
.raw_value
;
512 strcmp(inst
->name
, "VS_STATE") == 0 ? "vertex shader" :
513 strcmp(inst
->name
, "GS_STATE") == 0 ? "geometry shader" :
514 strcmp(inst
->name
, "SF_STATE") == 0 ? "strips and fans shader" :
515 strcmp(inst
->name
, "CLIP_STATE") == 0 ? "clip shader" :
516 strcmp(inst
->name
, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
517 strcmp(inst
->name
, "3DSTATE_HS") == 0 ? "tessellation control shader" :
518 strcmp(inst
->name
, "3DSTATE_VS") == 0 ? (is_simd8
? "SIMD8 vertex shader" : "vec4 vertex shader") :
519 strcmp(inst
->name
, "3DSTATE_GS") == 0 ? (is_simd8
? "SIMD8 geometry shader" : "vec4 geometry shader") :
523 ctx_disassemble_program(ctx
, ksp
, type
);
529 decode_ps_kernels(struct gen_batch_decode_ctx
*ctx
, const uint32_t *p
)
531 struct gen_group
*inst
= gen_spec_find_instruction(ctx
->spec
, p
);
533 uint64_t ksp
[3] = {0, 0, 0};
534 bool enabled
[3] = {false, false, false};
536 struct gen_field_iterator iter
;
537 gen_field_iterator_init(&iter
, inst
, p
, 0, false);
538 while (gen_field_iterator_next(&iter
)) {
539 if (strncmp(iter
.name
, "Kernel Start Pointer ",
540 strlen("Kernel Start Pointer ")) == 0) {
541 int idx
= iter
.name
[strlen("Kernel Start Pointer ")] - '0';
542 ksp
[idx
] = strtol(iter
.value
, NULL
, 16);
543 } else if (strcmp(iter
.name
, "8 Pixel Dispatch Enable") == 0) {
544 enabled
[0] = strcmp(iter
.value
, "true") == 0;
545 } else if (strcmp(iter
.name
, "16 Pixel Dispatch Enable") == 0) {
546 enabled
[1] = strcmp(iter
.value
, "true") == 0;
547 } else if (strcmp(iter
.name
, "32 Pixel Dispatch Enable") == 0) {
548 enabled
[2] = strcmp(iter
.value
, "true") == 0;
552 /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
553 if (enabled
[0] + enabled
[1] + enabled
[2] == 1) {
557 } else if (enabled
[2]) {
562 uint64_t tmp
= ksp
[1];
568 ctx_disassemble_program(ctx
, ksp
[0], "SIMD8 fragment shader");
570 ctx_disassemble_program(ctx
, ksp
[1], "SIMD16 fragment shader");
572 ctx_disassemble_program(ctx
, ksp
[2], "SIMD32 fragment shader");
573 fprintf(ctx
->fp
, "\n");
577 decode_3dstate_constant(struct gen_batch_decode_ctx
*ctx
, const uint32_t *p
)
579 struct gen_group
*inst
= gen_spec_find_instruction(ctx
->spec
, p
);
580 struct gen_group
*body
=
581 gen_spec_find_struct(ctx
->spec
, "3DSTATE_CONSTANT_BODY");
583 uint32_t read_length
[4] = {0};
584 uint64_t read_addr
[4];
586 struct gen_field_iterator outer
;
587 gen_field_iterator_init(&outer
, inst
, p
, 0, false);
588 while (gen_field_iterator_next(&outer
)) {
589 if (outer
.struct_desc
!= body
)
592 struct gen_field_iterator iter
;
593 gen_field_iterator_init(&iter
, body
, &outer
.p
[outer
.start_bit
/ 32],
596 while (gen_field_iterator_next(&iter
)) {
598 if (sscanf(iter
.name
, "Read Length[%d]", &idx
) == 1) {
599 read_length
[idx
] = iter
.raw_value
;
600 } else if (sscanf(iter
.name
, "Buffer[%d]", &idx
) == 1) {
601 read_addr
[idx
] = iter
.raw_value
;
605 for (int i
= 0; i
< 4; i
++) {
606 if (read_length
[i
] == 0)
609 struct gen_batch_decode_bo buffer
= ctx_get_bo(ctx
, read_addr
[i
]);
611 fprintf(ctx
->fp
, "constant buffer %d unavailable\n", i
);
615 unsigned size
= read_length
[i
] * 32;
616 fprintf(ctx
->fp
, "constant buffer %d, size %u\n", i
, size
);
618 ctx_print_buffer(ctx
, buffer
, size
, 0, -1);
624 decode_3dstate_binding_table_pointers(struct gen_batch_decode_ctx
*ctx
,
627 dump_binding_table(ctx
, p
[1], -1);
631 decode_3dstate_sampler_state_pointers(struct gen_batch_decode_ctx
*ctx
,
634 dump_samplers(ctx
, p
[1], -1);
638 decode_3dstate_sampler_state_pointers_gen6(struct gen_batch_decode_ctx
*ctx
,
641 dump_samplers(ctx
, p
[1], -1);
642 dump_samplers(ctx
, p
[2], -1);
643 dump_samplers(ctx
, p
[3], -1);
647 str_ends_with(const char *str
, const char *end
)
649 int offset
= strlen(str
) - strlen(end
);
653 return strcmp(str
+ offset
, end
) == 0;
657 decode_dynamic_state_pointers(struct gen_batch_decode_ctx
*ctx
,
658 const char *struct_type
, const uint32_t *p
,
661 struct gen_group
*inst
= gen_spec_find_instruction(ctx
->spec
, p
);
663 uint32_t state_offset
= 0;
665 struct gen_field_iterator iter
;
666 gen_field_iterator_init(&iter
, inst
, p
, 0, false);
667 while (gen_field_iterator_next(&iter
)) {
668 if (str_ends_with(iter
.name
, "Pointer")) {
669 state_offset
= iter
.raw_value
;
674 uint64_t state_addr
= ctx
->dynamic_base
+ state_offset
;
675 struct gen_batch_decode_bo bo
= ctx_get_bo(ctx
, state_addr
);
676 const void *state_map
= bo
.map
;
678 if (state_map
== NULL
) {
679 fprintf(ctx
->fp
, " dynamic %s state unavailable\n", struct_type
);
683 struct gen_group
*state
= gen_spec_find_struct(ctx
->spec
, struct_type
);
684 if (strcmp(struct_type
, "BLEND_STATE") == 0) {
685 /* Blend states are different from the others because they have a header
686 * struct called BLEND_STATE which is followed by a variable number of
687 * BLEND_STATE_ENTRY structs.
689 fprintf(ctx
->fp
, "%s\n", struct_type
);
690 ctx_print_group(ctx
, state
, state_addr
, state_map
);
692 state_addr
+= state
->dw_length
* 4;
693 state_map
+= state
->dw_length
* 4;
695 struct_type
= "BLEND_STATE_ENTRY";
696 state
= gen_spec_find_struct(ctx
->spec
, struct_type
);
699 for (int i
= 0; i
< count
; i
++) {
700 fprintf(ctx
->fp
, "%s %d\n", struct_type
, i
);
701 ctx_print_group(ctx
, state
, state_addr
, state_map
);
703 state_addr
+= state
->dw_length
* 4;
704 state_map
+= state
->dw_length
* 4;
709 decode_3dstate_viewport_state_pointers_cc(struct gen_batch_decode_ctx
*ctx
,
712 decode_dynamic_state_pointers(ctx
, "CC_VIEWPORT", p
, 4);
716 decode_3dstate_viewport_state_pointers_sf_clip(struct gen_batch_decode_ctx
*ctx
,
719 decode_dynamic_state_pointers(ctx
, "SF_CLIP_VIEWPORT", p
, 4);
723 decode_3dstate_blend_state_pointers(struct gen_batch_decode_ctx
*ctx
,
726 decode_dynamic_state_pointers(ctx
, "BLEND_STATE", p
, 1);
730 decode_3dstate_cc_state_pointers(struct gen_batch_decode_ctx
*ctx
,
733 decode_dynamic_state_pointers(ctx
, "COLOR_CALC_STATE", p
, 1);
737 decode_3dstate_scissor_state_pointers(struct gen_batch_decode_ctx
*ctx
,
740 decode_dynamic_state_pointers(ctx
, "SCISSOR_RECT", p
, 1);
744 decode_load_register_imm(struct gen_batch_decode_ctx
*ctx
, const uint32_t *p
)
746 struct gen_group
*reg
= gen_spec_find_register(ctx
->spec
, p
[1]);
749 fprintf(ctx
->fp
, "register %s (0x%x): 0x%x\n",
750 reg
->name
, reg
->register_offset
, p
[2]);
751 ctx_print_group(ctx
, reg
, reg
->register_offset
, &p
[2]);
755 struct custom_decoder
{
756 const char *cmd_name
;
757 void (*decode
)(struct gen_batch_decode_ctx
*ctx
, const uint32_t *p
);
758 } custom_decoders
[] = {
759 { "STATE_BASE_ADDRESS", handle_state_base_address
},
760 { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load
},
761 { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers
},
762 { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer
},
763 { "3DSTATE_VS", decode_single_ksp
},
764 { "3DSTATE_GS", decode_single_ksp
},
765 { "3DSTATE_DS", decode_single_ksp
},
766 { "3DSTATE_HS", decode_single_ksp
},
767 { "3DSTATE_PS", decode_ps_kernels
},
768 { "3DSTATE_CONSTANT_VS", decode_3dstate_constant
},
769 { "3DSTATE_CONSTANT_GS", decode_3dstate_constant
},
770 { "3DSTATE_CONSTANT_PS", decode_3dstate_constant
},
771 { "3DSTATE_CONSTANT_HS", decode_3dstate_constant
},
772 { "3DSTATE_CONSTANT_DS", decode_3dstate_constant
},
774 { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers
},
775 { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers
},
776 { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers
},
777 { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers
},
778 { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers
},
780 { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers
},
781 { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers
},
782 { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers
},
783 { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers
},
784 { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers
},
785 { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gen6
},
787 { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc
},
788 { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip
},
789 { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers
},
790 { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers
},
791 { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers
},
792 { "MI_LOAD_REGISTER_IMM", decode_load_register_imm
}
795 static inline uint64_t
796 get_address(struct gen_spec
*spec
, const uint32_t *p
)
798 /* Addresses are always guaranteed to be page-aligned and sometimes
799 * hardware packets have extra stuff stuffed in the bottom 12 bits.
801 uint64_t addr
= p
[0] & ~0xfffu
;
803 if (gen_spec_get_gen(spec
) >= gen_make_gen(8,0)) {
804 /* On Broadwell and above, we have 48-bit addresses which consume two
805 * dwords. Some packets require that these get stored in a "canonical
806 * form" which means that bit 47 is sign-extended through the upper
807 * bits. In order to correctly handle those aub dumps, we need to mask
808 * off the top 16 bits.
810 addr
|= ((uint64_t)p
[1] & 0xffff) << 32;
817 gen_print_batch(struct gen_batch_decode_ctx
*ctx
,
818 const uint32_t *batch
, uint32_t batch_size
,
821 const uint32_t *p
, *end
= batch
+ batch_size
/ sizeof(uint32_t);
823 struct gen_group
*inst
;
825 for (p
= batch
; p
< end
; p
+= length
) {
826 inst
= gen_spec_find_instruction(ctx
->spec
, p
);
827 length
= gen_group_get_length(inst
, p
);
828 assert(inst
== NULL
|| length
> 0);
829 length
= MAX2(1, length
);
831 const char *reset_color
= ctx
->flags
& GEN_BATCH_DECODE_IN_COLOR
? NORMAL
: "";
834 if (ctx
->flags
& GEN_BATCH_DECODE_OFFSETS
)
835 offset
= batch_addr
+ ((char *)p
- (char *)batch
);
840 fprintf(ctx
->fp
, "%s0x%08"PRIx64
": unknown instruction %08x%s\n",
841 (ctx
->flags
& GEN_BATCH_DECODE_IN_COLOR
) ? RED_COLOR
: "",
842 offset
, p
[0], reset_color
);
847 const char *inst_name
= gen_group_get_name(inst
);
848 if (ctx
->flags
& GEN_BATCH_DECODE_IN_COLOR
) {
849 reset_color
= NORMAL
;
850 if (ctx
->flags
& GEN_BATCH_DECODE_FULL
) {
851 if (strcmp(inst_name
, "MI_BATCH_BUFFER_START") == 0 ||
852 strcmp(inst_name
, "MI_BATCH_BUFFER_END") == 0)
853 color
= GREEN_HEADER
;
864 fprintf(ctx
->fp
, "%s0x%08"PRIx64
": 0x%08x: %-80s%s\n",
865 color
, offset
, p
[0], inst_name
, reset_color
);
867 if (ctx
->flags
& GEN_BATCH_DECODE_FULL
) {
868 ctx_print_group(ctx
, inst
, offset
, p
);
870 for (int i
= 0; i
< ARRAY_SIZE(custom_decoders
); i
++) {
871 if (strcmp(inst_name
, custom_decoders
[i
].cmd_name
) == 0) {
872 custom_decoders
[i
].decode(ctx
, p
);
878 if (strcmp(inst_name
, "MI_BATCH_BUFFER_START") == 0) {
879 struct gen_batch_decode_bo next_batch
= {};
881 struct gen_field_iterator iter
;
882 gen_field_iterator_init(&iter
, inst
, p
, 0, false);
883 while (gen_field_iterator_next(&iter
)) {
884 if (strcmp(iter
.name
, "Batch Buffer Start Address") == 0) {
885 next_batch
= ctx_get_bo(ctx
, iter
.raw_value
);
886 } else if (strcmp(iter
.name
, "Second Level Batch Buffer") == 0) {
887 second_level
= iter
.raw_value
;
891 if (next_batch
.map
== NULL
) {
892 fprintf(ctx
->fp
, "Secondary batch at 0x%08"PRIx64
" unavailable\n",
895 gen_print_batch(ctx
, next_batch
.map
, next_batch
.size
,
899 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
900 * like a subroutine call. Commands that come afterwards get
901 * processed once the 2nd level batch buffer returns with
902 * MI_BATCH_BUFFER_END.
906 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
907 * like a goto. Nothing after it will ever get processed. In
908 * order to prevent the recursion from growing, we just reset the
913 } else if (strcmp(inst_name
, "MI_BATCH_BUFFER_END") == 0) {