2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include <sys/types.h>
39 struct anv_aub_writer
{
46 aub_out(struct anv_aub_writer
*writer
, uint32_t data
)
48 fwrite(&data
, 1, 4, writer
->file
);
52 aub_out_data(struct anv_aub_writer
*writer
, const void *data
, size_t size
)
54 fwrite(data
, 1, size
, writer
->file
);
57 static struct anv_aub_writer
*
58 get_anv_aub_writer(struct anv_device
*device
)
60 struct anv_aub_writer
*writer
= device
->aub_writer
;
63 int gtt_size
= 0x10000;
66 if (geteuid() != getuid())
72 writer
= malloc(sizeof(*writer
));
76 filename
= "intel.aub";
77 writer
->gen
= device
->info
.gen
;
78 writer
->file
= fopen(filename
, "w+");
84 /* Start allocating objects from just after the GTT. */
85 writer
->offset
= gtt_size
;
87 /* Start with a (required) version packet. */
88 aub_out(writer
, CMD_AUB_HEADER
| (13 - 2));
90 (4 << AUB_HEADER_MAJOR_SHIFT
) |
91 (0 << AUB_HEADER_MINOR_SHIFT
));
92 for (i
= 0; i
< 8; i
++) {
93 aub_out(writer
, 0); /* app name */
95 aub_out(writer
, 0); /* timestamp */
96 aub_out(writer
, 0); /* timestamp */
97 aub_out(writer
, 0); /* comment len */
99 /* Set up the GTT. The max we can handle is 256M */
100 aub_out(writer
, CMD_AUB_TRACE_HEADER_BLOCK
| ((writer
->gen
>= 8 ? 6 : 5) - 2));
102 AUB_TRACE_MEMTYPE_GTT_ENTRY
|
103 AUB_TRACE_TYPE_NOTYPE
| AUB_TRACE_OP_DATA_WRITE
);
104 aub_out(writer
, 0); /* subtype */
105 aub_out(writer
, 0); /* offset */
106 aub_out(writer
, gtt_size
); /* size */
107 if (writer
->gen
>= 8)
109 for (i
= 0x000; i
< gtt_size
; i
+= 4, entry
+= 0x1000) {
110 aub_out(writer
, entry
);
113 return device
->aub_writer
= writer
;
117 anv_aub_writer_destroy(struct anv_aub_writer
*writer
)
119 fclose(writer
->file
);
125 * Break up large objects into multiple writes. Otherwise a 128kb VBO
126 * would overflow the 16 bits of size field in the packet header and
127 * everything goes badly after that.
130 aub_write_trace_block(struct anv_aub_writer
*writer
, uint32_t type
,
131 void *virtual, uint32_t size
, uint32_t gtt_offset
)
135 uint32_t subtype
= 0;
136 static const char null_block
[8 * 4096];
138 for (offset
= 0; offset
< size
; offset
+= block_size
) {
139 block_size
= size
- offset
;
141 if (block_size
> 8 * 4096)
142 block_size
= 8 * 4096;
145 CMD_AUB_TRACE_HEADER_BLOCK
|
146 ((writer
->gen
>= 8 ? 6 : 5) - 2));
148 AUB_TRACE_MEMTYPE_GTT
|
149 type
| AUB_TRACE_OP_DATA_WRITE
);
150 aub_out(writer
, subtype
);
151 aub_out(writer
, gtt_offset
+ offset
);
152 aub_out(writer
, align_u32(block_size
, 4));
153 if (writer
->gen
>= 8)
157 aub_out_data(writer
, (char *) virtual + offset
, block_size
);
159 aub_out_data(writer
, null_block
, block_size
);
161 /* Pad to a multiple of 4 bytes. */
162 aub_out_data(writer
, null_block
, -block_size
& 3);
167 * Make a ringbuffer on fly and dump it
170 aub_build_dump_ringbuffer(struct anv_aub_writer
*writer
,
171 uint32_t batch_offset
, uint32_t offset
,
174 uint32_t ringbuffer
[4096];
175 int ring
= AUB_TRACE_TYPE_RING_PRB0
; /* The default ring */
178 if (ring_flag
== I915_EXEC_BSD
)
179 ring
= AUB_TRACE_TYPE_RING_PRB1
;
180 else if (ring_flag
== I915_EXEC_BLT
)
181 ring
= AUB_TRACE_TYPE_RING_PRB2
;
183 /* Make a ring buffer to execute our batchbuffer. */
184 memset(ringbuffer
, 0, sizeof(ringbuffer
));
185 if (writer
->gen
>= 8) {
186 ringbuffer
[ring_count
++] = AUB_MI_BATCH_BUFFER_START
| (3 - 2);
187 ringbuffer
[ring_count
++] = batch_offset
;
188 ringbuffer
[ring_count
++] = 0;
190 ringbuffer
[ring_count
++] = AUB_MI_BATCH_BUFFER_START
;
191 ringbuffer
[ring_count
++] = batch_offset
;
194 /* Write out the ring. This appears to trigger execution of
195 * the ring in the simulator.
198 CMD_AUB_TRACE_HEADER_BLOCK
|
199 ((writer
->gen
>= 8 ? 6 : 5) - 2));
201 AUB_TRACE_MEMTYPE_GTT
| ring
| AUB_TRACE_OP_COMMAND_WRITE
);
202 aub_out(writer
, 0); /* general/surface subtype */
203 aub_out(writer
, offset
);
204 aub_out(writer
, ring_count
* 4);
205 if (writer
->gen
>= 8)
208 /* FIXME: Need some flush operations here? */
209 aub_out_data(writer
, ringbuffer
, ring_count
* 4);
219 relocate_bo(struct anv_bo
*bo
, struct drm_i915_gem_relocation_entry
*relocs
,
220 size_t num_relocs
, struct aub_bo
*bos
)
222 struct aub_bo
*aub_bo
= &bos
[bo
->index
];
223 struct drm_i915_gem_relocation_entry
*reloc
;
226 aub_bo
->relocated
= malloc(bo
->size
);
227 memcpy(aub_bo
->relocated
, aub_bo
->map
, bo
->size
);
228 for (size_t i
= 0; i
< num_relocs
; i
++) {
230 assert(reloc
->offset
< bo
->size
);
231 dw
= aub_bo
->relocated
+ reloc
->offset
;
232 *dw
= bos
[reloc
->target_handle
].offset
+ reloc
->delta
;
237 anv_cmd_buffer_dump(struct anv_cmd_buffer
*cmd_buffer
)
239 struct anv_device
*device
= cmd_buffer
->device
;
240 struct anv_batch
*batch
= &cmd_buffer
->batch
;
241 struct anv_aub_writer
*writer
;
243 uint32_t ring_flag
= 0;
245 struct aub_bo
*aub_bos
;
247 writer
= get_anv_aub_writer(device
);
251 aub_bos
= malloc(cmd_buffer
->exec2_bo_count
* sizeof(aub_bos
[0]));
252 offset
= writer
->offset
;
253 for (uint32_t i
= 0; i
< cmd_buffer
->exec2_bo_count
; i
++) {
254 bo
= cmd_buffer
->exec2_bos
[i
];
256 aub_bos
[i
].map
= bo
->map
;
258 aub_bos
[i
].map
= anv_gem_mmap(device
, bo
->gem_handle
, 0, bo
->size
);
259 aub_bos
[i
].relocated
= aub_bos
[i
].map
;
260 aub_bos
[i
].offset
= offset
;
261 offset
= align_u32(offset
+ bo
->size
+ 4095, 4096);
264 struct anv_batch_bo
*first_bbo
;
265 for (struct anv_batch_bo
*bbo
= cmd_buffer
->last_batch_bo
;
266 bbo
!= NULL
; bbo
= bbo
->prev_batch_bo
) {
267 /* Keep stashing the current BO until we get to the beginning */
270 /* Handle relocations for this batch BO */
271 relocate_bo(&bbo
->bo
, &batch
->relocs
.relocs
[bbo
->first_reloc
],
272 bbo
->num_relocs
, aub_bos
);
274 assert(first_bbo
->prev_batch_bo
== NULL
);
276 for (struct anv_batch_bo
*bbo
= cmd_buffer
->surface_batch_bo
;
277 bbo
!= NULL
; bbo
= bbo
->prev_batch_bo
) {
279 /* Handle relocations for this surface state BO */
280 relocate_bo(&bbo
->bo
,
281 &cmd_buffer
->surface_relocs
.relocs
[bbo
->first_reloc
],
282 bbo
->num_relocs
, aub_bos
);
285 for (uint32_t i
= 0; i
< cmd_buffer
->exec2_bo_count
; i
++) {
286 bo
= cmd_buffer
->exec2_bos
[i
];
287 if (i
== cmd_buffer
->exec2_bo_count
- 1) {
288 assert(bo
== &first_bbo
->bo
);
289 aub_write_trace_block(writer
, AUB_TRACE_TYPE_BATCH
,
290 aub_bos
[i
].relocated
,
291 first_bbo
->length
, aub_bos
[i
].offset
);
293 aub_write_trace_block(writer
, AUB_TRACE_TYPE_NOTYPE
,
294 aub_bos
[i
].relocated
,
295 bo
->size
, aub_bos
[i
].offset
);
297 if (aub_bos
[i
].relocated
!= aub_bos
[i
].map
)
298 free(aub_bos
[i
].relocated
);
299 if (aub_bos
[i
].map
!= bo
->map
)
300 anv_gem_munmap(aub_bos
[i
].map
, bo
->size
);
303 /* Dump ring buffer */
304 aub_build_dump_ringbuffer(writer
, aub_bos
[first_bbo
->bo
.index
].offset
,
309 fflush(writer
->file
);