intel/aub_write: store the physical page allocator in struct
[mesa.git] / src / intel / tools / aub_write.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "aub_write.h"
25
26 #include <inttypes.h>
27 #include <signal.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31
32 #include "drm-uapi/i915_drm.h"
33 #include "intel_aub.h"
34 #include "gen_context.h"
35
36 #ifndef ALIGN
37 #define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
38 #endif
39
40 #define MI_BATCH_NON_SECURE_I965 (1 << 8)
41
42 #define min(a, b) ({ \
43 __typeof(a) _a = (a); \
44 __typeof(b) _b = (b); \
45 _a < _b ? _a : _b; \
46 })
47
48 #define max(a, b) ({ \
49 __typeof(a) _a = (a); \
50 __typeof(b) _b = (b); \
51 _a > _b ? _a : _b; \
52 })
53
54 static void
55 mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
56 uint32_t len, uint32_t addr_space,
57 const char *desc);
58
59 static const uint32_t *
60 get_context_init(const struct gen_device_info *devinfo,
61 enum drm_i915_gem_engine_class engine_class)
62 {
63 static const uint32_t *gen8_contexts[] = {
64 [I915_ENGINE_CLASS_RENDER] = gen8_render_context_init,
65 [I915_ENGINE_CLASS_COPY] = gen8_blitter_context_init,
66 [I915_ENGINE_CLASS_VIDEO] = gen8_video_context_init,
67 };
68 static const uint32_t *gen10_contexts[] = {
69 [I915_ENGINE_CLASS_RENDER] = gen10_render_context_init,
70 [I915_ENGINE_CLASS_COPY] = gen10_blitter_context_init,
71 [I915_ENGINE_CLASS_VIDEO] = gen10_video_context_init,
72 };
73
74 assert(devinfo->gen >= 8);
75
76 if (devinfo->gen <= 10)
77 return gen8_contexts[engine_class];
78 return gen10_contexts[engine_class];
79 }
80
81 static void __attribute__ ((format(__printf__, 2, 3)))
82 fail_if(int cond, const char *format, ...)
83 {
84 va_list args;
85
86 if (!cond)
87 return;
88
89 va_start(args, format);
90 vfprintf(stderr, format, args);
91 va_end(args);
92
93 raise(SIGTRAP);
94 }
95
96 static inline uint32_t
97 align_u32(uint32_t v, uint32_t a)
98 {
99 return (v + a - 1) & ~(a - 1);
100 }
101
102 static void
103 aub_ppgtt_table_finish(struct aub_ppgtt_table *table, int level)
104 {
105 if (level == 1)
106 return;
107
108 for (unsigned i = 0; i < ARRAY_SIZE(table->subtables); i++) {
109 if (table->subtables[i]) {
110 aub_ppgtt_table_finish(table->subtables[i], level - 1);
111 free(table->subtables[i]);
112 }
113 }
114 }
115
116 static void
117 data_out(struct aub_file *aub, const void *data, size_t size)
118 {
119 if (size == 0)
120 return;
121
122 fail_if(fwrite(data, 1, size, aub->file) == 0,
123 "Writing to output failed\n");
124 }
125
126 static void
127 dword_out(struct aub_file *aub, uint32_t data)
128 {
129 data_out(aub, &data, sizeof(data));
130 }
131
132 static void
133 write_execlists_header(struct aub_file *aub, const char *name)
134 {
135 char app_name[8 * 4];
136 int app_name_len, dwords;
137
138 app_name_len =
139 snprintf(app_name, sizeof(app_name), "PCI-ID=0x%X %s",
140 aub->pci_id, name);
141 app_name_len = ALIGN(app_name_len, sizeof(uint32_t));
142
143 dwords = 5 + app_name_len / sizeof(uint32_t);
144 dword_out(aub, CMD_MEM_TRACE_VERSION | (dwords - 1));
145 dword_out(aub, AUB_MEM_TRACE_VERSION_FILE_VERSION);
146 dword_out(aub, aub->devinfo.simulator_id << AUB_MEM_TRACE_VERSION_DEVICE_SHIFT);
147 dword_out(aub, 0); /* version */
148 dword_out(aub, 0); /* version */
149 data_out(aub, app_name, app_name_len);
150 }
151
152 static void
153 write_legacy_header(struct aub_file *aub, const char *name)
154 {
155 char app_name[8 * 4];
156 char comment[16];
157 int comment_len, comment_dwords, dwords;
158
159 comment_len = snprintf(comment, sizeof(comment), "PCI-ID=0x%x", aub->pci_id);
160 comment_dwords = ((comment_len + 3) / 4);
161
162 /* Start with a (required) version packet. */
163 dwords = 13 + comment_dwords;
164 dword_out(aub, CMD_AUB_HEADER | (dwords - 2));
165 dword_out(aub, (4 << AUB_HEADER_MAJOR_SHIFT) |
166 (0 << AUB_HEADER_MINOR_SHIFT));
167
168 /* Next comes a 32-byte application name. */
169 strncpy(app_name, name, sizeof(app_name));
170 app_name[sizeof(app_name) - 1] = 0;
171 data_out(aub, app_name, sizeof(app_name));
172
173 dword_out(aub, 0); /* timestamp */
174 dword_out(aub, 0); /* timestamp */
175 dword_out(aub, comment_len);
176 data_out(aub, comment, comment_dwords * 4);
177 }
178
179
180 static void
181 aub_write_header(struct aub_file *aub, const char *app_name)
182 {
183 if (aub_use_execlists(aub))
184 write_execlists_header(aub, app_name);
185 else
186 write_legacy_header(aub, app_name);
187 }
188
189 void
190 aub_file_init(struct aub_file *aub, FILE *file, FILE *debug, uint16_t pci_id, const char *app_name)
191 {
192 memset(aub, 0, sizeof(*aub));
193
194 aub->verbose_log_file = debug;
195 aub->file = file;
196 aub->pci_id = pci_id;
197 fail_if(!gen_get_device_info(pci_id, &aub->devinfo),
198 "failed to identify chipset=0x%x\n", pci_id);
199 aub->addr_bits = aub->devinfo.gen >= 8 ? 48 : 32;
200
201 aub_write_header(aub, app_name);
202
203 aub->phys_addrs_allocator = 0;
204 aub->pml4.phys_addr = aub->phys_addrs_allocator++ << 12;
205
206 mem_trace_memory_write_header_out(aub, 0,
207 GEN8_PTE_SIZE,
208 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
209 "GGTT PT");
210 dword_out(aub, 1);
211 dword_out(aub, 0);
212 }
213
214 void
215 aub_file_finish(struct aub_file *aub)
216 {
217 aub_ppgtt_table_finish(&aub->pml4, 4);
218 fclose(aub->file);
219 }
220
221 uint32_t
222 aub_gtt_size(struct aub_file *aub)
223 {
224 return NUM_PT_ENTRIES * (aub->addr_bits > 32 ? GEN8_PTE_SIZE : PTE_SIZE);
225 }
226
227 static void
228 mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
229 uint32_t len, uint32_t addr_space,
230 const char *desc)
231 {
232 uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
233
234 if (aub->verbose_log_file) {
235 fprintf(aub->verbose_log_file,
236 " MEM WRITE (0x%016" PRIx64 "-0x%016" PRIx64 ") %s\n",
237 addr, addr + len, desc);
238 }
239
240 dword_out(aub, CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));
241 dword_out(aub, addr & 0xFFFFFFFF); /* addr lo */
242 dword_out(aub, addr >> 32); /* addr hi */
243 dword_out(aub, addr_space); /* gtt */
244 dword_out(aub, len);
245 }
246
247 static void
248 register_write_out(struct aub_file *aub, uint32_t addr, uint32_t value)
249 {
250 uint32_t dwords = 1;
251
252 if (aub->verbose_log_file) {
253 fprintf(aub->verbose_log_file,
254 " MMIO WRITE (0x%08x = 0x%08x)\n", addr, value);
255 }
256
257 dword_out(aub, CMD_MEM_TRACE_REGISTER_WRITE | (5 + dwords - 1));
258 dword_out(aub, addr);
259 dword_out(aub, AUB_MEM_TRACE_REGISTER_SIZE_DWORD |
260 AUB_MEM_TRACE_REGISTER_SPACE_MMIO);
261 dword_out(aub, 0xFFFFFFFF); /* mask lo */
262 dword_out(aub, 0x00000000); /* mask hi */
263 dword_out(aub, value);
264 }
265
266 static void
267 populate_ppgtt_table(struct aub_file *aub, struct aub_ppgtt_table *table,
268 int start, int end, int level)
269 {
270 uint64_t entries[512] = {0};
271 int dirty_start = 512, dirty_end = 0;
272
273 if (aub->verbose_log_file) {
274 fprintf(aub->verbose_log_file,
275 " PPGTT (0x%016" PRIx64 "), lvl %d, start: %x, end: %x\n",
276 table->phys_addr, level, start, end);
277 }
278
279 for (int i = start; i <= end; i++) {
280 if (!table->subtables[i]) {
281 dirty_start = min(dirty_start, i);
282 dirty_end = max(dirty_end, i);
283 if (level == 1) {
284 table->subtables[i] =
285 (void *)(aub->phys_addrs_allocator++ << 12);
286 if (aub->verbose_log_file) {
287 fprintf(aub->verbose_log_file,
288 " Adding entry: %x, phys_addr: 0x%016" PRIx64 "\n",
289 i, (uint64_t)table->subtables[i]);
290 }
291 } else {
292 table->subtables[i] =
293 calloc(1, sizeof(struct aub_ppgtt_table));
294 table->subtables[i]->phys_addr =
295 aub->phys_addrs_allocator++ << 12;
296 if (aub->verbose_log_file) {
297 fprintf(aub->verbose_log_file,
298 " Adding entry: %x, phys_addr: 0x%016" PRIx64 "\n",
299 i, table->subtables[i]->phys_addr);
300 }
301 }
302 }
303 entries[i] = 3 /* read/write | present */ |
304 (level == 1 ? (uint64_t)table->subtables[i] :
305 table->subtables[i]->phys_addr);
306 }
307
308 if (dirty_start <= dirty_end) {
309 uint64_t write_addr = table->phys_addr + dirty_start *
310 sizeof(uint64_t);
311 uint64_t write_size = (dirty_end - dirty_start + 1) *
312 sizeof(uint64_t);
313 mem_trace_memory_write_header_out(aub, write_addr, write_size,
314 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL,
315 "PPGTT update");
316 data_out(aub, entries + dirty_start, write_size);
317 }
318 }
319
320 void
321 aub_map_ppgtt(struct aub_file *aub, uint64_t start, uint64_t size)
322 {
323 uint64_t l4_start = start & 0xff8000000000;
324 uint64_t l4_end = ((start + size - 1) | 0x007fffffffff) & 0xffffffffffff;
325
326 #define L4_index(addr) (((addr) >> 39) & 0x1ff)
327 #define L3_index(addr) (((addr) >> 30) & 0x1ff)
328 #define L2_index(addr) (((addr) >> 21) & 0x1ff)
329 #define L1_index(addr) (((addr) >> 12) & 0x1ff)
330
331 #define L3_table(addr) (aub->pml4.subtables[L4_index(addr)])
332 #define L2_table(addr) (L3_table(addr)->subtables[L3_index(addr)])
333 #define L1_table(addr) (L2_table(addr)->subtables[L2_index(addr)])
334
335 if (aub->verbose_log_file) {
336 fprintf(aub->verbose_log_file,
337 " Mapping PPGTT address: 0x%" PRIx64 ", size: %" PRIu64"\n",
338 start, size);
339 }
340
341 populate_ppgtt_table(aub, &aub->pml4, L4_index(l4_start), L4_index(l4_end), 4);
342
343 for (uint64_t l4 = l4_start; l4 < l4_end; l4 += (1ULL << 39)) {
344 uint64_t l3_start = max(l4, start & 0xffffc0000000);
345 uint64_t l3_end = min(l4 + (1ULL << 39) - 1,
346 ((start + size - 1) | 0x00003fffffff) & 0xffffffffffff);
347 uint64_t l3_start_idx = L3_index(l3_start);
348 uint64_t l3_end_idx = L3_index(l3_end);
349
350 populate_ppgtt_table(aub, L3_table(l4), l3_start_idx, l3_end_idx, 3);
351
352 for (uint64_t l3 = l3_start; l3 < l3_end; l3 += (1ULL << 30)) {
353 uint64_t l2_start = max(l3, start & 0xffffffe00000);
354 uint64_t l2_end = min(l3 + (1ULL << 30) - 1,
355 ((start + size - 1) | 0x0000001fffff) & 0xffffffffffff);
356 uint64_t l2_start_idx = L2_index(l2_start);
357 uint64_t l2_end_idx = L2_index(l2_end);
358
359 populate_ppgtt_table(aub, L2_table(l3), l2_start_idx, l2_end_idx, 2);
360
361 for (uint64_t l2 = l2_start; l2 < l2_end; l2 += (1ULL << 21)) {
362 uint64_t l1_start = max(l2, start & 0xfffffffff000);
363 uint64_t l1_end = min(l2 + (1ULL << 21) - 1,
364 ((start + size - 1) | 0x000000000fff) & 0xffffffffffff);
365 uint64_t l1_start_idx = L1_index(l1_start);
366 uint64_t l1_end_idx = L1_index(l1_end);
367
368 populate_ppgtt_table(aub, L1_table(l2), l1_start_idx, l1_end_idx, 1);
369 }
370 }
371 }
372 }
373
374 static uint64_t
375 ppgtt_lookup(struct aub_file *aub, uint64_t ppgtt_addr)
376 {
377 return (uint64_t)L1_table(ppgtt_addr)->subtables[L1_index(ppgtt_addr)];
378 }
379
380 static void
381 write_execlists_default_setup(struct aub_file *aub)
382 {
383 /* Allocate a continuous physical chunk of memory (GGTT address match
384 * physical addresses).
385 */
386 uint32_t ggtt_ptes = STATIC_GGTT_MAP_SIZE >> 12;
387 uint64_t phys_addr = aub->phys_addrs_allocator << 12;
388
389 aub->phys_addrs_allocator += ggtt_ptes;
390
391 /* GGTT PT */
392 mem_trace_memory_write_header_out(aub,
393 sizeof(uint64_t) * (phys_addr >> 12),
394 ggtt_ptes * GEN8_PTE_SIZE,
395 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
396 "GGTT PT");
397 for (uint32_t i = 0; i < ggtt_ptes; i++) {
398 dword_out(aub, 1 + 0x1000 * i + phys_addr);
399 dword_out(aub, 0);
400 }
401
402 /* RENDER_RING */
403 mem_trace_memory_write_header_out(aub, phys_addr + RENDER_RING_ADDR, RING_SIZE,
404 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
405 "RENDER RING");
406 for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
407 dword_out(aub, 0);
408
409 /* RENDER_PPHWSP */
410 mem_trace_memory_write_header_out(aub, phys_addr + RENDER_CONTEXT_ADDR,
411 PPHWSP_SIZE +
412 CONTEXT_RENDER_SIZE,
413 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
414 "RENDER PPHWSP");
415 for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
416 dword_out(aub, 0);
417
418 /* RENDER_CONTEXT */
419 data_out(aub, get_context_init(&aub->devinfo, I915_ENGINE_CLASS_RENDER), CONTEXT_RENDER_SIZE);
420
421 /* BLITTER_RING */
422 mem_trace_memory_write_header_out(aub, phys_addr + BLITTER_RING_ADDR, RING_SIZE,
423 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
424 "BLITTER RING");
425 for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
426 dword_out(aub, 0);
427
428 /* BLITTER_PPHWSP */
429 mem_trace_memory_write_header_out(aub, phys_addr + BLITTER_CONTEXT_ADDR,
430 PPHWSP_SIZE +
431 CONTEXT_OTHER_SIZE,
432 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
433 "BLITTER PPHWSP");
434 for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
435 dword_out(aub, 0);
436
437 /* BLITTER_CONTEXT */
438 data_out(aub, get_context_init(&aub->devinfo, I915_ENGINE_CLASS_COPY), CONTEXT_OTHER_SIZE);
439
440 /* VIDEO_RING */
441 mem_trace_memory_write_header_out(aub, phys_addr + VIDEO_RING_ADDR, RING_SIZE,
442 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
443 "VIDEO RING");
444 for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
445 dword_out(aub, 0);
446
447 /* VIDEO_PPHWSP */
448 mem_trace_memory_write_header_out(aub, phys_addr + VIDEO_CONTEXT_ADDR,
449 PPHWSP_SIZE +
450 CONTEXT_OTHER_SIZE,
451 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
452 "VIDEO PPHWSP");
453 for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
454 dword_out(aub, 0);
455
456 /* VIDEO_CONTEXT */
457 data_out(aub, get_context_init(&aub->devinfo, I915_ENGINE_CLASS_VIDEO), CONTEXT_OTHER_SIZE);
458
459 register_write_out(aub, HWS_PGA_RCSUNIT, RENDER_CONTEXT_ADDR);
460 register_write_out(aub, HWS_PGA_VCSUNIT0, VIDEO_CONTEXT_ADDR);
461 register_write_out(aub, HWS_PGA_BCSUNIT, BLITTER_CONTEXT_ADDR);
462
463 register_write_out(aub, GFX_MODE_RCSUNIT, 0x80008000 /* execlist enable */);
464 register_write_out(aub, GFX_MODE_VCSUNIT0, 0x80008000 /* execlist enable */);
465 register_write_out(aub, GFX_MODE_BCSUNIT, 0x80008000 /* execlist enable */);
466 }
467
468 static void write_legacy_default_setup(struct aub_file *aub)
469 {
470 uint32_t entry = 0x200003;
471
472 /* Set up the GTT. The max we can handle is 64M */
473 dword_out(aub, CMD_AUB_TRACE_HEADER_BLOCK |
474 ((aub->addr_bits > 32 ? 6 : 5) - 2));
475 dword_out(aub, AUB_TRACE_MEMTYPE_GTT_ENTRY |
476 AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE);
477 dword_out(aub, 0); /* subtype */
478 dword_out(aub, 0); /* offset */
479 dword_out(aub, aub_gtt_size(aub)); /* size */
480 if (aub->addr_bits > 32)
481 dword_out(aub, 0);
482 for (uint32_t i = 0; i < NUM_PT_ENTRIES; i++) {
483 dword_out(aub, entry + 0x1000 * i);
484 if (aub->addr_bits > 32)
485 dword_out(aub, 0);
486 }
487 }
488
489 /**
490 * Sets up a default GGTT/PPGTT address space and execlists context (when
491 * supported).
492 */
493 void
494 aub_write_default_setup(struct aub_file *aub)
495 {
496 if (aub_use_execlists(aub))
497 write_execlists_default_setup(aub);
498 else
499 write_legacy_default_setup(aub);
500 }
501
502 /**
503 * Break up large objects into multiple writes. Otherwise a 128kb VBO
504 * would overflow the 16 bits of size field in the packet header and
505 * everything goes badly after that.
506 */
507 void
508 aub_write_trace_block(struct aub_file *aub,
509 uint32_t type, void *virtual,
510 uint32_t size, uint64_t gtt_offset)
511 {
512 uint32_t block_size;
513 uint32_t subtype = 0;
514 static const char null_block[8 * 4096];
515
516 for (uint32_t offset = 0; offset < size; offset += block_size) {
517 block_size = min(8 * 4096, size - offset);
518
519 if (aub_use_execlists(aub)) {
520 block_size = min(4096, block_size);
521 mem_trace_memory_write_header_out(aub,
522 ppgtt_lookup(aub, gtt_offset + offset),
523 block_size,
524 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL,
525 "Trace Block");
526 } else {
527 dword_out(aub, CMD_AUB_TRACE_HEADER_BLOCK |
528 ((aub->addr_bits > 32 ? 6 : 5) - 2));
529 dword_out(aub, AUB_TRACE_MEMTYPE_GTT |
530 type | AUB_TRACE_OP_DATA_WRITE);
531 dword_out(aub, subtype);
532 dword_out(aub, gtt_offset + offset);
533 dword_out(aub, align_u32(block_size, 4));
534 if (aub->addr_bits > 32)
535 dword_out(aub, (gtt_offset + offset) >> 32);
536 }
537
538 if (virtual)
539 data_out(aub, ((char *) virtual) + offset, block_size);
540 else
541 data_out(aub, null_block, block_size);
542
543 /* Pad to a multiple of 4 bytes. */
544 data_out(aub, null_block, -block_size & 3);
545 }
546 }
547
548 static const struct engine {
549 uint32_t elsp_reg;
550 uint32_t elsq_reg;
551 uint32_t status_reg;
552 uint32_t control_reg;
553
554 /* Those are only to be used if using the default context setup. */
555 uint32_t default_ring_addr;
556 uint64_t default_descriptor;
557 } engines[] = {
558 [I915_ENGINE_CLASS_RENDER] = {
559 .elsp_reg = EXECLIST_SUBMITPORT_RCSUNIT,
560 .elsq_reg = EXECLIST_SQ_CONTENTS0_RCSUNIT,
561 .status_reg = EXECLIST_STATUS_RCSUNIT,
562 .control_reg = EXECLIST_CONTROL_RCSUNIT,
563
564 .default_ring_addr = RENDER_RING_ADDR,
565 .default_descriptor = RENDER_CONTEXT_DESCRIPTOR,
566 },
567 [I915_ENGINE_CLASS_VIDEO] = {
568 .elsp_reg = EXECLIST_SUBMITPORT_VCSUNIT0,
569 .elsq_reg = EXECLIST_SQ_CONTENTS0_VCSUNIT0,
570 .status_reg = EXECLIST_STATUS_VCSUNIT0,
571 .control_reg = EXECLIST_CONTROL_VCSUNIT0,
572
573 .default_ring_addr = VIDEO_RING_ADDR,
574 .default_descriptor = VIDEO_CONTEXT_DESCRIPTOR,
575 },
576 [I915_ENGINE_CLASS_COPY] = {
577 .elsp_reg = EXECLIST_SUBMITPORT_BCSUNIT,
578 .elsq_reg = EXECLIST_SQ_CONTENTS0_BCSUNIT,
579 .status_reg = EXECLIST_STATUS_BCSUNIT,
580 .control_reg = EXECLIST_CONTROL_BCSUNIT,
581
582 .default_ring_addr = BLITTER_RING_ADDR,
583 .default_descriptor = BLITTER_CONTEXT_DESCRIPTOR,
584 },
585 };
586
587 static const struct engine *
588 engine_from_engine_class(enum drm_i915_gem_engine_class engine_class)
589 {
590 switch (engine_class) {
591 case I915_ENGINE_CLASS_RENDER:
592 case I915_ENGINE_CLASS_COPY:
593 case I915_ENGINE_CLASS_VIDEO:
594 return &engines[engine_class];
595 default:
596 unreachable("unknown ring");
597 }
598 }
599
600 static void
601 aub_dump_ring_buffer_execlist(struct aub_file *aub,
602 const struct engine *cs,
603 uint64_t batch_offset)
604 {
605 mem_trace_memory_write_header_out(aub, cs->default_ring_addr, 16,
606 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
607 "RING MI_BATCH_BUFFER_START user");
608 dword_out(aub, AUB_MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965 | (3 - 2));
609 dword_out(aub, batch_offset & 0xFFFFFFFF);
610 dword_out(aub, batch_offset >> 32);
611 dword_out(aub, 0 /* MI_NOOP */);
612
613 mem_trace_memory_write_header_out(aub, cs->default_ring_addr + 8192 + 20, 4,
614 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
615 "RING BUFFER HEAD");
616 dword_out(aub, 0); /* RING_BUFFER_HEAD */
617 mem_trace_memory_write_header_out(aub, cs->default_ring_addr + 8192 + 28, 4,
618 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
619 "RING BUFFER TAIL");
620 dword_out(aub, 16); /* RING_BUFFER_TAIL */
621 }
622
623 static void
624 aub_dump_execlist(struct aub_file *aub, const struct engine *cs, uint64_t descriptor)
625 {
626 if (aub->devinfo.gen >= 11) {
627 register_write_out(aub, cs->elsq_reg, descriptor & 0xFFFFFFFF);
628 register_write_out(aub, cs->elsq_reg + sizeof(uint32_t), descriptor >> 32);
629 register_write_out(aub, cs->control_reg, 1);
630 } else {
631 register_write_out(aub, cs->elsp_reg, 0);
632 register_write_out(aub, cs->elsp_reg, 0);
633 register_write_out(aub, cs->elsp_reg, descriptor >> 32);
634 register_write_out(aub, cs->elsp_reg, descriptor & 0xFFFFFFFF);
635 }
636
637 dword_out(aub, CMD_MEM_TRACE_REGISTER_POLL | (5 + 1 - 1));
638 dword_out(aub, cs->status_reg);
639 dword_out(aub, AUB_MEM_TRACE_REGISTER_SIZE_DWORD |
640 AUB_MEM_TRACE_REGISTER_SPACE_MMIO);
641 if (aub->devinfo.gen >= 11) {
642 dword_out(aub, 0x00000001); /* mask lo */
643 dword_out(aub, 0x00000000); /* mask hi */
644 dword_out(aub, 0x00000001);
645 } else {
646 dword_out(aub, 0x00000010); /* mask lo */
647 dword_out(aub, 0x00000000); /* mask hi */
648 dword_out(aub, 0x00000000);
649 }
650 }
651
652 static void
653 aub_dump_ring_buffer_legacy(struct aub_file *aub,
654 uint64_t batch_offset,
655 uint64_t offset,
656 enum drm_i915_gem_engine_class engine_class)
657 {
658 uint32_t ringbuffer[4096];
659 unsigned aub_mi_bbs_len;
660 int ring_count = 0;
661 static const int engine_class_to_ring[] = {
662 [I915_ENGINE_CLASS_RENDER] = AUB_TRACE_TYPE_RING_PRB0,
663 [I915_ENGINE_CLASS_VIDEO] = AUB_TRACE_TYPE_RING_PRB1,
664 [I915_ENGINE_CLASS_COPY] = AUB_TRACE_TYPE_RING_PRB2,
665 };
666 int ring = engine_class_to_ring[engine_class];
667
668 /* Make a ring buffer to execute our batchbuffer. */
669 memset(ringbuffer, 0, sizeof(ringbuffer));
670
671 aub_mi_bbs_len = aub->addr_bits > 32 ? 3 : 2;
672 ringbuffer[ring_count] = AUB_MI_BATCH_BUFFER_START | (aub_mi_bbs_len - 2);
673 aub_write_reloc(&aub->devinfo, &ringbuffer[ring_count + 1], batch_offset);
674 ring_count += aub_mi_bbs_len;
675
676 /* Write out the ring. This appears to trigger execution of
677 * the ring in the simulator.
678 */
679 dword_out(aub, CMD_AUB_TRACE_HEADER_BLOCK |
680 ((aub->addr_bits > 32 ? 6 : 5) - 2));
681 dword_out(aub, AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
682 dword_out(aub, 0); /* general/surface subtype */
683 dword_out(aub, offset);
684 dword_out(aub, ring_count * 4);
685 if (aub->addr_bits > 32)
686 dword_out(aub, offset >> 32);
687
688 data_out(aub, ringbuffer, ring_count * 4);
689 }
690
691 void
692 aub_write_exec(struct aub_file *aub, uint64_t batch_addr,
693 uint64_t offset, enum drm_i915_gem_engine_class engine_class)
694 {
695 const struct engine *cs = engine_from_engine_class(engine_class);
696
697 if (aub_use_execlists(aub)) {
698 aub_dump_ring_buffer_execlist(aub, cs, batch_addr);
699 aub_dump_execlist(aub, cs, cs->default_descriptor);
700 } else {
701 /* Dump ring buffer */
702 aub_dump_ring_buffer_legacy(aub, batch_addr, offset, engine_class);
703 }
704 fflush(aub->file);
705 }