_a > _b ? _a : _b; \
})
-static void
-mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
- uint32_t len, uint32_t addr_space,
- const char *desc);
+static struct aub_context *aub_context_new(struct aub_file *aub, uint32_t new_id);
+static void mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
+ uint32_t len, uint32_t addr_space,
+ const char *desc);
static void __attribute__ ((format(__printf__, 2, 3)))
fail_if(int cond, const char *format, ...)
aub_write_header(aub, app_name);
aub->phys_addrs_allocator = 0;
+ aub->ggtt_addrs_allocator = 0;
aub->pml4.phys_addr = aub->phys_addrs_allocator++ << 12;
- mem_trace_memory_write_header_out(aub, 0,
+ mem_trace_memory_write_header_out(aub, aub->ggtt_addrs_allocator++,
GEN8_PTE_SIZE,
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
"GGTT PT");
dword_out(aub, 1);
dword_out(aub, 0);
+
+ aub->next_context_handle = 1;
+ aub_context_new(aub, 0); /* Default context */
}
void
},
};
+static void
+aub_map_ggtt(struct aub_file *aub, uint64_t virt_addr, uint64_t size)
+{
+ /* Makes the code below a bit simpler. In practice all of the write we
+ * receive from error2aub are page aligned.
+ */
+ assert(virt_addr % 4096 == 0);
+ assert((aub->phys_addrs_allocator + size) < (1UL << 32));
+
+ /* GGTT PT */
+ uint32_t ggtt_ptes = DIV_ROUND_UP(size, 4096);
+ uint64_t phys_addr = aub->phys_addrs_allocator << 12;
+ aub->phys_addrs_allocator += ggtt_ptes;
+
+ if (aub->verbose_log_file) {
+ fprintf(aub->verbose_log_file,
+ " Mapping GGTT address: 0x%" PRIx64 ", size: %" PRIu64" phys_addr=0x%" PRIx64 " entries=%u\n",
+ virt_addr, size, phys_addr, ggtt_ptes);
+ }
+
+ mem_trace_memory_write_header_out(aub,
+ (virt_addr >> 12) * GEN8_PTE_SIZE,
+ ggtt_ptes * GEN8_PTE_SIZE,
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
+ "GGTT PT");
+ for (uint32_t i = 0; i < ggtt_ptes; i++) {
+ dword_out(aub, 1 + phys_addr + i * 4096);
+ dword_out(aub, 0);
+ }
+}
+
+void
+aub_write_ggtt(struct aub_file *aub, uint64_t virt_addr, uint64_t size, const void *data)
+{
+ /* Default setup assumes a 1 to 1 mapping between physical and virtual GGTT
+ * addresses. This is somewhat incompatible with the aub_write_ggtt()
+ * function. In practice it doesn't matter as the GGTT writes are used to
+ * replace the default setup and we've taken care to setup the PML4 as the
+ * top of the GGTT.
+ */
+ assert(!aub->has_default_setup);
+
+ aub_map_ggtt(aub, virt_addr, size);
+
+ /* We write the GGTT buffer through the GGTT aub command rather than the
+ * PHYSICAL aub command. This is because the Gen9 simulator seems to have 2
+ * different set of memory pools for GGTT and physical (probably someone
+ * didn't really understand the concept?).
+ */
+ static const char null_block[8 * 4096];
+ for (uint64_t offset = 0; offset < size; offset += 4096) {
+ uint32_t block_size = min(4096, size - offset);
+
+ mem_trace_memory_write_header_out(aub, virt_addr + offset, block_size,
+ AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
+ "GGTT buffer");
+ data_out(aub, (char *) data + offset, block_size);
+
+ /* Pad to a multiple of 4 bytes. */
+ data_out(aub, null_block, -block_size & 3);
+ }
+}
+
static const struct engine *
engine_from_engine_class(enum drm_i915_gem_engine_class engine_class)
{
gen10_contexts[engine_class](params, data, size);
}
+static uint64_t
+alloc_ggtt_address(struct aub_file *aub, uint64_t size)
+{
+ uint32_t ggtt_ptes = DIV_ROUND_UP(size, 4096);
+ uint64_t addr = aub->ggtt_addrs_allocator << 12;
+
+ aub->ggtt_addrs_allocator += ggtt_ptes;
+ aub_map_ggtt(aub, addr, size);
+
+ return addr;
+}
+
+static void
+write_hwsp(struct aub_file *aub,
+ enum drm_i915_gem_engine_class engine_class)
+{
+ uint32_t reg = 0;
+ switch (engine_class) {
+ case I915_ENGINE_CLASS_RENDER: reg = HWS_PGA_RCSUNIT; break;
+ case I915_ENGINE_CLASS_COPY: reg = HWS_PGA_BCSUNIT; break;
+ case I915_ENGINE_CLASS_VIDEO: reg = HWS_PGA_VCSUNIT0; break;
+ default:
+ unreachable("unknown ring");
+ }
+
+ register_write_out(aub, reg, aub->engine_setup[engine_class].hwsp_addr);
+}
+
static uint32_t
write_engine_execlist_setup(struct aub_file *aub,
+ uint32_t ctx_id,
+ struct aub_hw_context *hw_ctx,
enum drm_i915_gem_engine_class engine_class)
{
const struct engine *cs = engine_from_engine_class(engine_class);
get_context_init(&aub->devinfo, NULL, engine_class, NULL, &context_size);
/* GGTT PT */
- uint64_t phys_addr = aub->phys_addrs_allocator << 12;
uint32_t total_size = RING_SIZE + PPHWSP_SIZE + context_size;
- uint32_t ggtt_ptes = DIV_ROUND_UP(total_size, 4096);
char name[80];
+ uint64_t ggtt_addr = alloc_ggtt_address(aub, total_size);
- aub->phys_addrs_allocator += ggtt_ptes;
-
- snprintf(name, sizeof(name), "%s GGTT PT", cs->name);
- mem_trace_memory_write_header_out(aub,
- sizeof(uint64_t) * (phys_addr >> 12),
- ggtt_ptes * GEN8_PTE_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
- name);
- for (uint32_t i = 0; i < ggtt_ptes; i++) {
- dword_out(aub, 1 + 0x1000 * i + phys_addr);
- dword_out(aub, 0);
- }
+ snprintf(name, sizeof(name), "%s (ctx id: %d) GGTT PT", cs->name, ctx_id);
/* RING */
- aub->engine_setup[engine_class].ring_addr = phys_addr;
+ hw_ctx->ring_addr = ggtt_addr;
snprintf(name, sizeof(name), "%s RING", cs->name);
- mem_trace_memory_write_header_out(aub, phys_addr, RING_SIZE,
+ mem_trace_memory_write_header_out(aub, ggtt_addr, RING_SIZE,
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
name);
for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
dword_out(aub, 0);
- phys_addr += RING_SIZE;
+ ggtt_addr += RING_SIZE;
/* PPHWSP */
- aub->engine_setup[engine_class].pphwsp_addr = phys_addr;
- aub->engine_setup[engine_class].descriptor = cs->hw_class | phys_addr | CONTEXT_FLAGS;
+ hw_ctx->pphwsp_addr = ggtt_addr;
snprintf(name, sizeof(name), "%s PPHWSP", cs->name);
- mem_trace_memory_write_header_out(aub, phys_addr,
+ mem_trace_memory_write_header_out(aub, ggtt_addr,
PPHWSP_SIZE + context_size,
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
name);
/* CONTEXT */
struct gen_context_parameters params = {
- .ring_addr = aub->engine_setup[engine_class].ring_addr,
+ .ring_addr = hw_ctx->ring_addr,
.ring_size = RING_SIZE,
.pml4_addr = aub->pml4.phys_addr,
};
data_out(aub, context_data, context_size);
free(context_data);
+ hw_ctx->initialized = true;
+
return total_size;
}
static void
write_execlists_default_setup(struct aub_file *aub)
{
- write_engine_execlist_setup(aub, I915_ENGINE_CLASS_RENDER);
- write_engine_execlist_setup(aub, I915_ENGINE_CLASS_COPY);
- write_engine_execlist_setup(aub, I915_ENGINE_CLASS_VIDEO);
-
- register_write_out(aub, HWS_PGA_RCSUNIT, aub->engine_setup[I915_ENGINE_CLASS_RENDER].pphwsp_addr);
- register_write_out(aub, HWS_PGA_VCSUNIT0, aub->engine_setup[I915_ENGINE_CLASS_VIDEO].pphwsp_addr);
- register_write_out(aub, HWS_PGA_BCSUNIT, aub->engine_setup[I915_ENGINE_CLASS_COPY].pphwsp_addr);
-
register_write_out(aub, GFX_MODE_RCSUNIT, 0x80008000 /* execlist enable */);
register_write_out(aub, GFX_MODE_VCSUNIT0, 0x80008000 /* execlist enable */);
register_write_out(aub, GFX_MODE_BCSUNIT, 0x80008000 /* execlist enable */);
aub->has_default_setup = true;
}
-void
-aub_write_ggtt(struct aub_file *aub, uint64_t virt_addr, uint64_t size, const void *data)
+static struct aub_context *
+aub_context_new(struct aub_file *aub, uint32_t new_id)
{
- if (aub->verbose_log_file) {
- fprintf(aub->verbose_log_file,
- " Writting GGTT address: 0x%" PRIx64 ", size: %" PRIu64"\n",
- virt_addr, size);
- }
+ assert(aub->num_contexts < MAX_CONTEXT_COUNT);
- /* Default setup assumes a 1 to 1 mapping between physical and virtual GGTT
- * addresses. This is somewhat incompatible with the aub_write_ggtt()
- * function. In practice it doesn't matter as the GGTT writes are used to
- * replace the default setup and we've taken care to setup the PML4 as the
- * top of the GGTT.
- */
- assert(!aub->has_default_setup);
+ struct aub_context *ctx = &aub->contexts[aub->num_contexts++];
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->id = new_id;
- /* Makes the code below a bit simpler. In practice all of the write we
- * receive from error2aub are page aligned.
- */
- assert(virt_addr % 4096 == 0);
- assert((aub->phys_addrs_allocator + size) < (1UL << 32));
+ return ctx;
+}
- /* GGTT PT */
- uint32_t ggtt_ptes = DIV_ROUND_UP(size, 4096);
- uint64_t phys_addr = aub->phys_addrs_allocator << 12;
- aub->phys_addrs_allocator += ggtt_ptes;
+uint32_t
+aub_write_context_create(struct aub_file *aub, uint32_t *ctx_id)
+{
+ uint32_t new_id = ctx_id ? *ctx_id : aub->next_context_handle;
- if (aub->verbose_log_file) {
- fprintf(aub->verbose_log_file,
- " Writting GGTT address: 0x%" PRIx64 ", size: %" PRIu64" phys_addr=0x%" PRIx64 " entries=%u\n",
- virt_addr, size, phys_addr, ggtt_ptes);
- }
+ aub_context_new(aub, new_id);
- mem_trace_memory_write_header_out(aub,
- (virt_addr >> 12) * GEN8_PTE_SIZE,
- ggtt_ptes * GEN8_PTE_SIZE,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
- "GGTT PT");
- for (uint32_t i = 0; i < ggtt_ptes; i++) {
- dword_out(aub, 1 + phys_addr + i * 4096);
- dword_out(aub, 0);
+ if (!ctx_id)
+ aub->next_context_handle++;
+
+ return new_id;
+}
+
+static struct aub_context *
+aub_context_find(struct aub_file *aub, uint32_t id)
+{
+ for (int i = 0; i < aub->num_contexts; i++) {
+ if (aub->contexts[i].id == id)
+ return &aub->contexts[i];
}
- /* We write the GGTT buffer through the GGTT aub command rather than the
- * PHYSICAL aub command. This is because the Gen9 simulator seems to have 2
- * different set of memory pools for GGTT and physical (probably someone
- * didn't really understand the concept?).
- */
- static const char null_block[8 * 4096];
- for (uint64_t offset = 0; offset < size; offset += 4096) {
- uint32_t block_size = min(4096, size - offset);
+ return NULL;
+}
- mem_trace_memory_write_header_out(aub, virt_addr + offset, block_size,
- AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
- "GGTT buffer");
- data_out(aub, (char *) data + offset, block_size);
+static struct aub_hw_context *
+aub_write_ensure_context(struct aub_file *aub, uint32_t ctx_id,
+ enum drm_i915_gem_engine_class engine_class)
+{
+ struct aub_context *ctx = aub_context_find(aub, ctx_id);
+ assert(ctx != NULL);
- /* Pad to a multiple of 4 bytes. */
- data_out(aub, null_block, -block_size & 3);
- }
+ struct aub_hw_context *hw_ctx = &ctx->hw_contexts[engine_class];
+ if (!hw_ctx->initialized)
+ write_engine_execlist_setup(aub, ctx->id, hw_ctx, engine_class);
+
+ return hw_ctx;
+}
+
+static uint64_t
+get_context_descriptor(struct aub_file *aub,
+ const struct engine *cs,
+ struct aub_hw_context *hw_ctx)
+{
+ return cs->hw_class | hw_ctx->pphwsp_addr | CONTEXT_FLAGS;
}
/**
static void
aub_dump_ring_buffer_execlist(struct aub_file *aub,
+ struct aub_hw_context *hw_ctx,
const struct engine *cs,
uint64_t batch_offset)
{
- mem_trace_memory_write_header_out(aub, aub->engine_setup[cs->engine_class].ring_addr, 16,
+ mem_trace_memory_write_header_out(aub, hw_ctx->ring_addr, 16,
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
"RING MI_BATCH_BUFFER_START user");
dword_out(aub, AUB_MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965 | (3 - 2));
dword_out(aub, batch_offset >> 32);
dword_out(aub, 0 /* MI_NOOP */);
- mem_trace_memory_write_header_out(aub, aub->engine_setup[cs->engine_class].ring_addr + 8192 + 20, 4,
+ mem_trace_memory_write_header_out(aub, hw_ctx->ring_addr + 8192 + 20, 4,
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
"RING BUFFER HEAD");
dword_out(aub, 0); /* RING_BUFFER_HEAD */
- mem_trace_memory_write_header_out(aub, aub->engine_setup[cs->engine_class].ring_addr + 8192 + 28, 4,
+ mem_trace_memory_write_header_out(aub, hw_ctx->ring_addr + 8192 + 28, 4,
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
"RING BUFFER TAIL");
dword_out(aub, 16); /* RING_BUFFER_TAIL */
data_out(aub, ringbuffer, ring_count * 4);
}
+static void
+aub_write_ensure_hwsp(struct aub_file *aub,
+ enum drm_i915_gem_engine_class engine_class)
+{
+ uint64_t *hwsp_addr = &aub->engine_setup[engine_class].hwsp_addr;
+
+ if (*hwsp_addr != 0)
+ return;
+
+ *hwsp_addr = alloc_ggtt_address(aub, 4096);
+ write_hwsp(aub, engine_class);
+}
+
void
-aub_write_exec(struct aub_file *aub, uint64_t batch_addr,
+aub_write_exec(struct aub_file *aub, uint32_t ctx_id, uint64_t batch_addr,
uint64_t offset, enum drm_i915_gem_engine_class engine_class)
{
const struct engine *cs = engine_from_engine_class(engine_class);
if (aub_use_execlists(aub)) {
- aub_dump_ring_buffer_execlist(aub, cs, batch_addr);
- aub_dump_execlist(aub, cs, aub->engine_setup[engine_class].descriptor);
+ struct aub_hw_context *hw_ctx =
+ aub_write_ensure_context(aub, ctx_id, engine_class);
+ uint64_t descriptor = get_context_descriptor(aub, cs, hw_ctx);
+ aub_write_ensure_hwsp(aub, engine_class);
+ aub_dump_ring_buffer_execlist(aub, hw_ctx, cs, batch_addr);
+ aub_dump_execlist(aub, cs, descriptor);
} else {
/* Dump ring buffer */
aub_dump_ring_buffer_legacy(aub, batch_addr, offset, engine_class);