vc4: Add support for dumping executed commands to a file.
authorEric Anholt <eric@anholt.net>
Tue, 14 Oct 2014 11:35:47 +0000 (12:35 +0100)
committerEric Anholt <eric@anholt.net>
Tue, 15 Dec 2015 20:05:48 +0000 (12:05 -0800)
The VC4_DEBUG=cl,qpu is nice and all, but I want to be able to get more
detailed dumps, and to replay the same exact commands in simulation.  For
that I need a dump with all of the VBOs, shaders, shader recs, etc.  This
dump can be parsed by vc4-gpu-tools.

For now this is only doable from simulator mode, because otherwise we
don't have access to the RCL contents generated by the kernel.

src/gallium/drivers/vc4/vc4_screen.c
src/gallium/drivers/vc4/vc4_screen.h
src/gallium/drivers/vc4/vc4_simulator.c

index 090579c2c76ed25d11ec78b4872f7bcd42a86c8c..8ddf0865d21f390c59afce11ce6489299469fe88 100644 (file)
@@ -57,6 +57,10 @@ static const struct debug_named_value debug_options[] = {
           "Flush after each draw call" },
         { "always_sync", VC4_DEBUG_ALWAYS_SYNC,
           "Wait for finish after each flush" },
+#if USE_VC4_SIMULATOR
+        { "dump", VC4_DEBUG_DUMP,
+          "Write a GPU command stream trace file" },
+#endif
         { NULL }
 };
 
index 5992e37109380e4590ba140c01451fe315ef5498..03f76b257e3578f7327b0c0b9767f2ae713a3eef 100644 (file)
@@ -41,6 +41,7 @@ struct vc4_bo;
 #define VC4_DEBUG_ALWAYS_FLUSH 0x0080
 #define VC4_DEBUG_ALWAYS_SYNC  0x0100
 #define VC4_DEBUG_NIR       0x0200
+#define VC4_DEBUG_DUMP      0x0400
 
 #define VC4_MAX_MIP_LEVELS 12
 #define VC4_MAX_TEXTURE_SAMPLERS 16
index 4b1df9234b6ff982073cebc12eda6e6fd3b6cbfd..521ef50f8140b84482484eb307db19a45cccbc82 100644 (file)
@@ -131,6 +131,93 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
         return 0;
 }
 
+static void
+vc4_dump_to_file(struct vc4_exec_info *exec)
+{
+        static int dumpno = 0;
+        struct drm_vc4_get_hang_state *state;
+        struct drm_vc4_get_hang_state_bo *bo_state;
+        unsigned int dump_version = 0;
+
+        if (!(vc4_debug & VC4_DEBUG_DUMP))
+                return;
+
+        state = calloc(1, sizeof(*state));
+
+        int unref_count = 0;
+        list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
+                                 unref_head) {
+                unref_count++;
+        }
+
+        /* Add one more for the overflow area that isn't wrapped in a BO. */
+        state->bo_count = exec->bo_count + unref_count + 1;
+        bo_state = calloc(state->bo_count, sizeof(*bo_state));
+
+        char *filename = NULL;
+        asprintf(&filename, "vc4-dri-%d.dump", dumpno++);
+        FILE *f = fopen(filename, "w+");
+        if (!f) {
+                fprintf(stderr, "Couldn't open %s: %s", filename,
+                        strerror(errno));
+                return;
+        }
+
+        fwrite(&dump_version, sizeof(dump_version), 1, f);
+
+        state->ct0ca = exec->ct0ca;
+        state->ct0ea = exec->ct0ea;
+        state->ct1ca = exec->ct1ca;
+        state->ct1ea = exec->ct1ea;
+        state->start_bin = exec->ct0ca;
+        state->start_render = exec->ct1ca;
+        fwrite(state, sizeof(*state), 1, f);
+
+        int i;
+        for (i = 0; i < exec->bo_count; i++) {
+                struct drm_gem_cma_object *cma_bo = exec->bo[i];
+                bo_state[i].handle = i; /* Not used by the parser. */
+                bo_state[i].paddr = cma_bo->paddr;
+                bo_state[i].size = cma_bo->base.size;
+        }
+
+        list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
+                                 unref_head) {
+                struct drm_gem_cma_object *cma_bo = &bo->base;
+                bo_state[i].handle = 0;
+                bo_state[i].paddr = cma_bo->paddr;
+                bo_state[i].size = cma_bo->base.size;
+                i++;
+        }
+
+        /* Add the static overflow memory area. */
+        bo_state[i].handle = exec->bo_count;
+        bo_state[i].paddr = 0;
+        bo_state[i].size = OVERFLOW_SIZE;
+        i++;
+
+        fwrite(bo_state, sizeof(*bo_state), state->bo_count, f);
+
+        for (int i = 0; i < exec->bo_count; i++) {
+                struct drm_gem_cma_object *cma_bo = exec->bo[i];
+                fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+        }
+
+        list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
+                                 unref_head) {
+                struct drm_gem_cma_object *cma_bo = &bo->base;
+                fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+        }
+
+        void *overflow = calloc(1, OVERFLOW_SIZE);
+        fwrite(overflow, 1, OVERFLOW_SIZE, f);
+        free(overflow);
+
+        free(state);
+        free(bo_state);
+        fclose(f);
+}
+
 int
 vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
 {
@@ -183,6 +270,8 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
                             exec.ct1ea - exec.ct1ca, true);
         }
 
+        vc4_dump_to_file(&exec);
+
         if (exec.ct0ca != exec.ct0ea) {
                 int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
                 if (bfc != 1) {