vc4: Emit semaphore instructions for new kernel ABI.
authorEric Anholt <eric@anholt.net>
Tue, 18 Nov 2014 20:16:55 +0000 (12:16 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 18 Nov 2014 20:46:55 +0000 (12:46 -0800)
Previously, the kernel would dispatch thread 0, wait, then dispatch thread
1.  By insisting that the thread contents use semaphores in the right
place, the kernel can sleep for longer by dispatching both threads at
once.

src/gallium/drivers/vc4/kernel/vc4_drv.h
src/gallium/drivers/vc4/kernel/vc4_validate.c
src/gallium/drivers/vc4/vc4_context.c

index b0eb3f031c5078ce46ef7ecf55c0153908ec1e4a..81ffa03a6fa01d21b2ffffcb4a4b2486c2ca7868 100644 (file)
@@ -82,6 +82,8 @@ struct exec_info {
        bool found_tile_binning_mode_config_packet;
        bool found_tile_rendering_mode_config_packet;
        bool found_start_tile_binning_packet;
+       bool found_increment_semaphore_packet;
+       bool found_wait_on_semaphore_packet;
        uint8_t bin_tiles_x, bin_tiles_y;
        uint32_t fb_width, fb_height;
        uint32_t tile_alloc_init_block_size;
index 8b04eb9919586b5649052bc0c3097dd57f863363..ba6e46f204118b9a98180f629e44aab686e44b04 100644 (file)
@@ -202,6 +202,18 @@ check_tex_size(struct exec_info *exec, struct drm_gem_cma_object *fbo,
        return true;
 }
 
+static int
+validate_flush_all(VALIDATE_ARGS)
+{
+       if (exec->found_increment_semaphore_packet) {
+               DRM_ERROR("VC4_PACKET_FLUSH_ALL after "
+                         "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int
 validate_start_tile_binning(VALIDATE_ARGS)
 {
@@ -219,6 +231,41 @@ validate_start_tile_binning(VALIDATE_ARGS)
        return 0;
 }
 
+static int
+validate_increment_semaphore(VALIDATE_ARGS)
+{
+       if (exec->found_increment_semaphore_packet) {
+               DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n");
+               return -EINVAL;
+       }
+       exec->found_increment_semaphore_packet = true;
+
+       /* Once we've found the semaphore increment, there should be one FLUSH
+        * then the end of the command list.  The FLUSH actually triggers the
+        * increment, so we only need to make sure there
+        */
+
+       return 0;
+}
+
+static int
+validate_wait_on_semaphore(VALIDATE_ARGS)
+{
+       if (exec->found_wait_on_semaphore_packet) {
+               DRM_ERROR("Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n");
+               return -EINVAL;
+       }
+       exec->found_wait_on_semaphore_packet = true;
+
+       if (!exec->found_increment_semaphore_packet) {
+               DRM_ERROR("VC4_PACKET_WAIT_ON_SEMAPHORE without "
+                         "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int
 validate_branch_to_sublist(VALIDATE_ARGS)
 {
@@ -233,6 +280,11 @@ validate_branch_to_sublist(VALIDATE_ARGS)
                return -EINVAL;
        }
 
+       if (!exec->found_wait_on_semaphore_packet) {
+               DRM_ERROR("Jumping to tile alloc before binning finished.\n");
+               return -EINVAL;
+       }
+
        offset = *(uint32_t *)(untrusted + 0);
        if (offset % exec->tile_alloc_init_block_size ||
            offset / exec->tile_alloc_init_block_size >
@@ -322,6 +374,11 @@ validate_indexed_prim_list(VALIDATE_ARGS)
        uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
        struct vc4_shader_state *shader_state;
 
+       if (exec->found_increment_semaphore_packet) {
+               DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
+               return -EINVAL;
+       }
+
        /* Check overflow condition */
        if (exec->shader_state_count == 0) {
                DRM_ERROR("shader state must precede primitives\n");
@@ -355,6 +412,11 @@ validate_gl_array_primitive(VALIDATE_ARGS)
        uint32_t max_index;
        struct vc4_shader_state *shader_state;
 
+       if (exec->found_increment_semaphore_packet) {
+               DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
+               return -EINVAL;
+       }
+
        /* Check overflow condition */
        if (exec->shader_state_count == 0) {
                DRM_ERROR("shader state must precede primitives\n");
@@ -600,10 +662,10 @@ static const struct cmd_info {
        [VC4_PACKET_HALT] = { 1, 1, 1, "halt", NULL },
        [VC4_PACKET_NOP] = { 1, 1, 1, "nop", NULL },
        [VC4_PACKET_FLUSH] = { 1, 1, 1, "flush", NULL },
-       [VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", NULL },
+       [VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", validate_flush_all },
        [VC4_PACKET_START_TILE_BINNING] = { 1, 0, 1, "start tile binning", validate_start_tile_binning },
-       [VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", NULL },
-       [VC4_PACKET_WAIT_ON_SEMAPHORE] = { 1, 1, 1, "wait on semaphore", NULL },
+       [VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", validate_increment_semaphore },
+       [VC4_PACKET_WAIT_ON_SEMAPHORE] = { 0, 1, 1, "wait on semaphore", validate_wait_on_semaphore },
        /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but
         * we only use it from the render CL in order to jump into the tile
         * allocation BO.
@@ -737,6 +799,15 @@ vc4_validate_cl(struct drm_device *dev,
                        DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
                        return -EINVAL;
                }
+
+               /* Make sure that they actually consumed the semaphore
+                * increment from the bin CL.  Otherwise a later submit would
+                * have render execute immediately.
+                */
+               if (!exec->found_wait_on_semaphore_packet) {
+                       DRM_ERROR("Render CL missing VC4_PACKET_WAIT_ON_SEMAPHORE\n");
+                       return -EINVAL;
+               }
                exec->ct1ea = exec->ct1ca + dst_offset;
        }
 
index b1f0f353fcce61423f4deac3a2305171ae72ccf4..a6becaf73fccd2f3c5cfdda04d3a0f48ac75bab5 100644 (file)
@@ -203,6 +203,12 @@ vc4_setup_rcl(struct vc4_context *vc4)
                          */
                         vc4_tile_coordinates(vc4, x, y, &coords_emitted);
 
+                        /* Wait for the binner before jumping to the first
+                         * tile's lists.
+                         */
+                        if (x == 0 && y == 0)
+                                cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
                         cl_start_reloc(&vc4->rcl, 1);
                         cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
                         cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
@@ -269,12 +275,14 @@ vc4_flush(struct pipe_context *pctx)
         if (!vc4->needs_flush)
                 return;
 
+        /* Increment the semaphore indicating that binning is done and
+         * unblocking the render thread.  Note that this doesn't act until the
+         * FLUSH completes.
+         */
+        cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
         /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
         cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
 
-        cl_u8(&vc4->bcl, VC4_PACKET_NOP);
-        cl_u8(&vc4->bcl, VC4_PACKET_HALT);
-
         vc4_setup_rcl(vc4);
 
         if (vc4_debug & VC4_DEBUG_CL) {