From 82e919d33bbe508b3e1ba883a01ef2512dbc8f72 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 18 Nov 2014 12:16:55 -0800 Subject: [PATCH] vc4: Emit semaphore instructions for new kernel ABI. Previously, the kernel would dispatch thread 0, wait, then dispatch thread 1. By insisting that the thread contents use semaphores in the right place, the kernel can sleep for longer by dispatching both threads at once. --- src/gallium/drivers/vc4/kernel/vc4_drv.h | 2 + src/gallium/drivers/vc4/kernel/vc4_validate.c | 77 ++++++++++++++++++- src/gallium/drivers/vc4/vc4_context.c | 14 +++- 3 files changed, 87 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index b0eb3f031c5..81ffa03a6fa 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -82,6 +82,8 @@ struct exec_info { bool found_tile_binning_mode_config_packet; bool found_tile_rendering_mode_config_packet; bool found_start_tile_binning_packet; + bool found_increment_semaphore_packet; + bool found_wait_on_semaphore_packet; uint8_t bin_tiles_x, bin_tiles_y; uint32_t fb_width, fb_height; uint32_t tile_alloc_init_block_size; diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 8b04eb99195..ba6e46f2041 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -202,6 +202,18 @@ check_tex_size(struct exec_info *exec, struct drm_gem_cma_object *fbo, return true; } +static int +validate_flush_all(VALIDATE_ARGS) +{ + if (exec->found_increment_semaphore_packet) { + DRM_ERROR("VC4_PACKET_FLUSH_ALL after " + "VC4_PACKET_INCREMENT_SEMAPHORE\n"); + return -EINVAL; + } + + return 0; +} + static int validate_start_tile_binning(VALIDATE_ARGS) { @@ -219,6 +231,41 @@ validate_start_tile_binning(VALIDATE_ARGS) return 0; } +static int +validate_increment_semaphore(VALIDATE_ARGS) +{ + if (exec->found_increment_semaphore_packet) { + DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n"); + return -EINVAL; + } + exec->found_increment_semaphore_packet = true; + + /* Once we've found the semaphore increment, there should be one FLUSH + * then the end of the command list. The FLUSH actually triggers the + * increment, so we only need to make sure there + */ + + return 0; +} + +static int +validate_wait_on_semaphore(VALIDATE_ARGS) +{ + if (exec->found_wait_on_semaphore_packet) { + DRM_ERROR("Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n"); + return -EINVAL; + } + exec->found_wait_on_semaphore_packet = true; + + if (!exec->found_increment_semaphore_packet) { + DRM_ERROR("VC4_PACKET_WAIT_ON_SEMAPHORE without " + "VC4_PACKET_INCREMENT_SEMAPHORE\n"); + return -EINVAL; + } + + return 0; +} + static int validate_branch_to_sublist(VALIDATE_ARGS) { @@ -233,6 +280,11 @@ validate_branch_to_sublist(VALIDATE_ARGS) return -EINVAL; } + if (!exec->found_wait_on_semaphore_packet) { + DRM_ERROR("Jumping to tile alloc before binning finished.\n"); + return -EINVAL; + } + offset = *(uint32_t *)(untrusted + 0); if (offset % exec->tile_alloc_init_block_size || offset / exec->tile_alloc_init_block_size > @@ -322,6 +374,11 @@ validate_indexed_prim_list(VALIDATE_ARGS) uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1; struct vc4_shader_state *shader_state; + if (exec->found_increment_semaphore_packet) { + DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n"); + return -EINVAL; + } + /* Check overflow condition */ if (exec->shader_state_count == 0) { DRM_ERROR("shader state must precede primitives\n"); @@ -355,6 +412,11 @@ validate_gl_array_primitive(VALIDATE_ARGS) uint32_t max_index; struct vc4_shader_state *shader_state; + if (exec->found_increment_semaphore_packet) { + DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n"); + return -EINVAL; + } + /* Check overflow condition */ if (exec->shader_state_count == 0) { DRM_ERROR("shader state must precede primitives\n"); @@ -600,10 +662,10 @@ static const struct cmd_info { [VC4_PACKET_HALT] = { 1, 1, 1, "halt", NULL }, [VC4_PACKET_NOP] = { 1, 1, 1, "nop", NULL }, [VC4_PACKET_FLUSH] = { 1, 1, 1, "flush", NULL }, - [VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", NULL }, + [VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", validate_flush_all }, [VC4_PACKET_START_TILE_BINNING] = { 1, 0, 1, "start tile binning", validate_start_tile_binning }, - [VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", NULL }, - [VC4_PACKET_WAIT_ON_SEMAPHORE] = { 1, 1, 1, "wait on semaphore", NULL }, + [VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", validate_increment_semaphore }, + [VC4_PACKET_WAIT_ON_SEMAPHORE] = { 0, 1, 1, "wait on semaphore", validate_wait_on_semaphore }, /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but * we only use it from the render CL in order to jump into the tile * allocation BO. @@ -737,6 +799,15 @@ vc4_validate_cl(struct drm_device *dev, DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n"); return -EINVAL; } + + /* Make sure that they actually consumed the semaphore + * increment from the bin CL. Otherwise a later submit would + * have render execute immediately. + */ + if (!exec->found_wait_on_semaphore_packet) { + DRM_ERROR("Render CL missing VC4_PACKET_WAIT_ON_SEMAPHORE\n"); + return -EINVAL; + } exec->ct1ea = exec->ct1ca + dst_offset; } diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index b1f0f353fcc..a6becaf73fc 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -203,6 +203,12 @@ vc4_setup_rcl(struct vc4_context *vc4) */ vc4_tile_coordinates(vc4, x, y, &coords_emitted); + /* Wait for the binner before jumping to the first + * tile's lists. + */ + if (x == 0 && y == 0) + cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE); + cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc, @@ -269,12 +275,14 @@ vc4_flush(struct pipe_context *pctx) if (!vc4->needs_flush) return; + /* Increment the semaphore indicating that binning is done and + * unblocking the render thread. Note that this doesn't act until the + * FLUSH completes. + */ + cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE); /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */ cl_u8(&vc4->bcl, VC4_PACKET_FLUSH); - cl_u8(&vc4->bcl, VC4_PACKET_NOP); - cl_u8(&vc4->bcl, VC4_PACKET_HALT); - vc4_setup_rcl(vc4); if (vc4_debug & VC4_DEBUG_CL) { -- 2.30.2