From: Iago Toral Quiroga Date: Thu, 4 Jul 2019 10:22:40 +0000 (+0200) Subject: v3d: acquire scoreboard lock before first tlb read X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=7c1d70891150c9960b1bb2464b53a95f4645037c v3d: acquire scoreboard lock before first tlb read Until now we have always been emitting our scoreboard locks on the last thread switch to improve parallelism. We did this by emitting our last thread switch right before our tlb writes at the very end of the program, where we know that we are outside control flow. Unfortunately, this strategy is not valid when we have tlb color reads too, as these will happen before this point in the program and can happen inside control flow. To fix this we always emit a thread switch before the first tlb load and if we see additional thread switches after that point, we change the strategy to lock on the first thread switch. v2: change the solution so it is expected to work in more scenarios (Eric). Reviewed-by: Eric Anholt --- diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 75622338aa2..4f12110ded1 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -122,6 +122,13 @@ vir_emit_thrsw(struct v3d_compile *c) c->last_thrsw = vir_NOP(c); c->last_thrsw->qpu.sig.thrsw = true; c->last_thrsw_at_top_level = !c->in_control_flow; + + /* We need to lock the scoreboard before any tlb acess happens. If this + * thread switch comes after we have emitted a tlb load, then it means + * that we can't lock on the last thread switch any more. + */ + if (c->emitted_tlb_load) + c->lock_scoreboard_on_first_thrsw = true; } static uint32_t @@ -1646,6 +1653,27 @@ vir_emit_tlb_color_read(struct v3d_compile *c, nir_intrinsic_instr *instr) int component = nir_intrinsic_component(instr); assert(component < 4); + /* We need to emit our TLB reads after we have acquired the scoreboard + * lock, or the GPU will hang. Usually, we do our scoreboard locking on + * the last thread switch to improve parallelism, however, that is only + * guaranteed to happen before the tlb color writes. + * + * To fix that, we make sure we always emit a thread switch before the + * first tlb color read. If that happens to be the last thread switch + * we emit, then everything is fine, but otherwsie, if any code after + * this point needs to emit additional thread switches, then we will + * switch the strategy to locking the scoreboard on the first thread + * switch instead -- see vir_emit_thrsw(). + */ + if (!c->emitted_tlb_load) { + if (!c->last_thrsw_at_top_level) { + assert(c->devinfo->ver >= 41); + vir_emit_thrsw(c); + } + + c->emitted_tlb_load = true; + } + struct qreg *color_reads = &c->color_reads[(rt * V3D_MAX_SAMPLES + sample_index) * 4]; diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 4cb37d770cf..67c7dd48d8c 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -634,6 +634,9 @@ struct v3d_compile { struct qinst *last_thrsw; bool last_thrsw_at_top_level; + bool emitted_tlb_load; + bool lock_scoreboard_on_first_thrsw; + bool failed; }; @@ -700,6 +703,7 @@ struct v3d_fs_prog_data { bool disable_ez; bool uses_center_w; bool uses_implicit_point_line_varyings; + bool lock_scoreboard_on_first_thrsw; }; struct v3d_compute_prog_data { diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 8de582792b7..4f1ee605214 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -692,6 +692,8 @@ v3d_fs_set_prog_data(struct v3d_compile *c, prog_data->uses_center_w = c->uses_center_w; prog_data->uses_implicit_point_line_varyings = c->uses_implicit_point_line_varyings; + prog_data->lock_scoreboard_on_first_thrsw = + c->lock_scoreboard_on_first_thrsw; } static void diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index 0d23ac6bb2d..744d0c9c211 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -373,6 +373,8 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, v3d->prog.fs->prog_data.fs->uses_center_w; #if V3D_VERSION >= 40 + shader.do_scoreboard_wait_on_first_thread_switch = + v3d->prog.fs->prog_data.fs->lock_scoreboard_on_first_thrsw; shader.disable_implicit_point_line_varyings = !v3d->prog.fs->prog_data.fs->uses_implicit_point_line_varyings; #endif