X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fgen7_l3_state.c;h=49b59040b1cdae1a2b0d843a81387da6f39e79be;hb=29eb8059fd7906d2595ea99bc65a27691b9fbe53;hp=b63e61ca8f0939f1cbd4450b6cd31b7dec4cebbe;hpb=d5c9955d3eaa7311e2b2350b6964bae516c7b7b2;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c index b63e61ca8f0..49b59040b1c 100644 --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -298,7 +298,12 @@ static struct brw_l3_weights get_pipeline_state_l3_weights(const struct brw_context *brw) { const struct brw_stage_state *stage_states[] = { - &brw->vs.base, &brw->gs.base, &brw->wm.base, &brw->cs.base + [MESA_SHADER_VERTEX] = &brw->vs.base, + [MESA_SHADER_TESS_CTRL] = &brw->tcs.base, + [MESA_SHADER_TESS_EVAL] = &brw->tes.base, + [MESA_SHADER_GEOMETRY] = &brw->gs.base, + [MESA_SHADER_FRAGMENT] = &brw->wm.base, + [MESA_SHADER_COMPUTE] = &brw->cs.base }; bool needs_dc = false, needs_slm = false; @@ -330,23 +335,39 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg) /* According to the hardware docs, the L3 partitioning can only be changed * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline and - * initiates invalidation of the relevant caches... + * which involves a first PIPE_CONTROL flush which stalls the pipeline... + */ + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_DATA_CACHE_FLUSH | + PIPE_CONTROL_NO_WRITE | + PIPE_CONTROL_CS_STALL); + + /* ...followed by a second pipelined PIPE_CONTROL that initiates + * invalidation of the relevant caches. Note that because RO invalidation + * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL + * command is processed by the CS) we cannot combine it with the previous + * stalling flush as the hardware documentation suggests, because that + * would cause the CS to stall on previous rendering *after* RO + * invalidation and wouldn't prevent the RO caches from being polluted by + * concurrent rendering before the stall completes. This intentionally + * doesn't implement the SKL+ hardware workaround suggesting to enable CS + * stall on PIPE_CONTROLs with the texture cache invalidation bit set for + * GPGPU workloads because the previous and subsequent PIPE_CONTROLs + * already guarantee that there is no concurrent GPGPU kernel execution + * (see SKL HSD 2132585). */ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_INVALIDATE | - PIPE_CONTROL_DATA_CACHE_INVALIDATE | - PIPE_CONTROL_NO_WRITE | - PIPE_CONTROL_CS_STALL); + PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_NO_WRITE); - /* ...followed by a second stalling flush which guarantees that - * invalidation is complete when the L3 configuration registers are - * modified. + /* Now send a third stalling flush to make sure that invalidation is + * complete when the L3 configuration registers are modified. */ brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DATA_CACHE_INVALIDATE | + PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_CS_STALL); @@ -515,6 +536,7 @@ const struct brw_tracked_state gen7_l3_state = { .dirty = { .mesa = 0, .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | BRW_NEW_CS_PROG_DATA | BRW_NEW_FS_PROG_DATA | BRW_NEW_GS_PROG_DATA |