}
}
-static void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info)
+#define SI_CS_IMAGE_OP (1 << 0)
+#define SI_CS_WAIT_FOR_IDLE (1 << 1)
+
+static void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,
+ void *restore_cs, unsigned flags)
{
+ /* Wait for previous shaders to finish. */
+ sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH;
+ /* Invalidate L0-L1 caches. */
+ /* sL0 is never invalidated, because src resources don't use it. */
+ sctx->flags |= SI_CONTEXT_INV_VCACHE;
+
/* Set settings for driver-internal compute dispatches. */
sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
sctx->render_cond_force_off = false;
if (sctx->blitter)
sctx->blitter->running = false;
+
+ /* Restore the original compute shader. */
+ sctx->b.bind_compute_state(&sctx->b, restore_cs);
+
+ if (flags & SI_CS_WAIT_FOR_IDLE) {
+ sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
+
+ if (flags & SI_CS_IMAGE_OP) {
+ /* Make sure image stores are visible to CB, which doesn't use L2 on GFX6-8. */
+ sctx->flags |= sctx->chip_class <= GFX8 ? SI_CONTEXT_WB_L2 : 0;
+ /* Make sure image stores are visible to all CUs. */
+ sctx->flags |= SI_CONTEXT_INV_VCACHE;
+ } else {
+ /* Make sure buffer stores are visible to all CUs. */
+ sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE;
+ }
+ } else {
+ assert(!(flags & SI_CS_IMAGE_OP));
+ }
}
static void si_compute_clear_12bytes_buffer(struct si_context *sctx, struct pipe_resource *dst,
unsigned data[4] = {0};
memcpy(data, clear_value, 12);
- sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
- si_get_flush_flags(sctx, coher, SI_COMPUTE_DST_CACHE_POLICY);
+ sctx->flags |= si_get_flush_flags(sctx, coher, SI_COMPUTE_DST_CACHE_POLICY);
struct pipe_shader_buffer saved_sb = {0};
si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_sb);
info.grid[1] = 1;
info.grid[2] = 1;
- si_launch_grid_internal(sctx, &info);
+ si_launch_grid_internal(sctx, &info, saved_cs, SI_CS_WAIT_FOR_IDLE);
- ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_sb, saved_writable_mask);
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
assert(dst->target != PIPE_BUFFER || dst_offset + size <= dst->width0);
assert(!src || src_offset + size <= src->width0);
- sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
- si_get_flush_flags(sctx, coher, SI_COMPUTE_DST_CACHE_POLICY);
+ sctx->flags |= si_get_flush_flags(sctx, coher, SI_COMPUTE_DST_CACHE_POLICY);
/* Save states. */
void *saved_cs = sctx->cs_shader_state.program;
ctx->bind_compute_state(ctx, sctx->cs_clear_buffer);
}
- si_launch_grid_internal(sctx, &info);
+ si_launch_grid_internal(sctx, &info, saved_cs, SI_CS_WAIT_FOR_IDLE);
enum si_cache_policy cache_policy = get_cache_policy(sctx, coher, size);
- sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | (cache_policy == L2_BYPASS ? SI_CONTEXT_WB_L2 : 0);
+ sctx->flags |= cache_policy == L2_BYPASS ? SI_CONTEXT_WB_L2 : 0;
if (cache_policy != L2_BYPASS)
si_resource(dst)->TC_L2_dirty = true;
/* Restore states. */
- ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, src ? 2 : 1, saved_sb, saved_writable_mask);
for (int i = 0; i < 2; i++)
pipe_resource_reference(&saved_sb[i].buffer, NULL);
if (width == 0 || height == 0)
return;
- sctx->flags |=
- SI_CONTEXT_CS_PARTIAL_FLUSH | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
-
/* The driver doesn't decompress resources automatically here. */
si_decompress_subresource(ctx, dst, PIPE_MASK_RGBAZS, dst_level, dstz,
dstz + src_box->depth - 1);
info.grid[2] = depth;
}
- si_launch_grid_internal(sctx, &info);
+ si_launch_grid_internal(sctx, &info, saved_cs,
+ SI_CS_WAIT_FOR_IDLE | SI_CS_IMAGE_OP);
- sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | (sctx->chip_class <= GFX8 ? SI_CONTEXT_WB_L2 : 0) |
- si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
- ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image);
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
for (int i = 0; i < 2; i++)
{
struct pipe_context *ctx = &sctx->b;
- sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
- si_get_flush_flags(sctx, SI_COHERENCY_CB_META, L2_LRU) |
- si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_LRU);
- sctx->emit_cache_flush(sctx);
+ sctx->flags |= si_get_flush_flags(sctx, SI_COHERENCY_CB_META, L2_LRU);
/* Save states. */
void *saved_cs = sctx->cs_shader_state.program;
info.grid[2] = 1;
info.last_block[0] = num_threads % 64;
- si_launch_grid_internal(sctx, &info);
+ si_launch_grid_internal(sctx, &info, saved_cs, 0);
/* Don't flush caches or wait. The driver will wait at the end of this IB,
* and L2 will be flushed by the kernel fence.
*/
/* Restore states. */
- ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 3, saved_img);
for (unsigned i = 0; i < 3; i++) {
if (tex->nr_samples != tex->nr_storage_samples)
return;
- /* Flush caches and sync engines. */
- sctx->flags |=
- SI_CONTEXT_CS_PARTIAL_FLUSH | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
si_make_CB_shader_coherent(sctx, tex->nr_samples, true,
true /* DCC is not possible with image stores */);
info.grid[1] = DIV_ROUND_UP(tex->height0, 8);
info.grid[2] = is_array ? tex->array_size : 1;
- si_launch_grid_internal(sctx, &info);
-
- /* Flush caches and sync engines. */
- sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | (sctx->chip_class <= GFX8 ? SI_CONTEXT_WB_L2 : 0) |
- si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
+ si_launch_grid_internal(sctx, &info, saved_cs,
+ SI_CS_WAIT_FOR_IDLE | SI_CS_IMAGE_OP);
/* Restore previous states. */
- ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image);
pipe_resource_reference(&saved_image.resource, NULL);
sctx->render_cond_force_off = !render_condition_enabled;
- sctx->flags |=
- SI_CONTEXT_CS_PARTIAL_FLUSH | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
si_make_CB_shader_coherent(sctx, dstsurf->texture->nr_samples, true,
true /* DCC is not possible with image stores */);
info.grid[2] = 1;
}
- si_launch_grid_internal(sctx, &info);
+ si_launch_grid_internal(sctx, &info, saved_cs,
+ SI_CS_WAIT_FOR_IDLE | SI_CS_IMAGE_OP);
- sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | (sctx->chip_class <= GFX8 ? SI_CONTEXT_WB_L2 : 0) |
- si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
- ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image);
ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
pipe_resource_reference(&saved_image.resource, NULL);