if (info->mask & main_mask) {
       for (int slice = 0; slice < info->dst.box.depth; slice++) {
          iris_batch_maybe_flush(batch, 1500);
+         iris_batch_sync_region_start(batch);
 
          blorp_blit(&blorp_batch,
                     &src_surf, info->src.level, info->src.box.z + slice,
                     src_x0, src_y0, src_x1, src_y1,
                     dst_x0, dst_y0, dst_x1, dst_y1,
                     filter, mirror_x, mirror_y);
+
+         iris_batch_sync_region_end(batch);
       }
    }
 
 
       for (int slice = 0; slice < info->dst.box.depth; slice++) {
          iris_batch_maybe_flush(batch, 1500);
+         iris_batch_sync_region_start(batch);
 
          blorp_blit(&blorp_batch,
                     &src_surf, info->src.level, info->src.box.z + slice,
                     src_x0, src_y0, src_x1, src_y1,
                     dst_x0, dst_y0, dst_x1, dst_y1,
                     filter, mirror_x, mirror_y);
+
+         iris_batch_sync_region_end(batch);
       }
    }
 
 
       iris_batch_maybe_flush(batch, 1500);
 
+      iris_batch_sync_region_start(batch);
       blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
       blorp_buffer_copy(&blorp_batch, src_addr, dst_addr, src_box->width);
       blorp_batch_finish(&blorp_batch);
+      iris_batch_sync_region_end(batch);
    } else {
       // XXX: what about one surface being a buffer and not the other?
 
       for (int slice = 0; slice < src_box->depth; slice++) {
          iris_batch_maybe_flush(batch, 1500);
 
+         iris_batch_sync_region_start(batch);
          blorp_copy(&blorp_batch, &src_surf, src_level, src_box->z + slice,
                     &dst_surf, dst_level, dstz + slice,
                     src_box->x, src_box->y, dstx, dsty,
                     src_box->width, src_box->height);
+         iris_batch_sync_region_end(batch);
       }
       blorp_batch_finish(&blorp_batch);
 
 
                               "fast clear: pre-flush",
                               PIPE_CONTROL_RENDER_TARGET_FLUSH);
 
+   iris_batch_sync_region_start(batch);
+
    /* If we reach this point, we need to fast clear to change the state to
     * ISL_AUX_STATE_CLEAR, or to update the fast clear color (or both).
     */
    iris_emit_end_of_pipe_sync(batch,
                               "fast clear: post flush",
                               PIPE_CONTROL_RENDER_TARGET_FLUSH);
+   iris_batch_sync_region_end(batch);
 
    iris_resource_set_aux_state(ice, res, level, box->z,
                                box->depth, ISL_AUX_STATE_CLEAR);
    iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf,
                                 p_res, aux_usage, level, true);
 
+   iris_batch_sync_region_start(batch);
+
    struct blorp_batch blorp_batch;
    blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
 
                color, color_write_disable);
 
    blorp_batch_finish(&blorp_batch);
+   iris_batch_sync_region_end(batch);
+
    iris_flush_and_dirty_for_history(ice, batch, res,
                                     PIPE_CONTROL_RENDER_TARGET_FLUSH,
                                     "cache history: post color clear");
                                    level, true);
    }
 
-   struct blorp_batch blorp_batch;
-   blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
-
    uint8_t stencil_mask = clear_stencil && stencil_res ? 0xff : 0;
    if (stencil_mask) {
       iris_resource_prepare_access(ice, batch, stencil_res, level, 1, box->z,
                                    stencil_res->aux.usage, level, true);
    }
 
+   iris_batch_sync_region_start(batch);
+
+   struct blorp_batch blorp_batch;
+   blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
+
    blorp_clear_depth_stencil(&blorp_batch, &z_surf, &stencil_surf,
                              level, box->z, box->depth,
                              box->x, box->y,
                              stencil_mask, stencil);
 
    blorp_batch_finish(&blorp_batch);
+   iris_batch_sync_region_end(batch);
+
    iris_flush_and_dirty_for_history(ice, batch, res, 0,
                                     "cache history: post slow ZS clear");
 
 
    struct gen_mi_builder b;
    gen_mi_builder_init(&b, batch);
 
+   iris_batch_sync_region_start(batch);
+
    struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
    struct gen_mi_value dst =
       result_type <= PIPE_QUERY_TYPE_U32 ? gen_mi_mem32(rw_bo(dst_bo, offset))
    } else {
       gen_mi_store(&b, dst, result);
    }
+
+   iris_batch_sync_region_end(batch);
 }
 
 static void
    struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER];
    struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res);
 
+   iris_batch_sync_region_start(batch);
+
    /* The CPU doesn't have the query result yet; use hardware predication */
    ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT;
 
    gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
                                             predicate_result)), result);
    ice->state.compute_predicate = bo;
+
+   iris_batch_sync_region_end(batch);
 }
 
 static void
 
    iris_emit_end_of_pipe_sync(batch, "color resolve: pre-flush",
                               PIPE_CONTROL_RENDER_TARGET_FLUSH);
 
+   iris_batch_sync_region_start(batch);
    struct blorp_batch blorp_batch;
    blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
    /* On Gen >= 12, Stencil buffer with lossless compression needs to be
    /* See comment above */
    iris_emit_end_of_pipe_sync(batch, "color resolve: post-flush",
                               PIPE_CONTROL_RENDER_TARGET_FLUSH);
+   iris_batch_sync_region_end(batch);
 }
 
 static void
                                 &res->base, res->aux.usage, 0, true);
 
    struct blorp_batch blorp_batch;
+   iris_batch_sync_region_start(batch);
    blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
    blorp_mcs_partial_resolve(&blorp_batch, &surf,
                              isl_format_srgb_to_linear(res->surf.format),
                              start_layer, num_layers);
    blorp_batch_finish(&blorp_batch);
+   iris_batch_sync_region_end(batch);
 }
 
 
 
    iris_batch_maybe_flush(batch, 1500);
 
+   iris_batch_sync_region_start(batch);
+
    struct blorp_surf surf;
    iris_blorp_surf_for_resource(&batch->screen->isl_dev, &surf,
                                 &res->base, res->aux.usage, level, true);
                                 "hiz op: post flush",
                                 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                 PIPE_CONTROL_DEPTH_STALL);
+
+   iris_batch_sync_region_end(batch);
 }
 
 static bool
 
 iris_load_register_mem32(struct iris_batch *batch, uint32_t reg,
                          struct iris_bo *bo, uint32_t offset)
 {
+   iris_batch_sync_region_start(batch);
    iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
       lrm.RegisterAddress = reg;
       lrm.MemoryAddress = ro_bo(bo, offset);
    }
+   iris_batch_sync_region_end(batch);
 }
 
 /**
                           struct iris_bo *bo, uint32_t offset,
                           bool predicated)
 {
+   iris_batch_sync_region_start(batch);
    iris_emit_cmd(batch, GENX(MI_STORE_REGISTER_MEM), srm) {
       srm.RegisterAddress = reg;
       srm.MemoryAddress = rw_bo(bo, offset);
       srm.PredicateEnable = predicated;
    }
+   iris_batch_sync_region_end(batch);
 }
 
 static void
                       struct iris_bo *bo, uint32_t offset,
                       uint32_t imm)
 {
+   iris_batch_sync_region_start(batch);
    iris_emit_cmd(batch, GENX(MI_STORE_DATA_IMM), sdi) {
       sdi.Address = rw_bo(bo, offset);
       sdi.ImmediateData = imm;
    }
+   iris_batch_sync_region_end(batch);
 }
 
 static void
     * 2 in genxml but it's actually variable length and we need 5 DWords.
     */
    void *map = iris_get_command_space(batch, 4 * 5);
+   iris_batch_sync_region_start(batch);
    _iris_pack_command(batch, GENX(MI_STORE_DATA_IMM), map, sdi) {
       sdi.DWordLength = 5 - 2;
       sdi.Address = rw_bo(bo, offset);
       sdi.ImmediateData = imm;
    }
+   iris_batch_sync_region_end(batch);
 }
 
 static void
    assert(bytes % 4 == 0);
    assert(dst_offset % 4 == 0);
    assert(src_offset % 4 == 0);
+   iris_batch_sync_region_start(batch);
 
    for (unsigned i = 0; i < bytes; i += 4) {
       iris_emit_cmd(batch, GENX(MI_COPY_MEM_MEM), cp) {
          cp.SourceMemoryAddress = ro_bo(src_bo, src_offset + i);
       }
    }
+
+   iris_batch_sync_region_end(batch);
 }
 
 static void
    UNUSED const struct gen_device_info *devinfo = &batch->screen->devinfo;
    uint32_t reg_val;
 
+   iris_batch_sync_region_start(batch);
+
    emit_pipeline_select(batch, _3D);
 
    iris_emit_l3_config(batch, batch->screen->l3_config_3d);
 
    iris_alloc_push_constants(batch);
 
+
 #if GEN_GEN >= 12
    init_aux_map_state(batch);
 #endif
+
+   iris_batch_sync_region_end(batch);
 }
 
 static void
 {
    UNUSED const struct gen_device_info *devinfo = &batch->screen->devinfo;
 
+   iris_batch_sync_region_start(batch);
+
    /* GEN:BUG:1607854226:
     *
     *  Start with pipeline in 3D mode to set the STATE_BASE_ADDRESS.
    init_aux_map_state(batch);
 #endif
 
+   iris_batch_sync_region_end(batch);
 }
 
 struct iris_vertex_buffer_state {
 
    uint32_t mocs = batch->screen->isl_dev.mocs.internal;
 
+   iris_batch_sync_region_start(batch);
+
    flush_before_state_base_change(batch);
 
 #if GEN_GEN == 12
 #endif
 
    flush_after_state_base_change(batch);
+   iris_batch_sync_region_end(batch);
 
    batch->last_surface_base_address = binder->bo->gtt_offset;
 }
 {
    bool use_predicate = ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT;
 
+   iris_batch_sync_region_start(batch);
+
    /* Always pin the binder.  If we're emitting new binding table pointers,
     * we need it.  If not, we're probably inheriting old tables via the
     * context, and need it anyway.  Since true zero-bindings cases are
          }
       }
    }
+
+   iris_batch_sync_region_end(batch);
 }
 
 static void
       brw_cs_simd_size_for_group_size(devinfo, cs_prog_data, group_size);
    const unsigned threads = DIV_ROUND_UP(group_size, simd_size);
 
+   iris_batch_sync_region_start(batch);
+
    /* Always pin the binder.  If we're emitting new binding table pointers,
     * we need it.  If not, we're probably inheriting old tables via the
     * context, and need it anyway.  Since true zero-bindings cases are
       iris_restore_compute_saved_bos(ice, batch, grid);
       batch->contains_draw = true;
    }
+
+   iris_batch_sync_region_end(batch);
 }
 
 /**
               imm, reason);
    }
 
+   iris_batch_sync_region_start(batch);
+
    iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) {
 #if GEN_GEN >= 12
       pc.TileCacheFlushEnable = flags & PIPE_CONTROL_TILE_CACHE_FLUSH;
       pc.Address = rw_bo(bo, offset);
       pc.ImmediateData = imm;
    }
+
+   iris_batch_sync_region_end(batch);
 }
 
 #if GEN_GEN == 9
                                uint32_t offset_in_bytes,
                                uint32_t report_id)
 {
+   iris_batch_sync_region_start(batch);
    iris_emit_cmd(batch, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
       mi_rpc.MemoryAddress = rw_bo(bo, offset_in_bytes);
       mi_rpc.ReportID = report_id;
    }
+   iris_batch_sync_region_end(batch);
 }
 
 /**