+#endif /* GEN_GEN >= 6 */
+
+#if GEN_GEN >= 7
+static void
+blorp_emit_memcpy(struct blorp_batch *batch,
+ struct blorp_address dst,
+ struct blorp_address src,
+ uint32_t size)
+{
+ assert(size % 4 == 0);
+
+ for (unsigned dw = 0; dw < size; dw += 4) {
+#if GEN_GEN >= 8
+ blorp_emit(batch, GENX(MI_COPY_MEM_MEM), cp) {
+ cp.DestinationMemoryAddress = dst;
+ cp.SourceMemoryAddress = src;
+ }
+#else
+ /* IVB does not have a general purpose register for command streamer
+ * commands. Therefore, we use an alternate temporary register.
+ */
+#define BLORP_TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
+ blorp_emit(batch, GENX(MI_LOAD_REGISTER_MEM), load) {
+ load.RegisterAddress = BLORP_TEMP_REG;
+ load.MemoryAddress = src;
+ }
+ blorp_emit(batch, GENX(MI_STORE_REGISTER_MEM), store) {
+ store.RegisterAddress = BLORP_TEMP_REG;
+ store.MemoryAddress = dst;
+ }
+#undef BLORP_TEMP_REG
+#endif
+ dst.offset += 4;
+ src.offset += 4;
+ }
+}
+#endif
+
+static void
+blorp_emit_surface_state(struct blorp_batch *batch,
+ const struct brw_blorp_surface_info *surface,
+ enum isl_aux_op aux_op,
+ void *state, uint32_t state_offset,
+ const bool color_write_disables[4],
+ bool is_render_target)
+{
+ const struct isl_device *isl_dev = batch->blorp->isl_dev;
+ struct isl_surf surf = surface->surf;
+
+ if (surf.dim == ISL_SURF_DIM_1D &&
+ surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) {
+ assert(surf.logical_level0_px.height == 1);
+ surf.dim = ISL_SURF_DIM_2D;
+ }
+
+ /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */
+ enum isl_aux_usage aux_usage = surface->aux_usage;
+ if (aux_usage == ISL_AUX_USAGE_HIZ)
+ aux_usage = ISL_AUX_USAGE_NONE;
+
+ isl_channel_mask_t write_disable_mask = 0;
+ if (is_render_target && GEN_GEN <= 5) {
+ if (color_write_disables[0])
+ write_disable_mask |= ISL_CHANNEL_RED_BIT;
+ if (color_write_disables[1])
+ write_disable_mask |= ISL_CHANNEL_GREEN_BIT;
+ if (color_write_disables[2])
+ write_disable_mask |= ISL_CHANNEL_BLUE_BIT;
+ if (color_write_disables[3])
+ write_disable_mask |= ISL_CHANNEL_ALPHA_BIT;
+ }
+
+ const bool use_clear_address =
+ GEN_GEN >= 10 && (surface->clear_color_addr.buffer != NULL);
+
+ isl_surf_fill_state(batch->blorp->isl_dev, state,
+ .surf = &surf, .view = &surface->view,
+ .aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
+ .address =
+ blorp_get_surface_address(batch, surface->addr),
+ .aux_address = aux_usage == ISL_AUX_USAGE_NONE ? 0 :
+ blorp_get_surface_address(batch, surface->aux_addr),
+ .clear_address = !use_clear_address ? 0 :
+ blorp_get_surface_address(batch,
+ surface->clear_color_addr),
+ .mocs = surface->addr.mocs,
+ .clear_color = surface->clear_color,
+ .use_clear_address = use_clear_address,
+ .write_disables = write_disable_mask);
+
+ blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset,
+ surface->addr, 0);
+
+ if (aux_usage != ISL_AUX_USAGE_NONE) {
+ /* On gen7 and prior, the bottom 12 bits of the MCS base address are
+ * used to store other information. This should be ok, however, because
+ * surface buffer addresses are always 4K page alinged.
+ */
+ assert((surface->aux_addr.offset & 0xfff) == 0);
+ uint32_t *aux_addr = state + isl_dev->ss.aux_addr_offset;
+ blorp_surface_reloc(batch, state_offset + isl_dev->ss.aux_addr_offset,
+ surface->aux_addr, *aux_addr);
+ }
+
+ if (aux_usage != ISL_AUX_USAGE_NONE && surface->clear_color_addr.buffer) {
+#if GEN_GEN >= 10
+ assert((surface->clear_color_addr.offset & 0x3f) == 0);
+ uint32_t *clear_addr = state + isl_dev->ss.clear_color_state_offset;
+ blorp_surface_reloc(batch, state_offset +
+ isl_dev->ss.clear_color_state_offset,
+ surface->clear_color_addr, *clear_addr);
+#elif GEN_GEN >= 7
+ /* Fast clears just whack the AUX surface and don't actually use the
+ * clear color for anything. We can avoid the MI memcpy on that case.
+ */
+ if (aux_op != ISL_AUX_OP_FAST_CLEAR) {
+ struct blorp_address dst_addr = blorp_get_surface_base_address(batch);
+ dst_addr.offset += state_offset + isl_dev->ss.clear_value_offset;
+ blorp_emit_memcpy(batch, dst_addr, surface->clear_color_addr,
+ isl_dev->ss.clear_value_size);
+ }
+#else
+ unreachable("Fast clears are only supported on gen7+");
+#endif
+ }
+
+ blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);
+}
+
+static void
+blorp_emit_null_surface_state(struct blorp_batch *batch,
+ const struct brw_blorp_surface_info *surface,
+ uint32_t *state)
+{
+ struct GENX(RENDER_SURFACE_STATE) ss = {
+ .SurfaceType = SURFTYPE_NULL,
+ .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM,
+ .Width = surface->surf.logical_level0_px.width - 1,
+ .Height = surface->surf.logical_level0_px.height - 1,
+ .MIPCountLOD = surface->view.base_level,
+ .MinimumArrayElement = surface->view.base_array_layer,
+ .Depth = surface->view.array_len - 1,
+ .RenderTargetViewExtent = surface->view.array_len - 1,
+#if GEN_GEN >= 6
+ .NumberofMultisamples = ffs(surface->surf.samples) - 1,
+#endif
+
+#if GEN_GEN >= 7
+ .SurfaceArray = surface->surf.dim != ISL_SURF_DIM_3D,
+#endif
+
+#if GEN_GEN >= 8
+ .TileMode = YMAJOR,
+#else
+ .TiledSurface = true,
+#endif
+ };
+
+ GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &ss);
+
+ blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4);
+}
+
+static void
+blorp_emit_surface_states(struct blorp_batch *batch,
+ const struct blorp_params *params)
+{
+ const struct isl_device *isl_dev = batch->blorp->isl_dev;
+ uint32_t bind_offset = 0, surface_offsets[2];
+ void *surface_maps[2];
+
+ MAYBE_UNUSED bool has_indirect_clear_color = false;
+ if (params->use_pre_baked_binding_table) {
+ bind_offset = params->pre_baked_binding_table_offset;