+ if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false))
+ return;
+
+ batch->blorp->exec(batch, ¶ms);
+}
+
+static nir_ssa_def *
+blorp_nir_bit(nir_builder *b, nir_ssa_def *src, unsigned bit)
+{
+ return nir_iand(b, nir_ushr(b, src, nir_imm_int(b, bit)),
+ nir_imm_int(b, 1));
+}
+
+struct blorp_mcs_partial_resolve_key
+{
+ enum blorp_shader_type shader_type;
+ bool indirect_clear_color;
+ bool int_format;
+ uint32_t num_samples;
+};
+
+static bool
+blorp_params_get_mcs_partial_resolve_kernel(struct blorp_batch *batch,
+ struct blorp_params *params)
+{
+ struct blorp_context *blorp = batch->blorp;
+ const struct blorp_mcs_partial_resolve_key blorp_key = {
+ .shader_type = BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE,
+ .indirect_clear_color = params->dst.clear_color_addr.buffer != NULL,
+ .int_format = isl_format_has_int_channel(params->dst.view.format),
+ .num_samples = params->num_samples,
+ };
+
+ if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
+ ¶ms->wm_prog_kernel, ¶ms->wm_prog_data))
+ return true;
+
+ void *mem_ctx = ralloc_context(NULL);
+
+ nir_builder b;
+ blorp_nir_init_shader(&b, mem_ctx, MESA_SHADER_FRAGMENT,
+ "BLORP-mcs-partial-resolve");
+
+ nir_variable *v_color =
+ BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
+
+ nir_variable *frag_color =
+ nir_variable_create(b.shader, nir_var_shader_out,
+ glsl_vec4_type(), "gl_FragColor");
+ frag_color->data.location = FRAG_RESULT_COLOR;
+
+ /* Do an MCS fetch and check if it is equal to the magic clear value */
+ nir_ssa_def *mcs =
+ blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, nir_load_frag_coord(&b)),
+ nir_load_layer_id(&b));
+ nir_ssa_def *is_clear =
+ blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples);
+
+ /* If we aren't the clear value, discard. */
+ nir_intrinsic_instr *discard =
+ nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
+ discard->src[0] = nir_src_for_ssa(nir_inot(&b, is_clear));
+ nir_builder_instr_insert(&b, &discard->instr);
+
+ nir_ssa_def *clear_color = nir_load_var(&b, v_color);
+ if (blorp_key.indirect_clear_color && blorp->isl_dev->info->gen <= 8) {
+ /* Gen7-8 clear colors are stored as single 0/1 bits */
+ clear_color = nir_vec4(&b, blorp_nir_bit(&b, clear_color, 31),
+ blorp_nir_bit(&b, clear_color, 30),
+ blorp_nir_bit(&b, clear_color, 29),
+ blorp_nir_bit(&b, clear_color, 28));
+
+ if (!blorp_key.int_format)
+ clear_color = nir_i2f32(&b, clear_color);
+ }
+ nir_store_var(&b, frag_color, clear_color, 0xf);
+
+ struct brw_wm_prog_key wm_key;
+ brw_blorp_init_wm_prog_key(&wm_key);
+ wm_key.base.tex.compressed_multisample_layout_mask = 1;
+ wm_key.base.tex.msaa_16 = blorp_key.num_samples == 16;
+ wm_key.multisample_fbo = true;
+
+ struct brw_wm_prog_data prog_data;
+ const unsigned *program =
+ blorp_compile_fs(blorp, mem_ctx, b.shader, &wm_key, false,
+ &prog_data);
+
+ bool result =
+ blorp->upload_shader(batch, &blorp_key, sizeof(blorp_key),
+ program, prog_data.base.program_size,
+ &prog_data.base, sizeof(prog_data),
+ ¶ms->wm_prog_kernel, ¶ms->wm_prog_data);
+
+ ralloc_free(mem_ctx);
+ return result;
+}
+
+void
+blorp_mcs_partial_resolve(struct blorp_batch *batch,
+ struct blorp_surf *surf,
+ enum isl_format format,
+ uint32_t start_layer, uint32_t num_layers)
+{
+ struct blorp_params params;
+ blorp_params_init(¶ms);
+
+ assert(batch->blorp->isl_dev->info->gen >= 7);
+
+ params.x0 = 0;
+ params.y0 = 0;
+ params.x1 = surf->surf->logical_level0_px.width;
+ params.y1 = surf->surf->logical_level0_px.height;
+
+ brw_blorp_surface_info_init(batch->blorp, ¶ms.src, surf, 0,
+ start_layer, format, false);
+ brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, surf, 0,
+ start_layer, format, true);
+
+ params.num_samples = params.dst.surf.samples;
+ params.num_layers = num_layers;
+ params.dst_clear_color_as_input = surf->clear_color_addr.buffer != NULL;
+
+ memcpy(¶ms.wm_inputs.clear_color,
+ surf->clear_color.f32, sizeof(float) * 4);
+
+ if (!blorp_params_get_mcs_partial_resolve_kernel(batch, ¶ms))
+ return;
+
+ batch->blorp->exec(batch, ¶ms);
+}
+
+/** Clear a CCS to the "uncompressed" state
+ *
+ * This pass is the CCS equivalent of a "HiZ resolve". It sets the CCS values
+ * for a given layer/level of a surface to 0x0 which is the "uncompressed"
+ * state which tells the sampler to go look at the main surface.
+ */
+void
+blorp_ccs_ambiguate(struct blorp_batch *batch,
+ struct blorp_surf *surf,
+ uint32_t level, uint32_t layer)
+{
+ if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 10) {
+ /* On gen10 and above, we have a hardware resolve op for this */
+ return blorp_ccs_resolve(batch, surf, level, layer, 1,
+ surf->surf->format, ISL_AUX_OP_AMBIGUATE);
+ }
+
+ struct blorp_params params;
+ blorp_params_init(¶ms);
+
+ assert(ISL_DEV_GEN(batch->blorp->isl_dev) >= 7);
+
+ const struct isl_format_layout *aux_fmtl =
+ isl_format_get_layout(surf->aux_surf->format);
+ assert(aux_fmtl->txc == ISL_TXC_CCS);
+
+ params.dst = (struct brw_blorp_surface_info) {
+ .enabled = true,
+ .addr = surf->aux_addr,
+ .view = {
+ .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
+ .format = ISL_FORMAT_R32G32B32A32_UINT,
+ .base_level = 0,
+ .base_array_layer = 0,
+ .levels = 1,
+ .array_len = 1,
+ .swizzle = ISL_SWIZZLE_IDENTITY,
+ },
+ };
+
+ uint32_t z = 0;
+ if (surf->surf->dim == ISL_SURF_DIM_3D) {
+ z = layer;
+ layer = 0;
+ }
+
+ uint32_t offset_B, x_offset_el, y_offset_el;
+ isl_surf_get_image_offset_el(surf->aux_surf, level, layer, z,
+ &x_offset_el, &y_offset_el);
+ isl_tiling_get_intratile_offset_el(surf->aux_surf->tiling, aux_fmtl->bpb,
+ surf->aux_surf->row_pitch_B,
+ x_offset_el, y_offset_el,
+ &offset_B, &x_offset_el, &y_offset_el);
+ params.dst.addr.offset += offset_B;
+
+ const uint32_t width_px =
+ minify(surf->aux_surf->logical_level0_px.width, level);
+ const uint32_t height_px =
+ minify(surf->aux_surf->logical_level0_px.height, level);
+ const uint32_t width_el = DIV_ROUND_UP(width_px, aux_fmtl->bw);
+ const uint32_t height_el = DIV_ROUND_UP(height_px, aux_fmtl->bh);
+
+ struct isl_tile_info ccs_tile_info;
+ isl_surf_get_tile_info(surf->aux_surf, &ccs_tile_info);
+
+ /* We're going to map it as a regular RGBA32_UINT surface. We need to
+ * downscale a good deal. We start by computing the area on the CCS to
+ * clear in units of Y-tiled cache lines.
+ */
+ uint32_t x_offset_cl, y_offset_cl, width_cl, height_cl;
+ if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 8) {
+ /* From the Sky Lake PRM Vol. 12 in the section on planes:
+ *
+ * "The Color Control Surface (CCS) contains the compression status
+ * of the cache-line pairs. The compression state of the cache-line
+ * pair is specified by 2 bits in the CCS. Each CCS cache-line
+ * represents an area on the main surface of 16x16 sets of 128 byte
+ * Y-tiled cache-line-pairs. CCS is always Y tiled."
+ *
+ * Each 2-bit surface element in the CCS corresponds to a single
+ * cache-line pair in the main surface. This means that 16x16 el block
+ * in the CCS maps to a Y-tiled cache line. Fortunately, CCS layouts
+ * are calculated with a very large alignment so we can round up to a
+ * whole cache line without worrying about overdraw.
+ */
+
+ /* On Broadwell and above, a CCS tile is the same as a Y tile when
+ * viewed at the cache-line granularity. Fortunately, the horizontal
+ * and vertical alignment requirements of the CCS are such that we can
+ * align to an entire cache line without worrying about crossing over
+ * from one LOD to another.
+ */
+ const uint32_t x_el_per_cl = ccs_tile_info.logical_extent_el.w / 8;
+ const uint32_t y_el_per_cl = ccs_tile_info.logical_extent_el.h / 8;
+ assert(surf->aux_surf->image_alignment_el.w % x_el_per_cl == 0);
+ assert(surf->aux_surf->image_alignment_el.h % y_el_per_cl == 0);
+
+ assert(x_offset_el % x_el_per_cl == 0);
+ assert(y_offset_el % y_el_per_cl == 0);
+ x_offset_cl = x_offset_el / x_el_per_cl;
+ y_offset_cl = y_offset_el / y_el_per_cl;
+ width_cl = DIV_ROUND_UP(width_el, x_el_per_cl);
+ height_cl = DIV_ROUND_UP(height_el, y_el_per_cl);
+ } else {
+ /* On gen7, the CCS tiling is not so nice. However, there we are
+ * guaranteed that we only have a single level and slice so we don't
+ * have to worry about it and can just align to a whole tile.
+ */
+ assert(surf->aux_surf->logical_level0_px.depth == 1);
+ assert(surf->aux_surf->logical_level0_px.array_len == 1);
+ assert(x_offset_el == 0 && y_offset_el == 0);
+ const uint32_t width_tl =
+ DIV_ROUND_UP(width_el, ccs_tile_info.logical_extent_el.w);
+ const uint32_t height_tl =
+ DIV_ROUND_UP(height_el, ccs_tile_info.logical_extent_el.h);
+ x_offset_cl = 0;
+ y_offset_cl = 0;
+ width_cl = width_tl * 8;
+ height_cl = height_tl * 8;
+ }
+
+ /* We're going to use a RGBA32 format so as to write data as quickly as
+ * possible. A y-tiled cache line will then be 1x4 px.
+ */
+ const uint32_t x_offset_rgba_px = x_offset_cl;
+ const uint32_t y_offset_rgba_px = y_offset_cl * 4;
+ const uint32_t width_rgba_px = width_cl;
+ const uint32_t height_rgba_px = height_cl * 4;
+
+ ASSERTED bool ok =
+ isl_surf_init(batch->blorp->isl_dev, ¶ms.dst.surf,
+ .dim = ISL_SURF_DIM_2D,
+ .format = ISL_FORMAT_R32G32B32A32_UINT,
+ .width = width_rgba_px + x_offset_rgba_px,
+ .height = height_rgba_px + y_offset_rgba_px,
+ .depth = 1,
+ .levels = 1,
+ .array_len = 1,
+ .samples = 1,
+ .row_pitch_B = surf->aux_surf->row_pitch_B,
+ .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
+ .tiling_flags = ISL_TILING_Y0_BIT);
+ assert(ok);
+
+ params.x0 = x_offset_rgba_px;
+ params.y0 = y_offset_rgba_px;
+ params.x1 = x_offset_rgba_px + width_rgba_px;
+ params.y1 = y_offset_rgba_px + height_rgba_px;
+
+ /* A CCS value of 0 means "uncompressed." */
+ memset(¶ms.wm_inputs.clear_color, 0,
+ sizeof(params.wm_inputs.clear_color));
+
+ if (!blorp_params_get_clear_kernel(batch, ¶ms, true, false))
+ return;