+static uint32_t
+br13_for_cpp(int cpp)
+{
+ switch (cpp) {
+ case 4:
+ return BR13_8888;
+ break;
+ case 2:
+ return BR13_565;
+ break;
+ case 1:
+ return BR13_8;
+ break;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+/**
+ * Emits the packet for switching the blitter from X to Y tiled or back.
+ *
+ * This has to be called in a single BEGIN_BATCH_BLT_TILED() /
+ * ADVANCE_BATCH_TILED(). This is because BCS_SWCTRL is saved and restored as
+ * part of the power context, not a render context, and if the batchbuffer was
+ * to get flushed between setting and blitting, or blitting and restoring, our
+ * tiling state would leak into other unsuspecting applications (like the X
+ * server).
+ */
+static void
+set_blitter_tiling(struct intel_context *intel,
+ bool dst_y_tiled, bool src_y_tiled)
+{
+ assert(intel->gen >= 6);
+
+ /* Idle the blitter before we update how tiling is interpreted. */
+ OUT_BATCH(MI_FLUSH_DW);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+ OUT_BATCH(BCS_SWCTRL);
+ OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
+ (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
+ (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
+}
+
+#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do { \
+ BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0)); \
+ if (dst_y_tiled || src_y_tiled) \
+ set_blitter_tiling(intel, dst_y_tiled, src_y_tiled); \
+ } while (0)
+
+#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do { \
+ if (dst_y_tiled || src_y_tiled) \
+ set_blitter_tiling(intel, false, false); \
+ ADVANCE_BATCH(); \
+ } while (0)
+
+/**
+ * Implements a rectangular block transfer (blit) of pixels between two
+ * miptrees.
+ *
+ * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
+ * but limited, pitches and sizes allowed.
+ *
+ * The src/dst coordinates are relative to the given level/slice of the
+ * miptree.
+ *
+ * If @src_flip or @dst_flip is set, then the rectangle within that miptree
+ * will be inverted (including scanline order) when copying. This is common
+ * in GL when copying between window system and user-created
+ * renderbuffers/textures.
+ */
+bool
+intel_miptree_blit(struct intel_context *intel,
+ struct intel_mipmap_tree *src_mt,
+ int src_level, int src_slice,
+ uint32_t src_x, uint32_t src_y, bool src_flip,
+ struct intel_mipmap_tree *dst_mt,
+ int dst_level, int dst_slice,
+ uint32_t dst_x, uint32_t dst_y, bool dst_flip,
+ uint32_t width, uint32_t height,
+ GLenum logicop)
+{
+ /* No sRGB decode or encode is done by the hardware blitter, which is
+ * consistent with what we want in the callers (glCopyTexSubImage(),
+ * glBlitFramebuffer(), texture validation, etc.).
+ */
+ gl_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
+ gl_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
+
+ /* The blitter doesn't support doing any format conversions. We do also
+ * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
+ * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
+ * channel to 1.0 at the end.
+ */
+ if (src_format != dst_format &&
+ ((src_format != MESA_FORMAT_ARGB8888 &&
+ src_format != MESA_FORMAT_XRGB8888) ||
+ (dst_format != MESA_FORMAT_ARGB8888 &&
+ dst_format != MESA_FORMAT_XRGB8888))) {
+ perf_debug("%s: Can't use hardware blitter from %s to %s, "
+ "falling back.\n", __FUNCTION__,
+ _mesa_get_format_name(src_format),
+ _mesa_get_format_name(dst_format));
+ return false;
+ }
+
+ /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
+ * Data Size Limitations):
+ *
+ * The BLT engine is capable of transferring very large quantities of
+ * graphics data. Any graphics data read from and written to the
+ * destination is permitted to represent a number of pixels that
+ * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
+ * at the destination. The maximum number of pixels that may be
+ * represented per scan line’s worth of graphics data depends on the
+ * color depth.
+ *
+ * Furthermore, intelEmitCopyBlit (which is called below) uses a signed
+ * 16-bit integer to represent buffer pitch, so it can only handle buffer
+ * pitches < 32k.
+ *
+ * As a result of these two limitations, we can only use the blitter to do
+ * this copy when the region's pitch is less than 32k.
+ */
+ if (src_mt->region->pitch > 32768 ||
+ dst_mt->region->pitch > 32768) {
+ perf_debug("Falling back due to >32k pitch\n");
+ return false;
+ }
+
+ /* The blitter has no idea about HiZ or fast color clears, so we need to
+ * resolve the miptrees before we do anything.
+ */
+ intel_miptree_slice_resolve_depth(intel, src_mt, src_level, src_slice);
+ intel_miptree_slice_resolve_depth(intel, dst_mt, dst_level, dst_slice);
+ intel_miptree_resolve_color(intel, src_mt);
+ intel_miptree_resolve_color(intel, dst_mt);
+
+ if (src_flip)
+ src_y = src_mt->level[src_level].height - src_y - height;
+
+ if (dst_flip)
+ dst_y = dst_mt->level[dst_level].height - dst_y - height;
+
+ int src_pitch = src_mt->region->pitch;
+ if (src_flip != dst_flip)
+ src_pitch = -src_pitch;
+
+ uint32_t src_image_x, src_image_y;
+ intel_miptree_get_image_offset(src_mt, src_level, src_slice,
+ &src_image_x, &src_image_y);
+ src_x += src_image_x;
+ src_y += src_image_y;
+
+ uint32_t dst_image_x, dst_image_y;
+ intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
+ &dst_image_x, &dst_image_y);
+ dst_x += dst_image_x;
+ dst_y += dst_image_y;
+
+ if (!intelEmitCopyBlit(intel,
+ src_mt->cpp,
+ src_pitch,
+ src_mt->region->bo, src_mt->offset,
+ src_mt->region->tiling,
+ dst_mt->region->pitch,
+ dst_mt->region->bo, dst_mt->offset,
+ dst_mt->region->tiling,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height,
+ logicop)) {
+ return false;
+ }
+
+ if (src_mt->format == MESA_FORMAT_XRGB8888 &&
+ dst_mt->format == MESA_FORMAT_ARGB8888) {
+ intel_miptree_set_alpha_to_one(intel, dst_mt,
+ dst_x, dst_y,
+ width, height);
+ }
+
+ return true;
+}