From bcee124ef752f461100058ec0ec70f2460579217 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Thu, 16 Mar 2017 10:35:49 -0700 Subject: [PATCH] i965: Delete fast copy blit code Fast copy blit was primarily added to support Yf/Ys detiling. But, Yf/Ys tiling never got used in i965 due to not delivering the expected performance benefits. Also, replacing legacy blits with fast copy blit didn't help the benchmarking numbers. This is probably due to a h/w restriction that says "start pixel for Fast Copy blit should be on an OWord boundary". This restriction causes many blit operations to skip fast copy blit and use legacy blits. So, this patch is deleting this dead code in favor of adding it later when we actually find it useful. Signed-off-by: Anuj Phogat Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/intel_blit.c | 231 +++++-------------------- src/mesa/drivers/dri/i965/intel_blit.h | 2 - 2 files changed, 48 insertions(+), 185 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 4d4ab911cbc..ca24abef3b2 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -39,23 +39,6 @@ #define FILE_DEBUG_FLAG DEBUG_BLIT -#define SET_TILING_XY_FAST_COPY_BLT(tiling, tr_mode, type) \ -({ \ - switch (tiling) { \ - case I915_TILING_X: \ - CMD |= type ## _TILED_X; \ - break; \ - case I915_TILING_Y: \ - if (tr_mode == INTEL_MIPTREE_TRMODE_YS) \ - CMD |= type ## _TILED_64K; \ - else \ - CMD |= type ## _TILED_Y; \ - break; \ - default: \ - unreachable("not reached"); \ - } \ -}) - static void intel_miptree_set_alpha_to_one(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -272,11 +255,9 @@ emit_miptree_blit(struct brw_context *brw, reverse ? -src_mt->pitch : src_mt->pitch, src_mt->bo, src_mt->offset + src_offset, src_mt->tiling, - src_mt->tr_mode, dst_mt->pitch, dst_mt->bo, dst_mt->offset + dst_offset, dst_mt->tiling, - dst_mt->tr_mode, src_tile_x, src_tile_y, dst_tile_x, dst_tile_y, chunk_w, chunk_h, @@ -472,97 +453,30 @@ alignment_valid(struct brw_context *brw, unsigned offset, uint32_t tiling) return true; } -static bool -can_fast_copy_blit(struct brw_context *brw, - drm_intel_bo *src_buffer, - int16_t src_x, int16_t src_y, - uintptr_t src_offset, int32_t src_pitch, - uint32_t src_tiling, uint32_t src_tr_mode, - drm_intel_bo *dst_buffer, - int16_t dst_x, int16_t dst_y, - uintptr_t dst_offset, int32_t dst_pitch, - uint32_t dst_tiling, uint32_t dst_tr_mode, - int16_t w, int16_t h, uint32_t cpp, - GLenum logic_op) -{ - const bool dst_tiling_none = dst_tiling == I915_TILING_NONE; - const bool src_tiling_none = src_tiling == I915_TILING_NONE; - - if (brw->gen < 9) - return false; - - /* Enable fast copy blit only if the surfaces are Yf/Ys tiled. - * FIXME: Based on performance data, remove this condition later to - * enable for all types of surfaces. - */ - if (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE && - dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE) - return false; - - if (logic_op != GL_COPY) - return false; - - /* The start pixel for Fast Copy blit should be on an OWord boundary. */ - if ((dst_x * cpp | src_x * cpp) & 15) - return false; - - /* For all surface types buffers must be cacheline-aligned. */ - if ((dst_offset | src_offset) & 63) - return false; - - /* Color depths which are not power of 2 or greater than 128 bits are - * not supported. - */ - if (!_mesa_is_pow_two(cpp) || cpp > 16) - return false; - - /* For Fast Copy Blits the pitch cannot be a negative number. */ - if (src_pitch < 0 || dst_pitch < 0) - return false; - - /* For Linear surfaces, the pitch has to be an OWord (16byte) multiple. */ - if ((src_tiling_none && src_pitch % 16 != 0) || - (dst_tiling_none && dst_pitch % 16 != 0)) - return false; - - return true; -} - static uint32_t -xy_blit_cmd(uint32_t src_tiling, uint32_t src_tr_mode, - uint32_t dst_tiling, uint32_t dst_tr_mode, - uint32_t cpp, bool use_fast_copy_blit) +xy_blit_cmd(uint32_t src_tiling, uint32_t dst_tiling, uint32_t cpp) { uint32_t CMD = 0; - if (use_fast_copy_blit) { - CMD = XY_FAST_COPY_BLT_CMD; - - if (dst_tiling != I915_TILING_NONE) - SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_FAST_DST); + assert(cpp <= 4); + switch (cpp) { + case 1: + case 2: + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + unreachable("not reached"); + } - if (src_tiling != I915_TILING_NONE) - SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_FAST_SRC); - } else { - assert(cpp <= 4); - switch (cpp) { - case 1: - case 2: - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - unreachable("not reached"); - } + if (dst_tiling != I915_TILING_NONE) + CMD |= XY_DST_TILED; - if (dst_tiling != I915_TILING_NONE) - CMD |= XY_DST_TILED; + if (src_tiling != I915_TILING_NONE) + CMD |= XY_SRC_TILED; - if (src_tiling != I915_TILING_NONE) - CMD |= XY_SRC_TILED; - } return CMD; } @@ -575,12 +489,10 @@ intelEmitCopyBlit(struct brw_context *brw, drm_intel_bo *src_buffer, GLuint src_offset, uint32_t src_tiling, - uint32_t src_tr_mode, int32_t dst_pitch, drm_intel_bo *dst_buffer, GLuint dst_offset, uint32_t dst_tiling, - uint32_t dst_tr_mode, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, @@ -592,7 +504,6 @@ intelEmitCopyBlit(struct brw_context *brw, drm_intel_bo *aper_array[3]; bool dst_y_tiled = dst_tiling == I915_TILING_Y; bool src_y_tiled = src_tiling == I915_TILING_Y; - bool use_fast_copy_blit = false; uint32_t src_tile_w, src_tile_h; uint32_t dst_tile_w, dst_tile_h; @@ -623,8 +534,8 @@ intelEmitCopyBlit(struct brw_context *brw, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); - intel_get_tile_dims(src_tiling, src_tr_mode, cpp, &src_tile_w, &src_tile_h); - intel_get_tile_dims(dst_tiling, dst_tr_mode, cpp, &dst_tile_w, &dst_tile_h); + intel_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h); + intel_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h); /* For Tiled surfaces, the pitch has to be a multiple of the Tile width * (X direction width of the Tile). This is ensured while allocating the @@ -633,84 +544,40 @@ intelEmitCopyBlit(struct brw_context *brw, assert(src_tiling == I915_TILING_NONE || (src_pitch % src_tile_w) == 0); assert(dst_tiling == I915_TILING_NONE || (dst_pitch % dst_tile_w) == 0); - use_fast_copy_blit = can_fast_copy_blit(brw, - src_buffer, - src_x, src_y, - src_offset, src_pitch, - src_tiling, src_tr_mode, - dst_buffer, - dst_x, dst_y, - dst_offset, dst_pitch, - dst_tiling, dst_tr_mode, - w, h, cpp, logic_op); - if (!use_fast_copy_blit && - (src_tr_mode != INTEL_MIPTREE_TRMODE_NONE || - dst_tr_mode != INTEL_MIPTREE_TRMODE_NONE)) - return false; - - if (use_fast_copy_blit) { - assert(logic_op == GL_COPY); - - /* When two sequential fast copy blits have different source surfaces, - * but their destinations refer to the same destination surfaces and - * therefore destinations overlap it is imperative that a flush be - * inserted between the two blits. - * - * FIXME: Figure out a way to avoid flushing when not required. - */ - brw_emit_mi_flush(brw); - - assert(cpp <= 16); - BR13 = br13_for_cpp(cpp); - - if (src_tr_mode == INTEL_MIPTREE_TRMODE_YF) - BR13 |= XY_FAST_SRC_TRMODE_YF; - - if (dst_tr_mode == INTEL_MIPTREE_TRMODE_YF) - BR13 |= XY_FAST_DST_TRMODE_YF; - - CMD = xy_blit_cmd(src_tiling, src_tr_mode, - dst_tiling, dst_tr_mode, - cpp, use_fast_copy_blit); - - } else { - /* For big formats (such as floating point), do the copy using 16 or - * 32bpp and multiply the coordinates. - */ - if (cpp > 4) { - if (cpp % 4 == 2) { - dst_x *= cpp / 2; - dst_x2 *= cpp / 2; - src_x *= cpp / 2; - cpp = 2; - } else { - assert(cpp % 4 == 0); - dst_x *= cpp / 4; - dst_x2 *= cpp / 4; - src_x *= cpp / 4; - cpp = 4; - } + /* For big formats (such as floating point), do the copy using 16 or + * 32bpp and multiply the coordinates. + */ + if (cpp > 4) { + if (cpp % 4 == 2) { + dst_x *= cpp / 2; + dst_x2 *= cpp / 2; + src_x *= cpp / 2; + cpp = 2; + } else { + assert(cpp % 4 == 0); + dst_x *= cpp / 4; + dst_x2 *= cpp / 4; + src_x *= cpp / 4; + cpp = 4; } + } - if (!alignment_valid(brw, dst_offset, dst_tiling)) - return false; - if (!alignment_valid(brw, src_offset, src_tiling)) - return false; + if (!alignment_valid(brw, dst_offset, dst_tiling)) + return false; + if (!alignment_valid(brw, src_offset, src_tiling)) + return false; - /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop - * the low bits. Offsets must be naturally aligned. - */ - if (src_pitch % 4 != 0 || src_offset % cpp != 0 || - dst_pitch % 4 != 0 || dst_offset % cpp != 0) - return false; + /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop + * the low bits. Offsets must be naturally aligned. + */ + if (src_pitch % 4 != 0 || src_offset % cpp != 0 || + dst_pitch % 4 != 0 || dst_offset % cpp != 0) + return false; - assert(cpp <= 4); - BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; + assert(cpp <= 4); + BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; - CMD = xy_blit_cmd(src_tiling, src_tr_mode, - dst_tiling, dst_tr_mode, - cpp, use_fast_copy_blit); - } + CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp); /* For tiled source and destination, pitch value should be specified * as a number of Dwords. @@ -877,9 +744,7 @@ intel_emit_linear_blit(struct brw_context *brw, ok = intelEmitCopyBlit(brw, 1, pitch, src_bo, src_offset - src_x, I915_TILING_NONE, - INTEL_MIPTREE_TRMODE_NONE, pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE, - INTEL_MIPTREE_TRMODE_NONE, src_x, 0, /* src x/y */ dst_x, 0, /* dst x/y */ MIN2(size, pitch), height, /* w, h */ diff --git a/src/mesa/drivers/dri/i965/intel_blit.h b/src/mesa/drivers/dri/i965/intel_blit.h index 7cb2c7e0b27..31f382242cb 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.h +++ b/src/mesa/drivers/dri/i965/intel_blit.h @@ -35,12 +35,10 @@ intelEmitCopyBlit(struct brw_context *brw, drm_intel_bo *src_buffer, GLuint src_offset, uint32_t src_tiling, - uint32_t src_tr_mode, int32_t dst_pitch, drm_intel_bo *dst_buffer, GLuint dst_offset, uint32_t dst_tiling, - uint32_t dst_tr_mode, GLshort srcx, GLshort srcy, GLshort dstx, GLshort dsty, GLshort w, GLshort h, -- 2.30.2