X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fintel_blit.c;h=819a3da2966028760f6f777660a35a7137478e20;hb=27e273578f05521baac08d7de915c95312e3a595;hp=0cd2a203cb39c42b9895a87fbf8e90d54225e264;hpb=0fa39bff19dc2fbd3c184bd0e1267c86bd5040d9;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index 0cd2a203cb3..819a3da2966 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -27,7 +27,6 @@ #include "main/blit.h" #include "main/context.h" #include "main/enums.h" -#include "main/colormac.h" #include "main/fbobject.h" #include "brw_context.h" @@ -35,29 +34,11 @@ #include "intel_blit.h" #include "intel_buffers.h" #include "intel_fbo.h" -#include "intel_reg.h" #include "intel_batchbuffer.h" #include "intel_mipmap_tree.h" #define FILE_DEBUG_FLAG DEBUG_BLIT -#define SET_TILING_XY_FAST_COPY_BLT(tiling, tr_mode, type) \ -({ \ - switch (tiling) { \ - case I915_TILING_X: \ - CMD |= type ## _TILED_X; \ - break; \ - case I915_TILING_Y: \ - if (tr_mode == INTEL_MIPTREE_TRMODE_YS) \ - CMD |= type ## _TILED_64K; \ - else \ - CMD |= type ## _TILED_Y; \ - break; \ - default: \ - unreachable("not reached"); \ - } \ -}) - static void intel_miptree_set_alpha_to_one(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -105,64 +86,6 @@ br13_for_cpp(int cpp) } } -static uint32_t -get_tr_horizontal_align(uint32_t tr_mode, uint32_t cpp, bool is_src) { - /* Alignment tables for YF/YS tiled surfaces. */ - const uint32_t align_2d_yf[] = {64, 64, 32, 32, 16}; - const uint32_t bpp = cpp * 8; - const uint32_t shift = is_src ? 17 : 10; - uint32_t align; - int i = 0; - - if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) - return 0; - - /* Compute array index. */ - assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)); - i = ffs(bpp / 8) - 1; - - align = tr_mode == INTEL_MIPTREE_TRMODE_YF ? - align_2d_yf[i] : - 4 * align_2d_yf[i]; - - assert(_mesa_is_pow_two(align)); - - /* XY_FAST_COPY_BLT doesn't support horizontal alignment of 16. */ - if (align == 16) - align = 32; - - return (ffs(align) - 6) << shift; -} - -static uint32_t -get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, bool is_src) { - /* Vertical alignment tables for YF/YS tiled surfaces. */ - const unsigned align_2d_yf[] = {64, 32, 32, 16, 16}; - const uint32_t bpp = cpp * 8; - const uint32_t shift = is_src ? 15 : 8; - uint32_t align; - int i = 0; - - if (tr_mode == INTEL_MIPTREE_TRMODE_NONE) - return 0; - - /* Compute array index. */ - assert (bpp >= 8 && bpp <= 128 && _mesa_is_pow_two(bpp)); - i = ffs(bpp / 8) - 1; - - align = tr_mode == INTEL_MIPTREE_TRMODE_YF ? - align_2d_yf[i] : - 4 * align_2d_yf[i]; - - assert(_mesa_is_pow_two(align)); - - /* XY_FAST_COPY_BLT doesn't support vertical alignments of 16 and 32. */ - if (align == 16 || align == 32) - align = 64; - - return (ffs(align) - 7) << shift; -} - /** * Emits the packet for switching the blitter from X to Y tiled or back. * @@ -178,7 +101,9 @@ set_blitter_tiling(struct brw_context *brw, bool dst_y_tiled, bool src_y_tiled, uint32_t *__map) { - assert(brw->gen >= 6); + const struct gen_device_info *devinfo = &brw->screen->devinfo; + + assert(devinfo->gen >= 6); /* Idle the blitter before we update how tiling is interpreted. */ OUT_BATCH(MI_FLUSH_DW); @@ -208,8 +133,8 @@ set_blitter_tiling(struct brw_context *brw, static int blt_pitch(struct intel_mipmap_tree *mt) { - int pitch = mt->pitch; - if (mt->tiling) + int pitch = mt->surf.row_pitch; + if (mt->surf.tiling != ISL_TILING_LINEAR) pitch /= 4; return pitch; } @@ -239,6 +164,115 @@ intel_miptree_blit_compatible_formats(mesa_format src, mesa_format dst) return false; } +static void +get_blit_intratile_offset_el(const struct brw_context *brw, + struct intel_mipmap_tree *mt, + uint32_t total_x_offset_el, + uint32_t total_y_offset_el, + uint32_t *base_address_offset, + uint32_t *x_offset_el, + uint32_t *y_offset_el) +{ + isl_tiling_get_intratile_offset_el(mt->surf.tiling, + mt->cpp * 8, mt->surf.row_pitch, + total_x_offset_el, total_y_offset_el, + base_address_offset, + x_offset_el, y_offset_el); + if (mt->surf.tiling == ISL_TILING_LINEAR) { + /* From the Broadwell PRM docs for XY_SRC_COPY_BLT::SourceBaseAddress: + * + * "Base address of the destination surface: X=0, Y=0. Lower 32bits + * of the 48bit addressing. When Src Tiling is enabled (Bit_15 + * enabled), this address must be 4KB-aligned. When Tiling is not + * enabled, this address should be CL (64byte) aligned." + * + * The offsets we get from ISL in the tiled case are already aligned. + * In the linear case, we need to do some of our own aligning. + */ + uint32_t delta = *base_address_offset & 63; + assert(delta % mt->cpp == 0); + *base_address_offset -= delta; + *x_offset_el += delta / mt->cpp; + } else { + assert(*base_address_offset % 4096 == 0); + } +} + +static bool +emit_miptree_blit(struct brw_context *brw, + struct intel_mipmap_tree *src_mt, + uint32_t src_x, uint32_t src_y, + struct intel_mipmap_tree *dst_mt, + uint32_t dst_x, uint32_t dst_y, + uint32_t width, uint32_t height, + bool reverse, GLenum logicop) +{ + /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics + * Data Size Limitations): + * + * The BLT engine is capable of transferring very large quantities of + * graphics data. Any graphics data read from and written to the + * destination is permitted to represent a number of pixels that + * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line + * at the destination. The maximum number of pixels that may be + * represented per scan line’s worth of graphics data depends on the + * color depth. + * + * The blitter's pitch is a signed 16-bit integer, but measured in bytes + * for linear surfaces and DWords for tiled surfaces. So the maximum + * pitch is 32k linear and 128k tiled. + */ + if (blt_pitch(src_mt) >= 32768 || blt_pitch(dst_mt) >= 32768) { + perf_debug("Falling back due to >= 32k/128k pitch\n"); + return false; + } + + /* We need to split the blit into chunks that each fit within the blitter's + * restrictions. We can't use a chunk size of 32768 because we need to + * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's + * a nice round power of two, big enough that performance won't suffer, and + * small enough to guarantee everything fits. + */ + const uint32_t max_chunk_size = 16384; + + for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) { + for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) { + const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x); + const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y); + + uint32_t src_offset, src_tile_x, src_tile_y; + get_blit_intratile_offset_el(brw, src_mt, + src_x + chunk_x, src_y + chunk_y, + &src_offset, &src_tile_x, &src_tile_y); + + uint32_t dst_offset, dst_tile_x, dst_tile_y; + get_blit_intratile_offset_el(brw, dst_mt, + dst_x + chunk_x, dst_y + chunk_y, + &dst_offset, &dst_tile_x, &dst_tile_y); + + if (!intelEmitCopyBlit(brw, + src_mt->cpp, + reverse ? -src_mt->surf.row_pitch : + src_mt->surf.row_pitch, + src_mt->bo, src_mt->offset + src_offset, + src_mt->surf.tiling, + dst_mt->surf.row_pitch, + dst_mt->bo, dst_mt->offset + dst_offset, + dst_mt->surf.tiling, + src_tile_x, src_tile_y, + dst_tile_x, dst_tile_y, + chunk_w, chunk_h, + logicop)) { + /* If this is ever going to fail, it will fail on the first chunk */ + assert(chunk_x == 0 && chunk_y == 0); + return false; + } + } + } + + return true; +} + /** * Implements a rectangular block transfer (blit) of pixels between two * miptrees. @@ -266,12 +300,12 @@ intel_miptree_blit(struct brw_context *brw, GLenum logicop) { /* The blitter doesn't understand multisampling at all. */ - if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) + if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1) return false; /* No sRGB decode or encode is done by the hardware blitter, which is - * consistent with what we want in the callers (glCopyTexSubImage(), - * glBlitFramebuffer(), texture validation, etc.). + * consistent with what we want in many callers (glCopyTexSubImage(), + * texture validation, etc.). */ mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format); mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format); @@ -289,43 +323,21 @@ intel_miptree_blit(struct brw_context *brw, return false; } - /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics - * Data Size Limitations): - * - * The BLT engine is capable of transferring very large quantities of - * graphics data. Any graphics data read from and written to the - * destination is permitted to represent a number of pixels that - * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line - * at the destination. The maximum number of pixels that may be - * represented per scan line’s worth of graphics data depends on the - * color depth. - * - * Furthermore, intelEmitCopyBlit (which is called below) uses a signed - * 16-bit integer to represent buffer pitch, so it can only handle buffer - * pitches < 32k. However, the pitch is measured in bytes for linear buffers - * and dwords for tiled buffers. - * - * As a result of these two limitations, we can only use the blitter to do - * this copy when the miptree's pitch is less than 32k linear or 128k tiled. - */ - if (blt_pitch(src_mt) >= 32768 || blt_pitch(dst_mt) >= 32768) { - perf_debug("Falling back due to >= 32k/128k pitch\n"); - return false; - } - /* The blitter has no idea about HiZ or fast color clears, so we need to * resolve the miptrees before we do anything. */ - intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice); - intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice); - intel_miptree_resolve_color(brw, src_mt); - intel_miptree_resolve_color(brw, dst_mt); + intel_miptree_access_raw(brw, src_mt, src_level, src_slice, false); + intel_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true); - if (src_flip) - src_y = minify(src_mt->physical_height0, src_level - src_mt->first_level) - src_y - height; + if (src_flip) { + const unsigned h0 = src_mt->surf.phys_level0_sa.height; + src_y = minify(h0, src_level - src_mt->first_level) - src_y - height; + } - if (dst_flip) - dst_y = minify(dst_mt->physical_height0, dst_level - dst_mt->first_level) - dst_y - height; + if (dst_flip) { + const unsigned h0 = dst_mt->surf.phys_level0_sa.height; + dst_y = minify(h0, dst_level - dst_mt->first_level) - dst_y - height; + } uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y; intel_miptree_get_image_offset(src_mt, src_level, src_slice, @@ -337,30 +349,9 @@ intel_miptree_blit(struct brw_context *brw, dst_x += dst_image_x; dst_y += dst_image_y; - /* The blitter interprets the 16-bit destination x/y as a signed 16-bit - * value. The values we're working with are unsigned, so make sure we don't - * overflow. - */ - if (src_x >= 32768 || src_y >= 32768 || dst_x >= 32768 || dst_y >= 32768) { - perf_debug("Falling back due to >=32k offset [src(%d, %d) dst(%d, %d)]\n", - src_x, src_y, dst_x, dst_y); - return false; - } - - if (!intelEmitCopyBlit(brw, - src_mt->cpp, - src_flip == dst_flip ? src_mt->pitch : -src_mt->pitch, - src_mt->bo, src_mt->offset, - src_mt->tiling, - src_mt->tr_mode, - dst_mt->pitch, - dst_mt->bo, dst_mt->offset, - dst_mt->tiling, - dst_mt->tr_mode, - src_x, src_y, - dst_x, dst_y, - width, height, - logicop)) { + if (!emit_miptree_blit(brw, src_mt, src_x, src_y, + dst_mt, dst_x, dst_y, width, height, + src_flip != dst_flip, logicop)) { return false; } @@ -375,123 +366,125 @@ intel_miptree_blit(struct brw_context *brw, return true; } -static bool -alignment_valid(struct brw_context *brw, unsigned offset, uint32_t tiling) -{ - /* Tiled buffers must be page-aligned (4K). */ - if (tiling != I915_TILING_NONE) - return (offset & 4095) == 0; - - /* On Gen8+, linear buffers must be cacheline-aligned. */ - if (brw->gen >= 8) - return (offset & 63) == 0; - - return true; -} - -static bool -can_fast_copy_blit(struct brw_context *brw, - drm_intel_bo *src_buffer, - int16_t src_x, int16_t src_y, - uintptr_t src_offset, uint32_t src_pitch, - uint32_t src_tiling, uint32_t src_tr_mode, - drm_intel_bo *dst_buffer, - int16_t dst_x, int16_t dst_y, - uintptr_t dst_offset, uint32_t dst_pitch, - uint32_t dst_tiling, uint32_t dst_tr_mode, - int16_t w, int16_t h, uint32_t cpp) +bool +intel_miptree_copy(struct brw_context *brw, + struct intel_mipmap_tree *src_mt, + int src_level, int src_slice, + uint32_t src_x, uint32_t src_y, + struct intel_mipmap_tree *dst_mt, + int dst_level, int dst_slice, + uint32_t dst_x, uint32_t dst_y, + uint32_t src_width, uint32_t src_height) { - const bool dst_tiling_none = dst_tiling == I915_TILING_NONE; - const bool src_tiling_none = src_tiling == I915_TILING_NONE; - - if (brw->gen < 9) + /* The blitter doesn't understand multisampling at all. */ + if (src_mt->surf.samples > 1 || dst_mt->surf.samples > 1) return false; - if (src_buffer->handle == dst_buffer->handle && - _mesa_regions_overlap(src_x, src_y, src_x + w, src_y + h, - dst_x, dst_y, dst_x + w, dst_y + h)) + if (src_mt->format == MESA_FORMAT_S_UINT8) return false; - /* Enable fast copy blit only if the surfaces are Yf/Ys tiled. - * FIXME: Based on performance data, remove this condition later to - * enable for all types of surfaces. + /* The blitter has no idea about HiZ or fast color clears, so we need to + * resolve the miptrees before we do anything. */ - if (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE && - dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE) - return false; + intel_miptree_access_raw(brw, src_mt, src_level, src_slice, false); + intel_miptree_access_raw(brw, dst_mt, dst_level, dst_slice, true); - /* For all surface types buffers must be cacheline-aligned. */ - if ((dst_offset | src_offset) & 63) - return false; + uint32_t src_image_x, src_image_y; + intel_miptree_get_image_offset(src_mt, src_level, src_slice, + &src_image_x, &src_image_y); - /* Color depth greater than 128 bits not supported. */ - if (cpp > 16) - return false; + if (_mesa_is_format_compressed(src_mt->format)) { + GLuint bw, bh; + _mesa_get_format_block_size(src_mt->format, &bw, &bh); - /* For Fast Copy Blits the pitch cannot be a negative number. So, bit 15 - * of the destination pitch must be zero. - */ - if ((src_pitch >> 15 & 1) != 0 || (dst_pitch >> 15 & 1) != 0) - return false; + /* Compressed textures need not have dimensions that are a multiple of + * the block size. Rectangles in compressed textures do need to be a + * multiple of the block size. The one exception is that the right and + * bottom edges may be at the right or bottom edge of the miplevel even + * if it's not aligned. + */ + assert(src_x % bw == 0); + assert(src_y % bh == 0); + + assert(src_width % bw == 0 || + src_x + src_width == + minify(src_mt->surf.logical_level0_px.width, src_level)); + assert(src_height % bh == 0 || + src_y + src_height == + minify(src_mt->surf.logical_level0_px.height, src_level)); + + src_x /= (int)bw; + src_y /= (int)bh; + src_width = DIV_ROUND_UP(src_width, (int)bw); + src_height = DIV_ROUND_UP(src_height, (int)bh); + } + src_x += src_image_x; + src_y += src_image_y; - /* For Linear surfaces, the pitch has to be an OWord (16byte) multiple. */ - if ((src_tiling_none && src_pitch % 16 != 0) || - (dst_tiling_none && dst_pitch % 16 != 0)) - return false; + uint32_t dst_image_x, dst_image_y; + intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, + &dst_image_x, &dst_image_y); - /* For Tiled surfaces, the pitch has to be a multiple of the Tile width - * (X direction width of the Tile). This means the pitch value will - * always be Cache Line aligned (64byte multiple). - */ - if ((!dst_tiling_none && dst_pitch % 64 != 0) || - (!src_tiling_none && src_pitch % 64 != 0)) - return false; + if (_mesa_is_format_compressed(dst_mt->format)) { + GLuint bw, bh; + _mesa_get_format_block_size(dst_mt->format, &bw, &bh); - return true; + assert(dst_x % bw == 0); + assert(dst_y % bh == 0); + + dst_x /= (int)bw; + dst_y /= (int)bh; + } + dst_x += dst_image_x; + dst_y += dst_image_y; + + return emit_miptree_blit(brw, src_mt, src_x, src_y, + dst_mt, dst_x, dst_y, + src_width, src_height, false, GL_COPY); } -static uint32_t -xy_blit_cmd(uint32_t src_tiling, uint32_t src_tr_mode, - uint32_t dst_tiling, uint32_t dst_tr_mode, - uint32_t cpp, bool use_fast_copy_blit) +static bool +alignment_valid(struct brw_context *brw, unsigned offset, + enum isl_tiling tiling) { - uint32_t CMD = 0; + const struct gen_device_info *devinfo = &brw->screen->devinfo; - if (use_fast_copy_blit) { - CMD = XY_FAST_COPY_BLT_CMD; + /* Tiled buffers must be page-aligned (4K). */ + if (tiling != ISL_TILING_LINEAR) + return (offset & 4095) == 0; - if (dst_tiling != I915_TILING_NONE) - SET_TILING_XY_FAST_COPY_BLT(dst_tiling, dst_tr_mode, XY_FAST_DST); + /* On Gen8+, linear buffers must be cacheline-aligned. */ + if (devinfo->gen >= 8) + return (offset & 63) == 0; - if (src_tiling != I915_TILING_NONE) - SET_TILING_XY_FAST_COPY_BLT(src_tiling, src_tr_mode, XY_FAST_SRC); + return true; +} - CMD |= get_tr_horizontal_align(src_tr_mode, cpp, true /* is_src */); - CMD |= get_tr_vertical_align(src_tr_mode, cpp, true /* is_src */); +static uint32_t +xy_blit_cmd(enum isl_tiling src_tiling, enum isl_tiling dst_tiling, + uint32_t cpp) +{ + uint32_t CMD = 0; - CMD |= get_tr_horizontal_align(dst_tr_mode, cpp, false /* is_src */); - CMD |= get_tr_vertical_align(dst_tr_mode, cpp, false /* is_src */); + assert(cpp <= 4); + switch (cpp) { + case 1: + case 2: + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + unreachable("not reached"); + } - } else { - assert(cpp <= 4); - switch (cpp) { - case 1: - case 2: - CMD = XY_SRC_COPY_BLT_CMD; - break; - case 4: - CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - unreachable("not reached"); - } + if (dst_tiling != ISL_TILING_LINEAR) + CMD |= XY_DST_TILED; - if (dst_tiling != I915_TILING_NONE) - CMD |= XY_DST_TILED; + if (src_tiling != ISL_TILING_LINEAR) + CMD |= XY_SRC_TILED; - if (src_tiling != I915_TILING_NONE) - CMD |= XY_SRC_TILED; - } return CMD; } @@ -500,51 +493,41 @@ xy_blit_cmd(uint32_t src_tiling, uint32_t src_tr_mode, bool intelEmitCopyBlit(struct brw_context *brw, GLuint cpp, - GLshort src_pitch, - drm_intel_bo *src_buffer, + int32_t src_pitch, + struct brw_bo *src_buffer, GLuint src_offset, - uint32_t src_tiling, - uint32_t src_tr_mode, - GLshort dst_pitch, - drm_intel_bo *dst_buffer, + enum isl_tiling src_tiling, + int32_t dst_pitch, + struct brw_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, - uint32_t dst_tr_mode, + enum isl_tiling dst_tiling, GLshort src_x, GLshort src_y, GLshort dst_x, GLshort dst_y, GLshort w, GLshort h, GLenum logic_op) { - GLuint CMD, BR13, pass = 0; + const struct gen_device_info *devinfo = &brw->screen->devinfo; + GLuint CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - drm_intel_bo *aper_array[3]; - bool dst_y_tiled = dst_tiling == I915_TILING_Y; - bool src_y_tiled = src_tiling == I915_TILING_Y; - bool use_fast_copy_blit = false; + bool dst_y_tiled = dst_tiling == ISL_TILING_Y0; + bool src_y_tiled = src_tiling == ISL_TILING_Y0; uint32_t src_tile_w, src_tile_h; uint32_t dst_tile_w, dst_tile_h; - if ((dst_y_tiled || src_y_tiled) && brw->gen < 6) + if ((dst_y_tiled || src_y_tiled) && devinfo->gen < 6) return false; + const unsigned bo_sizes = dst_buffer->size + src_buffer->size; + /* do space check before going any further */ - do { - aper_array[0] = brw->batch.bo; - aper_array[1] = dst_buffer; - aper_array[2] = src_buffer; - - if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { - intel_batchbuffer_flush(brw); - pass++; - } else - break; - } while (pass < 2); - - if (pass >= 2) + if (!brw_batch_has_aperture_space(brw, bo_sizes)) + intel_batchbuffer_flush(brw); + + if (!brw_batch_has_aperture_space(brw, bo_sizes)) return false; - unsigned length = brw->gen >= 8 ? 10 : 8; + unsigned length = devinfo->gen >= 8 ? 10 : 8; intel_batchbuffer_require_space(brw, length * 4, BLT_RING); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", @@ -552,95 +535,58 @@ intelEmitCopyBlit(struct brw_context *brw, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); - intel_get_tile_dims(src_tiling, src_tr_mode, cpp, &src_tile_w, &src_tile_h); - intel_get_tile_dims(dst_tiling, dst_tr_mode, cpp, &dst_tile_w, &dst_tile_h); - - use_fast_copy_blit = can_fast_copy_blit(brw, - src_buffer, - src_x, src_y, - src_offset, src_pitch, - src_tiling, src_tr_mode, - dst_buffer, - dst_x, dst_y, - dst_offset, dst_pitch, - dst_tiling, dst_tr_mode, - w, h, cpp); - assert(use_fast_copy_blit || - (src_tr_mode == INTEL_MIPTREE_TRMODE_NONE && - dst_tr_mode == INTEL_MIPTREE_TRMODE_NONE)); - - if (use_fast_copy_blit) { - /* When two sequential fast copy blits have different source surfaces, - * but their destinations refer to the same destination surfaces and - * therefore destinations overlap it is imperative that a flush be - * inserted between the two blits. - * - * FIXME: Figure out a way to avoid flushing when not required. - */ - brw_emit_mi_flush(brw); + intel_get_tile_dims(src_tiling, cpp, &src_tile_w, &src_tile_h); + intel_get_tile_dims(dst_tiling, cpp, &dst_tile_w, &dst_tile_h); - assert(cpp <= 16); - BR13 = br13_for_cpp(cpp); - - if (src_tr_mode == INTEL_MIPTREE_TRMODE_YF) - BR13 |= XY_FAST_SRC_TRMODE_YF; - - if (dst_tr_mode == INTEL_MIPTREE_TRMODE_YF) - BR13 |= XY_FAST_DST_TRMODE_YF; - - CMD = xy_blit_cmd(src_tiling, src_tr_mode, - dst_tiling, dst_tr_mode, - cpp, use_fast_copy_blit); - - } else { - assert(src_tiling == I915_TILING_NONE || (src_pitch % src_tile_w) == 0); - assert(dst_tiling == I915_TILING_NONE || (dst_pitch % dst_tile_w) == 0); + /* For Tiled surfaces, the pitch has to be a multiple of the Tile width + * (X direction width of the Tile). This is ensured while allocating the + * buffer object. + */ + assert(src_tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0); + assert(dst_tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0); - /* For big formats (such as floating point), do the copy using 16 or - * 32bpp and multiply the coordinates. - */ - if (cpp > 4) { - if (cpp % 4 == 2) { - dst_x *= cpp / 2; - dst_x2 *= cpp / 2; - src_x *= cpp / 2; - cpp = 2; - } else { - assert(cpp % 4 == 0); - dst_x *= cpp / 4; - dst_x2 *= cpp / 4; - src_x *= cpp / 4; - cpp = 4; - } + /* For big formats (such as floating point), do the copy using 16 or + * 32bpp and multiply the coordinates. + */ + if (cpp > 4) { + if (cpp % 4 == 2) { + dst_x *= cpp / 2; + dst_x2 *= cpp / 2; + src_x *= cpp / 2; + cpp = 2; + } else { + assert(cpp % 4 == 0); + dst_x *= cpp / 4; + dst_x2 *= cpp / 4; + src_x *= cpp / 4; + cpp = 4; } + } - if (!alignment_valid(brw, dst_offset, dst_tiling)) - return false; - if (!alignment_valid(brw, src_offset, src_tiling)) - return false; + if (!alignment_valid(brw, dst_offset, dst_tiling)) + return false; + if (!alignment_valid(brw, src_offset, src_tiling)) + return false; - /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop - * the low bits. Offsets must be naturally aligned. - */ - if (src_pitch % 4 != 0 || src_offset % cpp != 0 || - dst_pitch % 4 != 0 || dst_offset % cpp != 0) - return false; + /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop + * the low bits. Offsets must be naturally aligned. + */ + if (src_pitch % 4 != 0 || src_offset % cpp != 0 || + dst_pitch % 4 != 0 || dst_offset % cpp != 0) + return false; - assert(cpp <= 4); - BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; + assert(cpp <= 4); + BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; - CMD = xy_blit_cmd(src_tiling, src_tr_mode, - dst_tiling, dst_tr_mode, - cpp, use_fast_copy_blit); - } + CMD = xy_blit_cmd(src_tiling, dst_tiling, cpp); /* For tiled source and destination, pitch value should be specified * as a number of Dwords. */ - if (dst_tiling != I915_TILING_NONE) + if (dst_tiling != ISL_TILING_LINEAR) dst_pitch /= 4; - if (src_tiling != I915_TILING_NONE) + if (src_tiling != ISL_TILING_LINEAR) src_pitch /= 4; if (dst_y2 <= dst_y || dst_x2 <= dst_x) @@ -648,35 +594,23 @@ intelEmitCopyBlit(struct brw_context *brw, assert(dst_x < dst_x2); assert(dst_y < dst_y2); - assert(src_offset + (src_y + h - 1) * abs(src_pitch) + - (w * cpp) <= src_buffer->size); - assert(dst_offset + (dst_y + h - 1) * abs(dst_pitch) + - (w * cpp) <= dst_buffer->size); BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, src_y_tiled); OUT_BATCH(CMD | (length - 2)); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH(SET_FIELD(dst_y, BLT_Y) | SET_FIELD(dst_x, BLT_X)); OUT_BATCH(SET_FIELD(dst_y2, BLT_Y) | SET_FIELD(dst_x2, BLT_X)); - if (brw->gen >= 8) { - OUT_RELOC64(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + if (devinfo->gen >= 8) { + OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset); } else { - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset); } OUT_BATCH(SET_FIELD(src_y, BLT_Y) | SET_FIELD(src_x, BLT_X)); OUT_BATCH((uint16_t)src_pitch); - if (brw->gen >= 8) { - OUT_RELOC64(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, - src_offset); + if (devinfo->gen >= 8) { + OUT_RELOC64(src_buffer, 0, src_offset); } else { - OUT_RELOC(src_buffer, - I915_GEM_DOMAIN_RENDER, 0, - src_offset); + OUT_RELOC(src_buffer, 0, src_offset); } ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled); @@ -692,20 +626,21 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, GLubyte *src_bits, GLuint src_size, GLuint fg_color, GLshort dst_pitch, - drm_intel_bo *dst_buffer, + struct brw_bo *dst_buffer, GLuint dst_offset, - uint32_t dst_tiling, + enum isl_tiling dst_tiling, GLshort x, GLshort y, GLshort w, GLshort h, GLenum logic_op) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; - if (dst_tiling != I915_TILING_NONE) { + if (dst_tiling != ISL_TILING_LINEAR) { if (dst_offset & 4095) return false; - if (dst_tiling == I915_TILING_Y) + if (dst_tiling == ISL_TILING_Y0) return false; } @@ -719,14 +654,14 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, __func__, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); - unsigned xy_setup_blt_length = brw->gen >= 8 ? 10 : 8; + unsigned xy_setup_blt_length = devinfo->gen >= 8 ? 10 : 8; intel_batchbuffer_require_space(brw, (xy_setup_blt_length * 4) + (3 * 4) + dwords * 4, BLT_RING); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - if (dst_tiling != I915_TILING_NONE) { + if (dst_tiling != ISL_TILING_LINEAR) { opcode |= XY_DST_TILED; dst_pitch /= 4; } @@ -735,7 +670,7 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, br13 |= br13_for_cpp(cpp); blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ - if (dst_tiling != I915_TILING_NONE) + if (dst_tiling != ISL_TILING_LINEAR) blit_cmd |= XY_DST_TILED; BEGIN_BATCH_BLT(xy_setup_blt_length + 3); @@ -743,19 +678,15 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ - if (brw->gen >= 8) { - OUT_RELOC64(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + if (devinfo->gen >= 8) { + OUT_RELOC64(dst_buffer, RELOC_WRITE, dst_offset); } else { - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); + OUT_RELOC(dst_buffer, RELOC_WRITE, dst_offset); } OUT_BATCH(0); /* bg */ OUT_BATCH(fg_color); /* fg */ OUT_BATCH(0); /* pattern base addr */ - if (brw->gen >= 8) + if (devinfo->gen >= 8) OUT_BATCH(0); OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); @@ -776,9 +707,9 @@ intelEmitImmediateColorExpandBlit(struct brw_context *brw, */ void intel_emit_linear_blit(struct brw_context *brw, - drm_intel_bo *dst_bo, + struct brw_bo *dst_bo, unsigned int dst_offset, - drm_intel_bo *src_bo, + struct brw_bo *src_bo, unsigned int src_offset, unsigned int size) { @@ -802,10 +733,10 @@ intel_emit_linear_blit(struct brw_context *brw, assert(dst_x + pitch < 1 << 15); ok = intelEmitCopyBlit(brw, 1, - pitch, src_bo, src_offset - src_x, I915_TILING_NONE, - INTEL_MIPTREE_TRMODE_NONE, - pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE, - INTEL_MIPTREE_TRMODE_NONE, + pitch, src_bo, src_offset - src_x, + ISL_TILING_LINEAR, + pitch, dst_bo, dst_offset - dst_x, + ISL_TILING_LINEAR, src_x, 0, /* src x/y */ dst_x, 0, /* dst x/y */ MIN2(size, pitch), height, /* w, h */ @@ -839,11 +770,11 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, struct intel_mipmap_tree *mt, int x, int y, int width, int height) { + const struct gen_device_info *devinfo = &brw->screen->devinfo; uint32_t BR13, CMD; int pitch, cpp; - drm_intel_bo *aper_array[2]; - pitch = mt->pitch; + pitch = mt->surf.row_pitch; cpp = mt->cpp; DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", @@ -853,40 +784,53 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw, CMD = XY_COLOR_BLT_CMD; CMD |= XY_BLT_WRITE_ALPHA; - if (mt->tiling != I915_TILING_NONE) { + if (mt->surf.tiling != ISL_TILING_LINEAR) { CMD |= XY_DST_TILED; pitch /= 4; } BR13 |= pitch; /* do space check before going any further */ - aper_array[0] = brw->batch.bo; - aper_array[1] = mt->bo; - - if (drm_intel_bufmgr_check_aperture_space(aper_array, - ARRAY_SIZE(aper_array)) != 0) { + if (!brw_batch_has_aperture_space(brw, mt->bo->size)) intel_batchbuffer_flush(brw); - } - unsigned length = brw->gen >= 8 ? 7 : 6; - bool dst_y_tiled = mt->tiling == I915_TILING_Y; + unsigned length = devinfo->gen >= 8 ? 7 : 6; + const bool dst_y_tiled = mt->surf.tiling == ISL_TILING_Y0; - BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false); - OUT_BATCH(CMD | (length - 2)); - OUT_BATCH(BR13); - OUT_BATCH(SET_FIELD(y, BLT_Y) | SET_FIELD(x, BLT_X)); - OUT_BATCH(SET_FIELD(y + height, BLT_Y) | SET_FIELD(x + width, BLT_X)); - if (brw->gen >= 8) { - OUT_RELOC64(mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - } else { - OUT_RELOC(mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + /* We need to split the blit into chunks that each fit within the blitter's + * restrictions. We can't use a chunk size of 32768 because we need to + * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's + * a nice round power of two, big enough that performance won't suffer, and + * small enough to guarantee everything fits. + */ + const uint32_t max_chunk_size = 16384; + + for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) { + for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) { + const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x); + const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y); + + uint32_t offset, tile_x, tile_y; + get_blit_intratile_offset_el(brw, mt, + x + chunk_x, y + chunk_y, + &offset, &tile_x, &tile_y); + + BEGIN_BATCH_BLT_TILED(length, dst_y_tiled, false); + OUT_BATCH(CMD | (length - 2)); + OUT_BATCH(BR13); + OUT_BATCH(SET_FIELD(y + chunk_y, BLT_Y) | + SET_FIELD(x + chunk_x, BLT_X)); + OUT_BATCH(SET_FIELD(y + chunk_y + chunk_h, BLT_Y) | + SET_FIELD(x + chunk_x + chunk_w, BLT_X)); + if (devinfo->gen >= 8) { + OUT_RELOC64(mt->bo, RELOC_WRITE, mt->offset + offset); + } else { + OUT_RELOC(mt->bo, RELOC_WRITE, mt->offset + offset); + } + OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ + ADVANCE_BATCH_TILED(dst_y_tiled, false); + } } - OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ - ADVANCE_BATCH_TILED(dst_y_tiled, false); brw_emit_mi_flush(brw); }