From: Chia-I Wu Date: Mon, 17 Jun 2013 05:59:51 +0000 (+0800) Subject: ilo: add BLT-based blitting methods to ilo_blitter X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ebfd7a61c0a4ca9395ef7a522cc491208e3f045f;p=mesa.git ilo: add BLT-based blitting methods to ilo_blitter Port BLT code in ilo_blit.c to BLT-based blitting methods of ilo_blitter. Add BLT-based clears. The latter is verifed with util_clear(), but it is not in use yet. --- diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 363ae172f18..1612a774519 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -6,6 +6,7 @@ C_SOURCES := \ ilo_3d_pipeline_gen7.c \ ilo_blit.c \ ilo_blitter.c \ + ilo_blitter_blt.c \ ilo_blitter_pipe.c \ ilo_context.c \ ilo_cp.c \ diff --git a/src/gallium/drivers/ilo/ilo_blit.c b/src/gallium/drivers/ilo/ilo_blit.c index 3c1695950eb..0041cd10916 100644 --- a/src/gallium/drivers/ilo/ilo_blit.c +++ b/src/gallium/drivers/ilo/ilo_blit.c @@ -25,7 +25,6 @@ * Chia-I Wu */ -#include "util/u_pack_color.h" #include "util/u_surface.h" #include "intel_reg.h" @@ -36,424 +35,6 @@ #include "ilo_screen.h" #include "ilo_blit.h" -/* - * From the Sandy Bridge PRM, volume 1 part 5, page 7: - * - * "The BLT engine is capable of transferring very large quantities of - * graphics data. Any graphics data read from and written to the - * destination is permitted to represent a number of pixels that occupies - * up to 65,536 scan lines and up to 32,768 bytes per scan line at the - * destination. The maximum number of pixels that may be represented per - * scan line's worth of graphics data depends on the color depth." - */ -static const int gen6_max_bytes_per_scanline = 32768; -static const int gen6_max_scanlines = 65536; - -static void -ilo_blit_own_blt_ring(struct ilo_context *ilo) -{ - ilo_cp_set_ring(ilo->cp, ILO_CP_RING_BLT); - ilo_cp_set_owner(ilo->cp, NULL, 0); -} - -static void -gen6_MI_FLUSH_DW(struct ilo_context *ilo) -{ - const uint8_t cmd_len = 4; - struct ilo_cp *cp = ilo->cp; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, MI_FLUSH_DW | (cmd_len - 2)); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_write(cp, 0); - ilo_cp_end(cp); -} - -static void -gen6_MI_LOAD_REGISTER_IMM(struct ilo_context *ilo, uint32_t reg, uint32_t val) -{ - const uint8_t cmd_len = 3; - struct ilo_cp *cp = ilo->cp; - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, MI_LOAD_REGISTER_IMM | (cmd_len - 2)); - ilo_cp_write(cp, reg); - ilo_cp_write(cp, val); - ilo_cp_end(cp); -} - -static void -gen6_XY_COLOR_BLT(struct ilo_context *ilo, struct intel_bo *dst_bo, - uint32_t dst_offset, int16_t dst_pitch, - enum intel_tiling_mode dst_tiling, - int16_t x1, int16_t y1, int16_t x2, int16_t y2, - uint32_t color, - uint8_t rop, int cpp, bool write_alpha) -{ - const uint8_t cmd_len = 6; - struct ilo_cp *cp = ilo->cp; - int dst_align, dst_pitch_shift; - uint32_t dw0, dw1; - - dw0 = XY_COLOR_BLT_CMD | (cmd_len - 2); - - if (dst_tiling == INTEL_TILING_NONE) { - dst_align = 4; - dst_pitch_shift = 0; - } - else { - dw0 |= XY_DST_TILED; - - dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512; - /* in dwords when tiled */ - dst_pitch_shift = 2; - } - - assert(cpp == 4 || cpp == 2 || cpp == 1); - assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline); - assert(y2 - y1 < gen6_max_scanlines); - assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0); - - dw1 = rop << 16 | - dst_pitch >> dst_pitch_shift; - - switch (cpp) { - case 4: - dw0 |= XY_BLT_WRITE_RGB; - if (write_alpha) - dw0 |= XY_BLT_WRITE_ALPHA; - - dw1 |= BR13_8888; - break; - case 2: - dw1 |= BR13_565; - break; - case 1: - dw1 |= BR13_8; - break; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, dw0); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, y1 << 16 | x1); - ilo_cp_write(cp, y2 << 16 | x2); - ilo_cp_write_bo(cp, dst_offset, dst_bo, - INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); - ilo_cp_write(cp, color); - ilo_cp_end(cp); -} - -static void -gen6_SRC_COPY_BLT(struct ilo_context *ilo, struct intel_bo *dst_bo, - uint32_t dst_offset, int16_t dst_pitch, - uint16_t width, uint16_t height, - struct intel_bo *src_bo, - uint32_t src_offset, int16_t src_pitch, - uint8_t rop, int cpp, bool write_alpha, bool dir_rtl) -{ - const uint8_t cmd_len = 6; - struct ilo_cp *cp = ilo->cp; - uint32_t dw0, dw1; - - assert(cpp == 4 || cpp == 2 || cpp == 1); - assert(width < gen6_max_bytes_per_scanline); - assert(height < gen6_max_scanlines); - /* offsets are naturally aligned and pitches are dword-aligned */ - assert(dst_offset % cpp == 0 && dst_pitch % 4 == 0); - assert(src_offset % cpp == 0 && src_pitch % 4 == 0); - -#ifndef SRC_COPY_BLT_CMD -#define SRC_COPY_BLT_CMD (CMD_2D | (0x43 << 22)) -#endif - dw0 = SRC_COPY_BLT_CMD | (cmd_len - 2); - dw1 = rop << 16 | dst_pitch; - - if (dir_rtl) - dw1 |= 1 << 30; - - switch (cpp) { - case 4: - dw0 |= XY_BLT_WRITE_RGB; - if (write_alpha) - dw0 |= XY_BLT_WRITE_ALPHA; - - dw1 |= BR13_8888; - break; - case 2: - dw1 |= BR13_565; - break; - case 1: - dw1 |= BR13_8; - break; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, dw0); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, height << 16 | width); - ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER, - INTEL_DOMAIN_RENDER); - ilo_cp_write(cp, src_pitch); - ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0); - ilo_cp_end(cp); -} - -static void -gen6_XY_SRC_COPY_BLT(struct ilo_context *ilo, struct intel_bo *dst_bo, - uint32_t dst_offset, int16_t dst_pitch, - enum intel_tiling_mode dst_tiling, - int16_t x1, int16_t y1, int16_t x2, int16_t y2, - struct intel_bo *src_bo, - uint32_t src_offset, int16_t src_pitch, - enum intel_tiling_mode src_tiling, - int16_t src_x, int16_t src_y, - uint8_t rop, int cpp, bool write_alpha) -{ - const uint8_t cmd_len = 8; - struct ilo_cp *cp = ilo->cp; - int dst_align, dst_pitch_shift; - int src_align, src_pitch_shift; - uint32_t dw0, dw1; - - dw0 = XY_SRC_COPY_BLT_CMD | (cmd_len - 2); - - if (dst_tiling == INTEL_TILING_NONE) { - dst_align = 4; - dst_pitch_shift = 0; - } - else { - dw0 |= XY_DST_TILED; - - dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512; - /* in dwords when tiled */ - dst_pitch_shift = 2; - } - - if (src_tiling == INTEL_TILING_NONE) { - src_align = 4; - src_pitch_shift = 0; - } - else { - dw0 |= XY_SRC_TILED; - - src_align = (src_tiling == INTEL_TILING_Y) ? 128 : 512; - /* in dwords when tiled */ - src_pitch_shift = 2; - } - - assert(cpp == 4 || cpp == 2 || cpp == 1); - assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline); - assert(y2 - y1 < gen6_max_scanlines); - assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0); - assert(src_offset % src_align == 0 && src_pitch % src_align == 0); - - dw1 = rop << 16 | - dst_pitch >> dst_pitch_shift; - - switch (cpp) { - case 4: - dw0 |= XY_BLT_WRITE_RGB; - if (write_alpha) - dw0 |= XY_BLT_WRITE_ALPHA; - - dw1 |= BR13_8888; - break; - case 2: - dw1 |= BR13_565; - break; - case 1: - dw1 |= BR13_8; - break; - } - - ilo_cp_begin(cp, cmd_len); - ilo_cp_write(cp, dw0); - ilo_cp_write(cp, dw1); - ilo_cp_write(cp, y1 << 16 | x1); - ilo_cp_write(cp, y2 << 16 | x2); - ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER, - INTEL_DOMAIN_RENDER); - ilo_cp_write(cp, src_y << 16 | src_x); - ilo_cp_write(cp, src_pitch >> src_pitch_shift); - ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0); - ilo_cp_end(cp); -} - -static bool -tex_copy_region(struct ilo_context *ilo, - struct ilo_texture *dst, - unsigned dst_level, - unsigned dst_x, unsigned dst_y, unsigned dst_z, - struct ilo_texture *src, - unsigned src_level, - const struct pipe_box *src_box) -{ - const struct util_format_description *desc = - util_format_description(dst->bo_format); - const unsigned max_extent = 32767; /* INT16_MAX */ - const uint8_t rop = 0xcc; /* SRCCOPY */ - struct intel_bo *aper_check[3]; - uint32_t swctrl; - int cpp, xscale, slice; - - /* no W-tiling support */ - if (dst->separate_s8 || src->separate_s8) - return false; - - if (dst->bo_stride > max_extent || src->bo_stride > max_extent) - return false; - - cpp = desc->block.bits / 8; - xscale = 1; - - /* accommodate for larger cpp */ - if (cpp > 4) { - if (cpp % 2 == 1) - return false; - - cpp = (cpp % 4 == 0) ? 4 : 2; - xscale = (desc->block.bits / 8) / cpp; - } - - ilo_blit_own_blt_ring(ilo); - - /* make room if necessary */ - aper_check[0] = ilo->cp->bo; - aper_check[1] = dst->bo; - aper_check[2] = src->bo; - if (intel_winsys_check_aperture_space(ilo->winsys, aper_check, 3)) - ilo_cp_flush(ilo->cp); - - swctrl = 0x0; - - if (dst->tiling == INTEL_TILING_Y) { - swctrl |= BCS_SWCTRL_DST_Y << 16 | - BCS_SWCTRL_DST_Y; - } - - if (src->tiling == INTEL_TILING_Y) { - swctrl |= BCS_SWCTRL_SRC_Y << 16 | - BCS_SWCTRL_SRC_Y; - } - - if (swctrl) { - /* - * Most clients expect BLT engine to be stateless. If we have to set - * BCS_SWCTRL to a non-default value, we have to set it back in the same - * batch buffer. - */ - if (ilo_cp_space(ilo->cp) < (4 + 3) * 2 + src_box->depth * 8) - ilo_cp_flush(ilo->cp); - - ilo_cp_assert_no_implicit_flush(ilo->cp, true); - - /* - * From the Ivy Bridge PRM, volume 1 part 4, page 133: - * - * "SW is required to flush the HW before changing the polarity of - * this bit (Tile Y Destination/Source)." - */ - gen6_MI_FLUSH_DW(ilo); - gen6_MI_LOAD_REGISTER_IMM(ilo, BCS_SWCTRL, swctrl); - - swctrl &= ~(BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y); - } - - for (slice = 0; slice < src_box->depth; slice++) { - const struct ilo_texture_slice *dst_slice = - &dst->slice_offsets[dst_level][dst_z + slice]; - const struct ilo_texture_slice *src_slice = - &src->slice_offsets[src_level][src_box->z + slice]; - unsigned x1, y1, x2, y2, src_x, src_y; - - x1 = (dst_slice->x + dst_x) * xscale; - y1 = dst_slice->y + dst_y; - x2 = (x1 + src_box->width) * xscale; - y2 = y1 + src_box->height; - src_x = (src_slice->x + src_box->x) * xscale; - src_y = src_slice->y + src_box->y; - - x1 /= desc->block.width; - y1 /= desc->block.height; - x2 = (x2 + desc->block.width - 1) / desc->block.width; - y2 = (y2 + desc->block.height - 1) / desc->block.height; - src_x /= desc->block.width; - src_y /= desc->block.height; - - if (x2 > max_extent || y2 > max_extent || - src_x > max_extent || src_y > max_extent || - (x2 - x1) * cpp > gen6_max_bytes_per_scanline) - break; - - gen6_XY_SRC_COPY_BLT(ilo, - dst->bo, 0, dst->bo_stride, dst->tiling, - x1, y1, x2, y2, - src->bo, 0, src->bo_stride, src->tiling, - src_x, src_y, rop, cpp, true); - } - - if (swctrl) { - gen6_MI_FLUSH_DW(ilo); - gen6_MI_LOAD_REGISTER_IMM(ilo, BCS_SWCTRL, swctrl); - - ilo_cp_assert_no_implicit_flush(ilo->cp, false); - } - - return (slice == src_box->depth); -} - -static bool -buf_copy_region(struct ilo_context *ilo, - struct ilo_buffer *dst, unsigned dst_offset, - struct ilo_buffer *src, unsigned src_offset, - unsigned size) -{ - const uint8_t rop = 0xcc; /* SRCCOPY */ - unsigned offset = 0; - struct intel_bo *aper_check[3]; - - ilo_blit_own_blt_ring(ilo); - - /* make room if necessary */ - aper_check[0] = ilo->cp->bo; - aper_check[1] = dst->bo; - aper_check[2] = src->bo; - if (intel_winsys_check_aperture_space(ilo->winsys, aper_check, 3)) - ilo_cp_flush(ilo->cp); - - while (size) { - unsigned width, height; - int16_t pitch; - - width = size; - height = 1; - pitch = 0; - - if (width > gen6_max_bytes_per_scanline) { - /* less than INT16_MAX and dword-aligned */ - pitch = 32764; - - width = pitch; - height = size / width; - if (height > gen6_max_scanlines) - height = gen6_max_scanlines; - } - - gen6_SRC_COPY_BLT(ilo, - dst->bo, dst_offset + offset, pitch, - width, height, - src->bo, src_offset + offset, pitch, - rop, 1, true, false); - - offset += pitch * height; - size -= width * height; - } - - return true; -} - static void ilo_resource_copy_region(struct pipe_context *pipe, struct pipe_resource *dst, @@ -462,73 +43,16 @@ ilo_resource_copy_region(struct pipe_context *pipe, struct pipe_resource *src, unsigned src_level, const struct pipe_box *src_box) -{ - bool success; - - if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) { - success = tex_copy_region(ilo_context(pipe), - ilo_texture(dst), dst_level, dstx, dsty, dstz, - ilo_texture(src), src_level, src_box); - } - else if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { - const unsigned dst_offset = dstx; - const unsigned src_offset = src_box->x; - const unsigned size = src_box->width; - - assert(dst_level == 0 && dsty == 0 && dstz == 0); - assert(src_level == 0 && - src_box->y == 0 && - src_box->z == 0 && - src_box->height == 1 && - src_box->depth == 1); - - success = buf_copy_region(ilo_context(pipe), - ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size); - } - else { - success = false; - } - - if (!success) { - util_resource_copy_region(pipe, dst, dst_level, - dstx, dsty, dstz, src, src_level, src_box); - } -} - -static bool -blitter_xy_color_blt(struct pipe_context *pipe, - struct pipe_resource *res, - int16_t x1, int16_t y1, - int16_t x2, int16_t y2, - uint32_t color) { struct ilo_context *ilo = ilo_context(pipe); - struct ilo_texture *tex = ilo_texture(res); - const int cpp = util_format_get_blocksize(tex->bo_format); - const uint8_t rop = 0xf0; /* PATCOPY */ - struct intel_bo *aper_check[2]; - - /* how to support Y-tiling? */ - if (tex->tiling == INTEL_TILING_Y) - return false; - /* nothing to clear */ - if (x1 >= x2 || y1 >= y2) - return true; - - ilo_blit_own_blt_ring(ilo); - - /* make room if necessary */ - aper_check[0] = ilo->cp->bo; - aper_check[1] = tex->bo; - if (intel_winsys_check_aperture_space(ilo->winsys, aper_check, 2)) - ilo_cp_flush(ilo->cp); - - gen6_XY_COLOR_BLT(ilo, - tex->bo, 0, tex->bo_stride, tex->tiling, - x1, y1, x2, y2, color, rop, cpp, true); + if (ilo_blitter_blt_copy_resource(ilo->blitter, + dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box)) + return; - return true; + util_resource_copy_region(&ilo->base, dst, dst_level, + dstx, dsty, dstz, src, src_level, src_box); } static void @@ -551,7 +75,6 @@ ilo_clear_render_target(struct pipe_context *pipe, unsigned width, unsigned height) { struct ilo_context *ilo = ilo_context(pipe); - union util_color packed; if (!width || !height || dstx >= dst->width || dsty >= dst->height) return; @@ -561,13 +84,8 @@ ilo_clear_render_target(struct pipe_context *pipe, if (dsty + height > dst->height) height = dst->height - dsty; - util_pack_color(color->f, dst->format, &packed); - - /* try HW blit first */ - if (blitter_xy_color_blt(pipe, dst->texture, - dstx, dsty, - dstx + width, dsty + height, - packed.ui)) + if (ilo_blitter_blt_clear_rt(ilo->blitter, + dst, color, dstx, dsty, width, height)) return; ilo_blitter_pipe_clear_rt(ilo->blitter, @@ -585,11 +103,17 @@ ilo_clear_depth_stencil(struct pipe_context *pipe, { struct ilo_context *ilo = ilo_context(pipe); - /* - * The PRM claims that HW blit supports Y-tiling since GEN6, but it does - * not tell us how to program it. Since depth buffers are always Y-tiled, - * HW blit will not work. - */ + if (!width || !height || dstx >= dst->width || dsty >= dst->height) + return; + + if (dstx + width > dst->width) + width = dst->width - dstx; + if (dsty + height > dst->height) + height = dst->height - dsty; + + if (ilo_blitter_blt_clear_zs(ilo->blitter, + dst, clear_flags, depth, stencil, dstx, dsty, width, height)) + return; ilo_blitter_pipe_clear_zs(ilo->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 24218f27a62..6a2a0d7022e 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -77,4 +77,26 @@ ilo_blitter_pipe_clear_fb(struct ilo_blitter *blitter, const union pipe_color_union *color, double depth, unsigned stencil); +bool +ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter, + struct pipe_resource *dst, unsigned dst_level, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box); + +bool +ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter, + struct pipe_surface *rt, + const union pipe_color_union *color, + unsigned x, unsigned y, + unsigned width, unsigned height); + +bool +ilo_blitter_blt_clear_zs(struct ilo_blitter *blitter, + struct pipe_surface *zs, + unsigned clear_flags, + double depth, unsigned stencil, + unsigned x, unsigned y, + unsigned width, unsigned height); + #endif /* ILO_BLITTER_H */ diff --git a/src/gallium/drivers/ilo/ilo_blitter_blt.c b/src/gallium/drivers/ilo/ilo_blitter_blt.c new file mode 100644 index 00000000000..55fc68b08c0 --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_blitter_blt.c @@ -0,0 +1,805 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "util/u_pack_color.h" +#include "intel_reg.h" + +#include "ilo_context.h" +#include "ilo_cp.h" +#include "ilo_resource.h" +#include "ilo_blitter.h" + +#ifndef COLOR_BLT_CMD +#define COLOR_BLT_CMD (CMD_2D | (0x40 << 22)) +#endif + +#ifndef SRC_COPY_BLT_CMD +#define SRC_COPY_BLT_CMD (CMD_2D | (0x43 << 22)) +#endif + +enum gen6_blt_mask { + GEN6_BLT_MASK_8, + GEN6_BLT_MASK_16, + GEN6_BLT_MASK_32, + GEN6_BLT_MASK_32_LO, + GEN6_BLT_MASK_32_HI, +}; + +/* + * From the Sandy Bridge PRM, volume 1 part 5, page 7: + * + * "The BLT engine is capable of transferring very large quantities of + * graphics data. Any graphics data read from and written to the + * destination is permitted to represent a number of pixels that occupies + * up to 65,536 scan lines and up to 32,768 bytes per scan line at the + * destination. The maximum number of pixels that may be represented per + * scan line's worth of graphics data depends on the color depth." + */ +static const int gen6_max_bytes_per_scanline = 32768; +static const int gen6_max_scanlines = 65536; + +static void +gen6_emit_MI_FLUSH_DW(struct ilo_dev_info *dev, struct ilo_cp *cp) +{ + const uint8_t cmd_len = 4; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, MI_FLUSH_DW | (cmd_len - 2)); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_write(cp, 0); + ilo_cp_end(cp); +} + +static void +gen6_emit_MI_LOAD_REGISTER_IMM(struct ilo_dev_info *dev, + uint32_t reg, uint32_t val, + struct ilo_cp *cp) +{ + const uint8_t cmd_len = 3; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, MI_LOAD_REGISTER_IMM | (cmd_len - 2)); + ilo_cp_write(cp, reg); + ilo_cp_write(cp, val); + ilo_cp_end(cp); +} + +static uint32_t +gen6_translate_blt_value_mask(enum gen6_blt_mask value_mask) +{ + switch (value_mask) { + case GEN6_BLT_MASK_8: return BR13_8; + case GEN6_BLT_MASK_16: return BR13_565; + default: return BR13_8888; + } +} + +static uint32_t +gen6_translate_blt_write_mask(enum gen6_blt_mask write_mask) +{ + switch (write_mask) { + case GEN6_BLT_MASK_32: return XY_BLT_WRITE_RGB | + XY_BLT_WRITE_ALPHA; + case GEN6_BLT_MASK_32_LO: return XY_BLT_WRITE_RGB; + case GEN6_BLT_MASK_32_HI: return XY_BLT_WRITE_ALPHA; + default: return 0; + } +} + +static uint32_t +gen6_translate_blt_cpp(enum gen6_blt_mask mask) +{ + switch (mask) { + case GEN6_BLT_MASK_8: return 1; + case GEN6_BLT_MASK_16: return 2; + default: return 4; + } +} + +static void +gen6_emit_COLOR_BLT(struct ilo_dev_info *dev, + struct intel_bo *dst_bo, + int16_t dst_pitch, uint32_t dst_offset, + uint16_t width, uint16_t height, + uint32_t pattern, uint8_t rop, + enum gen6_blt_mask value_mask, + enum gen6_blt_mask write_mask, + struct ilo_cp *cp) +{ + const uint8_t cmd_len = 5; + const int cpp = gen6_translate_blt_cpp(value_mask); + uint32_t dw0, dw1; + + dw0 = COLOR_BLT_CMD | + gen6_translate_blt_write_mask(write_mask) | + (cmd_len - 2); + + assert(width < gen6_max_bytes_per_scanline); + assert(height < gen6_max_scanlines); + /* offsets are naturally aligned and pitches are dword-aligned */ + assert(dst_offset % cpp == 0 && dst_pitch % 4 == 0); + + dw1 = rop << 16 | + gen6_translate_blt_value_mask(value_mask) | + dst_pitch; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, dw0); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, height << 16 | width); + ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER, + INTEL_DOMAIN_RENDER); + ilo_cp_write(cp, pattern); + ilo_cp_end(cp); +} + +static void +gen6_emit_XY_COLOR_BLT(struct ilo_dev_info *dev, + struct intel_bo *dst_bo, + enum intel_tiling_mode dst_tiling, + int16_t dst_pitch, uint32_t dst_offset, + int16_t x1, int16_t y1, int16_t x2, int16_t y2, + uint32_t pattern, uint8_t rop, + enum gen6_blt_mask value_mask, + enum gen6_blt_mask write_mask, + struct ilo_cp *cp) +{ + const uint8_t cmd_len = 6; + const int cpp = gen6_translate_blt_cpp(value_mask); + int dst_align, dst_pitch_shift; + uint32_t dw0, dw1; + + dw0 = XY_COLOR_BLT_CMD | + gen6_translate_blt_write_mask(write_mask) | + (cmd_len - 2); + + if (dst_tiling == INTEL_TILING_NONE) { + dst_align = 4; + dst_pitch_shift = 0; + } + else { + dw0 |= XY_DST_TILED; + + dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512; + /* in dwords when tiled */ + dst_pitch_shift = 2; + } + + assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline); + assert(y2 - y1 < gen6_max_scanlines); + assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0); + + dw1 = rop << 16 | + gen6_translate_blt_value_mask(value_mask) | + dst_pitch >> dst_pitch_shift; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, dw0); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, y1 << 16 | x1); + ilo_cp_write(cp, y2 << 16 | x2); + ilo_cp_write_bo(cp, dst_offset, dst_bo, + INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER); + ilo_cp_write(cp, pattern); + ilo_cp_end(cp); +} + +static void +gen6_emit_SRC_COPY_BLT(struct ilo_dev_info *dev, + struct intel_bo *dst_bo, + int16_t dst_pitch, uint32_t dst_offset, + uint16_t width, uint16_t height, + struct intel_bo *src_bo, + int16_t src_pitch, uint32_t src_offset, + bool dir_rtl, uint8_t rop, + enum gen6_blt_mask value_mask, + enum gen6_blt_mask write_mask, + struct ilo_cp *cp) +{ + const uint8_t cmd_len = 6; + const int cpp = gen6_translate_blt_cpp(value_mask); + uint32_t dw0, dw1; + + dw0 = SRC_COPY_BLT_CMD | + gen6_translate_blt_write_mask(write_mask) | + (cmd_len - 2); + + assert(width < gen6_max_bytes_per_scanline); + assert(height < gen6_max_scanlines); + /* offsets are naturally aligned and pitches are dword-aligned */ + assert(dst_offset % cpp == 0 && dst_pitch % 4 == 0); + assert(src_offset % cpp == 0 && src_pitch % 4 == 0); + + dw1 = rop << 16 | + gen6_translate_blt_value_mask(value_mask) | + dst_pitch; + + if (dir_rtl) + dw1 |= 1 << 30; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, dw0); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, height << 16 | width); + ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER, + INTEL_DOMAIN_RENDER); + ilo_cp_write(cp, src_pitch); + ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0); + ilo_cp_end(cp); +} + +static void +gen6_emit_XY_SRC_COPY_BLT(struct ilo_dev_info *dev, + struct intel_bo *dst_bo, + enum intel_tiling_mode dst_tiling, + int16_t dst_pitch, uint32_t dst_offset, + int16_t x1, int16_t y1, int16_t x2, int16_t y2, + struct intel_bo *src_bo, + enum intel_tiling_mode src_tiling, + int16_t src_pitch, uint32_t src_offset, + int16_t src_x, int16_t src_y, uint8_t rop, + enum gen6_blt_mask value_mask, + enum gen6_blt_mask write_mask, + struct ilo_cp *cp) +{ + const uint8_t cmd_len = 8; + const int cpp = gen6_translate_blt_cpp(value_mask); + int dst_align, dst_pitch_shift; + int src_align, src_pitch_shift; + uint32_t dw0, dw1; + + dw0 = XY_SRC_COPY_BLT_CMD | + gen6_translate_blt_write_mask(write_mask) | + (cmd_len - 2); + + if (dst_tiling == INTEL_TILING_NONE) { + dst_align = 4; + dst_pitch_shift = 0; + } + else { + dw0 |= XY_DST_TILED; + + dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512; + /* in dwords when tiled */ + dst_pitch_shift = 2; + } + + if (src_tiling == INTEL_TILING_NONE) { + src_align = 4; + src_pitch_shift = 0; + } + else { + dw0 |= XY_SRC_TILED; + + src_align = (src_tiling == INTEL_TILING_Y) ? 128 : 512; + /* in dwords when tiled */ + src_pitch_shift = 2; + } + + assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline); + assert(y2 - y1 < gen6_max_scanlines); + assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0); + assert(src_offset % src_align == 0 && src_pitch % src_align == 0); + + dw1 = rop << 16 | + gen6_translate_blt_value_mask(value_mask) | + dst_pitch >> dst_pitch_shift; + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, dw0); + ilo_cp_write(cp, dw1); + ilo_cp_write(cp, y1 << 16 | x1); + ilo_cp_write(cp, y2 << 16 | x2); + ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER, + INTEL_DOMAIN_RENDER); + ilo_cp_write(cp, src_y << 16 | src_x); + ilo_cp_write(cp, src_pitch >> src_pitch_shift); + ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0); + ilo_cp_end(cp); +} + +static uint32_t +ilo_blitter_blt_begin(struct ilo_blitter *blitter, int max_cmd_size, + struct intel_bo *dst, enum intel_tiling_mode dst_tiling, + struct intel_bo *src, enum intel_tiling_mode src_tiling) +{ + struct ilo_context *ilo = blitter->ilo; + struct intel_bo *aper_check[3]; + int count; + uint32_t swctrl; + + /* change ring */ + ilo_cp_set_ring(ilo->cp, ILO_CP_RING_BLT); + ilo_cp_set_owner(ilo->cp, NULL, 0); + + /* check aperture space */ + aper_check[0] = ilo->cp->bo; + aper_check[1] = dst; + count = 2; + + if (src) { + aper_check[2] = src; + count++; + } + + if (intel_winsys_check_aperture_space(ilo->winsys, aper_check, count)) + ilo_cp_flush(ilo->cp); + + /* set BCS_SWCTRL */ + swctrl = 0x0; + + if (dst_tiling == INTEL_TILING_Y) { + swctrl |= BCS_SWCTRL_DST_Y << 16 | + BCS_SWCTRL_DST_Y; + } + + if (src && src_tiling == INTEL_TILING_Y) { + swctrl |= BCS_SWCTRL_SRC_Y << 16 | + BCS_SWCTRL_SRC_Y; + } + + if (swctrl) { + /* + * Most clients expect BLT engine to be stateless. If we have to set + * BCS_SWCTRL to a non-default value, we have to set it back in the same + * batch buffer. + */ + if (ilo_cp_space(ilo->cp) < (4 + 3) * 2 + max_cmd_size) + ilo_cp_flush(ilo->cp); + + ilo_cp_assert_no_implicit_flush(ilo->cp, true); + + /* + * From the Ivy Bridge PRM, volume 1 part 4, page 133: + * + * "SW is required to flush the HW before changing the polarity of + * this bit (Tile Y Destination/Source)." + */ + gen6_emit_MI_FLUSH_DW(ilo->dev, ilo->cp); + gen6_emit_MI_LOAD_REGISTER_IMM(ilo->dev, BCS_SWCTRL, swctrl, ilo->cp); + + swctrl &= ~(BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y); + } + + return swctrl; +} + +static void +ilo_blitter_blt_end(struct ilo_blitter *blitter, uint32_t swctrl) +{ + struct ilo_context *ilo = blitter->ilo; + + /* set BCS_SWCTRL back */ + if (swctrl) { + gen6_emit_MI_FLUSH_DW(ilo->dev, ilo->cp); + gen6_emit_MI_LOAD_REGISTER_IMM(ilo->dev, BCS_SWCTRL, swctrl, ilo->cp); + + ilo_cp_assert_no_implicit_flush(ilo->cp, false); + } +} + +static bool +buf_clear_region(struct ilo_blitter *blitter, + struct ilo_buffer *dst, + unsigned dst_offset, unsigned dst_size, + uint32_t val, + enum gen6_blt_mask value_mask, + enum gen6_blt_mask write_mask) +{ + const uint8_t rop = 0xf0; /* PATCOPY */ + const int cpp = gen6_translate_blt_cpp(value_mask); + struct ilo_context *ilo = blitter->ilo; + unsigned offset = 0; + + if (dst_offset % cpp || dst_size % cpp) + return false; + + ilo_blitter_blt_begin(blitter, 0, + dst->bo, INTEL_TILING_NONE, NULL, INTEL_TILING_NONE); + + while (dst_size) { + unsigned width, height; + int16_t pitch; + + width = dst_size; + height = 1; + pitch = 0; + + if (width > gen6_max_bytes_per_scanline) { + /* less than INT16_MAX and dword-aligned */ + pitch = 32764; + + width = pitch; + height = dst_size / width; + if (height > gen6_max_scanlines) + height = gen6_max_scanlines; + } + + gen6_emit_COLOR_BLT(ilo->dev, dst->bo, pitch, dst_offset + offset, + width, height, val, rop, value_mask, write_mask, ilo->cp); + + offset += pitch * height; + dst_size -= width * height; + } + + ilo_blitter_blt_end(blitter, 0); + + return true; +} + +static bool +buf_copy_region(struct ilo_blitter *blitter, + struct ilo_buffer *dst, unsigned dst_offset, + struct ilo_buffer *src, unsigned src_offset, + unsigned size) +{ + const uint8_t rop = 0xcc; /* SRCCOPY */ + struct ilo_context *ilo = blitter->ilo; + unsigned offset = 0; + + ilo_blitter_blt_begin(blitter, 0, + dst->bo, INTEL_TILING_NONE, src->bo, INTEL_TILING_NONE); + + while (size) { + unsigned width, height; + int16_t pitch; + + width = size; + height = 1; + pitch = 0; + + if (width > gen6_max_bytes_per_scanline) { + /* less than INT16_MAX and dword-aligned */ + pitch = 32764; + + width = pitch; + height = size / width; + if (height > gen6_max_scanlines) + height = gen6_max_scanlines; + } + + gen6_emit_SRC_COPY_BLT(ilo->dev, + dst->bo, pitch, dst_offset + offset, + width, height, + src->bo, pitch, src_offset + offset, + false, rop, GEN6_BLT_MASK_8, GEN6_BLT_MASK_8, + ilo->cp); + + offset += pitch * height; + size -= width * height; + } + + ilo_blitter_blt_end(blitter, 0); + + return true; +} + +static bool +tex_clear_region(struct ilo_blitter *blitter, + struct ilo_texture *dst, unsigned dst_level, + const struct pipe_box *dst_box, + uint32_t val, + enum gen6_blt_mask value_mask, + enum gen6_blt_mask write_mask) +{ + const int cpp = gen6_translate_blt_cpp(value_mask); + const unsigned max_extent = 32767; /* INT16_MAX */ + const uint8_t rop = 0xf0; /* PATCOPY */ + struct ilo_context *ilo = blitter->ilo; + uint32_t swctrl; + int slice; + + /* no W-tiling support */ + if (dst->separate_s8) + return false; + + if (dst->bo_stride > max_extent) + return false; + + swctrl = ilo_blitter_blt_begin(blitter, dst_box->depth * 6, + dst->bo, dst->tiling, NULL, INTEL_TILING_NONE); + + for (slice = 0; slice < dst_box->depth; slice++) { + const struct ilo_texture_slice *dst_slice = + &dst->slice_offsets[dst_level][dst_box->z + slice]; + unsigned x1, y1, x2, y2; + + x1 = dst_slice->x + dst_box->x; + y1 = dst_slice->y + dst_box->y; + x2 = x1 + dst_box->width; + y2 = y1 + dst_box->height; + + if (x2 > max_extent || y2 > max_extent || + (x2 - x1) * cpp > gen6_max_bytes_per_scanline) + break; + + gen6_emit_XY_COLOR_BLT(ilo->dev, + dst->bo, dst->tiling, dst->bo_stride, 0, + x1, y1, x2, y2, val, rop, value_mask, write_mask, + ilo->cp); + } + + ilo_blitter_blt_end(blitter, swctrl); + + return (slice == dst_box->depth); +} + +static bool +tex_copy_region(struct ilo_blitter *blitter, + struct ilo_texture *dst, + unsigned dst_level, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + struct ilo_texture *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + const struct util_format_description *desc = + util_format_description(dst->bo_format); + const unsigned max_extent = 32767; /* INT16_MAX */ + const uint8_t rop = 0xcc; /* SRCCOPY */ + struct ilo_context *ilo = blitter->ilo; + enum gen6_blt_mask mask; + uint32_t swctrl; + int cpp, xscale, slice; + + /* no W-tiling support */ + if (dst->separate_s8 || src->separate_s8) + return false; + + if (dst->bo_stride > max_extent || src->bo_stride > max_extent) + return false; + + cpp = desc->block.bits / 8; + xscale = 1; + + /* accommodate for larger cpp */ + if (cpp > 4) { + if (cpp % 2 == 1) + return false; + + cpp = (cpp % 4 == 0) ? 4 : 2; + xscale = (desc->block.bits / 8) / cpp; + } + + switch (cpp) { + case 1: + mask = GEN6_BLT_MASK_8; + break; + case 2: + mask = GEN6_BLT_MASK_16; + break; + case 4: + mask = GEN6_BLT_MASK_32; + break; + default: + return false; + break; + } + + swctrl = ilo_blitter_blt_begin(blitter, src_box->depth * 8, + dst->bo, dst->tiling, src->bo, src->tiling); + + for (slice = 0; slice < src_box->depth; slice++) { + const struct ilo_texture_slice *dst_slice = + &dst->slice_offsets[dst_level][dst_z + slice]; + const struct ilo_texture_slice *src_slice = + &src->slice_offsets[src_level][src_box->z + slice]; + unsigned x1, y1, x2, y2, src_x, src_y; + + x1 = (dst_slice->x + dst_x) * xscale; + y1 = dst_slice->y + dst_y; + x2 = (x1 + src_box->width) * xscale; + y2 = y1 + src_box->height; + src_x = (src_slice->x + src_box->x) * xscale; + src_y = src_slice->y + src_box->y; + + /* in blocks */ + x1 /= desc->block.width; + y1 /= desc->block.height; + x2 = (x2 + desc->block.width - 1) / desc->block.width; + y2 = (y2 + desc->block.height - 1) / desc->block.height; + src_x /= desc->block.width; + src_y /= desc->block.height; + + if (x2 > max_extent || y2 > max_extent || + src_x > max_extent || src_y > max_extent || + (x2 - x1) * cpp > gen6_max_bytes_per_scanline) + break; + + gen6_emit_XY_SRC_COPY_BLT(ilo->dev, + dst->bo, dst->tiling, dst->bo_stride, 0, + x1, y1, x2, y2, + src->bo, src->tiling, src->bo_stride, 0, + src_x, src_y, rop, mask, mask, + ilo->cp); + } + + ilo_blitter_blt_end(blitter, swctrl); + + return (slice == src_box->depth); +} + +bool +ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter, + struct pipe_resource *dst, unsigned dst_level, + unsigned dst_x, unsigned dst_y, unsigned dst_z, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + bool success; + + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + const unsigned dst_offset = dst_x; + const unsigned src_offset = src_box->x; + const unsigned size = src_box->width; + + assert(dst_level == 0 && dst_y == 0 && dst_z == 0); + assert(src_level == 0 && + src_box->y == 0 && + src_box->z == 0 && + src_box->height == 1 && + src_box->depth == 1); + + success = buf_copy_region(blitter, + ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size); + } + else if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) { + success = tex_copy_region(blitter, + ilo_texture(dst), dst_level, dst_x, dst_y, dst_z, + ilo_texture(src), src_level, src_box); + } + else { + success = false; + } + + return success; +} + +bool +ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter, + struct pipe_surface *rt, + const union pipe_color_union *color, + unsigned x, unsigned y, + unsigned width, unsigned height) +{ + const int cpp = util_format_get_blocksize(rt->format); + enum gen6_blt_mask mask; + union util_color packed; + bool success; + + switch (cpp) { + case 1: + mask = GEN6_BLT_MASK_8; + break; + case 2: + mask = GEN6_BLT_MASK_16; + break; + case 4: + mask = GEN6_BLT_MASK_32; + break; + default: + return false; + break; + } + + if (util_format_is_pure_integer(rt->format) || + util_format_is_compressed(rt->format)) + return false; + + util_pack_color(color->f, rt->format, &packed); + + if (rt->texture->target == PIPE_BUFFER) { + unsigned offset, end, size; + + assert(y == 0 && height == 1); + + offset = (rt->u.buf.first_element + x) * cpp; + end = (rt->u.buf.last_element + 1) * cpp; + + size = width * cpp; + if (offset + size > end) + size = end - offset; + + success = buf_clear_region(blitter, ilo_buffer(rt->texture), + offset, size, packed.ui, mask, mask); + } + else { + struct pipe_box box; + + u_box_3d(x, y, rt->u.tex.first_layer, width, height, + rt->u.tex.last_layer - rt->u.tex.first_layer + 1, &box); + + success = tex_clear_region(blitter, ilo_texture(rt->texture), + rt->u.tex.level, &box, packed.ui, mask, mask); + } + + return success; +} + +bool +ilo_blitter_blt_clear_zs(struct ilo_blitter *blitter, + struct pipe_surface *zs, + unsigned clear_flags, + double depth, unsigned stencil, + unsigned x, unsigned y, + unsigned width, unsigned height) +{ + enum gen6_blt_mask value_mask, write_mask; + struct pipe_box box; + uint32_t val; + + switch (zs->format) { + case PIPE_FORMAT_Z16_UNORM: + if (!(clear_flags & PIPE_CLEAR_DEPTH)) + return true; + + value_mask = GEN6_BLT_MASK_16; + write_mask = GEN6_BLT_MASK_16; + break; + case PIPE_FORMAT_Z32_FLOAT: + if (!(clear_flags & PIPE_CLEAR_DEPTH)) + return true; + + value_mask = GEN6_BLT_MASK_32; + write_mask = GEN6_BLT_MASK_32; + break; + case PIPE_FORMAT_Z24X8_UNORM: + if (!(clear_flags & PIPE_CLEAR_DEPTH)) + return true; + + value_mask = GEN6_BLT_MASK_32; + write_mask = GEN6_BLT_MASK_32_LO; + break; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (!(clear_flags & PIPE_CLEAR_DEPTHSTENCIL)) + return true; + + value_mask = GEN6_BLT_MASK_32; + + if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) + write_mask = GEN6_BLT_MASK_32; + else if (clear_flags & PIPE_CLEAR_DEPTH) + write_mask = GEN6_BLT_MASK_32_LO; + else + write_mask = GEN6_BLT_MASK_32_HI; + break; + default: + return false; + break; + } + + val = util_pack_z_stencil(zs->format, depth, stencil); + + u_box_3d(x, y, zs->u.tex.first_layer, width, height, + zs->u.tex.last_layer - zs->u.tex.first_layer + 1, &box); + + assert(zs->texture->target != PIPE_BUFFER); + + return tex_clear_region(blitter, ilo_texture(zs->texture), + zs->u.tex.level, &box, val, value_mask, write_mask); +}