X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fintel_mipmap_tree.c;h=0818226f3c4a2ead5688fcae5c7f98b90a8c3f4a;hb=bdc5241af4aa9afbd66f6c96ee6d20e09f77ea89;hp=1776a4b34c578d27b236e05fe2b3ec2e70c0d39c;hpb=3dbba95b72262344b82fba018b7c2c1208754cd2;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 1776a4b34c5..0818226f3c4 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ #include @@ -30,23 +30,21 @@ #include "intel_batchbuffer.h" #include "intel_chipset.h" -#include "intel_context.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_resolve_map.h" -#include "intel_tex_layout.h" #include "intel_tex.h" #include "intel_blit.h" -#ifndef I915 #include "brw_blorp.h" -#endif +#include "brw_context.h" #include "main/enums.h" #include "main/formats.h" #include "main/glformats.h" #include "main/texcompress_etc.h" #include "main/teximage.h" +#include "main/streaming-load-memcpy.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE @@ -72,10 +70,10 @@ target_to_target(GLenum target) * created, based on the chip generation and the surface type. */ static enum intel_msaa_layout -compute_msaa_layout(struct intel_context *intel, gl_format format, GLenum target) +compute_msaa_layout(struct brw_context *brw, gl_format format, GLenum target) { /* Prior to Gen7, all MSAA surfaces used IMS layout. */ - if (intel->gen < 7) + if (brw->gen < 7) return INTEL_MSAA_LAYOUT_IMS; /* In Gen7, IMS layout is only used for depth and stencil buffers. */ @@ -98,26 +96,10 @@ compute_msaa_layout(struct intel_context *intel, gl_format format, GLenum target */ if (_mesa_get_format_datatype(format) == GL_INT) { /* TODO: is this workaround needed for future chipsets? */ - assert(intel->gen == 7); + assert(brw->gen == 7); return INTEL_MSAA_LAYOUT_UMS; } else { - /* For now, if we're going to be texturing from this surface, - * force UMS, so that the shader doesn't have to do different things - * based on whether there's a multisample control surface needing sampled first. - * We can't just blindly read the MCS surface in all cases because: - * - * From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"): - * - * If this field is disabled and the sampling engine message - * is issued on this surface, the MCS surface may be accessed. Software - * must ensure that the surface is defined to avoid GTT errors. - */ - if (target == GL_TEXTURE_2D_MULTISAMPLE || - target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { - return INTEL_MSAA_LAYOUT_UMS; - } else { - return INTEL_MSAA_LAYOUT_CMS; - } + return INTEL_MSAA_LAYOUT_CMS; } } } @@ -165,7 +147,7 @@ compute_msaa_layout(struct intel_context *intel, gl_format format, GLenum target * by half the block width, and Y coordinates by half the block height. */ void -intel_get_non_msrt_mcs_alignment(struct intel_context *intel, +intel_get_non_msrt_mcs_alignment(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned *width_px, unsigned *height) { @@ -200,17 +182,11 @@ intel_get_non_msrt_mcs_alignment(struct intel_context *intel, * 64bpp, and 128bpp. */ bool -intel_is_non_msrt_mcs_buffer_supported(struct intel_context *intel, +intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw, struct intel_mipmap_tree *mt) { -#ifdef I915 - /* MCS is not supported on the i915 (pre-Gen4) driver */ - return false; -#else - struct brw_context *brw = brw_context(&intel->ctx); - /* MCS support does not exist prior to Gen7 */ - if (intel->gen < 7) + if (brw->gen < 7 || brw->gen >= 8) return false; /* MCS is only supported for color buffers */ @@ -238,7 +214,6 @@ intel_is_non_msrt_mcs_buffer_supported(struct intel_context *intel, return false; return true; -#endif } @@ -248,7 +223,7 @@ intel_is_non_msrt_mcs_buffer_supported(struct intel_context *intel, * \c stencil_mt. */ struct intel_mipmap_tree * -intel_miptree_create_layout(struct intel_context *intel, +intel_miptree_create_layout(struct brw_context *brw, GLenum target, gl_format format, GLuint first_level, @@ -260,6 +235,8 @@ intel_miptree_create_layout(struct intel_context *intel, GLuint num_samples) { struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); + if (!mt) + return NULL; DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), @@ -273,9 +250,7 @@ intel_miptree_create_layout(struct intel_context *intel, mt->logical_width0 = width0; mt->logical_height0 = height0; mt->logical_depth0 = depth0; -#ifndef I915 - mt->mcs_state = INTEL_MCS_STATE_NONE; -#endif + mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; /* The cpp is bytes per (1, blockheight)-sized block for compressed * textures. This is why you'll see divides by blockheight all over @@ -288,11 +263,11 @@ intel_miptree_create_layout(struct intel_context *intel, mt->num_samples = num_samples; mt->compressed = _mesa_is_format_compressed(format); mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE; - mt->refcount = 1; + mt->refcount = 1; if (num_samples > 1) { /* Adjust width/height/depth for MSAA */ - mt->msaa_layout = compute_msaa_layout(intel, format, mt->target); + mt->msaa_layout = compute_msaa_layout(brw, format, mt->target); if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) { /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says: * @@ -374,10 +349,9 @@ intel_miptree_create_layout(struct intel_context *intel, if (!for_bo && _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL && - (intel->must_use_separate_stencil || - (intel->has_separate_stencil && - intel->vtbl.is_hiz_depth_format(intel, format)))) { - mt->stencil_mt = intel_miptree_create(intel, + (brw->must_use_separate_stencil || + (brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format)))) { + mt->stencil_mt = intel_miptree_create(brw, mt->target, MESA_FORMAT_S8, mt->first_level, @@ -407,18 +381,7 @@ intel_miptree_create_layout(struct intel_context *intel, } } - intel_get_texture_alignment_unit(intel, mt->format, - &mt->align_w, &mt->align_h); - -#ifdef I915 - (void) intel; - if (intel->is_945) - i945_miptree_layout(mt); - else - i915_miptree_layout(mt); -#else - brw_miptree_layout(intel, mt); -#endif + brw_miptree_layout(brw, mt); return mt; } @@ -427,14 +390,13 @@ intel_miptree_create_layout(struct intel_context *intel, * \brief Helper function for intel_miptree_create(). */ static uint32_t -intel_miptree_choose_tiling(struct intel_context *intel, +intel_miptree_choose_tiling(struct brw_context *brw, gl_format format, uint32_t width0, uint32_t num_samples, enum intel_miptree_tiling_mode requested, struct intel_mipmap_tree *mt) { - if (format == MESA_FORMAT_S8) { /* The stencil buffer is W tiled. However, we request from the kernel a * non-tiled buffer because the GTT is incapable of W fencing. @@ -471,9 +433,8 @@ intel_miptree_choose_tiling(struct intel_context *intel, } GLenum base_format = _mesa_get_format_base_format(format); - if (intel->gen >= 4 && - (base_format == GL_DEPTH_COMPONENT || - base_format == GL_DEPTH_STENCIL_EXT)) + if (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL_EXT) return I915_TILING_Y; int minimum_pitch = mt->total_width * mt->cpp; @@ -482,21 +443,48 @@ intel_miptree_choose_tiling(struct intel_context *intel, if (minimum_pitch < 64) return I915_TILING_NONE; - if (ALIGN(minimum_pitch, 512) >= 32768) { + if (ALIGN(minimum_pitch, 512) >= 32768 || + mt->total_width >= 32768 || mt->total_height >= 32768) { perf_debug("%dx%d miptree too large to blit, falling back to untiled", mt->total_width, mt->total_height); return I915_TILING_NONE; } /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */ - if (intel->gen < 6) + if (brw->gen < 6) return I915_TILING_X; + /* From the Sandybridge PRM, Volume 1, Part 2, page 32: + * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX + * or Linear." + * 128 bits per pixel translates to 16 bytes per pixel. This is necessary + * all the way back to 965, but is explicitly permitted on Gen7. + */ + if (brw->gen != 7 && mt->cpp >= 16) + return I915_TILING_X; + + /* From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most + * messages), on p64, under the heading "Surface Vertical Alignment": + * + * This field must be set to VALIGN_4 for all tiled Y Render Target + * surfaces. + * + * So if the surface is renderable and uses a vertical alignment of 2, + * force it to be X tiled. This is somewhat conservative (it's possible + * that the client won't ever render to this surface), but it's difficult + * to know that ahead of time. And besides, since we use a vertical + * alignment of 4 as often as we can, this shouldn't happen very often. + */ + if (brw->gen == 7 && mt->align_h == 2 && + brw->format_supported_as_render_target[format]) { + return I915_TILING_X; + } + return I915_TILING_Y | I915_TILING_X; } struct intel_mipmap_tree * -intel_miptree_create(struct intel_context *intel, +intel_miptree_create(struct brw_context *brw, GLenum target, gl_format format, GLuint first_level, @@ -513,7 +501,7 @@ intel_miptree_create(struct intel_context *intel, gl_format etc_format = MESA_FORMAT_NONE; GLuint total_width, total_height; - if (!intel->is_baytrail) { + if (!brw->is_baytrail) { switch (format) { case MESA_FORMAT_ETC1_RGB8: format = MESA_FORMAT_RGBX8888_REV; @@ -550,7 +538,7 @@ intel_miptree_create(struct intel_context *intel, etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE; - mt = intel_miptree_create_layout(intel, target, format, + mt = intel_miptree_create_layout(brw, target, format, first_level, last_level, width0, height0, depth0, false, num_samples); @@ -571,13 +559,13 @@ intel_miptree_create(struct intel_context *intel, total_height = ALIGN(total_height, 64); } - uint32_t tiling = intel_miptree_choose_tiling(intel, format, width0, + uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0, num_samples, requested_tiling, mt); bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X); mt->etc_format = etc_format; - mt->region = intel_region_alloc(intel->intelScreen, + mt->region = intel_region_alloc(brw->intelScreen, y_or_x ? I915_TILING_Y : tiling, mt->cpp, total_width, @@ -588,12 +576,12 @@ intel_miptree_create(struct intel_context *intel, * BLT engine to support it. The BLT paths can't currently handle Y-tiling, * so we need to fall back to X. */ - if (y_or_x && mt->region->bo->size >= intel->max_gtt_map_object_size) { + if (y_or_x && mt->region->bo->size >= brw->max_gtt_map_object_size) { perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n", mt->total_width, mt->total_height); intel_region_release(&mt->region); - mt->region = intel_region_alloc(intel->intelScreen, + mt->region = intel_region_alloc(brw->intelScreen, I915_TILING_X, mt->cpp, total_width, @@ -608,21 +596,27 @@ intel_miptree_create(struct intel_context *intel, return NULL; } -#ifndef I915 + + if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { + if (!intel_miptree_alloc_mcs(brw, mt, num_samples)) { + intel_miptree_release(&mt); + return NULL; + } + } + /* If this miptree is capable of supporting fast color clears, set - * mcs_state appropriately to ensure that fast clears will occur. + * fast_clear_state appropriately to ensure that fast clears will occur. * Allocation of the MCS miptree will be deferred until the first fast * clear actually occurs. */ - if (intel_is_non_msrt_mcs_buffer_supported(intel, mt)) - mt->mcs_state = INTEL_MCS_STATE_RESOLVED; -#endif + if (intel_is_non_msrt_mcs_buffer_supported(brw, mt)) + mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; return mt; } struct intel_mipmap_tree * -intel_miptree_create_for_bo(struct intel_context *intel, +intel_miptree_create_for_bo(struct brw_context *brw, drm_intel_bo *bo, gl_format format, uint32_t offset, @@ -648,12 +642,14 @@ intel_miptree_create_for_bo(struct intel_context *intel, */ assert(pitch >= 0); - mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format, + mt = intel_miptree_create_layout(brw, GL_TEXTURE_2D, format, 0, 0, width, height, 1, true, 0 /* num_samples */); - if (!mt) + if (!mt) { + free(region); return mt; + } region->cpp = mt->cpp; region->width = width; @@ -679,7 +675,7 @@ intel_miptree_create_for_bo(struct intel_context *intel, * singlesample miptree is embedded as a child. */ struct intel_mipmap_tree* -intel_miptree_create_for_dri2_buffer(struct intel_context *intel, +intel_miptree_create_for_dri2_buffer(struct brw_context *brw, unsigned dri_attachment, gl_format format, uint32_t num_samples, @@ -697,7 +693,7 @@ intel_miptree_create_for_dri2_buffer(struct intel_context *intel, assert(_mesa_get_format_base_format(format) == GL_RGB || _mesa_get_format_base_format(format) == GL_RGBA); - singlesample_mt = intel_miptree_create_for_bo(intel, + singlesample_mt = intel_miptree_create_for_bo(brw, region->bo, format, 0, @@ -709,15 +705,80 @@ intel_miptree_create_for_dri2_buffer(struct intel_context *intel, return NULL; singlesample_mt->region->name = region->name; -#ifndef I915 + /* If this miptree is capable of supporting fast color clears, set + * fast_clear_state appropriately to ensure that fast clears will occur. + * Allocation of the MCS miptree will be deferred until the first fast + * clear actually occurs. + */ + if (intel_is_non_msrt_mcs_buffer_supported(brw, singlesample_mt)) + singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; + + if (num_samples == 0) + return singlesample_mt; + + multisample_mt = intel_miptree_create_for_renderbuffer(brw, + format, + region->width, + region->height, + num_samples); + if (!multisample_mt) { + intel_miptree_release(&singlesample_mt); + return NULL; + } + + multisample_mt->singlesample_mt = singlesample_mt; + multisample_mt->need_downsample = false; + + if (brw->is_front_buffer_rendering && + (dri_attachment == __DRI_BUFFER_FRONT_LEFT || + dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) { + intel_miptree_upsample(brw, multisample_mt); + } + + return multisample_mt; +} + +/** + * For a singlesample image buffer, this simply wraps the given region with a miptree. + * + * For a multisample image buffer, this wraps the given region with + * a singlesample miptree, then creates a multisample miptree into which the + * singlesample miptree is embedded as a child. + */ +struct intel_mipmap_tree* +intel_miptree_create_for_image_buffer(struct brw_context *intel, + enum __DRIimageBufferMask buffer_type, + gl_format format, + uint32_t num_samples, + struct intel_region *region) +{ + struct intel_mipmap_tree *singlesample_mt = NULL; + struct intel_mipmap_tree *multisample_mt = NULL; + + /* Only the front and back buffers, which are color buffers, are allocated + * through the image loader. + */ + assert(_mesa_get_format_base_format(format) == GL_RGB || + _mesa_get_format_base_format(format) == GL_RGBA); + + singlesample_mt = intel_miptree_create_for_bo(intel, + region->bo, + format, + 0, + region->width, + region->height, + region->pitch, + region->tiling); + if (!singlesample_mt) + return NULL; + /* If this miptree is capable of supporting fast color clears, set * mcs_state appropriately to ensure that fast clears will occur. * Allocation of the MCS miptree will be deferred until the first fast * clear actually occurs. */ if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt)) - singlesample_mt->mcs_state = INTEL_MCS_STATE_RESOLVED; -#endif + singlesample_mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; if (num_samples == 0) return singlesample_mt; @@ -735,9 +796,7 @@ intel_miptree_create_for_dri2_buffer(struct intel_context *intel, multisample_mt->singlesample_mt = singlesample_mt; multisample_mt->need_downsample = false; - if (intel->is_front_buffer_rendering && - (dri_attachment == __DRI_BUFFER_FRONT_LEFT || - dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) { + if (intel->is_front_buffer_rendering && buffer_type == __DRI_IMAGE_BUFFER_FRONT) { intel_miptree_upsample(intel, multisample_mt); } @@ -745,7 +804,7 @@ intel_miptree_create_for_dri2_buffer(struct intel_context *intel, } struct intel_mipmap_tree* -intel_miptree_create_for_renderbuffer(struct intel_context *intel, +intel_miptree_create_for_renderbuffer(struct brw_context *brw, gl_format format, uint32_t width, uint32_t height, @@ -755,20 +814,14 @@ intel_miptree_create_for_renderbuffer(struct intel_context *intel, uint32_t depth = 1; bool ok; - mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0, + mt = intel_miptree_create(brw, GL_TEXTURE_2D, format, 0, 0, width, height, depth, true, num_samples, INTEL_MIPTREE_TILING_ANY); if (!mt) goto fail; - if (intel->vtbl.is_hiz_depth_format(intel, format)) { - ok = intel_miptree_alloc_hiz(intel, mt); - if (!ok) - goto fail; - } - - if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { - ok = intel_miptree_alloc_mcs(intel, mt, num_samples); + if (brw_is_hiz_depth_format(brw, format)) { + ok = intel_miptree_alloc_hiz(brw, mt); if (!ok) goto fail; } @@ -813,9 +866,7 @@ intel_miptree_release(struct intel_mipmap_tree **mt) intel_region_release(&((*mt)->region)); intel_miptree_release(&(*mt)->stencil_mt); intel_miptree_release(&(*mt)->hiz_mt); -#ifndef I915 intel_miptree_release(&(*mt)->mcs_mt); -#endif intel_miptree_release(&(*mt)->singlesample_mt); intel_resolve_map_clear(&(*mt)->hiz_map); @@ -997,7 +1048,7 @@ intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt, } static void -intel_miptree_copy_slice_sw(struct intel_context *intel, +intel_miptree_copy_slice_sw(struct brw_context *brw, struct intel_mipmap_tree *dst_mt, struct intel_mipmap_tree *src_mt, int level, @@ -1009,14 +1060,14 @@ intel_miptree_copy_slice_sw(struct intel_context *intel, int src_stride, dst_stride; int cpp = dst_mt->cpp; - intel_miptree_map(intel, src_mt, + intel_miptree_map(brw, src_mt, level, slice, 0, 0, width, height, GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT, &src, &src_stride); - intel_miptree_map(intel, dst_mt, + intel_miptree_map(brw, dst_mt, level, slice, 0, 0, width, height, @@ -1043,8 +1094,8 @@ intel_miptree_copy_slice_sw(struct intel_context *intel, } } - intel_miptree_unmap(intel, dst_mt, level, slice); - intel_miptree_unmap(intel, src_mt, level, slice); + intel_miptree_unmap(brw, dst_mt, level, slice); + intel_miptree_unmap(brw, src_mt, level, slice); /* Don't forget to copy the stencil data over, too. We could have skipped * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map @@ -1053,13 +1104,13 @@ intel_miptree_copy_slice_sw(struct intel_context *intel, */ if (dst_mt->stencil_mt) { assert(src_mt->stencil_mt); - intel_miptree_copy_slice_sw(intel, dst_mt->stencil_mt, src_mt->stencil_mt, + intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt, level, slice, width, height); } } static void -intel_miptree_copy_slice(struct intel_context *intel, +intel_miptree_copy_slice(struct brw_context *brw, struct intel_mipmap_tree *dst_mt, struct intel_mipmap_tree *src_mt, int level, @@ -1090,7 +1141,7 @@ intel_miptree_copy_slice(struct intel_context *intel, * stencil's W tiling in the blitter. */ if (src_mt->stencil_mt) { - intel_miptree_copy_slice_sw(intel, + intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice, width, height); @@ -1108,14 +1159,14 @@ intel_miptree_copy_slice(struct intel_context *intel, dst_mt, dst_x, dst_y, dst_mt->region->pitch, width, height); - if (!intel_miptree_blit(intel, + if (!intel_miptree_blit(brw, src_mt, level, slice, 0, 0, false, dst_mt, level, slice, 0, 0, false, width, height, GL_COPY)) { perf_debug("miptree validate blit for %s failed\n", _mesa_get_format_name(format)); - intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice, + intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice, width, height); } } @@ -1129,7 +1180,7 @@ intel_miptree_copy_slice(struct intel_context *intel, * is set to true if we're about to clear the image). */ void -intel_miptree_copy_teximage(struct intel_context *intel, +intel_miptree_copy_teximage(struct brw_context *brw, struct intel_texture_image *intelImage, struct intel_mipmap_tree *dst_mt, bool invalidate) @@ -1143,7 +1194,7 @@ intel_miptree_copy_teximage(struct intel_context *intel, if (!invalidate) { for (int slice = 0; slice < depth; slice++) { - intel_miptree_copy_slice(intel, dst_mt, src_mt, level, face, slice); + intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice); } } @@ -1152,14 +1203,11 @@ intel_miptree_copy_teximage(struct intel_context *intel, } bool -intel_miptree_alloc_mcs(struct intel_context *intel, +intel_miptree_alloc_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt, GLuint num_samples) { - assert(intel->gen >= 7); /* MCS only used on Gen7+ */ -#ifdef I915 - return false; -#else + assert(brw->gen >= 7); /* MCS only used on Gen7+ */ assert(mt->mcs_mt == NULL); /* Choose the correct format for the MCS buffer. All that really matters @@ -1190,8 +1238,7 @@ intel_miptree_alloc_mcs(struct intel_context *intel, * * "The MCS surface must be stored as Tile Y." */ - mt->mcs_state = INTEL_MCS_STATE_MSAA; - mt->mcs_mt = intel_miptree_create(intel, + mt->mcs_mt = intel_miptree_create(brw, mt->target, format, mt->first_level, @@ -1213,23 +1260,19 @@ intel_miptree_alloc_mcs(struct intel_context *intel, * * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff. */ - void *data = intel_miptree_map_raw(intel, mt->mcs_mt); + void *data = intel_miptree_map_raw(brw, mt->mcs_mt); memset(data, 0xff, mt->mcs_mt->region->bo->size); - intel_miptree_unmap_raw(intel, mt->mcs_mt); + intel_miptree_unmap_raw(brw, mt->mcs_mt); + mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; return mt->mcs_mt; -#endif } bool -intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, +intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw, struct intel_mipmap_tree *mt) { -#ifdef I915 - assert(!"MCS not supported on i915"); - return false; -#else assert(mt->mcs_mt == NULL); /* The format of the MCS buffer is opaque to the driver; all that matters @@ -1244,7 +1287,7 @@ intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, const gl_format format = MESA_FORMAT_R_UINT32; unsigned block_width_px; unsigned block_height; - intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height); + intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height); unsigned width_divisor = block_width_px * 4; unsigned height_divisor = block_height * 8; unsigned mcs_width = @@ -1252,7 +1295,7 @@ intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, unsigned mcs_height = ALIGN(mt->logical_height0, height_divisor) / height_divisor; assert(mt->logical_depth0 == 1); - mt->mcs_mt = intel_miptree_create(intel, + mt->mcs_mt = intel_miptree_create(brw, mt->target, format, mt->first_level, @@ -1265,7 +1308,6 @@ intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, INTEL_MIPTREE_TILING_Y); return mt->mcs_mt; -#endif } @@ -1275,37 +1317,23 @@ intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, * \c has_hiz was set. */ static bool -intel_miptree_slice_enable_hiz(struct intel_context *intel, +intel_miptree_slice_enable_hiz(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t level, uint32_t layer) { assert(mt->hiz_mt); - if (intel->is_haswell) { - /* Disable HiZ for some slices to work around a hardware bug. - * - * Haswell hardware fails to respect - * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y when during HiZ - * ambiguate operations. The failure is inconsistent and affected by - * other GPU contexts. Running a heavy GPU workload in a separate - * process causes the failure rate to drop to nearly 0. - * - * To workaround the bug, we enable HiZ only when we can guarantee that - * the Depth Coordinate Offset fields will be set to 0. The function - * brw_get_depthstencil_tile_masks() is used to calculate the fields, - * and the function is sometimes called in such a way that the presence - * of an attached stencil buffer changes the fuction's return value. - * - * The largest tile size considered by brw_get_depthstencil_tile_masks() - * is that of the stencil buffer. Therefore, if this hiz slice's - * corresponding depth slice has an offset that is aligned to the - * stencil buffer tile size, 64x64 pixels, then - * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y is set to 0. + if (brw->is_haswell) { + const struct intel_mipmap_level *l = &mt->level[level]; + + /* Disable HiZ for LOD > 0 unless the width is 8 aligned + * and the height is 4 aligned. This allows our HiZ support + * to fulfill Haswell restrictions for HiZ ops. For LOD == 0, + * we can grow the width & height to allow the HiZ op to + * force the proper size alignments. */ - uint32_t depth_x_offset = mt->level[level].slice[layer].x_offset; - uint32_t depth_y_offset = mt->level[level].slice[layer].y_offset; - if ((depth_x_offset & 63) || (depth_y_offset & 63)) { + if (level > 0 && ((l->width & 7) || (l->height & 3))) { return false; } } @@ -1317,11 +1345,11 @@ intel_miptree_slice_enable_hiz(struct intel_context *intel, bool -intel_miptree_alloc_hiz(struct intel_context *intel, +intel_miptree_alloc_hiz(struct brw_context *brw, struct intel_mipmap_tree *mt) { assert(mt->hiz_mt == NULL); - mt->hiz_mt = intel_miptree_create(intel, + mt->hiz_mt = intel_miptree_create(brw, mt->target, mt->format, mt->first_level, @@ -1340,7 +1368,7 @@ intel_miptree_alloc_hiz(struct intel_context *intel, struct intel_resolve_map *head = &mt->hiz_map; for (int level = mt->first_level; level <= mt->last_level; ++level) { for (int layer = 0; layer < mt->level[level].depth; ++layer) { - if (!intel_miptree_slice_enable_hiz(intel, mt, level, layer)) + if (!intel_miptree_slice_enable_hiz(brw, mt, level, layer)) continue; head->next = malloc(sizeof(*head->next)); @@ -1394,8 +1422,20 @@ intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); } +void +intel_miptree_set_all_slices_need_depth_resolve(struct intel_mipmap_tree *mt, + uint32_t level) +{ + uint32_t layer; + uint32_t end_layer = mt->level[level].depth; + + for (layer = 0; layer < end_layer; layer++) { + intel_miptree_slice_set_needs_depth_resolve(mt, level, layer); + } +} + static bool -intel_miptree_slice_resolve(struct intel_context *intel, +intel_miptree_slice_resolve(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t level, uint32_t layer, @@ -1409,33 +1449,33 @@ intel_miptree_slice_resolve(struct intel_context *intel, if (!item || item->need != need) return false; - intel_hiz_exec(intel, mt, level, layer, need); + intel_hiz_exec(brw, mt, level, layer, need); intel_resolve_map_remove(item); return true; } bool -intel_miptree_slice_resolve_hiz(struct intel_context *intel, +intel_miptree_slice_resolve_hiz(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t level, uint32_t layer) { - return intel_miptree_slice_resolve(intel, mt, level, layer, + return intel_miptree_slice_resolve(brw, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); } bool -intel_miptree_slice_resolve_depth(struct intel_context *intel, +intel_miptree_slice_resolve_depth(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t level, uint32_t layer) { - return intel_miptree_slice_resolve(intel, mt, level, layer, + return intel_miptree_slice_resolve(brw, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); } static bool -intel_miptree_all_slices_resolve(struct intel_context *intel, +intel_miptree_all_slices_resolve(struct brw_context *brw, struct intel_mipmap_tree *mt, enum gen6_hiz_op need) { @@ -1447,7 +1487,7 @@ intel_miptree_all_slices_resolve(struct intel_context *intel, if (i->need != need) continue; - intel_hiz_exec(intel, mt, i->level, i->layer, need); + intel_hiz_exec(brw, mt, i->level, i->layer, need); intel_resolve_map_remove(i); did_resolve = true; } @@ -1456,41 +1496,38 @@ intel_miptree_all_slices_resolve(struct intel_context *intel, } bool -intel_miptree_all_slices_resolve_hiz(struct intel_context *intel, +intel_miptree_all_slices_resolve_hiz(struct brw_context *brw, struct intel_mipmap_tree *mt) { - return intel_miptree_all_slices_resolve(intel, mt, + return intel_miptree_all_slices_resolve(brw, mt, GEN6_HIZ_OP_HIZ_RESOLVE); } bool -intel_miptree_all_slices_resolve_depth(struct intel_context *intel, +intel_miptree_all_slices_resolve_depth(struct brw_context *brw, struct intel_mipmap_tree *mt) { - return intel_miptree_all_slices_resolve(intel, mt, + return intel_miptree_all_slices_resolve(brw, mt, GEN6_HIZ_OP_DEPTH_RESOLVE); } void -intel_miptree_resolve_color(struct intel_context *intel, +intel_miptree_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt) { -#ifdef I915 - /* Fast color clear is not supported on the i915 (pre-Gen4) driver */ -#else - switch (mt->mcs_state) { - case INTEL_MCS_STATE_NONE: - case INTEL_MCS_STATE_MSAA: - case INTEL_MCS_STATE_RESOLVED: + switch (mt->fast_clear_state) { + case INTEL_FAST_CLEAR_STATE_NO_MCS: + case INTEL_FAST_CLEAR_STATE_RESOLVED: /* No resolve needed */ break; - case INTEL_MCS_STATE_UNRESOLVED: - case INTEL_MCS_STATE_CLEAR: - brw_blorp_resolve_color(intel, mt); + case INTEL_FAST_CLEAR_STATE_UNRESOLVED: + case INTEL_FAST_CLEAR_STATE_CLEAR: + /* Fast color clear resolves only make sense for non-MSAA buffers. */ + if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) + brw_blorp_resolve_color(brw, mt); break; } -#endif } @@ -1499,19 +1536,14 @@ intel_miptree_resolve_color(struct intel_context *intel, * process or another miptree. * * Fast color clears are unsafe with shared buffers, so we need to resolve and - * then discard the MCS buffer, if present. We also set the mcs_state to - * INTEL_MCS_STATE_NONE to ensure that no MCS buffer gets allocated in the - * future. + * then discard the MCS buffer, if present. We also set the fast_clear_state + * to INTEL_FAST_CLEAR_STATE_NO_MCS to ensure that no MCS buffer gets + * allocated in the future. */ void -intel_miptree_make_shareable(struct intel_context *intel, +intel_miptree_make_shareable(struct brw_context *brw, struct intel_mipmap_tree *mt) { -#ifdef I915 - /* Nothing needs to be done for I915 */ - (void) intel; - (void) mt; -#else /* MCS buffers are also used for multisample buffers, but we can't resolve * away a multisample MCS buffer because it's an integral part of how the * pixel data is stored. Fortunately this code path should never be @@ -1520,11 +1552,10 @@ intel_miptree_make_shareable(struct intel_context *intel, assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE); if (mt->mcs_mt) { - intel_miptree_resolve_color(intel, mt); + intel_miptree_resolve_color(brw, mt); intel_miptree_release(&mt->mcs_mt); - mt->mcs_state = INTEL_MCS_STATE_NONE; + mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; } -#endif } @@ -1585,38 +1616,36 @@ intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled) } static void -intel_miptree_updownsample(struct intel_context *intel, +intel_miptree_updownsample(struct brw_context *brw, struct intel_mipmap_tree *src, struct intel_mipmap_tree *dst, unsigned width, unsigned height) { -#ifndef I915 int src_x0 = 0; int src_y0 = 0; int dst_x0 = 0; int dst_y0 = 0; - brw_blorp_blit_miptrees(intel, + brw_blorp_blit_miptrees(brw, src, 0 /* level */, 0 /* layer */, dst, 0 /* level */, 0 /* layer */, src_x0, src_y0, width, height, dst_x0, dst_y0, width, height, - false, false /*mirror x, y*/); + GL_NEAREST, false, false /*mirror x, y*/); if (src->stencil_mt) { - brw_blorp_blit_miptrees(intel, + brw_blorp_blit_miptrees(brw, src->stencil_mt, 0 /* level */, 0 /* layer */, dst->stencil_mt, 0 /* level */, 0 /* layer */, src_x0, src_y0, width, height, dst_x0, dst_y0, width, height, - false, false /*mirror x, y*/); + GL_NEAREST, false, false /*mirror x, y*/); } -#endif /* I915 */ } static void @@ -1633,7 +1662,7 @@ assert_is_flat(struct intel_mipmap_tree *mt) * If the miptree needs no downsample, then skip. */ void -intel_miptree_downsample(struct intel_context *intel, +intel_miptree_downsample(struct brw_context *brw, struct intel_mipmap_tree *mt) { /* Only flat, renderbuffer-like miptrees are supported. */ @@ -1641,7 +1670,7 @@ intel_miptree_downsample(struct intel_context *intel, if (!mt->need_downsample) return; - intel_miptree_updownsample(intel, + intel_miptree_updownsample(brw, mt, mt->singlesample_mt, mt->logical_width0, mt->logical_height0); @@ -1654,36 +1683,36 @@ intel_miptree_downsample(struct intel_context *intel, * The upsample is done unconditionally. */ void -intel_miptree_upsample(struct intel_context *intel, +intel_miptree_upsample(struct brw_context *brw, struct intel_mipmap_tree *mt) { /* Only flat, renderbuffer-like miptrees are supported. */ assert_is_flat(mt); assert(!mt->need_downsample); - intel_miptree_updownsample(intel, + intel_miptree_updownsample(brw, mt->singlesample_mt, mt, mt->logical_width0, mt->logical_height0); } void * -intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt) +intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt) { /* CPU accesses to color buffers don't understand fast color clears, so * resolve any pending fast color clears before we map. */ - intel_miptree_resolve_color(intel, mt); + intel_miptree_resolve_color(brw, mt); drm_intel_bo *bo = mt->region->bo; if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { if (drm_intel_bo_busy(bo)) { - perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); + perf_debug("Mapping a busy miptree, causing a stall on the GPU.\n"); } } - intel_flush(&intel->ctx); + intel_batchbuffer_flush(brw); if (mt->region->tiling != I915_TILING_NONE) drm_intel_gem_bo_map_gtt(bo); @@ -1694,14 +1723,14 @@ intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt) } void -intel_miptree_unmap_raw(struct intel_context *intel, +intel_miptree_unmap_raw(struct brw_context *brw, struct intel_mipmap_tree *mt) { drm_intel_bo_unmap(mt->region->bo); } static void -intel_miptree_map_gtt(struct intel_context *intel, +intel_miptree_map_gtt(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) @@ -1720,7 +1749,7 @@ intel_miptree_map_gtt(struct intel_context *intel, assert(y % bh == 0); y /= bh; - base = intel_miptree_map_raw(intel, mt) + mt->offset; + base = intel_miptree_map_raw(brw, mt) + mt->offset; if (base == NULL) map->ptr = NULL; @@ -1743,22 +1772,22 @@ intel_miptree_map_gtt(struct intel_context *intel, } static void -intel_miptree_unmap_gtt(struct intel_context *intel, +intel_miptree_unmap_gtt(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) { - intel_miptree_unmap_raw(intel, mt); + intel_miptree_unmap_raw(brw, mt); } static void -intel_miptree_map_blit(struct intel_context *intel, +intel_miptree_map_blit(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) { - map->mt = intel_miptree_create(intel, GL_TEXTURE_2D, mt->format, + map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format, 0, 0, map->w, map->h, 1, false, 0, @@ -1769,7 +1798,7 @@ intel_miptree_map_blit(struct intel_context *intel, } map->stride = map->mt->region->pitch; - if (!intel_miptree_blit(intel, + if (!intel_miptree_blit(brw, mt, level, slice, map->x, map->y, false, map->mt, 0, 0, @@ -1779,8 +1808,7 @@ intel_miptree_map_blit(struct intel_context *intel, goto fail; } - intel_batchbuffer_flush(intel); - map->ptr = intel_miptree_map_raw(intel, map->mt); + map->ptr = intel_miptree_map_raw(brw, map->mt); DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, map->x, map->y, map->w, map->h, @@ -1796,18 +1824,18 @@ fail: } static void -intel_miptree_unmap_blit(struct intel_context *intel, +intel_miptree_unmap_blit(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) { - struct gl_context *ctx = &intel->ctx; + struct gl_context *ctx = &brw->ctx; - intel_miptree_unmap_raw(intel, map->mt); + intel_miptree_unmap_raw(brw, map->mt); if (map->mode & GL_MAP_WRITE_BIT) { - bool ok = intel_miptree_blit(intel, + bool ok = intel_miptree_blit(brw, map->mt, 0, 0, 0, 0, false, mt, level, slice, @@ -1819,8 +1847,81 @@ intel_miptree_unmap_blit(struct intel_context *intel, intel_miptree_release(&map->mt); } +#ifdef __SSE4_1__ +/** + * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA. + */ +static void +intel_miptree_map_movntdqa(struct brw_context *brw, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + assert(map->mode & GL_MAP_READ_BIT); + assert(!(map->mode & GL_MAP_WRITE_BIT)); + + DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, _mesa_get_format_name(mt->format), + level, slice, map->ptr, map->stride); + + /* Map the original image */ + uint32_t image_x; + uint32_t image_y; + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + image_x += map->x; + image_y += map->y; + + void *src = intel_miptree_map_raw(brw, mt); + if (!src) + return; + src += image_y * mt->region->pitch; + src += image_x * mt->region->cpp; + + /* Due to the pixel offsets for the particular image being mapped, our + * src pointer may not be 16-byte aligned. However, if the pitch is + * divisible by 16, then the amount by which it's misaligned will remain + * consistent from row to row. + */ + assert((mt->region->pitch % 16) == 0); + const int misalignment = ((uintptr_t) src) & 15; + + /* Create an untiled temporary buffer for the mapping. */ + const unsigned width_bytes = _mesa_format_row_stride(mt->format, map->w); + + map->stride = ALIGN(misalignment + width_bytes, 16); + + map->buffer = malloc(map->stride * map->h); + /* Offset the destination so it has the same misalignment as src. */ + map->ptr = map->buffer + misalignment; + + assert((((uintptr_t) map->ptr) & 15) == misalignment); + + for (uint32_t y = 0; y < map->h; y++) { + void *dst_ptr = map->ptr + y * map->stride; + void *src_ptr = src + y * mt->region->pitch; + + _mesa_streaming_load_memcpy(dst_ptr, src_ptr, width_bytes); + } + + intel_miptree_unmap_raw(brw, mt); +} + static void -intel_miptree_map_s8(struct intel_context *intel, +intel_miptree_unmap_movntdqa(struct brw_context *brw, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + free(map->buffer); + map->buffer = NULL; + map->ptr = NULL; +} +#endif + +static void +intel_miptree_map_s8(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) @@ -1837,7 +1938,7 @@ intel_miptree_map_s8(struct intel_context *intel, */ if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { uint8_t *untiled_s8_map = map->ptr; - uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt); + uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt); unsigned int image_x, image_y; intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); @@ -1847,12 +1948,12 @@ intel_miptree_map_s8(struct intel_context *intel, ptrdiff_t offset = intel_offset_S8(mt->region->pitch, x + image_x + map->x, y + image_y + map->y, - intel->has_swizzling); + brw->has_swizzling); untiled_s8_map[y * map->w + x] = tiled_s8_map[offset]; } } - intel_miptree_unmap_raw(intel, mt); + intel_miptree_unmap_raw(brw, mt); DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__, map->x, map->y, map->w, map->h, @@ -1865,7 +1966,7 @@ intel_miptree_map_s8(struct intel_context *intel, } static void -intel_miptree_unmap_s8(struct intel_context *intel, +intel_miptree_unmap_s8(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, @@ -1874,7 +1975,7 @@ intel_miptree_unmap_s8(struct intel_context *intel, if (map->mode & GL_MAP_WRITE_BIT) { unsigned int image_x, image_y; uint8_t *untiled_s8_map = map->ptr; - uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt); + uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt); intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); @@ -1883,19 +1984,19 @@ intel_miptree_unmap_s8(struct intel_context *intel, ptrdiff_t offset = intel_offset_S8(mt->region->pitch, x + map->x, y + map->y, - intel->has_swizzling); + brw->has_swizzling); tiled_s8_map[offset] = untiled_s8_map[y * map->w + x]; } } - intel_miptree_unmap_raw(intel, mt); + intel_miptree_unmap_raw(brw, mt); } free(map->buffer); } static void -intel_miptree_map_etc(struct intel_context *intel, +intel_miptree_map_etc(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, @@ -1916,7 +2017,7 @@ intel_miptree_map_etc(struct intel_context *intel, } static void -intel_miptree_unmap_etc(struct intel_context *intel, +intel_miptree_unmap_etc(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, @@ -1929,7 +2030,7 @@ intel_miptree_unmap_etc(struct intel_context *intel, image_x += map->x; image_y += map->y; - uint8_t *dst = intel_miptree_map_raw(intel, mt) + uint8_t *dst = intel_miptree_map_raw(brw, mt) + image_y * mt->region->pitch + image_x * mt->region->cpp; @@ -1942,7 +2043,7 @@ intel_miptree_unmap_etc(struct intel_context *intel, map->ptr, map->stride, map->w, map->h, mt->etc_format); - intel_miptree_unmap_raw(intel, mt); + intel_miptree_unmap_raw(brw, mt); free(map->buffer); } @@ -1958,7 +2059,7 @@ intel_miptree_unmap_etc(struct intel_context *intel, * copying the data between the actual backing store and the temporary. */ static void -intel_miptree_map_depthstencil(struct intel_context *intel, +intel_miptree_map_depthstencil(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) @@ -1980,8 +2081,8 @@ intel_miptree_map_depthstencil(struct intel_context *intel, */ if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { uint32_t *packed_map = map->ptr; - uint8_t *s_map = intel_miptree_map_raw(intel, s_mt); - uint32_t *z_map = intel_miptree_map_raw(intel, z_mt); + uint8_t *s_map = intel_miptree_map_raw(brw, s_mt); + uint32_t *z_map = intel_miptree_map_raw(brw, z_mt); unsigned int s_image_x, s_image_y; unsigned int z_image_x, z_image_y; @@ -1996,7 +2097,7 @@ intel_miptree_map_depthstencil(struct intel_context *intel, ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch, map_x + s_image_x, map_y + s_image_y, - intel->has_swizzling); + brw->has_swizzling); ptrdiff_t z_offset = ((map_y + z_image_y) * (z_mt->region->pitch / 4) + (map_x + z_image_x)); @@ -2012,8 +2113,8 @@ intel_miptree_map_depthstencil(struct intel_context *intel, } } - intel_miptree_unmap_raw(intel, s_mt); - intel_miptree_unmap_raw(intel, z_mt); + intel_miptree_unmap_raw(brw, s_mt); + intel_miptree_unmap_raw(brw, z_mt); DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n", __FUNCTION__, @@ -2029,7 +2130,7 @@ intel_miptree_map_depthstencil(struct intel_context *intel, } static void -intel_miptree_unmap_depthstencil(struct intel_context *intel, +intel_miptree_unmap_depthstencil(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, @@ -2041,8 +2142,8 @@ intel_miptree_unmap_depthstencil(struct intel_context *intel, if (map->mode & GL_MAP_WRITE_BIT) { uint32_t *packed_map = map->ptr; - uint8_t *s_map = intel_miptree_map_raw(intel, s_mt); - uint32_t *z_map = intel_miptree_map_raw(intel, z_mt); + uint8_t *s_map = intel_miptree_map_raw(brw, s_mt); + uint32_t *z_map = intel_miptree_map_raw(brw, z_mt); unsigned int s_image_x, s_image_y; unsigned int z_image_x, z_image_y; @@ -2056,7 +2157,7 @@ intel_miptree_unmap_depthstencil(struct intel_context *intel, ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch, x + s_image_x + map->x, y + s_image_y + map->y, - intel->has_swizzling); + brw->has_swizzling); ptrdiff_t z_offset = ((y + z_image_y) * (z_mt->region->pitch / 4) + (x + z_image_x)); @@ -2072,8 +2173,8 @@ intel_miptree_unmap_depthstencil(struct intel_context *intel, } } - intel_miptree_unmap_raw(intel, s_mt); - intel_miptree_unmap_raw(intel, z_mt); + intel_miptree_unmap_raw(brw, s_mt); + intel_miptree_unmap_raw(brw, z_mt); DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n", __FUNCTION__, @@ -2133,8 +2234,24 @@ intel_miptree_release_map(struct intel_mipmap_tree *mt, *map = NULL; } +static bool +can_blit_slice(struct intel_mipmap_tree *mt, + unsigned int level, unsigned int slice) +{ + uint32_t image_x; + uint32_t image_y; + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + if (image_x >= 32768 || image_y >= 32768) + return false; + + if (mt->region->pitch >= 32768) + return false; + + return true; +} + static void -intel_miptree_map_singlesample(struct intel_context *intel, +intel_miptree_map_singlesample(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int slice, @@ -2157,33 +2274,37 @@ intel_miptree_map_singlesample(struct intel_context *intel, return; } - intel_miptree_slice_resolve_depth(intel, mt, level, slice); + intel_miptree_slice_resolve_depth(brw, mt, level, slice); if (map->mode & GL_MAP_WRITE_BIT) { intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice); } if (mt->format == MESA_FORMAT_S8) { - intel_miptree_map_s8(intel, mt, map, level, slice); + intel_miptree_map_s8(brw, mt, map, level, slice); } else if (mt->etc_format != MESA_FORMAT_NONE && !(mode & BRW_MAP_DIRECT_BIT)) { - intel_miptree_map_etc(intel, mt, map, level, slice); + intel_miptree_map_etc(brw, mt, map, level, slice); } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) { - intel_miptree_map_depthstencil(intel, mt, map, level, slice); + intel_miptree_map_depthstencil(brw, mt, map, level, slice); } /* See intel_miptree_blit() for details on the 32k pitch limit. */ - else if (intel->has_llc && + else if (brw->has_llc && !(mode & GL_MAP_WRITE_BIT) && !mt->compressed && (mt->region->tiling == I915_TILING_X || - (intel->gen >= 6 && mt->region->tiling == I915_TILING_Y)) && - mt->region->pitch < 32768) { - intel_miptree_map_blit(intel, mt, map, level, slice); + (brw->gen >= 6 && mt->region->tiling == I915_TILING_Y)) && + can_blit_slice(mt, level, slice)) { + intel_miptree_map_blit(brw, mt, map, level, slice); } else if (mt->region->tiling != I915_TILING_NONE && - mt->region->bo->size >= intel->max_gtt_map_object_size) { - assert(mt->region->pitch < 32768); - intel_miptree_map_blit(intel, mt, map, level, slice); + mt->region->bo->size >= brw->max_gtt_map_object_size) { + assert(can_blit_slice(mt, level, slice)); + intel_miptree_map_blit(brw, mt, map, level, slice); +#ifdef __SSE4_1__ + } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed) { + intel_miptree_map_movntdqa(brw, mt, map, level, slice); +#endif } else { - intel_miptree_map_gtt(intel, mt, map, level, slice); + intel_miptree_map_gtt(brw, mt, map, level, slice); } *out_ptr = map->ptr; @@ -2194,7 +2315,7 @@ intel_miptree_map_singlesample(struct intel_context *intel, } static void -intel_miptree_unmap_singlesample(struct intel_context *intel, +intel_miptree_unmap_singlesample(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int slice) @@ -2210,23 +2331,27 @@ intel_miptree_unmap_singlesample(struct intel_context *intel, mt, _mesa_get_format_name(mt->format), level, slice); if (mt->format == MESA_FORMAT_S8) { - intel_miptree_unmap_s8(intel, mt, map, level, slice); + intel_miptree_unmap_s8(brw, mt, map, level, slice); } else if (mt->etc_format != MESA_FORMAT_NONE && !(map->mode & BRW_MAP_DIRECT_BIT)) { - intel_miptree_unmap_etc(intel, mt, map, level, slice); + intel_miptree_unmap_etc(brw, mt, map, level, slice); } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) { - intel_miptree_unmap_depthstencil(intel, mt, map, level, slice); + intel_miptree_unmap_depthstencil(brw, mt, map, level, slice); } else if (map->mt) { - intel_miptree_unmap_blit(intel, mt, map, level, slice); + intel_miptree_unmap_blit(brw, mt, map, level, slice); +#ifdef __SSE4_1__ + } else if (map->buffer) { + intel_miptree_unmap_movntdqa(brw, mt, map, level, slice); +#endif } else { - intel_miptree_unmap_gtt(intel, mt, map, level, slice); + intel_miptree_unmap_gtt(brw, mt, map, level, slice); } intel_miptree_release_map(mt, level, slice); } static void -intel_miptree_map_multisample(struct intel_context *intel, +intel_miptree_map_multisample(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int slice, @@ -2238,6 +2363,7 @@ intel_miptree_map_multisample(struct intel_context *intel, void **out_ptr, int *out_stride) { + struct gl_context *ctx = &brw->ctx; struct intel_miptree_map *map; assert(mt->num_samples > 1); @@ -2246,7 +2372,7 @@ intel_miptree_map_multisample(struct intel_context *intel, if (mt->target != GL_TEXTURE_2D || mt->first_level != 0 || mt->last_level != 0) { - _mesa_problem(&intel->ctx, "attempt to map a multisample miptree for " + _mesa_problem(ctx, "attempt to map a multisample miptree for " "which (target, first_level, last_level != " "(GL_TEXTURE_2D, 0, 0)"); goto fail; @@ -2258,7 +2384,7 @@ intel_miptree_map_multisample(struct intel_context *intel, if (!mt->singlesample_mt) { mt->singlesample_mt = - intel_miptree_create_for_renderbuffer(intel, + intel_miptree_create_for_renderbuffer(brw, mt->format, mt->logical_width0, mt->logical_height0, @@ -2270,8 +2396,8 @@ intel_miptree_map_multisample(struct intel_context *intel, mt->need_downsample = true; } - intel_miptree_downsample(intel, mt); - intel_miptree_map_singlesample(intel, mt->singlesample_mt, + intel_miptree_downsample(brw, mt); + intel_miptree_map_singlesample(brw, mt->singlesample_mt, level, slice, x, y, w, h, mode, @@ -2285,7 +2411,7 @@ fail: } static void -intel_miptree_unmap_multisample(struct intel_context *intel, +intel_miptree_unmap_multisample(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int slice) @@ -2297,11 +2423,11 @@ intel_miptree_unmap_multisample(struct intel_context *intel, if (!map) return; - intel_miptree_unmap_singlesample(intel, mt->singlesample_mt, level, slice); + intel_miptree_unmap_singlesample(brw, mt->singlesample_mt, level, slice); mt->need_downsample = false; if (map->mode & GL_MAP_WRITE_BIT) - intel_miptree_upsample(intel, mt); + intel_miptree_upsample(brw, mt); if (map->singlesample_mt_is_tmp) intel_miptree_release(&mt->singlesample_mt); @@ -2310,7 +2436,7 @@ intel_miptree_unmap_multisample(struct intel_context *intel, } void -intel_miptree_map(struct intel_context *intel, +intel_miptree_map(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int slice, @@ -2323,13 +2449,13 @@ intel_miptree_map(struct intel_context *intel, int *out_stride) { if (mt->num_samples <= 1) - intel_miptree_map_singlesample(intel, mt, + intel_miptree_map_singlesample(brw, mt, level, slice, x, y, w, h, mode, out_ptr, out_stride); else - intel_miptree_map_multisample(intel, mt, + intel_miptree_map_multisample(brw, mt, level, slice, x, y, w, h, mode, @@ -2337,13 +2463,13 @@ intel_miptree_map(struct intel_context *intel, } void -intel_miptree_unmap(struct intel_context *intel, +intel_miptree_unmap(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int slice) { if (mt->num_samples <= 1) - intel_miptree_unmap_singlesample(intel, mt, level, slice); + intel_miptree_unmap_singlesample(brw, mt, level, slice); else - intel_miptree_unmap_multisample(intel, mt, level, slice); + intel_miptree_unmap_multisample(brw, mt, level, slice); }