* by half the block width, and Y coordinates by half the block height.
*/
void
-intel_get_non_msrt_mcs_alignment(struct intel_mipmap_tree *mt,
+intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height)
{
switch (mt->tiling) {
}
}
-static bool
-intel_tiling_supports_non_msrt_mcs(struct brw_context *brw, unsigned tiling)
+bool
+intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
+ unsigned tiling)
{
/* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
* Target(s)", beneath the "Fast Color Clear" bullet (p326):
* - MCS and Lossless compression is supported for TiledY/TileYs/TileYf
* non-MSRTs only.
*/
-static bool
+bool
intel_miptree_supports_non_msrt_fast_clear(struct brw_context *brw,
- struct intel_mipmap_tree *mt)
+ const struct intel_mipmap_tree *mt)
{
/* MCS support does not exist prior to Gen7 */
if (brw->gen < 7)
if (brw->gen >= 9) {
mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
const uint32_t brw_format = brw_format_for_mesa_format(linear_format);
- return brw_losslessly_compressible_format(brw, brw_format);
+ return isl_format_supports_lossless_compression(brw->intelScreen->devinfo,
+ brw_format);
} else
return true;
}
return mt->num_samples <= 1;
}
+bool
+intel_miptree_supports_lossless_compressed(struct brw_context *brw,
+ const struct intel_mipmap_tree *mt)
+{
+ /* For now compression is only enabled for integer formats even though
+ * there exist supported floating point formats also. This is a heuristic
+ * decision based on current public benchmarks. In none of the cases these
+ * formats provided any improvement but a few cases were seen to regress.
+ * Hence these are left to to be enabled in the future when they are known
+ * to improve things.
+ */
+ if (_mesa_get_format_datatype(mt->format) == GL_FLOAT)
+ return false;
+
+ /* Fast clear mechanism and lossless compression go hand in hand. */
+ if (!intel_miptree_supports_non_msrt_fast_clear(brw, mt))
+ return false;
+
+ /* Fast clear can be also used to clear srgb surfaces by using equivalent
+ * linear format. This trick, however, can't be extended to be used with
+ * lossless compression and therefore a check is needed to see if the format
+ * really is linear.
+ */
+ return _mesa_get_srgb_format_linear(mt->format) == mt->format;
+}
+
/**
* Determine depth format corresponding to a depth+stencil format,
* for separate stencil.
mt->logical_depth0 = depth0;
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
mt->disable_aux_buffers = (layout_flags & MIPTREE_LAYOUT_DISABLE_AUX) != 0;
+ mt->is_scanout = (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT) != 0;
exec_list_make_empty(&mt->hiz_map);
mt->cpp = _mesa_get_format_bytes(format);
mt->num_samples = num_samples;
} else if (brw->gen >= 9 && num_samples > 1) {
layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
} else {
+ const UNUSED bool is_lossless_compressed_aux =
+ brw->gen >= 9 && num_samples == 1 &&
+ mt->format == MESA_FORMAT_R_UINT32;
+
/* For now, nothing else has this requirement */
- assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
+ assert(is_lossless_compressed_aux ||
+ (layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
}
brw_miptree_layout(brw, mt, layout_flags);
/* If this miptree is capable of supporting fast color clears, set
* fast_clear_state appropriately to ensure that fast clears will occur.
* Allocation of the MCS miptree will be deferred until the first fast
- * clear actually occurs.
+ * clear actually occurs or when compressed single sampled buffer is
+ * written by the GPU for the first time.
*/
if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
intel_miptree_supports_non_msrt_fast_clear(brw, mt)) {
height,
1,
pitch,
- 0);
+ MIPTREE_LAYOUT_FOR_SCANOUT);
if (!singlesample_mt)
goto fail;
bool ok;
GLenum target = num_samples > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
const uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
- MIPTREE_LAYOUT_TILING_ANY;
-
+ MIPTREE_LAYOUT_TILING_ANY |
+ MIPTREE_LAYOUT_FOR_SCANOUT;
mt = intel_miptree_create(brw, target, format, 0, 0,
width, height, depth, num_samples,
intel_miptree_release(&(*mt)->mcs_mt);
intel_resolve_map_clear(&(*mt)->hiz_map);
+ intel_miptree_release(&(*mt)->plane[0]);
+ intel_miptree_release(&(*mt)->plane[1]);
+
for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
free((*mt)->level[i].slice);
}
unsigned mcs_height =
ALIGN(mt->logical_height0, height_divisor) / height_divisor;
assert(mt->logical_depth0 == 1);
- uint32_t layout_flags = MIPTREE_LAYOUT_ACCELERATED_UPLOAD |
- MIPTREE_LAYOUT_TILING_Y;
+ uint32_t layout_flags = MIPTREE_LAYOUT_TILING_Y;
+
if (brw->gen >= 8) {
layout_flags |= MIPTREE_LAYOUT_FORCE_HALIGN16;
}
+
+ /* On Gen9+ clients are not currently capable of consuming compressed
+ * single-sampled buffers. Disabling compression allows us to skip
+ * resolves.
+ */
+ const bool lossless_compression_disabled = INTEL_DEBUG & DEBUG_NO_RBC;
+ const bool is_lossless_compressed =
+ unlikely(!lossless_compression_disabled) &&
+ brw->gen >= 9 && !mt->is_scanout &&
+ intel_miptree_supports_lossless_compressed(brw, mt);
+
+ /* In case of compression mcs buffer needs to be initialised requiring the
+ * buffer to be immediately mapped to cpu space for writing. Therefore do
+ * not use the gpu access flag which can cause an unnecessary delay if the
+ * backing pages happened to be just used by the GPU.
+ */
+ if (!is_lossless_compressed)
+ layout_flags |= MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
+
mt->mcs_mt = miptree_create(brw,
mt->target,
format,
0 /* num_samples */,
layout_flags);
+ /* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are
+ * used for lossless compression which requires similar initialisation
+ * as multi-sample compression.
+ */
+ if (is_lossless_compressed) {
+ /* Hardware sets the auxiliary buffer to all zeroes when it does full
+ * resolve. Initialize it accordingly in case the first renderer is
+ * cpu (or other none compression aware party).
+ *
+ * This is also explicitly stated in the spec (MCS Buffer for Render
+ * Target(s)):
+ * "If Software wants to enable Color Compression without Fast clear,
+ * Software needs to initialize MCS with zeros."
+ */
+ intel_miptree_init_mcs(brw, mt, 0);
+ mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
+ mt->msaa_layout = INTEL_MSAA_LAYOUT_CMS;
+ }
+
return mt->mcs_mt;
}
+void
+intel_miptree_prepare_mcs(struct brw_context *brw,
+ struct intel_mipmap_tree *mt)
+{
+ if (mt->mcs_mt)
+ return;
+
+ if (brw->gen < 9)
+ return;
+
+ /* Single sample compression is represented re-using msaa compression
+ * layout type: "Compressed Multisampled Surfaces".
+ */
+ if (mt->msaa_layout != INTEL_MSAA_LAYOUT_CMS || mt->num_samples > 1)
+ return;
+
+ /* Clients are not currently capable of consuming compressed
+ * single-sampled buffers.
+ */
+ if (mt->is_scanout)
+ return;
+
+ assert(intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) ||
+ intel_miptree_supports_lossless_compressed(brw, mt));
+
+ /* Consider if lossless compression is supported but the needed
+ * auxiliary buffer doesn't exist yet.
+ *
+ * Failing to allocate the auxiliary buffer means running out of
+ * memory. The pointer to the aux miptree is left NULL which should
+ * signal non-compressed behavior.
+ */
+ if (!intel_miptree_alloc_non_msrt_mcs(brw, mt)) {
+ _mesa_warning(NULL,
+ "Failed to allocated aux buffer for lossless"
+ " compressed %p %u:%u %s\n",
+ mt, mt->logical_width0, mt->logical_height0,
+ _mesa_get_format_name(mt->format));
+ }
+}
/**
* Helper for intel_miptree_alloc_hiz() that sets
/* Fast color clear resolves only make sense for non-MSAA buffers. */
if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE ||
intel_miptree_is_lossless_compressed(brw, mt)) {
- brw_meta_resolve_color(brw, mt);
+ brw_blorp_resolve_color(brw, mt);
}
break;
}
struct intel_mipmap_tree *src,
struct intel_mipmap_tree *dst)
{
- if (brw->gen < 8) {
- brw_blorp_blit_miptrees(brw,
- src, 0 /* level */, 0 /* layer */, src->format,
- dst, 0 /* level */, 0 /* layer */, dst->format,
- 0, 0,
- src->logical_width0, src->logical_height0,
- 0, 0,
- dst->logical_width0, dst->logical_height0,
- GL_NEAREST, false, false /*mirror x, y*/);
- } else if (src->format == MESA_FORMAT_S_UINT8) {
- brw_meta_stencil_updownsample(brw, src, dst);
- } else {
- brw_meta_updownsample(brw, src, dst);
- }
+ brw_blorp_blit_miptrees(brw,
+ src, 0 /* level */, 0 /* layer */,
+ src->format, SWIZZLE_XYZW,
+ dst, 0 /* level */, 0 /* layer */, dst->format,
+ 0, 0,
+ src->logical_width0, src->logical_height0,
+ 0, 0,
+ dst->logical_width0, dst->logical_height0,
+ GL_NEAREST, false, false /*mirror x, y*/,
+ false, false);
if (src->stencil_mt) {
- if (brw->gen >= 8) {
- brw_meta_stencil_updownsample(brw, src->stencil_mt, dst);
- return;
- }
-
brw_blorp_blit_miptrees(brw,
src->stencil_mt, 0 /* level */, 0 /* layer */,
- src->stencil_mt->format,
+ src->stencil_mt->format, SWIZZLE_XYZW,
dst->stencil_mt, 0 /* level */, 0 /* layer */,
dst->stencil_mt->format,
0, 0,
src->logical_width0, src->logical_height0,
0, 0,
dst->logical_width0, dst->logical_height0,
- GL_NEAREST, false, false /*mirror x, y*/);
+ GL_NEAREST, false, false /*mirror x, y*/,
+ false, false /* decode/encode srgb */);
}
}
-void *
+static void *
intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
{
/* CPU accesses to color buffers don't understand fast color clears, so
return bo->virtual;
}
-void
+static void
intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
{
drm_intel_bo_unmap(mt->bo);
for (uint32_t y = 0; y < map->h; y++) {
for (uint32_t x = 0; x < map->w; x++) {
ptrdiff_t offset = intel_offset_S8(mt->pitch,
- x + map->x,
- y + map->y,
+ image_x + x + map->x,
+ image_y + y + map->y,
brw->has_swizzling);
tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
}
intel_miptree_release_map(mt, level, slice);
}
+
+void
+intel_miptree_get_isl_surf(struct brw_context *brw,
+ const struct intel_mipmap_tree *mt,
+ struct isl_surf *surf)
+{
+ switch (mt->target) {
+ case GL_TEXTURE_1D:
+ case GL_TEXTURE_1D_ARRAY: {
+ surf->dim = ISL_SURF_DIM_1D;
+ if (brw->gen >= 9 && mt->tiling == I915_TILING_NONE)
+ surf->dim_layout = ISL_DIM_LAYOUT_GEN9_1D;
+ else
+ surf->dim_layout = ISL_DIM_LAYOUT_GEN4_2D;
+ break;
+ }
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_2D_ARRAY:
+ case GL_TEXTURE_RECTANGLE:
+ case GL_TEXTURE_CUBE_MAP:
+ case GL_TEXTURE_CUBE_MAP_ARRAY:
+ case GL_TEXTURE_2D_MULTISAMPLE:
+ case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+ case GL_TEXTURE_EXTERNAL_OES:
+ surf->dim = ISL_SURF_DIM_2D;
+ surf->dim_layout = ISL_DIM_LAYOUT_GEN4_2D;
+ break;
+ case GL_TEXTURE_3D:
+ surf->dim = ISL_SURF_DIM_3D;
+ if (brw->gen >= 9)
+ surf->dim_layout = ISL_DIM_LAYOUT_GEN4_2D;
+ else
+ surf->dim_layout = ISL_DIM_LAYOUT_GEN4_3D;
+ break;
+ default:
+ unreachable("Invalid texture target");
+ }
+
+ if (mt->num_samples > 1) {
+ switch (mt->msaa_layout) {
+ case INTEL_MSAA_LAYOUT_IMS:
+ surf->msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;
+ break;
+ case INTEL_MSAA_LAYOUT_UMS:
+ case INTEL_MSAA_LAYOUT_CMS:
+ surf->msaa_layout = ISL_MSAA_LAYOUT_ARRAY;
+ break;
+ default:
+ unreachable("Invalid MSAA layout");
+ }
+ } else {
+ surf->msaa_layout = ISL_MSAA_LAYOUT_NONE;
+ }
+
+ if (mt->format == MESA_FORMAT_S_UINT8) {
+ surf->tiling = ISL_TILING_W;
+ /* The ISL definition of row_pitch matches the surface state pitch field
+ * a bit better than intel_mipmap_tree. In particular, ISL incorporates
+ * the factor of 2 for W-tiling in row_pitch.
+ */
+ surf->row_pitch = 2 * mt->pitch;
+ } else {
+ switch (mt->tiling) {
+ case I915_TILING_NONE:
+ surf->tiling = ISL_TILING_LINEAR;
+ break;
+ case I915_TILING_X:
+ surf->tiling = ISL_TILING_X;
+ break;
+ case I915_TILING_Y:
+ switch (mt->tr_mode) {
+ case INTEL_MIPTREE_TRMODE_NONE:
+ surf->tiling = ISL_TILING_Y0;
+ break;
+ case INTEL_MIPTREE_TRMODE_YF:
+ surf->tiling = ISL_TILING_Yf;
+ break;
+ case INTEL_MIPTREE_TRMODE_YS:
+ surf->tiling = ISL_TILING_Ys;
+ break;
+ }
+ break;
+ default:
+ unreachable("Invalid tiling mode");
+ }
+
+ surf->row_pitch = mt->pitch;
+ }
+
+ surf->format = translate_tex_format(brw, mt->format, false);
+
+ if (brw->gen >= 9) {
+ if (surf->dim == ISL_SURF_DIM_1D && surf->tiling == ISL_TILING_LINEAR) {
+ /* For gen9 1-D surfaces, intel_mipmap_tree has a bogus alignment. */
+ surf->image_alignment_el = isl_extent3d(64, 1, 1);
+ } else {
+ /* On gen9+, intel_mipmap_tree stores the horizontal and vertical
+ * alignment in terms of surface elements like we want.
+ */
+ surf->image_alignment_el = isl_extent3d(mt->halign, mt->valign, 1);
+ }
+ } else {
+ /* On earlier gens it's stored in pixels. */
+ unsigned bw, bh;
+ _mesa_get_format_block_size(mt->format, &bw, &bh);
+ surf->image_alignment_el =
+ isl_extent3d(mt->halign / bw, mt->valign / bh, 1);
+ }
+
+ surf->logical_level0_px.width = mt->logical_width0;
+ surf->logical_level0_px.height = mt->logical_height0;
+ if (surf->dim == ISL_SURF_DIM_3D) {
+ surf->logical_level0_px.depth = mt->logical_depth0;
+ surf->logical_level0_px.array_len = 1;
+ } else if (mt->target == GL_TEXTURE_CUBE_MAP ||
+ mt->target == GL_TEXTURE_CUBE_MAP_ARRAY) {
+ /* For cube maps, mt->logical_depth0 is in number of cubes */
+ surf->logical_level0_px.depth = 1;
+ surf->logical_level0_px.array_len = mt->logical_depth0 * 6;
+ } else {
+ surf->logical_level0_px.depth = 1;
+ surf->logical_level0_px.array_len = mt->logical_depth0;
+ }
+
+ surf->phys_level0_sa.width = mt->physical_width0;
+ surf->phys_level0_sa.height = mt->physical_height0;
+ if (surf->dim == ISL_SURF_DIM_3D) {
+ surf->phys_level0_sa.depth = mt->physical_depth0;
+ surf->phys_level0_sa.array_len = 1;
+ } else {
+ surf->phys_level0_sa.depth = 1;
+ surf->phys_level0_sa.array_len = mt->physical_depth0;
+ }
+
+ surf->levels = mt->last_level + 1;
+ surf->samples = MAX2(mt->num_samples, 1);
+
+ surf->size = 0; /* TODO */
+ surf->alignment = 0; /* TODO */
+
+ switch (surf->dim_layout) {
+ case ISL_DIM_LAYOUT_GEN4_2D:
+ case ISL_DIM_LAYOUT_GEN4_3D:
+ if (brw->gen >= 9) {
+ surf->array_pitch_el_rows = mt->qpitch;
+ } else {
+ unsigned bw, bh;
+ _mesa_get_format_block_size(mt->format, &bw, &bh);
+ assert(mt->qpitch % bh == 0);
+ surf->array_pitch_el_rows = mt->qpitch / bh;
+ }
+ break;
+ case ISL_DIM_LAYOUT_GEN9_1D:
+ surf->array_pitch_el_rows = 1;
+ break;
+ }
+
+ switch (mt->array_layout) {
+ case ALL_LOD_IN_EACH_SLICE:
+ surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_FULL;
+ break;
+ case ALL_SLICES_AT_EACH_LOD:
+ surf->array_pitch_span = ISL_ARRAY_PITCH_SPAN_COMPACT;
+ break;
+ default:
+ unreachable("Invalid array layout");
+ }
+
+ surf->usage = 0; /* TODO */
+}