From: Chia-I Wu Date: Sun, 8 Mar 2015 05:39:02 +0000 (+0800) Subject: ilo: move ilo_layout.[ch] to core as ilo_image.[ch] X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ac47563cb40a79ec2bf149c6d9916a9d66361753;p=mesa.git ilo: move ilo_layout.[ch] to core as ilo_image.[ch] Move files and s/layout/image/. --- diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index 29bb9f6e446..f1a7ef522f2 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -7,6 +7,8 @@ C_SOURCES := \ core/ilo_format.c \ core/ilo_format.h \ core/ilo_fence.h \ + core/ilo_image.c \ + core/ilo_image.h \ core/intel_winsys.h \ ilo_blit.c \ ilo_blit.h \ @@ -34,8 +36,6 @@ C_SOURCES := \ ilo_draw.h \ ilo_gpgpu.c \ ilo_gpgpu.h \ - ilo_layout.c \ - ilo_layout.h \ ilo_public.h \ ilo_query.c \ ilo_query.h \ diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c new file mode 100644 index 00000000000..daa2bae9412 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_image.c @@ -0,0 +1,1411 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2014 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "ilo_debug.h" +#include "ilo_image.h" + +enum { + IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE, + IMAGE_TILING_X = 1 << GEN6_TILING_X, + IMAGE_TILING_Y = 1 << GEN6_TILING_Y, + IMAGE_TILING_W = 1 << GEN8_TILING_W, + + IMAGE_TILING_ALL = (IMAGE_TILING_NONE | + IMAGE_TILING_X | + IMAGE_TILING_Y | + IMAGE_TILING_W) +}; + +struct ilo_image_params { + const struct ilo_dev *dev; + const struct pipe_resource *templ; + + bool compressed; + + unsigned h0, h1; + unsigned max_x, max_y; +}; + +static void +img_get_slice_size(const struct ilo_image *img, + const struct ilo_image_params *params, + unsigned level, unsigned *width, unsigned *height) +{ + const struct pipe_resource *templ = params->templ; + unsigned w, h; + + w = u_minify(img->width0, level); + h = u_minify(img->height0, level); + + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 114: + * + * "The dimensions of the mip maps are first determined by applying the + * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then, + * if necessary, they are padded out to compression block boundaries." + */ + w = align(w, img->block_width); + h = align(h, img->block_height); + + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 111: + * + * "If the surface is multisampled (4x), these values must be adjusted + * as follows before proceeding: + * + * W_L = ceiling(W_L / 2) * 4 + * H_L = ceiling(H_L / 2) * 4" + * + * From the Ivy Bridge PRM, volume 1 part 1, page 108: + * + * "If the surface is multisampled and it is a depth or stencil surface + * or Multisampled Surface StorageFormat in SURFACE_STATE is + * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before + * proceeding: + * + * #samples W_L = H_L = + * 2 ceiling(W_L / 2) * 4 HL [no adjustment] + * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 + * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 + * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" + * + * For interleaved samples (4x), where pixels + * + * (x, y ) (x+1, y ) + * (x, y+1) (x+1, y+1) + * + * would be is occupied by + * + * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) + * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) + * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) + * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) + * + * Thus the need to + * + * w = align(w, 2) * 2; + * y = align(y, 2) * 2; + */ + if (img->interleaved_samples) { + switch (templ->nr_samples) { + case 0: + case 1: + break; + case 2: + w = align(w, 2) * 2; + break; + case 4: + w = align(w, 2) * 2; + h = align(h, 2) * 2; + break; + case 8: + w = align(w, 2) * 4; + h = align(h, 2) * 2; + break; + case 16: + w = align(w, 2) * 4; + h = align(h, 2) * 4; + break; + default: + assert(!"unsupported sample count"); + break; + } + } + + /* + * From the Ivy Bridge PRM, volume 1 part 1, page 108: + * + * "For separate stencil buffer, the width must be mutiplied by 2 and + * height divided by 2..." + * + * To make things easier (for transfer), we will just double the stencil + * stride in 3DSTATE_STENCIL_BUFFER. + */ + w = align(w, img->align_i); + h = align(h, img->align_j); + + *width = w; + *height = h; +} + +static unsigned +img_get_num_layers(const struct ilo_image *img, + const struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + unsigned num_layers = templ->array_size; + + /* samples of the same index are stored in a layer */ + if (templ->nr_samples > 1 && !img->interleaved_samples) + num_layers *= templ->nr_samples; + + return num_layers; +} + +static void +img_init_layer_height(struct ilo_image *img, + struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + unsigned num_layers; + + if (img->walk != ILO_IMAGE_WALK_LAYER) + return; + + num_layers = img_get_num_layers(img, params); + if (num_layers <= 1) + return; + + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 115: + * + * "The following equation is used for surface formats other than + * compressed textures: + * + * QPitch = (h0 + h1 + 11j)" + * + * "The equation for compressed textures (BC* and FXT1 surface formats) + * follows: + * + * QPitch = (h0 + h1 + 11j) / 4" + * + * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the + * value calculated in the equation above, for every other odd Surface + * Height starting from 1 i.e. 1,5,9,13" + * + * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: + * + * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth + * buffer and stencil buffer have an implied value of ARYSPC_FULL): + * + * QPitch = (h0 + h1 + 12j) + * QPitch = (h0 + h1 + 12j) / 4 (compressed) + * + * (There are many typos or missing words here...)" + * + * To access the N-th slice, an offset of (Stride * QPitch * N) is added to + * the base address. The PRM divides QPitch by 4 for compressed formats + * because the block height for those formats are 4, and it wants QPitch to + * mean the number of memory rows, as opposed to texel rows, between + * slices. Since we use texel rows everywhere, we do not need to divide + * QPitch by 4. + */ + img->layer_height = params->h0 + params->h1 + + ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j; + + if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 && + img->height0 % 4 == 1) + img->layer_height += 4; + + params->max_y += img->layer_height * (num_layers - 1); +} + +static void +img_init_lods(struct ilo_image *img, + struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + unsigned cur_x, cur_y; + unsigned lv; + + cur_x = 0; + cur_y = 0; + for (lv = 0; lv <= templ->last_level; lv++) { + unsigned lod_w, lod_h; + + img_get_slice_size(img, params, lv, &lod_w, &lod_h); + + img->lods[lv].x = cur_x; + img->lods[lv].y = cur_y; + img->lods[lv].slice_width = lod_w; + img->lods[lv].slice_height = lod_h; + + switch (img->walk) { + case ILO_IMAGE_WALK_LOD: + lod_h *= img_get_num_layers(img, params); + if (lv == 1) + cur_x += lod_w; + else + cur_y += lod_h; + + /* every LOD begins at tile boundaries */ + if (templ->last_level > 0) { + assert(img->format == PIPE_FORMAT_S8_UINT); + cur_x = align(cur_x, 64); + cur_y = align(cur_y, 64); + } + break; + case ILO_IMAGE_WALK_LAYER: + /* MIPLAYOUT_BELOW */ + if (lv == 1) + cur_x += lod_w; + else + cur_y += lod_h; + break; + case ILO_IMAGE_WALK_3D: + { + const unsigned num_slices = u_minify(templ->depth0, lv); + const unsigned num_slices_per_row = 1 << lv; + const unsigned num_rows = + (num_slices + num_slices_per_row - 1) / num_slices_per_row; + + lod_w *= num_slices_per_row; + lod_h *= num_rows; + + cur_y += lod_h; + } + break; + } + + if (params->max_x < img->lods[lv].x + lod_w) + params->max_x = img->lods[lv].x + lod_w; + if (params->max_y < img->lods[lv].y + lod_h) + params->max_y = img->lods[lv].y + lod_h; + } + + if (img->walk == ILO_IMAGE_WALK_LAYER) { + params->h0 = img->lods[0].slice_height; + + if (templ->last_level > 0) + params->h1 = img->lods[1].slice_height; + else + img_get_slice_size(img, params, 1, &cur_x, ¶ms->h1); + } +} + +static void +img_init_alignments(struct ilo_image *img, + struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 113: + * + * "surface format align_i align_j + * YUV 4:2:2 formats 4 *see below + * BC1-5 4 4 + * FXT1 8 4 + * all other formats 4 *see below" + * + * "- align_j = 4 for any depth buffer + * - align_j = 2 for separate stencil buffer + * - align_j = 4 for any render target surface is multisampled (4x) + * - align_j = 4 for any render target surface with Surface Vertical + * Alignment = VALIGN_4 + * - align_j = 2 for any render target surface with Surface Vertical + * Alignment = VALIGN_2 + * - align_j = 2 for all other render target surface + * - align_j = 2 for any sampling engine surface with Surface Vertical + * Alignment = VALIGN_2 + * - align_j = 4 for any sampling engine surface with Surface Vertical + * Alignment = VALIGN_4" + * + * From the Sandy Bridge PRM, volume 4 part 1, page 86: + * + * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if + * the Surface Format is 96 bits per element (BPE)." + * + * They can be rephrased as + * + * align_i align_j + * compressed formats block width block height + * PIPE_FORMAT_S8_UINT 4 2 + * other depth/stencil formats 4 4 + * 4x multisampled 4 4 + * bpp 96 4 2 + * others 4 2 or 4 + */ + + /* + * From the Ivy Bridge PRM, volume 1 part 1, page 110: + * + * "surface defined by surface format align_i align_j + * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4 + * not D16_UNORM 4 4 + * 3DSTATE_STENCIL_BUFFER N/A 8 8 + * SURFACE_STATE BC*, ETC*, EAC* 4 4 + * FXT1 8 4 + * all others (set by SURFACE_STATE)" + * + * From the Ivy Bridge PRM, volume 4 part 1, page 63: + * + * "- This field (Surface Vertical Aligment) is intended to be set to + * VALIGN_4 if the surface was rendered as a depth buffer, for a + * multisampled (4x) render target, or for a multisampled (8x) + * render target, since these surfaces support only alignment of 4. + * - Use of VALIGN_4 for other surfaces is supported, but uses more + * memory. + * - This field must be set to VALIGN_4 for all tiled Y Render Target + * surfaces. + * - Value of 1 is not supported for format YCRCB_NORMAL (0x182), + * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) + * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field + * must be set to VALIGN_4." + * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT." + * + * "- This field (Surface Horizontal Aligment) is intended to be set to + * HALIGN_8 only if the surface was rendered as a depth buffer with + * Z16 format or a stencil buffer, since these surfaces support only + * alignment of 8. + * - Use of HALIGN_8 for other surfaces is supported, but uses more + * memory. + * - This field must be set to HALIGN_4 if the Surface Format is BC*. + * - This field must be set to HALIGN_8 if the Surface Format is + * FXT1." + * + * They can be rephrased as + * + * align_i align_j + * compressed formats block width block height + * PIPE_FORMAT_Z16_UNORM 8 4 + * PIPE_FORMAT_S8_UINT 8 8 + * other depth/stencil formats 4 4 + * 2x or 4x multisampled 4 or 8 4 + * tiled Y 4 or 8 4 (if rt) + * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2 + * others 4 or 8 2 or 4 + */ + + if (params->compressed) { + /* this happens to be the case */ + img->align_i = img->block_width; + img->align_j = img->block_height; + } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { + if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) { + switch (img->format) { + case PIPE_FORMAT_Z16_UNORM: + img->align_i = 8; + img->align_j = 4; + break; + case PIPE_FORMAT_S8_UINT: + img->align_i = 8; + img->align_j = 8; + break; + default: + img->align_i = 4; + img->align_j = 4; + break; + } + } else { + switch (img->format) { + case PIPE_FORMAT_S8_UINT: + img->align_i = 4; + img->align_j = 2; + break; + default: + img->align_i = 4; + img->align_j = 4; + break; + } + } + } else { + const bool valign_4 = + (templ->nr_samples > 1) || + (ilo_dev_gen(params->dev) >= ILO_GEN(8)) || + (ilo_dev_gen(params->dev) >= ILO_GEN(7) && + img->tiling == GEN6_TILING_Y && + (templ->bind & PIPE_BIND_RENDER_TARGET)); + + if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && + ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4) + assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT); + + img->align_i = 4; + img->align_j = (valign_4) ? 4 : 2; + } + + /* + * the fact that align i and j are multiples of block width and height + * respectively is what makes the size of the bo a multiple of the block + * size, slices start at block boundaries, and many of the computations + * work. + */ + assert(img->align_i % img->block_width == 0); + assert(img->align_j % img->block_height == 0); + + /* make sure align() works */ + assert(util_is_power_of_two(img->align_i) && + util_is_power_of_two(img->align_j)); + assert(util_is_power_of_two(img->block_width) && + util_is_power_of_two(img->block_height)); +} + +static unsigned +img_get_valid_tilings(const struct ilo_image *img, + const struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + const enum pipe_format format = img->format; + unsigned valid_tilings = IMAGE_TILING_ALL; + + /* + * From the Sandy Bridge PRM, volume 1 part 2, page 32: + * + * "Display/Overlay Y-Major not supported. + * X-Major required for Async Flips" + */ + if (unlikely(templ->bind & PIPE_BIND_SCANOUT)) + valid_tilings &= IMAGE_TILING_X; + + /* + * From the Sandy Bridge PRM, volume 3 part 2, page 158: + * + * "The cursor surface address must be 4K byte aligned. The cursor must + * be in linear memory, it cannot be tiled." + */ + if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR))) + valid_tilings &= IMAGE_TILING_NONE; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 318: + * + * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear + * Depth Buffer is not supported." + * + * "The Depth Buffer, if tiled, must use Y-Major tiling." + * + * From the Sandy Bridge PRM, volume 1 part 2, page 22: + * + * "W-Major Tile Format is used for separate stencil." + */ + if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { + switch (format) { + case PIPE_FORMAT_S8_UINT: + valid_tilings &= IMAGE_TILING_W; + break; + default: + valid_tilings &= IMAGE_TILING_Y; + break; + } + } + + if (templ->bind & PIPE_BIND_RENDER_TARGET) { + /* + * From the Sandy Bridge PRM, volume 1 part 2, page 32: + * + * "NOTE: 128BPE Format Color buffer ( render target ) MUST be + * either TileX or Linear." + * + * From the Haswell PRM, volume 5, page 32: + * + * "NOTE: 128 BPP format color buffer (render target) supports + * Linear, TiledX and TiledY." + */ + if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16) + valid_tilings &= ~IMAGE_TILING_Y; + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 63: + * + * "This field (Surface Vertical Aligment) must be set to VALIGN_4 + * for all tiled Y Render Target surfaces." + * + * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT." + */ + if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && + ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && + img->format == PIPE_FORMAT_R32G32B32_FLOAT) + valid_tilings &= ~IMAGE_TILING_Y; + + valid_tilings &= ~IMAGE_TILING_W; + } + + if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { + if (ilo_dev_gen(params->dev) < ILO_GEN(8)) + valid_tilings &= ~IMAGE_TILING_W; + } + + /* no conflicting binding flags */ + assert(valid_tilings); + + return valid_tilings; +} + +static void +img_init_tiling(struct ilo_image *img, + struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + unsigned preferred_tilings; + + img->valid_tilings = img_get_valid_tilings(img, params); + + preferred_tilings = img->valid_tilings; + + /* no fencing nor BLT support */ + if (preferred_tilings & ~IMAGE_TILING_W) + preferred_tilings &= ~IMAGE_TILING_W; + + if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) { + /* + * heuristically set a minimum width/height for enabling tiling + */ + if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X)) + preferred_tilings &= ~IMAGE_TILING_X; + + if ((img->width0 < 32 || img->height0 < 16) && + (img->width0 < 16 || img->height0 < 32) && + (preferred_tilings & ~IMAGE_TILING_Y)) + preferred_tilings &= ~IMAGE_TILING_Y; + } else { + /* force linear if we are not sure where the texture is bound to */ + if (preferred_tilings & IMAGE_TILING_NONE) + preferred_tilings &= IMAGE_TILING_NONE; + } + + /* prefer tiled over linear */ + if (preferred_tilings & IMAGE_TILING_Y) + img->tiling = GEN6_TILING_Y; + else if (preferred_tilings & IMAGE_TILING_X) + img->tiling = GEN6_TILING_X; + else if (preferred_tilings & IMAGE_TILING_W) + img->tiling = GEN8_TILING_W; + else + img->tiling = GEN6_TILING_NONE; +} + +static void +img_init_walk_gen7(struct ilo_image *img, + struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + + /* + * It is not explicitly states, but render targets are expected to be + * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected + * to be IMS (samples interleaved). + * + * See "Multisampled Surface Storage Format" field of SURFACE_STATE. + */ + if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { + /* + * From the Ivy Bridge PRM, volume 1 part 1, page 111: + * + * "note that the depth buffer and stencil buffer have an implied + * value of ARYSPC_FULL" + */ + img->walk = (templ->target == PIPE_TEXTURE_3D) ? + ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER; + + img->interleaved_samples = true; + } else { + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 66: + * + * "If Multisampled Surface Storage Format is MSFMT_MSS and Number + * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface + * Array Spacing) must be set to ARYSPC_LOD0." + * + * As multisampled resources are not mipmapped, we never use + * ARYSPC_FULL for them. + */ + if (templ->nr_samples > 1) + assert(templ->last_level == 0); + + img->walk = + (templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D : + (templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER : + ILO_IMAGE_WALK_LOD; + + img->interleaved_samples = false; + } +} + +static void +img_init_walk_gen6(struct ilo_image *img, + struct ilo_image_params *params) +{ + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 115: + * + * "The separate stencil buffer does not support mip mapping, thus the + * storage for LODs other than LOD 0 is not needed. The following + * QPitch equation applies only to the separate stencil buffer: + * + * QPitch = h_0" + * + * GEN6 does not support compact spacing otherwise. + */ + img->walk = + (params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D : + (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD : + ILO_IMAGE_WALK_LAYER; + + /* GEN6 supports only interleaved samples */ + img->interleaved_samples = true; +} + +static void +img_init_walk(struct ilo_image *img, + struct ilo_image_params *params) +{ + if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) + img_init_walk_gen7(img, params); + else + img_init_walk_gen6(img, params); +} + +static void +img_init_size_and_format(struct ilo_image *img, + struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + enum pipe_format format = templ->format; + bool require_separate_stencil = false; + + img->width0 = templ->width0; + img->height0 = templ->height0; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 317: + * + * "This field (Separate Stencil Buffer Enable) must be set to the same + * value (enabled or disabled) as Hierarchical Depth Buffer Enable." + * + * GEN7+ requires separate stencil buffers. + */ + if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { + if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) + require_separate_stencil = true; + else + require_separate_stencil = (img->aux == ILO_IMAGE_AUX_HIZ); + } + + switch (format) { + case PIPE_FORMAT_ETC1_RGB8: + format = PIPE_FORMAT_R8G8B8X8_UNORM; + break; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + if (require_separate_stencil) { + format = PIPE_FORMAT_Z24X8_UNORM; + img->separate_stencil = true; + } + break; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + if (require_separate_stencil) { + format = PIPE_FORMAT_Z32_FLOAT; + img->separate_stencil = true; + } + break; + default: + break; + } + + img->format = format; + img->block_width = util_format_get_blockwidth(format); + img->block_height = util_format_get_blockheight(format); + img->block_size = util_format_get_blocksize(format); + + params->compressed = util_format_is_compressed(format); +} + +static bool +img_want_mcs(struct ilo_image *img, + struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + bool want_mcs = false; + + /* MCS is for RT on GEN7+ */ + if (ilo_dev_gen(params->dev) < ILO_GEN(7)) + return false; + + if (templ->target != PIPE_TEXTURE_2D || + !(templ->bind & PIPE_BIND_RENDER_TARGET)) + return false; + + /* + * From the Ivy Bridge PRM, volume 4 part 1, page 77: + * + * "For Render Target and Sampling Engine Surfaces:If the surface is + * multisampled (Number of Multisamples any value other than + * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled." + * + * "This field must be set to 0 for all SINT MSRTs when all RT channels + * are not written" + */ + if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) { + want_mcs = true; + } else if (templ->nr_samples <= 1) { + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 326: + * + * "When MCS is buffer is used for color clear of non-multisampler + * render target, the following restrictions apply. + * - Support is limited to tiled render targets. + * - Support is for non-mip-mapped and non-array surface types + * only. + * - Clear is supported only on the full RT; i.e., no partial clear + * or overlapping clears. + * - MCS buffer for non-MSRT is supported only for RT formats + * 32bpp, 64bpp and 128bpp. + * ..." + */ + if (img->tiling != GEN6_TILING_NONE && + templ->last_level == 0 && templ->array_size == 1) { + switch (img->block_size) { + case 4: + case 8: + case 16: + want_mcs = true; + break; + default: + break; + } + } + } + + return want_mcs; +} + +static bool +img_want_hiz(const struct ilo_image *img, + const struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + const struct util_format_description *desc = + util_format_description(templ->format); + + if (ilo_debug & ILO_DEBUG_NOHIZ) + return false; + + if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL)) + return false; + + if (!util_format_has_depth(desc)) + return false; + + /* no point in having HiZ */ + if (templ->usage == PIPE_USAGE_STAGING) + return false; + + /* + * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled + * for every level. This is generally fine except on GEN6, where HiZ and + * separate stencil are enabled and disabled at the same time. When the + * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ + * can result in incompatible formats. + */ + if (ilo_dev_gen(params->dev) == ILO_GEN(6) && + templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && + templ->last_level) + return false; + + return true; +} + +static void +img_init_aux(struct ilo_image *img, + struct ilo_image_params *params) +{ + if (img_want_hiz(img, params)) + img->aux = ILO_IMAGE_AUX_HIZ; + else if (img_want_mcs(img, params)) + img->aux = ILO_IMAGE_AUX_MCS; +} + +static void +img_align(struct ilo_image *img, struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + int align_w = 1, align_h = 1, pad_h = 0; + + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 118: + * + * "To determine the necessary padding on the bottom and right side of + * the surface, refer to the table in Section 7.18.3.4 for the i and j + * parameters for the surface format in use. The surface must then be + * extended to the next multiple of the alignment unit size in each + * dimension, and all texels contained in this extended surface must + * have valid GTT entries." + * + * "For cube surfaces, an additional two rows of padding are required + * at the bottom of the surface. This must be ensured regardless of + * whether the surface is stored tiled or linear. This is due to the + * potential rotation of cache line orientation from memory to cache." + * + * "For compressed textures (BC* and FXT1 surface formats), padding at + * the bottom of the surface is to an even compressed row, which is + * equal to a multiple of 8 uncompressed texel rows. Thus, for padding + * purposes, these surfaces behave as if j = 8 only for surface + * padding purposes. The value of 4 for j still applies for mip level + * alignment and QPitch calculation." + */ + if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { + align_w = MAX2(align_w, img->align_i); + align_h = MAX2(align_h, img->align_j); + + if (templ->target == PIPE_TEXTURE_CUBE) + pad_h += 2; + + if (params->compressed) + align_h = MAX2(align_h, img->align_j * 2); + } + + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 118: + * + * "If the surface contains an odd number of rows of data, a final row + * below the surface must be allocated." + */ + if (templ->bind & PIPE_BIND_RENDER_TARGET) + align_h = MAX2(align_h, 2); + + /* + * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In + * ilo_texture_can_enable_hiz(), we always return true for the first slice. + * To avoid out-of-bound access, we have to pad. + */ + if (img->aux == ILO_IMAGE_AUX_HIZ && + templ->last_level == 0 && + templ->array_size == 1 && + templ->depth0 == 1) { + align_w = MAX2(align_w, 8); + align_h = MAX2(align_h, 4); + } + + params->max_x = align(params->max_x, align_w); + params->max_y = align(params->max_y + pad_h, align_h); +} + +/* note that this may force the texture to be linear */ +static void +img_calculate_bo_size(struct ilo_image *img, + struct ilo_image_params *params) +{ + assert(params->max_x % img->block_width == 0); + assert(params->max_y % img->block_height == 0); + assert(img->layer_height % img->block_height == 0); + + img->bo_stride = + (params->max_x / img->block_width) * img->block_size; + img->bo_height = params->max_y / img->block_height; + + while (true) { + unsigned w = img->bo_stride, h = img->bo_height; + unsigned align_w, align_h; + + /* + * From the Haswell PRM, volume 5, page 163: + * + * "For linear surfaces, additional padding of 64 bytes is required + * at the bottom of the surface. This is in addition to the padding + * required above." + */ + if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) && + (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) && + img->tiling == GEN6_TILING_NONE) + h += (64 + img->bo_stride - 1) / img->bo_stride; + + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 81: + * + * "- For linear render target surfaces, the pitch must be a + * multiple of the element size for non-YUV surface formats. + * Pitch must be a multiple of 2 * element size for YUV surface + * formats. + * - For other linear surfaces, the pitch can be any multiple of + * bytes. + * - For tiled surfaces, the pitch must be a multiple of the tile + * width." + * + * Different requirements may exist when the bo is used in different + * places, but our alignments here should be good enough that we do not + * need to check params->templ->bind. + */ + switch (img->tiling) { + case GEN6_TILING_X: + align_w = 512; + align_h = 8; + break; + case GEN6_TILING_Y: + align_w = 128; + align_h = 32; + break; + case GEN8_TILING_W: + /* + * From the Sandy Bridge PRM, volume 1 part 2, page 22: + * + * "A 4KB tile is subdivided into 8-high by 8-wide array of + * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8 + * bytes." + */ + align_w = 64; + align_h = 64; + break; + default: + assert(img->tiling == GEN6_TILING_NONE); + /* some good enough values */ + align_w = 64; + align_h = 2; + break; + } + + w = align(w, align_w); + h = align(h, align_h); + + /* make sure the bo is mappable */ + if (img->tiling != GEN6_TILING_NONE) { + /* + * Usually only the first 256MB of the GTT is mappable. + * + * See also how intel_context::max_gtt_map_object_size is calculated. + */ + const size_t mappable_gtt_size = 256 * 1024 * 1024; + + /* + * Be conservative. We may be able to switch from VALIGN_4 to + * VALIGN_2 if the image was Y-tiled, but let's keep it simple. + */ + if (mappable_gtt_size / w / 4 < h) { + if (img->valid_tilings & IMAGE_TILING_NONE) { + img->tiling = GEN6_TILING_NONE; + /* MCS support for non-MSRTs is limited to tiled RTs */ + if (img->aux == ILO_IMAGE_AUX_MCS && + params->templ->nr_samples <= 1) + img->aux = ILO_IMAGE_AUX_NONE; + + continue; + } else { + ilo_warn("cannot force texture to be linear\n"); + } + } + } + + img->bo_stride = w; + img->bo_height = h; + break; + } +} + +static void +img_calculate_hiz_size(struct ilo_image *img, + struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + const unsigned hz_align_j = 8; + enum ilo_image_walk_type hz_walk; + unsigned hz_width, hz_height, lv; + unsigned hz_clear_w, hz_clear_h; + + assert(img->aux == ILO_IMAGE_AUX_HIZ); + + assert(img->walk == ILO_IMAGE_WALK_LAYER || + img->walk == ILO_IMAGE_WALK_3D); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 312: + * + * "The hierarchical depth buffer does not support the LOD field, it is + * assumed by hardware to be zero. A separate hierarachical depth + * buffer is required for each LOD used, and the corresponding + * buffer's state delivered to hardware each time a new depth buffer + * state with modified LOD is delivered." + * + * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD. + */ + if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) + hz_walk = img->walk; + else + hz_walk = ILO_IMAGE_WALK_LOD; + + /* + * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge + * PRM, volume 2 part 1, page 312-313. + * + * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a + * memory row. + */ + switch (hz_walk) { + case ILO_IMAGE_WALK_LOD: + { + unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS]; + unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS]; + unsigned cur_tx, cur_ty; + + /* figure out the tile offsets of LODs */ + hz_width = 0; + hz_height = 0; + cur_tx = 0; + cur_ty = 0; + for (lv = 0; lv <= templ->last_level; lv++) { + unsigned tw, th; + + lod_tx[lv] = cur_tx; + lod_ty[lv] = cur_ty; + + tw = align(img->lods[lv].slice_width, 16); + th = align(img->lods[lv].slice_height, hz_align_j) * + templ->array_size / 2; + /* convert to Y-tiles */ + tw = align(tw, 128) / 128; + th = align(th, 32) / 32; + + if (hz_width < cur_tx + tw) + hz_width = cur_tx + tw; + if (hz_height < cur_ty + th) + hz_height = cur_ty + th; + + if (lv == 1) + cur_tx += tw; + else + cur_ty += th; + } + + /* convert tile offsets to memory offsets */ + for (lv = 0; lv <= templ->last_level; lv++) { + img->aux_offsets[lv] = + (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096; + } + hz_width *= 128; + hz_height *= 32; + } + break; + case ILO_IMAGE_WALK_LAYER: + { + const unsigned h0 = align(params->h0, hz_align_j); + const unsigned h1 = align(params->h1, hz_align_j); + const unsigned htail = + ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; + const unsigned hz_qpitch = h0 + h1 + htail; + + hz_width = align(img->lods[0].slice_width, 16); + + hz_height = hz_qpitch * templ->array_size / 2; + if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) + hz_height = align(hz_height, 8); + + img->aux_layer_height = hz_qpitch; + } + break; + case ILO_IMAGE_WALK_3D: + hz_width = align(img->lods[0].slice_width, 16); + + hz_height = 0; + for (lv = 0; lv <= templ->last_level; lv++) { + const unsigned h = align(img->lods[lv].slice_height, hz_align_j); + /* according to the formula, slices are packed together vertically */ + hz_height += h * u_minify(templ->depth0, lv); + } + hz_height /= 2; + break; + default: + assert(!"unknown HiZ walk"); + hz_width = 0; + hz_height = 0; + break; + } + + /* + * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks. + * Experiments on Haswell show that aligning the RECTLIST primitive and + * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be + * aligned. + */ + hz_clear_w = 8; + hz_clear_h = 4; + switch (templ->nr_samples) { + case 0: + case 1: + default: + break; + case 2: + hz_clear_w /= 2; + break; + case 4: + hz_clear_w /= 2; + hz_clear_h /= 2; + break; + case 8: + hz_clear_w /= 4; + hz_clear_h /= 2; + break; + case 16: + hz_clear_w /= 4; + hz_clear_h /= 4; + break; + } + + for (lv = 0; lv <= templ->last_level; lv++) { + if (u_minify(img->width0, lv) % hz_clear_w || + u_minify(img->height0, lv) % hz_clear_h) + break; + img->aux_enables |= 1 << lv; + } + + /* we padded to allow this in img_align() */ + if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1) + img->aux_enables |= 0x1; + + /* align to Y-tile */ + img->aux_stride = align(hz_width, 128); + img->aux_height = align(hz_height, 32); +} + +static void +img_calculate_mcs_size(struct ilo_image *img, + struct ilo_image_params *params) +{ + const struct pipe_resource *templ = params->templ; + int mcs_width, mcs_height, mcs_cpp; + int downscale_x, downscale_y; + + assert(img->aux == ILO_IMAGE_AUX_MCS); + + if (templ->nr_samples > 1) { + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear + * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The + * need of scale down could be that the clear rectangle is used to clear + * the MCS instead of the RT. + * + * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The + * 2x2 factor could come from that the hardware writes 128 bits (an + * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in + * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the + * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2 + * pixel block in the RT. + */ + switch (templ->nr_samples) { + case 2: + case 4: + downscale_x = 8; + downscale_y = 2; + mcs_cpp = 1; + break; + case 8: + downscale_x = 2; + downscale_y = 2; + mcs_cpp = 4; + break; + case 16: + downscale_x = 2; + downscale_y = 1; + mcs_cpp = 8; + break; + default: + assert(!"unsupported sample count"); + return; + break; + } + + /* + * It also appears that the 2x2 subspans generated by the scaled-down + * clear rectangle cannot be masked. The scale-down clear rectangle + * thus must be aligned to 2x2, and we need to pad. + */ + mcs_width = align(img->width0, downscale_x * 2); + mcs_height = align(img->height0, downscale_y * 2); + } else { + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 327: + * + * " Pixels Lines + * TiledY RT CL + * bpp + * 32 8 4 + * 64 4 4 + * 128 2 4 + * + * TiledX RT CL + * bpp + * 32 16 2 + * 64 8 2 + * 128 4 2" + * + * This table and the two following tables define the RT alignments, the + * clear rectangle alignments, and the clear rectangle scale factors. + * Viewing the RT alignments as the sizes of 128-byte blocks, we can see + * that the clear rectangle alignments are 16x32 blocks, and the clear + * rectangle scale factors are 8x16 blocks. + * + * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the + * RT. Similar to the MSAA cases, we can argue that an OWord maps to + * 8x16 blocks. + * + * One problem with this reasoning is that a Y-tile in MCS has 8x32 + * OWords and maps to 64x512 128-byte blocks. This differs from i965, + * which says that a Y-tile maps to 128x256 blocks (\see + * intel_get_non_msrt_mcs_alignment). It does not really change + * anything except for the size of the allocated MCS. Let's see if we + * hit out-of-bound access. + */ + switch (img->tiling) { + case GEN6_TILING_X: + downscale_x = 64 / img->block_size; + downscale_y = 2; + break; + case GEN6_TILING_Y: + downscale_x = 32 / img->block_size; + downscale_y = 4; + break; + default: + assert(!"unsupported tiling mode"); + return; + break; + } + + downscale_x *= 8; + downscale_y *= 16; + + /* + * From the Haswell PRM, volume 7, page 652: + * + * "Clear rectangle must be aligned to two times the number of + * pixels in the table shown below due to 16X16 hashing across the + * slice." + * + * The scaled-down clear rectangle must be aligned to 4x4 instead of + * 2x2, and we need to pad. + */ + mcs_width = align(img->width0, downscale_x * 4) / downscale_x; + mcs_height = align(img->height0, downscale_y * 4) / downscale_y; + mcs_cpp = 16; /* an OWord */ + } + + img->aux_enables = (1 << (templ->last_level + 1)) - 1; + /* align to Y-tile */ + img->aux_stride = align(mcs_width * mcs_cpp, 128); + img->aux_height = align(mcs_height, 32); +} + +/** + * The texutre is for transfer only. We can define our own layout to save + * space. + */ +static void +img_init_for_transfer(struct ilo_image *img, + const struct ilo_dev *dev, + const struct pipe_resource *templ) +{ + const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ? + templ->depth0 : templ->array_size; + unsigned layer_width, layer_height; + + assert(templ->last_level == 0); + assert(templ->nr_samples <= 1); + + img->aux = ILO_IMAGE_AUX_NONE; + img->width0 = templ->width0; + img->height0 = templ->height0; + img->format = templ->format; + img->block_width = util_format_get_blockwidth(templ->format); + img->block_height = util_format_get_blockheight(templ->format); + img->block_size = util_format_get_blocksize(templ->format); + img->walk = ILO_IMAGE_WALK_LOD; + + img->valid_tilings = IMAGE_TILING_NONE; + img->tiling = GEN6_TILING_NONE; + + img->align_i = img->block_width; + img->align_j = img->block_height; + + assert(util_is_power_of_two(img->block_width) && + util_is_power_of_two(img->block_height)); + + /* use packed layout */ + layer_width = align(templ->width0, img->align_i); + layer_height = align(templ->height0, img->align_j); + + img->lods[0].slice_width = layer_width; + img->lods[0].slice_height = layer_height; + + img->bo_stride = (layer_width / img->block_width) * img->block_size; + img->bo_stride = align(img->bo_stride, 64); + + img->bo_height = (layer_height / img->block_height) * num_layers; +} + +/** + * Initialize the image. Callers should zero-initialize \p img first. + */ +void ilo_image_init(struct ilo_image *img, + const struct ilo_dev *dev, + const struct pipe_resource *templ) +{ + struct ilo_image_params params; + bool transfer_only; + + /* use transfer layout when the texture is never bound to GPU */ + transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE | + PIPE_BIND_TRANSFER_READ)); + if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) { + img_init_for_transfer(img, dev, templ); + return; + } + + memset(¶ms, 0, sizeof(params)); + params.dev = dev; + params.templ = templ; + + /* note that there are dependencies between these functions */ + img_init_aux(img, ¶ms); + img_init_size_and_format(img, ¶ms); + img_init_walk(img, ¶ms); + img_init_tiling(img, ¶ms); + img_init_alignments(img, ¶ms); + img_init_lods(img, ¶ms); + img_init_layer_height(img, ¶ms); + + img_align(img, ¶ms); + img_calculate_bo_size(img, ¶ms); + + switch (img->aux) { + case ILO_IMAGE_AUX_HIZ: + img_calculate_hiz_size(img, ¶ms); + break; + case ILO_IMAGE_AUX_MCS: + img_calculate_mcs_size(img, ¶ms); + break; + default: + break; + } +} + +/** + * Update the tiling mode and bo stride (for imported resources). + */ +bool +ilo_image_update_for_imported_bo(struct ilo_image *img, + enum gen_surface_tiling tiling, + unsigned bo_stride) +{ + if (!(img->valid_tilings & (1 << tiling))) + return false; + + if ((tiling == GEN6_TILING_X && bo_stride % 512) || + (tiling == GEN6_TILING_Y && bo_stride % 128) || + (tiling == GEN8_TILING_W && bo_stride % 64)) + return false; + + img->tiling = tiling; + img->bo_stride = bo_stride; + + return true; +} diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h new file mode 100644 index 00000000000..722a666991d --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_image.h @@ -0,0 +1,295 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2014 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_IMAGE_H +#define ILO_IMAGE_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +struct pipe_resource; + +enum ilo_image_walk_type { + /* + * Array layers of an LOD are packed together vertically. This maps to + * ARYSPC_LOD0 for non-mipmapped 2D textures, and is extended to support + * mipmapped stencil textures and HiZ on GEN6. + */ + ILO_IMAGE_WALK_LOD, + + /* + * LODs of an array layer are packed together. This maps to ARYSPC_FULL + * and is used for mipmapped 2D textures. + */ + ILO_IMAGE_WALK_LAYER, + + /* + * 3D slices of an LOD are packed together, horizontally with wrapping. + * Used for 3D textures. + */ + ILO_IMAGE_WALK_3D, +}; + +enum ilo_image_aux_type { + ILO_IMAGE_AUX_NONE, + ILO_IMAGE_AUX_HIZ, + ILO_IMAGE_AUX_MCS, +}; + +struct ilo_image_lod { + /* physical position */ + unsigned x; + unsigned y; + + /* + * Physical size of an LOD slice. There may be multiple slices when the + * walk type is not ILO_IMAGE_WALK_LAYER. + */ + unsigned slice_width; + unsigned slice_height; +}; + +/** + * Texture layout. + */ +struct ilo_image { + enum ilo_image_aux_type aux; + + /* physical width0, height0, and format */ + unsigned width0; + unsigned height0; + enum pipe_format format; + bool separate_stencil; + + /* + * width, height, and size of pixel blocks, for conversion between 2D + * coordinates and memory offsets + */ + unsigned block_width; + unsigned block_height; + unsigned block_size; + + enum ilo_image_walk_type walk; + bool interleaved_samples; + + /* bitmask of valid tiling modes */ + unsigned valid_tilings; + enum gen_surface_tiling tiling; + + /* mipmap alignments */ + unsigned align_i; + unsigned align_j; + + struct ilo_image_lod lods[PIPE_MAX_TEXTURE_LEVELS]; + + /* physical height of layers for ILO_IMAGE_WALK_LAYER */ + unsigned layer_height; + + /* distance in bytes between two pixel block rows */ + unsigned bo_stride; + /* number of pixel block rows */ + unsigned bo_height; + + /* bitmask of levels that can use aux */ + unsigned aux_enables; + unsigned aux_offsets[PIPE_MAX_TEXTURE_LEVELS]; + unsigned aux_layer_height; + unsigned aux_stride; + unsigned aux_height; +}; + +void +ilo_image_init(struct ilo_image *img, + const struct ilo_dev *dev, + const struct pipe_resource *templ); + +bool +ilo_image_update_for_imported_bo(struct ilo_image *img, + enum gen_surface_tiling tiling, + unsigned bo_stride); + +/** + * Convert from pixel position to 2D memory offset. + */ +static inline void +ilo_image_pos_to_mem(const struct ilo_image *img, + unsigned pos_x, unsigned pos_y, + unsigned *mem_x, unsigned *mem_y) +{ + assert(pos_x % img->block_width == 0); + assert(pos_y % img->block_height == 0); + + *mem_x = pos_x / img->block_width * img->block_size; + *mem_y = pos_y / img->block_height; +} + +/** + * Convert from 2D memory offset to linear offset. + */ +static inline unsigned +ilo_image_mem_to_linear(const struct ilo_image *img, + unsigned mem_x, unsigned mem_y) +{ + return mem_y * img->bo_stride + mem_x; +} + +/** + * Convert from 2D memory offset to raw offset. + */ +static inline unsigned +ilo_image_mem_to_raw(const struct ilo_image *img, + unsigned mem_x, unsigned mem_y) +{ + unsigned tile_w, tile_h; + + switch (img->tiling) { + case GEN6_TILING_NONE: + tile_w = 1; + tile_h = 1; + break; + case GEN6_TILING_X: + tile_w = 512; + tile_h = 8; + break; + case GEN6_TILING_Y: + tile_w = 128; + tile_h = 32; + break; + case GEN8_TILING_W: + tile_w = 64; + tile_h = 64; + break; + default: + assert(!"unknown tiling"); + tile_w = 1; + tile_h = 1; + break; + } + + assert(mem_x % tile_w == 0); + assert(mem_y % tile_h == 0); + + return mem_y * img->bo_stride + mem_x * tile_h; +} + +/** + * Return the stride, in bytes, between slices within a level. + */ +static inline unsigned +ilo_image_get_slice_stride(const struct ilo_image *img, unsigned level) +{ + unsigned h; + + switch (img->walk) { + case ILO_IMAGE_WALK_LOD: + h = img->lods[level].slice_height; + break; + case ILO_IMAGE_WALK_LAYER: + h = img->layer_height; + break; + case ILO_IMAGE_WALK_3D: + if (level == 0) { + h = img->lods[0].slice_height; + break; + } + /* fall through */ + default: + assert(!"no single stride to walk across slices"); + h = 0; + break; + } + + assert(h % img->block_height == 0); + + return (h / img->block_height) * img->bo_stride; +} + +/** + * Return the physical size, in bytes, of a slice in a level. + */ +static inline unsigned +ilo_image_get_slice_size(const struct ilo_image *img, unsigned level) +{ + const unsigned w = img->lods[level].slice_width; + const unsigned h = img->lods[level].slice_height; + + assert(w % img->block_width == 0); + assert(h % img->block_height == 0); + + return (w / img->block_width * img->block_size) * + (h / img->block_height); +} + +/** + * Return the pixel position of a slice. + */ +static inline void +ilo_image_get_slice_pos(const struct ilo_image *img, + unsigned level, unsigned slice, + unsigned *x, unsigned *y) +{ + switch (img->walk) { + case ILO_IMAGE_WALK_LOD: + *x = img->lods[level].x; + *y = img->lods[level].y + img->lods[level].slice_height * slice; + break; + case ILO_IMAGE_WALK_LAYER: + *x = img->lods[level].x; + *y = img->lods[level].y + img->layer_height * slice; + break; + case ILO_IMAGE_WALK_3D: + { + /* slices are packed horizontally with wrapping */ + const unsigned sx = slice & ((1 << level) - 1); + const unsigned sy = slice >> level; + + *x = img->lods[level].x + img->lods[level].slice_width * sx; + *y = img->lods[level].y + img->lods[level].slice_height * sy; + + /* should not overlap with the next level */ + if (level + 1 < Elements(img->lods) && + img->lods[level + 1].y) { + assert(*y + img->lods[level].slice_height <= + img->lods[level + 1].y); + } + break; + } + default: + assert(!"unknown img walk type"); + *x = 0; + *y = 0; + break; + } + + /* should not exceed the bo size */ + assert(*y + img->lods[level].slice_height <= + img->bo_height * img->block_height); +} + +#endif /* ILO_IMAGE_H */ diff --git a/src/gallium/drivers/ilo/ilo_blitter_blt.c b/src/gallium/drivers/ilo/ilo_blitter_blt.c index 7667d4e2ede..5e67198adb1 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_blt.c +++ b/src/gallium/drivers/ilo/ilo_blitter_blt.c @@ -249,10 +249,10 @@ tex_clear_region(struct ilo_blitter *blitter, int slice; /* no W-tiling nor separate stencil support */ - if (dst_tex->layout.tiling == GEN8_TILING_W || dst_tex->separate_s8) + if (dst_tex->image.tiling == GEN8_TILING_W || dst_tex->separate_s8) return false; - if (dst_tex->layout.bo_stride > max_extent) + if (dst_tex->image.bo_stride > max_extent) return false; if (dst_box->width * cpp > gen6_blt_max_bytes_per_scanline) @@ -260,17 +260,17 @@ tex_clear_region(struct ilo_blitter *blitter, dst.bo = dst_tex->bo; dst.offset = 0; - dst.pitch = dst_tex->layout.bo_stride; - dst.tiling = dst_tex->layout.tiling; + dst.pitch = dst_tex->image.bo_stride; + dst.tiling = dst_tex->image.tiling; swctrl = ilo_blitter_blt_begin(blitter, GEN6_XY_COLOR_BLT__SIZE * dst_box->depth, - dst_tex->bo, dst_tex->layout.tiling, NULL, GEN6_TILING_NONE); + dst_tex->bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE); for (slice = 0; slice < dst_box->depth; slice++) { unsigned x, y; - ilo_layout_get_slice_pos(&dst_tex->layout, + ilo_image_get_slice_pos(&dst_tex->image, dst_level, dst_box->z + slice, &x, &y); dst.x = x + dst_box->x; @@ -299,7 +299,7 @@ tex_copy_region(struct ilo_blitter *blitter, const struct pipe_box *src_box) { const struct util_format_description *desc = - util_format_description(dst_tex->layout.format); + util_format_description(dst_tex->image.format); const unsigned max_extent = 32767; /* INT16_MAX */ const uint8_t rop = 0xcc; /* SRCCOPY */ struct ilo_builder *builder = &blitter->ilo->cp->builder; @@ -309,12 +309,12 @@ tex_copy_region(struct ilo_blitter *blitter, int cpp, xscale, slice; /* no W-tiling nor separate stencil support */ - if (dst_tex->layout.tiling == GEN8_TILING_W || dst_tex->separate_s8 || - src_tex->layout.tiling == GEN8_TILING_W || src_tex->separate_s8) + if (dst_tex->image.tiling == GEN8_TILING_W || dst_tex->separate_s8 || + src_tex->image.tiling == GEN8_TILING_W || src_tex->separate_s8) return false; - if (dst_tex->layout.bo_stride > max_extent || - src_tex->layout.bo_stride > max_extent) + if (dst_tex->image.bo_stride > max_extent || + src_tex->image.bo_stride > max_extent) return false; cpp = desc->block.bits / 8; @@ -349,13 +349,13 @@ tex_copy_region(struct ilo_blitter *blitter, dst.bo = dst_tex->bo; dst.offset = 0; - dst.pitch = dst_tex->layout.bo_stride; - dst.tiling = dst_tex->layout.tiling; + dst.pitch = dst_tex->image.bo_stride; + dst.tiling = dst_tex->image.tiling; src.bo = src_tex->bo; src.offset = 0; - src.pitch = src_tex->layout.bo_stride; - src.tiling = src_tex->layout.tiling; + src.pitch = src_tex->image.bo_stride; + src.tiling = src_tex->image.tiling; swctrl = ilo_blitter_blt_begin(blitter, GEN6_XY_SRC_COPY_BLT__SIZE * src_box->depth, @@ -364,9 +364,9 @@ tex_copy_region(struct ilo_blitter *blitter, for (slice = 0; slice < src_box->depth; slice++) { unsigned dx, dy, sx, sy, width, height; - ilo_layout_get_slice_pos(&dst_tex->layout, + ilo_image_get_slice_pos(&dst_tex->image, dst_level, dst_z + slice, &dx, &dy); - ilo_layout_get_slice_pos(&src_tex->layout, + ilo_image_get_slice_pos(&src_tex->image, src_level, src_box->z + slice, &sx, &sy); dst.x = (dx + dst_x) * xscale; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 91cf7375c2a..3383eaf247a 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -138,8 +138,8 @@ ilo_blitter_set_fb(struct ilo_blitter *blitter, { struct ilo_texture *tex = ilo_texture(res); - blitter->fb.width = u_minify(tex->layout.width0, level); - blitter->fb.height = u_minify(tex->layout.height0, level); + blitter->fb.width = u_minify(tex->image.width0, level); + blitter->fb.height = u_minify(tex->image.height0, level); blitter->fb.num_samples = res->nr_samples; if (!blitter->fb.num_samples) @@ -303,7 +303,7 @@ hiz_can_clear_zs(const struct ilo_blitter *blitter, * The truth is when HiZ is enabled, separate stencil is also enabled on * all GENs. The depth buffer format cannot be combined depth/stencil. */ - switch (tex->layout.format) { + switch (tex->image.format) { case PIPE_FORMAT_Z16_UNORM: if (ilo_dev_gen(blitter->ilo->dev) == ILO_GEN(6) && tex->base.width0 % 16) @@ -342,7 +342,7 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter, if (ilo_dev_gen(blitter->ilo->dev) >= ILO_GEN(8)) clear_value = fui(depth); else - clear_value = util_pack_z(tex->layout.format, depth); + clear_value = util_pack_z(tex->image.format, depth); ilo_blit_resolve_surface(blitter->ilo, zs, ILO_TEXTURE_RENDER_WRITE | ILO_TEXTURE_CLEAR); diff --git a/src/gallium/drivers/ilo/ilo_layout.c b/src/gallium/drivers/ilo/ilo_layout.c deleted file mode 100644 index f2da27f834a..00000000000 --- a/src/gallium/drivers/ilo/ilo_layout.c +++ /dev/null @@ -1,1410 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2014 LunarG, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#include "ilo_layout.h" - -enum { - LAYOUT_TILING_NONE = 1 << GEN6_TILING_NONE, - LAYOUT_TILING_X = 1 << GEN6_TILING_X, - LAYOUT_TILING_Y = 1 << GEN6_TILING_Y, - LAYOUT_TILING_W = 1 << GEN8_TILING_W, - - LAYOUT_TILING_ALL = (LAYOUT_TILING_NONE | - LAYOUT_TILING_X | - LAYOUT_TILING_Y | - LAYOUT_TILING_W) -}; - -struct ilo_layout_params { - const struct ilo_dev *dev; - const struct pipe_resource *templ; - - bool compressed; - - unsigned h0, h1; - unsigned max_x, max_y; -}; - -static void -layout_get_slice_size(const struct ilo_layout *layout, - const struct ilo_layout_params *params, - unsigned level, unsigned *width, unsigned *height) -{ - const struct pipe_resource *templ = params->templ; - unsigned w, h; - - w = u_minify(layout->width0, level); - h = u_minify(layout->height0, level); - - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 114: - * - * "The dimensions of the mip maps are first determined by applying the - * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then, - * if necessary, they are padded out to compression block boundaries." - */ - w = align(w, layout->block_width); - h = align(h, layout->block_height); - - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 111: - * - * "If the surface is multisampled (4x), these values must be adjusted - * as follows before proceeding: - * - * W_L = ceiling(W_L / 2) * 4 - * H_L = ceiling(H_L / 2) * 4" - * - * From the Ivy Bridge PRM, volume 1 part 1, page 108: - * - * "If the surface is multisampled and it is a depth or stencil surface - * or Multisampled Surface StorageFormat in SURFACE_STATE is - * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before - * proceeding: - * - * #samples W_L = H_L = - * 2 ceiling(W_L / 2) * 4 HL [no adjustment] - * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4 - * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4 - * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8" - * - * For interleaved samples (4x), where pixels - * - * (x, y ) (x+1, y ) - * (x, y+1) (x+1, y+1) - * - * would be is occupied by - * - * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1) - * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1) - * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3) - * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3) - * - * Thus the need to - * - * w = align(w, 2) * 2; - * y = align(y, 2) * 2; - */ - if (layout->interleaved_samples) { - switch (templ->nr_samples) { - case 0: - case 1: - break; - case 2: - w = align(w, 2) * 2; - break; - case 4: - w = align(w, 2) * 2; - h = align(h, 2) * 2; - break; - case 8: - w = align(w, 2) * 4; - h = align(h, 2) * 2; - break; - case 16: - w = align(w, 2) * 4; - h = align(h, 2) * 4; - break; - default: - assert(!"unsupported sample count"); - break; - } - } - - /* - * From the Ivy Bridge PRM, volume 1 part 1, page 108: - * - * "For separate stencil buffer, the width must be mutiplied by 2 and - * height divided by 2..." - * - * To make things easier (for transfer), we will just double the stencil - * stride in 3DSTATE_STENCIL_BUFFER. - */ - w = align(w, layout->align_i); - h = align(h, layout->align_j); - - *width = w; - *height = h; -} - -static unsigned -layout_get_num_layers(const struct ilo_layout *layout, - const struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - unsigned num_layers = templ->array_size; - - /* samples of the same index are stored in a layer */ - if (templ->nr_samples > 1 && !layout->interleaved_samples) - num_layers *= templ->nr_samples; - - return num_layers; -} - -static void -layout_init_layer_height(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - unsigned num_layers; - - if (layout->walk != ILO_LAYOUT_WALK_LAYER) - return; - - num_layers = layout_get_num_layers(layout, params); - if (num_layers <= 1) - return; - - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 115: - * - * "The following equation is used for surface formats other than - * compressed textures: - * - * QPitch = (h0 + h1 + 11j)" - * - * "The equation for compressed textures (BC* and FXT1 surface formats) - * follows: - * - * QPitch = (h0 + h1 + 11j) / 4" - * - * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the - * value calculated in the equation above, for every other odd Surface - * Height starting from 1 i.e. 1,5,9,13" - * - * From the Ivy Bridge PRM, volume 1 part 1, page 111-112: - * - * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth - * buffer and stencil buffer have an implied value of ARYSPC_FULL): - * - * QPitch = (h0 + h1 + 12j) - * QPitch = (h0 + h1 + 12j) / 4 (compressed) - * - * (There are many typos or missing words here...)" - * - * To access the N-th slice, an offset of (Stride * QPitch * N) is added to - * the base address. The PRM divides QPitch by 4 for compressed formats - * because the block height for those formats are 4, and it wants QPitch to - * mean the number of memory rows, as opposed to texel rows, between - * slices. Since we use texel rows everywhere, we do not need to divide - * QPitch by 4. - */ - layout->layer_height = params->h0 + params->h1 + - ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j; - - if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 && - layout->height0 % 4 == 1) - layout->layer_height += 4; - - params->max_y += layout->layer_height * (num_layers - 1); -} - -static void -layout_init_lods(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - unsigned cur_x, cur_y; - unsigned lv; - - cur_x = 0; - cur_y = 0; - for (lv = 0; lv <= templ->last_level; lv++) { - unsigned lod_w, lod_h; - - layout_get_slice_size(layout, params, lv, &lod_w, &lod_h); - - layout->lods[lv].x = cur_x; - layout->lods[lv].y = cur_y; - layout->lods[lv].slice_width = lod_w; - layout->lods[lv].slice_height = lod_h; - - switch (layout->walk) { - case ILO_LAYOUT_WALK_LOD: - lod_h *= layout_get_num_layers(layout, params); - if (lv == 1) - cur_x += lod_w; - else - cur_y += lod_h; - - /* every LOD begins at tile boundaries */ - if (templ->last_level > 0) { - assert(layout->format == PIPE_FORMAT_S8_UINT); - cur_x = align(cur_x, 64); - cur_y = align(cur_y, 64); - } - break; - case ILO_LAYOUT_WALK_LAYER: - /* MIPLAYOUT_BELOW */ - if (lv == 1) - cur_x += lod_w; - else - cur_y += lod_h; - break; - case ILO_LAYOUT_WALK_3D: - { - const unsigned num_slices = u_minify(templ->depth0, lv); - const unsigned num_slices_per_row = 1 << lv; - const unsigned num_rows = - (num_slices + num_slices_per_row - 1) / num_slices_per_row; - - lod_w *= num_slices_per_row; - lod_h *= num_rows; - - cur_y += lod_h; - } - break; - } - - if (params->max_x < layout->lods[lv].x + lod_w) - params->max_x = layout->lods[lv].x + lod_w; - if (params->max_y < layout->lods[lv].y + lod_h) - params->max_y = layout->lods[lv].y + lod_h; - } - - if (layout->walk == ILO_LAYOUT_WALK_LAYER) { - params->h0 = layout->lods[0].slice_height; - - if (templ->last_level > 0) - params->h1 = layout->lods[1].slice_height; - else - layout_get_slice_size(layout, params, 1, &cur_x, ¶ms->h1); - } -} - -static void -layout_init_alignments(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 113: - * - * "surface format align_i align_j - * YUV 4:2:2 formats 4 *see below - * BC1-5 4 4 - * FXT1 8 4 - * all other formats 4 *see below" - * - * "- align_j = 4 for any depth buffer - * - align_j = 2 for separate stencil buffer - * - align_j = 4 for any render target surface is multisampled (4x) - * - align_j = 4 for any render target surface with Surface Vertical - * Alignment = VALIGN_4 - * - align_j = 2 for any render target surface with Surface Vertical - * Alignment = VALIGN_2 - * - align_j = 2 for all other render target surface - * - align_j = 2 for any sampling engine surface with Surface Vertical - * Alignment = VALIGN_2 - * - align_j = 4 for any sampling engine surface with Surface Vertical - * Alignment = VALIGN_4" - * - * From the Sandy Bridge PRM, volume 4 part 1, page 86: - * - * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if - * the Surface Format is 96 bits per element (BPE)." - * - * They can be rephrased as - * - * align_i align_j - * compressed formats block width block height - * PIPE_FORMAT_S8_UINT 4 2 - * other depth/stencil formats 4 4 - * 4x multisampled 4 4 - * bpp 96 4 2 - * others 4 2 or 4 - */ - - /* - * From the Ivy Bridge PRM, volume 1 part 1, page 110: - * - * "surface defined by surface format align_i align_j - * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4 - * not D16_UNORM 4 4 - * 3DSTATE_STENCIL_BUFFER N/A 8 8 - * SURFACE_STATE BC*, ETC*, EAC* 4 4 - * FXT1 8 4 - * all others (set by SURFACE_STATE)" - * - * From the Ivy Bridge PRM, volume 4 part 1, page 63: - * - * "- This field (Surface Vertical Aligment) is intended to be set to - * VALIGN_4 if the surface was rendered as a depth buffer, for a - * multisampled (4x) render target, or for a multisampled (8x) - * render target, since these surfaces support only alignment of 4. - * - Use of VALIGN_4 for other surfaces is supported, but uses more - * memory. - * - This field must be set to VALIGN_4 for all tiled Y Render Target - * surfaces. - * - Value of 1 is not supported for format YCRCB_NORMAL (0x182), - * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190) - * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field - * must be set to VALIGN_4." - * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT." - * - * "- This field (Surface Horizontal Aligment) is intended to be set to - * HALIGN_8 only if the surface was rendered as a depth buffer with - * Z16 format or a stencil buffer, since these surfaces support only - * alignment of 8. - * - Use of HALIGN_8 for other surfaces is supported, but uses more - * memory. - * - This field must be set to HALIGN_4 if the Surface Format is BC*. - * - This field must be set to HALIGN_8 if the Surface Format is - * FXT1." - * - * They can be rephrased as - * - * align_i align_j - * compressed formats block width block height - * PIPE_FORMAT_Z16_UNORM 8 4 - * PIPE_FORMAT_S8_UINT 8 8 - * other depth/stencil formats 4 4 - * 2x or 4x multisampled 4 or 8 4 - * tiled Y 4 or 8 4 (if rt) - * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2 - * others 4 or 8 2 or 4 - */ - - if (params->compressed) { - /* this happens to be the case */ - layout->align_i = layout->block_width; - layout->align_j = layout->block_height; - } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) { - switch (layout->format) { - case PIPE_FORMAT_Z16_UNORM: - layout->align_i = 8; - layout->align_j = 4; - break; - case PIPE_FORMAT_S8_UINT: - layout->align_i = 8; - layout->align_j = 8; - break; - default: - layout->align_i = 4; - layout->align_j = 4; - break; - } - } else { - switch (layout->format) { - case PIPE_FORMAT_S8_UINT: - layout->align_i = 4; - layout->align_j = 2; - break; - default: - layout->align_i = 4; - layout->align_j = 4; - break; - } - } - } else { - const bool valign_4 = - (templ->nr_samples > 1) || - (ilo_dev_gen(params->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(params->dev) >= ILO_GEN(7) && - layout->tiling == GEN6_TILING_Y && - (templ->bind & PIPE_BIND_RENDER_TARGET)); - - if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && - ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4) - assert(layout->format != PIPE_FORMAT_R32G32B32_FLOAT); - - layout->align_i = 4; - layout->align_j = (valign_4) ? 4 : 2; - } - - /* - * the fact that align i and j are multiples of block width and height - * respectively is what makes the size of the bo a multiple of the block - * size, slices start at block boundaries, and many of the computations - * work. - */ - assert(layout->align_i % layout->block_width == 0); - assert(layout->align_j % layout->block_height == 0); - - /* make sure align() works */ - assert(util_is_power_of_two(layout->align_i) && - util_is_power_of_two(layout->align_j)); - assert(util_is_power_of_two(layout->block_width) && - util_is_power_of_two(layout->block_height)); -} - -static unsigned -layout_get_valid_tilings(const struct ilo_layout *layout, - const struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - const enum pipe_format format = layout->format; - unsigned valid_tilings = LAYOUT_TILING_ALL; - - /* - * From the Sandy Bridge PRM, volume 1 part 2, page 32: - * - * "Display/Overlay Y-Major not supported. - * X-Major required for Async Flips" - */ - if (unlikely(templ->bind & PIPE_BIND_SCANOUT)) - valid_tilings &= LAYOUT_TILING_X; - - /* - * From the Sandy Bridge PRM, volume 3 part 2, page 158: - * - * "The cursor surface address must be 4K byte aligned. The cursor must - * be in linear memory, it cannot be tiled." - */ - if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR))) - valid_tilings &= LAYOUT_TILING_NONE; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 318: - * - * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear - * Depth Buffer is not supported." - * - * "The Depth Buffer, if tiled, must use Y-Major tiling." - * - * From the Sandy Bridge PRM, volume 1 part 2, page 22: - * - * "W-Major Tile Format is used for separate stencil." - */ - if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_S8_UINT: - valid_tilings &= LAYOUT_TILING_W; - break; - default: - valid_tilings &= LAYOUT_TILING_Y; - break; - } - } - - if (templ->bind & PIPE_BIND_RENDER_TARGET) { - /* - * From the Sandy Bridge PRM, volume 1 part 2, page 32: - * - * "NOTE: 128BPE Format Color buffer ( render target ) MUST be - * either TileX or Linear." - * - * From the Haswell PRM, volume 5, page 32: - * - * "NOTE: 128 BPP format color buffer (render target) supports - * Linear, TiledX and TiledY." - */ - if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && layout->block_size == 16) - valid_tilings &= ~LAYOUT_TILING_Y; - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 63: - * - * "This field (Surface Vertical Aligment) must be set to VALIGN_4 - * for all tiled Y Render Target surfaces." - * - * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT." - */ - if (ilo_dev_gen(params->dev) >= ILO_GEN(7) && - ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && - layout->format == PIPE_FORMAT_R32G32B32_FLOAT) - valid_tilings &= ~LAYOUT_TILING_Y; - - valid_tilings &= ~LAYOUT_TILING_W; - } - - if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { - if (ilo_dev_gen(params->dev) < ILO_GEN(8)) - valid_tilings &= ~LAYOUT_TILING_W; - } - - /* no conflicting binding flags */ - assert(valid_tilings); - - return valid_tilings; -} - -static void -layout_init_tiling(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - unsigned preferred_tilings; - - layout->valid_tilings = layout_get_valid_tilings(layout, params); - - preferred_tilings = layout->valid_tilings; - - /* no fencing nor BLT support */ - if (preferred_tilings & ~LAYOUT_TILING_W) - preferred_tilings &= ~LAYOUT_TILING_W; - - if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) { - /* - * heuristically set a minimum width/height for enabling tiling - */ - if (layout->width0 < 64 && (preferred_tilings & ~LAYOUT_TILING_X)) - preferred_tilings &= ~LAYOUT_TILING_X; - - if ((layout->width0 < 32 || layout->height0 < 16) && - (layout->width0 < 16 || layout->height0 < 32) && - (preferred_tilings & ~LAYOUT_TILING_Y)) - preferred_tilings &= ~LAYOUT_TILING_Y; - } else { - /* force linear if we are not sure where the texture is bound to */ - if (preferred_tilings & LAYOUT_TILING_NONE) - preferred_tilings &= LAYOUT_TILING_NONE; - } - - /* prefer tiled over linear */ - if (preferred_tilings & LAYOUT_TILING_Y) - layout->tiling = GEN6_TILING_Y; - else if (preferred_tilings & LAYOUT_TILING_X) - layout->tiling = GEN6_TILING_X; - else if (preferred_tilings & LAYOUT_TILING_W) - layout->tiling = GEN8_TILING_W; - else - layout->tiling = GEN6_TILING_NONE; -} - -static void -layout_init_walk_gen7(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - - /* - * It is not explicitly states, but render targets are expected to be - * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected - * to be IMS (samples interleaved). - * - * See "Multisampled Surface Storage Format" field of SURFACE_STATE. - */ - if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { - /* - * From the Ivy Bridge PRM, volume 1 part 1, page 111: - * - * "note that the depth buffer and stencil buffer have an implied - * value of ARYSPC_FULL" - */ - layout->walk = (templ->target == PIPE_TEXTURE_3D) ? - ILO_LAYOUT_WALK_3D : ILO_LAYOUT_WALK_LAYER; - - layout->interleaved_samples = true; - } else { - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 66: - * - * "If Multisampled Surface Storage Format is MSFMT_MSS and Number - * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface - * Array Spacing) must be set to ARYSPC_LOD0." - * - * As multisampled resources are not mipmapped, we never use - * ARYSPC_FULL for them. - */ - if (templ->nr_samples > 1) - assert(templ->last_level == 0); - - layout->walk = - (templ->target == PIPE_TEXTURE_3D) ? ILO_LAYOUT_WALK_3D : - (templ->last_level > 0) ? ILO_LAYOUT_WALK_LAYER : - ILO_LAYOUT_WALK_LOD; - - layout->interleaved_samples = false; - } -} - -static void -layout_init_walk_gen6(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 115: - * - * "The separate stencil buffer does not support mip mapping, thus the - * storage for LODs other than LOD 0 is not needed. The following - * QPitch equation applies only to the separate stencil buffer: - * - * QPitch = h_0" - * - * GEN6 does not support compact spacing otherwise. - */ - layout->walk = - (params->templ->target == PIPE_TEXTURE_3D) ? ILO_LAYOUT_WALK_3D : - (layout->format == PIPE_FORMAT_S8_UINT) ? ILO_LAYOUT_WALK_LOD : - ILO_LAYOUT_WALK_LAYER; - - /* GEN6 supports only interleaved samples */ - layout->interleaved_samples = true; -} - -static void -layout_init_walk(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) - layout_init_walk_gen7(layout, params); - else - layout_init_walk_gen6(layout, params); -} - -static void -layout_init_size_and_format(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - enum pipe_format format = templ->format; - bool require_separate_stencil = false; - - layout->width0 = templ->width0; - layout->height0 = templ->height0; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 317: - * - * "This field (Separate Stencil Buffer Enable) must be set to the same - * value (enabled or disabled) as Hierarchical Depth Buffer Enable." - * - * GEN7+ requires separate stencil buffers. - */ - if (templ->bind & PIPE_BIND_DEPTH_STENCIL) { - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) - require_separate_stencil = true; - else - require_separate_stencil = (layout->aux == ILO_LAYOUT_AUX_HIZ); - } - - switch (format) { - case PIPE_FORMAT_ETC1_RGB8: - format = PIPE_FORMAT_R8G8B8X8_UNORM; - break; - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - if (require_separate_stencil) { - format = PIPE_FORMAT_Z24X8_UNORM; - layout->separate_stencil = true; - } - break; - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - if (require_separate_stencil) { - format = PIPE_FORMAT_Z32_FLOAT; - layout->separate_stencil = true; - } - break; - default: - break; - } - - layout->format = format; - layout->block_width = util_format_get_blockwidth(format); - layout->block_height = util_format_get_blockheight(format); - layout->block_size = util_format_get_blocksize(format); - - params->compressed = util_format_is_compressed(format); -} - -static bool -layout_want_mcs(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - bool want_mcs = false; - - /* MCS is for RT on GEN7+ */ - if (ilo_dev_gen(params->dev) < ILO_GEN(7)) - return false; - - if (templ->target != PIPE_TEXTURE_2D || - !(templ->bind & PIPE_BIND_RENDER_TARGET)) - return false; - - /* - * From the Ivy Bridge PRM, volume 4 part 1, page 77: - * - * "For Render Target and Sampling Engine Surfaces:If the surface is - * multisampled (Number of Multisamples any value other than - * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled." - * - * "This field must be set to 0 for all SINT MSRTs when all RT channels - * are not written" - */ - if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) { - want_mcs = true; - } else if (templ->nr_samples <= 1) { - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 326: - * - * "When MCS is buffer is used for color clear of non-multisampler - * render target, the following restrictions apply. - * - Support is limited to tiled render targets. - * - Support is for non-mip-mapped and non-array surface types - * only. - * - Clear is supported only on the full RT; i.e., no partial clear - * or overlapping clears. - * - MCS buffer for non-MSRT is supported only for RT formats - * 32bpp, 64bpp and 128bpp. - * ..." - */ - if (layout->tiling != GEN6_TILING_NONE && - templ->last_level == 0 && templ->array_size == 1) { - switch (layout->block_size) { - case 4: - case 8: - case 16: - want_mcs = true; - break; - default: - break; - } - } - } - - return want_mcs; -} - -static bool -layout_want_hiz(const struct ilo_layout *layout, - const struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - const struct util_format_description *desc = - util_format_description(templ->format); - - if (ilo_debug & ILO_DEBUG_NOHIZ) - return false; - - if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL)) - return false; - - if (!util_format_has_depth(desc)) - return false; - - /* no point in having HiZ */ - if (templ->usage == PIPE_USAGE_STAGING) - return false; - - /* - * As can be seen in layout_calculate_hiz_size(), HiZ may not be enabled - * for every level. This is generally fine except on GEN6, where HiZ and - * separate stencil are enabled and disabled at the same time. When the - * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ - * can result in incompatible formats. - */ - if (ilo_dev_gen(params->dev) == ILO_GEN(6) && - templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && - templ->last_level) - return false; - - return true; -} - -static void -layout_init_aux(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - if (layout_want_hiz(layout, params)) - layout->aux = ILO_LAYOUT_AUX_HIZ; - else if (layout_want_mcs(layout, params)) - layout->aux = ILO_LAYOUT_AUX_MCS; -} - -static void -layout_align(struct ilo_layout *layout, struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - int align_w = 1, align_h = 1, pad_h = 0; - - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 118: - * - * "To determine the necessary padding on the bottom and right side of - * the surface, refer to the table in Section 7.18.3.4 for the i and j - * parameters for the surface format in use. The surface must then be - * extended to the next multiple of the alignment unit size in each - * dimension, and all texels contained in this extended surface must - * have valid GTT entries." - * - * "For cube surfaces, an additional two rows of padding are required - * at the bottom of the surface. This must be ensured regardless of - * whether the surface is stored tiled or linear. This is due to the - * potential rotation of cache line orientation from memory to cache." - * - * "For compressed textures (BC* and FXT1 surface formats), padding at - * the bottom of the surface is to an even compressed row, which is - * equal to a multiple of 8 uncompressed texel rows. Thus, for padding - * purposes, these surfaces behave as if j = 8 only for surface - * padding purposes. The value of 4 for j still applies for mip level - * alignment and QPitch calculation." - */ - if (templ->bind & PIPE_BIND_SAMPLER_VIEW) { - align_w = MAX2(align_w, layout->align_i); - align_h = MAX2(align_h, layout->align_j); - - if (templ->target == PIPE_TEXTURE_CUBE) - pad_h += 2; - - if (params->compressed) - align_h = MAX2(align_h, layout->align_j * 2); - } - - /* - * From the Sandy Bridge PRM, volume 1 part 1, page 118: - * - * "If the surface contains an odd number of rows of data, a final row - * below the surface must be allocated." - */ - if (templ->bind & PIPE_BIND_RENDER_TARGET) - align_h = MAX2(align_h, 2); - - /* - * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In - * ilo_texture_can_enable_hiz(), we always return true for the first slice. - * To avoid out-of-bound access, we have to pad. - */ - if (layout->aux == ILO_LAYOUT_AUX_HIZ && - templ->last_level == 0 && - templ->array_size == 1 && - templ->depth0 == 1) { - align_w = MAX2(align_w, 8); - align_h = MAX2(align_h, 4); - } - - params->max_x = align(params->max_x, align_w); - params->max_y = align(params->max_y + pad_h, align_h); -} - -/* note that this may force the texture to be linear */ -static void -layout_calculate_bo_size(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - assert(params->max_x % layout->block_width == 0); - assert(params->max_y % layout->block_height == 0); - assert(layout->layer_height % layout->block_height == 0); - - layout->bo_stride = - (params->max_x / layout->block_width) * layout->block_size; - layout->bo_height = params->max_y / layout->block_height; - - while (true) { - unsigned w = layout->bo_stride, h = layout->bo_height; - unsigned align_w, align_h; - - /* - * From the Haswell PRM, volume 5, page 163: - * - * "For linear surfaces, additional padding of 64 bytes is required - * at the bottom of the surface. This is in addition to the padding - * required above." - */ - if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) && - (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) && - layout->tiling == GEN6_TILING_NONE) - h += (64 + layout->bo_stride - 1) / layout->bo_stride; - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 81: - * - * "- For linear render target surfaces, the pitch must be a - * multiple of the element size for non-YUV surface formats. - * Pitch must be a multiple of 2 * element size for YUV surface - * formats. - * - For other linear surfaces, the pitch can be any multiple of - * bytes. - * - For tiled surfaces, the pitch must be a multiple of the tile - * width." - * - * Different requirements may exist when the bo is used in different - * places, but our alignments here should be good enough that we do not - * need to check layout->templ->bind. - */ - switch (layout->tiling) { - case GEN6_TILING_X: - align_w = 512; - align_h = 8; - break; - case GEN6_TILING_Y: - align_w = 128; - align_h = 32; - break; - case GEN8_TILING_W: - /* - * From the Sandy Bridge PRM, volume 1 part 2, page 22: - * - * "A 4KB tile is subdivided into 8-high by 8-wide array of - * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8 - * bytes." - */ - align_w = 64; - align_h = 64; - break; - default: - assert(layout->tiling == GEN6_TILING_NONE); - /* some good enough values */ - align_w = 64; - align_h = 2; - break; - } - - w = align(w, align_w); - h = align(h, align_h); - - /* make sure the bo is mappable */ - if (layout->tiling != GEN6_TILING_NONE) { - /* - * Usually only the first 256MB of the GTT is mappable. - * - * See also how intel_context::max_gtt_map_object_size is calculated. - */ - const size_t mappable_gtt_size = 256 * 1024 * 1024; - - /* - * Be conservative. We may be able to switch from VALIGN_4 to - * VALIGN_2 if the layout was Y-tiled, but let's keep it simple. - */ - if (mappable_gtt_size / w / 4 < h) { - if (layout->valid_tilings & LAYOUT_TILING_NONE) { - layout->tiling = GEN6_TILING_NONE; - /* MCS support for non-MSRTs is limited to tiled RTs */ - if (layout->aux == ILO_LAYOUT_AUX_MCS && - params->templ->nr_samples <= 1) - layout->aux = ILO_LAYOUT_AUX_NONE; - - continue; - } else { - ilo_warn("cannot force texture to be linear\n"); - } - } - } - - layout->bo_stride = w; - layout->bo_height = h; - break; - } -} - -static void -layout_calculate_hiz_size(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - const unsigned hz_align_j = 8; - enum ilo_layout_walk_type hz_walk; - unsigned hz_width, hz_height, lv; - unsigned hz_clear_w, hz_clear_h; - - assert(layout->aux == ILO_LAYOUT_AUX_HIZ); - - assert(layout->walk == ILO_LAYOUT_WALK_LAYER || - layout->walk == ILO_LAYOUT_WALK_3D); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 312: - * - * "The hierarchical depth buffer does not support the LOD field, it is - * assumed by hardware to be zero. A separate hierarachical depth - * buffer is required for each LOD used, and the corresponding - * buffer's state delivered to hardware each time a new depth buffer - * state with modified LOD is delivered." - * - * We will put all LODs in a single bo with ILO_LAYOUT_WALK_LOD. - */ - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) - hz_walk = layout->walk; - else - hz_walk = ILO_LAYOUT_WALK_LOD; - - /* - * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge - * PRM, volume 2 part 1, page 312-313. - * - * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a - * memory row. - */ - switch (hz_walk) { - case ILO_LAYOUT_WALK_LOD: - { - unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS]; - unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS]; - unsigned cur_tx, cur_ty; - - /* figure out the tile offsets of LODs */ - hz_width = 0; - hz_height = 0; - cur_tx = 0; - cur_ty = 0; - for (lv = 0; lv <= templ->last_level; lv++) { - unsigned tw, th; - - lod_tx[lv] = cur_tx; - lod_ty[lv] = cur_ty; - - tw = align(layout->lods[lv].slice_width, 16); - th = align(layout->lods[lv].slice_height, hz_align_j) * - templ->array_size / 2; - /* convert to Y-tiles */ - tw = align(tw, 128) / 128; - th = align(th, 32) / 32; - - if (hz_width < cur_tx + tw) - hz_width = cur_tx + tw; - if (hz_height < cur_ty + th) - hz_height = cur_ty + th; - - if (lv == 1) - cur_tx += tw; - else - cur_ty += th; - } - - /* convert tile offsets to memory offsets */ - for (lv = 0; lv <= templ->last_level; lv++) { - layout->aux_offsets[lv] = - (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096; - } - hz_width *= 128; - hz_height *= 32; - } - break; - case ILO_LAYOUT_WALK_LAYER: - { - const unsigned h0 = align(params->h0, hz_align_j); - const unsigned h1 = align(params->h1, hz_align_j); - const unsigned htail = - ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j; - const unsigned hz_qpitch = h0 + h1 + htail; - - hz_width = align(layout->lods[0].slice_width, 16); - - hz_height = hz_qpitch * templ->array_size / 2; - if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) - hz_height = align(hz_height, 8); - - layout->aux_layer_height = hz_qpitch; - } - break; - case ILO_LAYOUT_WALK_3D: - hz_width = align(layout->lods[0].slice_width, 16); - - hz_height = 0; - for (lv = 0; lv <= templ->last_level; lv++) { - const unsigned h = align(layout->lods[lv].slice_height, hz_align_j); - /* according to the formula, slices are packed together vertically */ - hz_height += h * u_minify(templ->depth0, lv); - } - hz_height /= 2; - break; - default: - assert(!"unknown HiZ walk"); - hz_width = 0; - hz_height = 0; - break; - } - - /* - * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks. - * Experiments on Haswell show that aligning the RECTLIST primitive and - * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The LOD sizes must be - * aligned. - */ - hz_clear_w = 8; - hz_clear_h = 4; - switch (templ->nr_samples) { - case 0: - case 1: - default: - break; - case 2: - hz_clear_w /= 2; - break; - case 4: - hz_clear_w /= 2; - hz_clear_h /= 2; - break; - case 8: - hz_clear_w /= 4; - hz_clear_h /= 2; - break; - case 16: - hz_clear_w /= 4; - hz_clear_h /= 4; - break; - } - - for (lv = 0; lv <= templ->last_level; lv++) { - if (u_minify(layout->width0, lv) % hz_clear_w || - u_minify(layout->height0, lv) % hz_clear_h) - break; - layout->aux_enables |= 1 << lv; - } - - /* we padded to allow this in layout_align() */ - if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1) - layout->aux_enables |= 0x1; - - /* align to Y-tile */ - layout->aux_stride = align(hz_width, 128); - layout->aux_height = align(hz_height, 32); -} - -static void -layout_calculate_mcs_size(struct ilo_layout *layout, - struct ilo_layout_params *params) -{ - const struct pipe_resource *templ = params->templ; - int mcs_width, mcs_height, mcs_cpp; - int downscale_x, downscale_y; - - assert(layout->aux == ILO_LAYOUT_AUX_MCS); - - if (templ->nr_samples > 1) { - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear - * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The - * need of scale down could be that the clear rectangle is used to clear - * the MCS instead of the RT. - * - * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT. The - * 2x2 factor could come from that the hardware writes 128 bits (an - * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in - * the RT. For 4X MSAA, we need 8 bits in MCS for every pixel in the - * RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2 - * pixel block in the RT. - */ - switch (templ->nr_samples) { - case 2: - case 4: - downscale_x = 8; - downscale_y = 2; - mcs_cpp = 1; - break; - case 8: - downscale_x = 2; - downscale_y = 2; - mcs_cpp = 4; - break; - case 16: - downscale_x = 2; - downscale_y = 1; - mcs_cpp = 8; - break; - default: - assert(!"unsupported sample count"); - return; - break; - } - - /* - * It also appears that the 2x2 subspans generated by the scaled-down - * clear rectangle cannot be masked. The scale-down clear rectangle - * thus must be aligned to 2x2, and we need to pad. - */ - mcs_width = align(layout->width0, downscale_x * 2); - mcs_height = align(layout->height0, downscale_y * 2); - } else { - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 327: - * - * " Pixels Lines - * TiledY RT CL - * bpp - * 32 8 4 - * 64 4 4 - * 128 2 4 - * - * TiledX RT CL - * bpp - * 32 16 2 - * 64 8 2 - * 128 4 2" - * - * This table and the two following tables define the RT alignments, the - * clear rectangle alignments, and the clear rectangle scale factors. - * Viewing the RT alignments as the sizes of 128-byte blocks, we can see - * that the clear rectangle alignments are 16x32 blocks, and the clear - * rectangle scale factors are 8x16 blocks. - * - * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the - * RT. Similar to the MSAA cases, we can argue that an OWord maps to - * 8x16 blocks. - * - * One problem with this reasoning is that a Y-tile in MCS has 8x32 - * OWords and maps to 64x512 128-byte blocks. This differs from i965, - * which says that a Y-tile maps to 128x256 blocks (\see - * intel_get_non_msrt_mcs_alignment). It does not really change - * anything except for the size of the allocated MCS. Let's see if we - * hit out-of-bound access. - */ - switch (layout->tiling) { - case GEN6_TILING_X: - downscale_x = 64 / layout->block_size; - downscale_y = 2; - break; - case GEN6_TILING_Y: - downscale_x = 32 / layout->block_size; - downscale_y = 4; - break; - default: - assert(!"unsupported tiling mode"); - return; - break; - } - - downscale_x *= 8; - downscale_y *= 16; - - /* - * From the Haswell PRM, volume 7, page 652: - * - * "Clear rectangle must be aligned to two times the number of - * pixels in the table shown below due to 16X16 hashing across the - * slice." - * - * The scaled-down clear rectangle must be aligned to 4x4 instead of - * 2x2, and we need to pad. - */ - mcs_width = align(layout->width0, downscale_x * 4) / downscale_x; - mcs_height = align(layout->height0, downscale_y * 4) / downscale_y; - mcs_cpp = 16; /* an OWord */ - } - - layout->aux_enables = (1 << (templ->last_level + 1)) - 1; - /* align to Y-tile */ - layout->aux_stride = align(mcs_width * mcs_cpp, 128); - layout->aux_height = align(mcs_height, 32); -} - -/** - * The texutre is for transfer only. We can define our own layout to save - * space. - */ -static void -layout_init_for_transfer(struct ilo_layout *layout, - const struct ilo_dev *dev, - const struct pipe_resource *templ) -{ - const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ? - templ->depth0 : templ->array_size; - unsigned layer_width, layer_height; - - assert(templ->last_level == 0); - assert(templ->nr_samples <= 1); - - layout->aux = ILO_LAYOUT_AUX_NONE; - layout->width0 = templ->width0; - layout->height0 = templ->height0; - layout->format = templ->format; - layout->block_width = util_format_get_blockwidth(templ->format); - layout->block_height = util_format_get_blockheight(templ->format); - layout->block_size = util_format_get_blocksize(templ->format); - layout->walk = ILO_LAYOUT_WALK_LOD; - - layout->valid_tilings = LAYOUT_TILING_NONE; - layout->tiling = GEN6_TILING_NONE; - - layout->align_i = layout->block_width; - layout->align_j = layout->block_height; - - assert(util_is_power_of_two(layout->block_width) && - util_is_power_of_two(layout->block_height)); - - /* use packed layout */ - layer_width = align(templ->width0, layout->align_i); - layer_height = align(templ->height0, layout->align_j); - - layout->lods[0].slice_width = layer_width; - layout->lods[0].slice_height = layer_height; - - layout->bo_stride = (layer_width / layout->block_width) * layout->block_size; - layout->bo_stride = align(layout->bo_stride, 64); - - layout->bo_height = (layer_height / layout->block_height) * num_layers; -} - -/** - * Initialize the layout. Callers should zero-initialize \p layout first. - */ -void ilo_layout_init(struct ilo_layout *layout, - const struct ilo_dev *dev, - const struct pipe_resource *templ) -{ - struct ilo_layout_params params; - bool transfer_only; - - /* use transfer layout when the texture is never bound to GPU */ - transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE | - PIPE_BIND_TRANSFER_READ)); - if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) { - layout_init_for_transfer(layout, dev, templ); - return; - } - - memset(¶ms, 0, sizeof(params)); - params.dev = dev; - params.templ = templ; - - /* note that there are dependencies between these functions */ - layout_init_aux(layout, ¶ms); - layout_init_size_and_format(layout, ¶ms); - layout_init_walk(layout, ¶ms); - layout_init_tiling(layout, ¶ms); - layout_init_alignments(layout, ¶ms); - layout_init_lods(layout, ¶ms); - layout_init_layer_height(layout, ¶ms); - - layout_align(layout, ¶ms); - layout_calculate_bo_size(layout, ¶ms); - - switch (layout->aux) { - case ILO_LAYOUT_AUX_HIZ: - layout_calculate_hiz_size(layout, ¶ms); - break; - case ILO_LAYOUT_AUX_MCS: - layout_calculate_mcs_size(layout, ¶ms); - break; - default: - break; - } -} - -/** - * Update the tiling mode and bo stride (for imported resources). - */ -bool -ilo_layout_update_for_imported_bo(struct ilo_layout *layout, - enum gen_surface_tiling tiling, - unsigned bo_stride) -{ - if (!(layout->valid_tilings & (1 << tiling))) - return false; - - if ((tiling == GEN6_TILING_X && bo_stride % 512) || - (tiling == GEN6_TILING_Y && bo_stride % 128) || - (tiling == GEN8_TILING_W && bo_stride % 64)) - return false; - - layout->tiling = tiling; - layout->bo_stride = bo_stride; - - return true; -} diff --git a/src/gallium/drivers/ilo/ilo_layout.h b/src/gallium/drivers/ilo/ilo_layout.h deleted file mode 100644 index 36fd02e652b..00000000000 --- a/src/gallium/drivers/ilo/ilo_layout.h +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 2014 LunarG, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Chia-I Wu - */ - -#ifndef ILO_LAYOUT_H -#define ILO_LAYOUT_H - -#include "genhw/genhw.h" - -#include "ilo_common.h" - -struct pipe_resource; - -enum ilo_layout_walk_type { - /* - * Array layers of an LOD are packed together vertically. This maps to - * ARYSPC_LOD0 for non-mipmapped 2D textures, and is extended to support - * mipmapped stencil textures and HiZ on GEN6. - */ - ILO_LAYOUT_WALK_LOD, - - /* - * LODs of an array layer are packed together. This maps to ARYSPC_FULL - * and is used for mipmapped 2D textures. - */ - ILO_LAYOUT_WALK_LAYER, - - /* - * 3D slices of an LOD are packed together, horizontally with wrapping. - * Used for 3D textures. - */ - ILO_LAYOUT_WALK_3D, -}; - -enum ilo_layout_aux_type { - ILO_LAYOUT_AUX_NONE, - ILO_LAYOUT_AUX_HIZ, - ILO_LAYOUT_AUX_MCS, -}; - -struct ilo_layout_lod { - /* physical position */ - unsigned x; - unsigned y; - - /* - * Physical size of an LOD slice. There may be multiple slices when the - * walk type is not ILO_LAYOUT_WALK_LAYER. - */ - unsigned slice_width; - unsigned slice_height; -}; - -/** - * Texture layout. - */ -struct ilo_layout { - enum ilo_layout_aux_type aux; - - /* physical width0, height0, and format */ - unsigned width0; - unsigned height0; - enum pipe_format format; - bool separate_stencil; - - /* - * width, height, and size of pixel blocks, for conversion between 2D - * coordinates and memory offsets - */ - unsigned block_width; - unsigned block_height; - unsigned block_size; - - enum ilo_layout_walk_type walk; - bool interleaved_samples; - - /* bitmask of valid tiling modes */ - unsigned valid_tilings; - enum gen_surface_tiling tiling; - - /* mipmap alignments */ - unsigned align_i; - unsigned align_j; - - struct ilo_layout_lod lods[PIPE_MAX_TEXTURE_LEVELS]; - - /* physical height of layers for ILO_LAYOUT_WALK_LAYER */ - unsigned layer_height; - - /* distance in bytes between two pixel block rows */ - unsigned bo_stride; - /* number of pixel block rows */ - unsigned bo_height; - - /* bitmask of levels that can use aux */ - unsigned aux_enables; - unsigned aux_offsets[PIPE_MAX_TEXTURE_LEVELS]; - unsigned aux_layer_height; - unsigned aux_stride; - unsigned aux_height; -}; - -void ilo_layout_init(struct ilo_layout *layout, - const struct ilo_dev *dev, - const struct pipe_resource *templ); - -bool -ilo_layout_update_for_imported_bo(struct ilo_layout *layout, - enum gen_surface_tiling tiling, - unsigned bo_stride); - -/** - * Convert from pixel position to 2D memory offset. - */ -static inline void -ilo_layout_pos_to_mem(const struct ilo_layout *layout, - unsigned pos_x, unsigned pos_y, - unsigned *mem_x, unsigned *mem_y) -{ - assert(pos_x % layout->block_width == 0); - assert(pos_y % layout->block_height == 0); - - *mem_x = pos_x / layout->block_width * layout->block_size; - *mem_y = pos_y / layout->block_height; -} - -/** - * Convert from 2D memory offset to linear offset. - */ -static inline unsigned -ilo_layout_mem_to_linear(const struct ilo_layout *layout, - unsigned mem_x, unsigned mem_y) -{ - return mem_y * layout->bo_stride + mem_x; -} - -/** - * Convert from 2D memory offset to raw offset. - */ -static inline unsigned -ilo_layout_mem_to_raw(const struct ilo_layout *layout, - unsigned mem_x, unsigned mem_y) -{ - unsigned tile_w, tile_h; - - switch (layout->tiling) { - case GEN6_TILING_NONE: - tile_w = 1; - tile_h = 1; - break; - case GEN6_TILING_X: - tile_w = 512; - tile_h = 8; - break; - case GEN6_TILING_Y: - tile_w = 128; - tile_h = 32; - break; - case GEN8_TILING_W: - tile_w = 64; - tile_h = 64; - break; - default: - assert(!"unknown tiling"); - tile_w = 1; - tile_h = 1; - break; - } - - assert(mem_x % tile_w == 0); - assert(mem_y % tile_h == 0); - - return mem_y * layout->bo_stride + mem_x * tile_h; -} - -/** - * Return the stride, in bytes, between slices within a level. - */ -static inline unsigned -ilo_layout_get_slice_stride(const struct ilo_layout *layout, unsigned level) -{ - unsigned h; - - switch (layout->walk) { - case ILO_LAYOUT_WALK_LOD: - h = layout->lods[level].slice_height; - break; - case ILO_LAYOUT_WALK_LAYER: - h = layout->layer_height; - break; - case ILO_LAYOUT_WALK_3D: - if (level == 0) { - h = layout->lods[0].slice_height; - break; - } - /* fall through */ - default: - assert(!"no single stride to walk across slices"); - h = 0; - break; - } - - assert(h % layout->block_height == 0); - - return (h / layout->block_height) * layout->bo_stride; -} - -/** - * Return the physical size, in bytes, of a slice in a level. - */ -static inline unsigned -ilo_layout_get_slice_size(const struct ilo_layout *layout, unsigned level) -{ - const unsigned w = layout->lods[level].slice_width; - const unsigned h = layout->lods[level].slice_height; - - assert(w % layout->block_width == 0); - assert(h % layout->block_height == 0); - - return (w / layout->block_width * layout->block_size) * - (h / layout->block_height); -} - -/** - * Return the pixel position of a slice. - */ -static inline void -ilo_layout_get_slice_pos(const struct ilo_layout *layout, - unsigned level, unsigned slice, - unsigned *x, unsigned *y) -{ - switch (layout->walk) { - case ILO_LAYOUT_WALK_LOD: - *x = layout->lods[level].x; - *y = layout->lods[level].y + layout->lods[level].slice_height * slice; - break; - case ILO_LAYOUT_WALK_LAYER: - *x = layout->lods[level].x; - *y = layout->lods[level].y + layout->layer_height * slice; - break; - case ILO_LAYOUT_WALK_3D: - { - /* slices are packed horizontally with wrapping */ - const unsigned sx = slice & ((1 << level) - 1); - const unsigned sy = slice >> level; - - *x = layout->lods[level].x + layout->lods[level].slice_width * sx; - *y = layout->lods[level].y + layout->lods[level].slice_height * sy; - - /* should not overlap with the next level */ - if (level + 1 < Elements(layout->lods) && - layout->lods[level + 1].y) { - assert(*y + layout->lods[level].slice_height <= - layout->lods[level + 1].y); - } - break; - } - default: - assert(!"unknown layout walk type"); - *x = 0; - *y = 0; - break; - } - - /* should not exceed the bo size */ - assert(*y + layout->lods[level].slice_height <= - layout->bo_height * layout->block_height); -} - -#endif /* ILO_LAYOUT_H */ diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c index 8e59e0d0bb2..f5038952f43 100644 --- a/src/gallium/drivers/ilo/ilo_resource.c +++ b/src/gallium/drivers/ilo/ilo_resource.c @@ -25,7 +25,6 @@ * Chia-I Wu */ -#include "ilo_layout.h" #include "ilo_screen.h" #include "ilo_resource.h" @@ -164,11 +163,11 @@ tex_import_handle(struct ilo_texture *tex, unsigned long pitch; tex->bo = intel_winsys_import_handle(is->dev.winsys, name, handle, - tex->layout.bo_height, &tiling, &pitch); + tex->image.bo_height, &tiling, &pitch); if (!tex->bo) return false; - if (!ilo_layout_update_for_imported_bo(&tex->layout, + if (!ilo_image_update_for_imported_bo(&tex->image, winsys_to_surface_tiling(tiling), pitch)) { ilo_err("imported handle has incompatible tiling/pitch\n"); intel_bo_unref(tex->bo); @@ -188,15 +187,15 @@ tex_create_bo(struct ilo_texture *tex) struct intel_bo *bo; bo = intel_winsys_alloc_bo(is->dev.winsys, name, - tex->layout.bo_stride * tex->layout.bo_height, cpu_init); + tex->image.bo_stride * tex->image.bo_height, cpu_init); /* set the tiling for transfer and export */ - if (bo && (tex->layout.tiling == GEN6_TILING_X || - tex->layout.tiling == GEN6_TILING_Y)) { + if (bo && (tex->image.tiling == GEN6_TILING_X || + tex->image.tiling == GEN6_TILING_Y)) { const enum intel_tiling_mode tiling = - surface_to_winsys_tiling(tex->layout.tiling); + surface_to_winsys_tiling(tex->image.tiling); - if (intel_bo_set_tiling(bo, tiling, tex->layout.bo_stride)) { + if (intel_bo_set_tiling(bo, tiling, tex->image.bo_stride)) { intel_bo_unref(bo); bo = NULL; } @@ -229,7 +228,7 @@ tex_create_separate_stencil(struct ilo_texture *tex) tex->separate_s8 = ilo_texture(s8); - assert(tex->separate_s8->layout.format == PIPE_FORMAT_S8_UINT); + assert(tex->separate_s8->image.format == PIPE_FORMAT_S8_UINT); return true; } @@ -242,12 +241,12 @@ tex_create_hiz(struct ilo_texture *tex) unsigned lv; tex->aux_bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture", - tex->layout.aux_stride * tex->layout.aux_height, false); + tex->image.aux_stride * tex->image.aux_height, false); if (!tex->aux_bo) return false; for (lv = 0; lv <= templ->last_level; lv++) { - if (tex->layout.aux_enables & (1 << lv)) { + if (tex->image.aux_enables & (1 << lv)) { const unsigned num_slices = (templ->target == PIPE_TEXTURE_3D) ? u_minify(templ->depth0, lv) : templ->array_size; unsigned flags = ILO_TEXTURE_HIZ; @@ -268,10 +267,10 @@ tex_create_mcs(struct ilo_texture *tex) { struct ilo_screen *is = ilo_screen(tex->base.screen); - assert(tex->layout.aux_enables == (1 << (tex->base.last_level + 1)) - 1); + assert(tex->image.aux_enables == (1 << (tex->base.last_level + 1)) - 1); tex->aux_bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture", - tex->layout.aux_stride * tex->layout.aux_height, false); + tex->image.aux_stride * tex->image.aux_height, false); if (!tex->aux_bo) return false; @@ -306,19 +305,19 @@ tex_alloc_bos(struct ilo_texture *tex, } /* allocate separate stencil resource */ - if (tex->layout.separate_stencil && !tex_create_separate_stencil(tex)) + if (tex->image.separate_stencil && !tex_create_separate_stencil(tex)) return false; - switch (tex->layout.aux) { - case ILO_LAYOUT_AUX_HIZ: + switch (tex->image.aux) { + case ILO_IMAGE_AUX_HIZ: if (!tex_create_hiz(tex)) { /* Separate Stencil Buffer requires HiZ to be enabled */ if (ilo_dev_gen(&is->dev) == ILO_GEN(6) && - tex->layout.separate_stencil) + tex->image.separate_stencil) return false; } break; - case ILO_LAYOUT_AUX_MCS: + case ILO_IMAGE_AUX_MCS: if (!tex_create_mcs(tex)) return false; break; @@ -330,21 +329,21 @@ tex_alloc_bos(struct ilo_texture *tex, } static bool -tex_init_layout(struct ilo_texture *tex) +tex_init_image(struct ilo_texture *tex) { struct ilo_screen *is = ilo_screen(tex->base.screen); const struct pipe_resource *templ = &tex->base; - struct ilo_layout *layout = &tex->layout; + struct ilo_image *img = &tex->image; - ilo_layout_init(layout, &is->dev, templ); + ilo_image_init(img, &is->dev, templ); - if (layout->bo_height > ilo_max_resource_size / layout->bo_stride) + if (img->bo_height > ilo_max_resource_size / img->bo_stride) return false; if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) { /* require on-the-fly tiling/untiling or format conversion */ - if (layout->tiling == GEN8_TILING_W || layout->separate_stencil || - layout->format != templ->format) + if (img->tiling == GEN8_TILING_W || img->separate_stencil || + img->format != templ->format) return false; } @@ -371,7 +370,7 @@ tex_create(struct pipe_screen *screen, tex->imported = (handle != NULL); - if (!tex_init_layout(tex)) { + if (!tex_init_image(tex)) { FREE(tex); return NULL; } @@ -392,13 +391,13 @@ tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle) int err; /* must match what tex_create_bo() sets */ - if (tex->layout.tiling == GEN8_TILING_W) + if (tex->image.tiling == GEN8_TILING_W) tiling = INTEL_TILING_NONE; else - tiling = surface_to_winsys_tiling(tex->layout.tiling); + tiling = surface_to_winsys_tiling(tex->image.tiling); err = intel_winsys_export_handle(is->dev.winsys, tex->bo, tiling, - tex->layout.bo_stride, tex->layout.bo_height, handle); + tex->image.bo_stride, tex->image.bo_height, handle); return !err; } @@ -481,15 +480,15 @@ static boolean ilo_can_create_resource(struct pipe_screen *screen, const struct pipe_resource *templ) { - struct ilo_layout layout; + struct ilo_image img; if (templ->target == PIPE_BUFFER) return (templ->width0 <= ilo_max_resource_size); - memset(&layout, 0, sizeof(layout)); - ilo_layout_init(&layout, &ilo_screen(screen)->dev, templ); + memset(&img, 0, sizeof(img)); + ilo_image_init(&img, &ilo_screen(screen)->dev, templ); - return (layout.bo_height <= ilo_max_resource_size / layout.bo_stride); + return (img.bo_height <= ilo_max_resource_size / img.bo_stride); } static struct pipe_resource * diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h index 930f4e9c9af..3b520aa5b27 100644 --- a/src/gallium/drivers/ilo/ilo_resource.h +++ b/src/gallium/drivers/ilo/ilo_resource.h @@ -29,9 +29,9 @@ #define ILO_RESOURCE_H #include "core/intel_winsys.h" +#include "core/ilo_image.h" #include "ilo_common.h" -#include "ilo_layout.h" #include "ilo_screen.h" enum ilo_texture_flags { @@ -108,7 +108,7 @@ struct ilo_texture { bool imported; - struct ilo_layout layout; + struct ilo_image image; /* XXX thread-safety */ struct intel_bo *bo; diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 0b99cdac9db..175e7c659d4 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -1013,7 +1013,7 @@ ilo_create_sampler_view(struct pipe_context *pipe, struct ilo_texture *tex = ilo_texture(res); /* warn about degraded performance because of a missing binding flag */ - if (tex->layout.tiling == GEN6_TILING_NONE && + if (tex->image.tiling == GEN6_TILING_NONE && !(tex->base.bind & PIPE_BIND_SAMPLER_VIEW)) { ilo_warn("creating sampler view for a resource " "not created for sampling\n"); diff --git a/src/gallium/drivers/ilo/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/ilo_state_3d_bottom.c index daf6cb19e63..f4a850634f7 100644 --- a/src/gallium/drivers/ilo/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/ilo_state_3d_bottom.c @@ -1032,12 +1032,12 @@ zs_init_info(const struct ilo_dev *dev, if (format != PIPE_FORMAT_S8_UINT) { info->zs.bo = tex->bo; - info->zs.stride = tex->layout.bo_stride; + info->zs.stride = tex->image.bo_stride; - assert(tex->layout.layer_height % 4 == 0); - info->zs.qpitch = tex->layout.layer_height / 4; + assert(tex->image.layer_height % 4 == 0); + info->zs.qpitch = tex->image.layer_height / 4; - info->zs.tiling = tex->layout.tiling; + info->zs.tiling = tex->image.tiling; info->zs.offset = 0; } @@ -1056,41 +1056,41 @@ zs_init_info(const struct ilo_dev *dev, * For GEN7, we still dobule the stride because we did not double the * slice widths when initializing the layout. */ - info->stencil.stride = s8_tex->layout.bo_stride * 2; + info->stencil.stride = s8_tex->image.bo_stride * 2; - assert(s8_tex->layout.layer_height % 4 == 0); - info->stencil.qpitch = s8_tex->layout.layer_height / 4; + assert(s8_tex->image.layer_height % 4 == 0); + info->stencil.qpitch = s8_tex->image.layer_height / 4; - info->stencil.tiling = s8_tex->layout.tiling; + info->stencil.tiling = s8_tex->image.tiling; if (ilo_dev_gen(dev) == ILO_GEN(6)) { unsigned x, y; - assert(s8_tex->layout.walk == ILO_LAYOUT_WALK_LOD); + assert(s8_tex->image.walk == ILO_IMAGE_WALK_LOD); /* offset to the level */ - ilo_layout_get_slice_pos(&s8_tex->layout, level, 0, &x, &y); - ilo_layout_pos_to_mem(&s8_tex->layout, x, y, &x, &y); - info->stencil.offset = ilo_layout_mem_to_raw(&s8_tex->layout, x, y); + ilo_image_get_slice_pos(&s8_tex->image, level, 0, &x, &y); + ilo_image_pos_to_mem(&s8_tex->image, x, y, &x, &y); + info->stencil.offset = ilo_image_mem_to_raw(&s8_tex->image, x, y); } } if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) { info->hiz.bo = tex->aux_bo; - info->hiz.stride = tex->layout.aux_stride; + info->hiz.stride = tex->image.aux_stride; - assert(tex->layout.aux_layer_height % 4 == 0); - info->hiz.qpitch = tex->layout.aux_layer_height / 4; + assert(tex->image.aux_layer_height % 4 == 0); + info->hiz.qpitch = tex->image.aux_layer_height / 4; info->hiz.tiling = GEN6_TILING_Y; /* offset to the level */ if (ilo_dev_gen(dev) == ILO_GEN(6)) - info->hiz.offset = tex->layout.aux_offsets[level]; + info->hiz.offset = tex->image.aux_offsets[level]; } - info->width = tex->layout.width0; - info->height = tex->layout.height0; + info->width = tex->image.width0; + info->height = tex->image.height0; info->depth = (tex->base.target == PIPE_TEXTURE_3D) ? tex->base.depth0 : num_layers; diff --git a/src/gallium/drivers/ilo/ilo_state_3d_top.c b/src/gallium/drivers/ilo/ilo_state_3d_top.c index 167c0911a46..79cd8b5c3b0 100644 --- a/src/gallium/drivers/ilo/ilo_state_3d_top.c +++ b/src/gallium/drivers/ilo/ilo_state_3d_top.c @@ -584,11 +584,11 @@ view_init_for_texture_gen6(const struct ilo_dev *dev, surface_format = ilo_format_translate_texture(dev, format); assert(surface_format >= 0); - width = tex->layout.width0; - height = tex->layout.height0; + width = tex->image.width0; + height = tex->image.height0; depth = (tex->base.target == PIPE_TEXTURE_3D) ? tex->base.depth0 : num_layers; - pitch = tex->layout.bo_stride; + pitch = tex->image.bo_stride; if (surface_type == GEN6_SURFTYPE_CUBE) { /* @@ -642,10 +642,10 @@ view_init_for_texture_gen6(const struct ilo_dev *dev, } /* non-full array spacing is supported only on GEN7+ */ - assert(tex->layout.walk != ILO_LAYOUT_WALK_LOD); + assert(tex->image.walk != ILO_IMAGE_WALK_LOD); /* non-interleaved samples are supported only on GEN7+ */ if (tex->base.nr_samples > 1) - assert(tex->layout.interleaved_samples); + assert(tex->image.interleaved_samples); if (is_rt) { assert(num_levels == 1); @@ -673,7 +673,7 @@ view_init_for_texture_gen6(const struct ilo_dev *dev, * * "For linear surfaces, this field (X Offset) must be zero" */ - if (tex->layout.tiling == GEN6_TILING_NONE) { + if (tex->image.tiling == GEN6_TILING_NONE) { if (is_rt) { const int elem_size = util_format_get_blocksize(format); assert(pitch % elem_size == 0); @@ -701,10 +701,10 @@ view_init_for_texture_gen6(const struct ilo_dev *dev, (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT | lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT; - assert(tex->layout.tiling != GEN8_TILING_W); + assert(tex->image.tiling != GEN8_TILING_W); dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT | (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT | - tex->layout.tiling; + tex->image.tiling; dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT | first_layer << 17 | @@ -714,8 +714,8 @@ view_init_for_texture_gen6(const struct ilo_dev *dev, dw[5] = 0; - assert(tex->layout.align_j == 2 || tex->layout.align_j == 4); - if (tex->layout.align_j == 4) + assert(tex->image.align_j == 2 || tex->image.align_j == 4); + if (tex->image.align_j == 4) dw[5] |= GEN6_SURFACE_DW5_VALIGN_4; } @@ -946,11 +946,11 @@ view_init_for_texture_gen7(const struct ilo_dev *dev, surface_format = ilo_format_translate_texture(dev, format); assert(surface_format >= 0); - width = tex->layout.width0; - height = tex->layout.height0; + width = tex->image.width0; + height = tex->image.height0; depth = (tex->base.target == PIPE_TEXTURE_3D) ? tex->base.depth0 : num_layers; - pitch = tex->layout.bo_stride; + pitch = tex->image.bo_stride; if (surface_type == GEN6_SURFTYPE_CUBE) { /* @@ -1030,7 +1030,7 @@ view_init_for_texture_gen7(const struct ilo_dev *dev, * * "For linear surfaces, this field (X Offset) must be zero." */ - if (tex->layout.tiling == GEN6_TILING_NONE) { + if (tex->image.tiling == GEN6_TILING_NONE) { if (is_rt) { const int elem_size = util_format_get_blocksize(format); assert(pitch % elem_size == 0); @@ -1062,7 +1062,7 @@ view_init_for_texture_gen7(const struct ilo_dev *dev, } if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - switch (tex->layout.align_j) { + switch (tex->image.align_j) { case 4: dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; break; @@ -1077,7 +1077,7 @@ view_init_for_texture_gen7(const struct ilo_dev *dev, break; } - switch (tex->layout.align_i) { + switch (tex->image.align_i) { case 4: dw[0] |= GEN8_SURFACE_DW0_HALIGN_4; break; @@ -1092,21 +1092,21 @@ view_init_for_texture_gen7(const struct ilo_dev *dev, break; } - dw[0] |= tex->layout.tiling << GEN8_SURFACE_DW0_TILING__SHIFT; + dw[0] |= tex->image.tiling << GEN8_SURFACE_DW0_TILING__SHIFT; } else { - assert(tex->layout.align_i == 4 || tex->layout.align_i == 8); - assert(tex->layout.align_j == 2 || tex->layout.align_j == 4); + assert(tex->image.align_i == 4 || tex->image.align_i == 8); + assert(tex->image.align_j == 2 || tex->image.align_j == 4); - if (tex->layout.align_j == 4) + if (tex->image.align_j == 4) dw[0] |= GEN7_SURFACE_DW0_VALIGN_4; - if (tex->layout.align_i == 8) + if (tex->image.align_i == 8) dw[0] |= GEN7_SURFACE_DW0_HALIGN_8; - assert(tex->layout.tiling != GEN8_TILING_W); - dw[0] |= tex->layout.tiling << GEN7_SURFACE_DW0_TILING__SHIFT; + assert(tex->image.tiling != GEN8_TILING_W); + dw[0] |= tex->image.tiling << GEN7_SURFACE_DW0_TILING__SHIFT; - if (tex->layout.walk == ILO_LAYOUT_WALK_LOD) + if (tex->image.walk == ILO_IMAGE_WALK_LOD) dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0; else dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL; @@ -1119,8 +1119,8 @@ view_init_for_texture_gen7(const struct ilo_dev *dev, dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK; if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - assert(tex->layout.layer_height % 4 == 0); - dw[1] = tex->layout.layer_height / 4; + assert(tex->image.layer_height % 4 == 0); + dw[1] = tex->image.layer_height / 4; } else { dw[1] = 0; } @@ -1139,7 +1139,7 @@ view_init_for_texture_gen7(const struct ilo_dev *dev, * means the samples are interleaved. The layouts are the same when the * number of samples is 1. */ - if (tex->layout.interleaved_samples && tex->base.nr_samples > 1) { + if (tex->image.interleaved_samples && tex->base.nr_samples > 1) { assert(!is_rt); dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL; } diff --git a/src/gallium/drivers/ilo/ilo_transfer.c b/src/gallium/drivers/ilo/ilo_transfer.c index f0d47de54a0..e80ed8bda9c 100644 --- a/src/gallium/drivers/ilo/ilo_transfer.c +++ b/src/gallium/drivers/ilo/ilo_transfer.c @@ -93,7 +93,7 @@ resource_get_transfer_method(struct pipe_resource *res, bool need_convert = false; /* we may need to convert on the fly */ - if (tex->layout.tiling == GEN8_TILING_W || tex->separate_s8) { + if (tex->image.tiling == GEN8_TILING_W || tex->separate_s8) { /* on GEN6, separate stencil is enabled only when HiZ is */ if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) || ilo_texture_can_enable_hiz(tex, transfer->level, @@ -101,7 +101,7 @@ resource_get_transfer_method(struct pipe_resource *res, m = ILO_TRANSFER_MAP_SW_ZS; need_convert = true; } - } else if (tex->layout.format != tex->base.format) { + } else if (tex->image.format != tex->base.format) { m = ILO_TRANSFER_MAP_SW_CONVERT; need_convert = true; } @@ -114,7 +114,7 @@ resource_get_transfer_method(struct pipe_resource *res, return true; } - tiled = (tex->layout.tiling != GEN6_TILING_NONE); + tiled = (tex->image.tiling != GEN6_TILING_NONE); } if (tiled) @@ -202,7 +202,7 @@ xfer_alloc_staging_res(struct ilo_transfer *xfer) xfer->staging.res = res->screen->resource_create(res->screen, &templ); if (xfer->staging.res && xfer->staging.res->target != PIPE_BUFFER) { - assert(ilo_texture(xfer->staging.res)->layout.tiling == + assert(ilo_texture(xfer->staging.res)->image.tiling == GEN6_TILING_NONE); } @@ -354,11 +354,11 @@ tex_get_box_origin(const struct ilo_texture *tex, { unsigned x, y; - ilo_layout_get_slice_pos(&tex->layout, level, box->z + slice, &x, &y); + ilo_image_get_slice_pos(&tex->image, level, box->z + slice, &x, &y); x += box->x; y += box->y; - ilo_layout_pos_to_mem(&tex->layout, x, y, mem_x, mem_y); + ilo_image_pos_to_mem(&tex->image, x, y, mem_x, mem_y); } static unsigned @@ -369,13 +369,13 @@ tex_get_box_offset(const struct ilo_texture *tex, unsigned level, tex_get_box_origin(tex, level, 0, box, &mem_x, &mem_y); - return ilo_layout_mem_to_linear(&tex->layout, mem_x, mem_y); + return ilo_image_mem_to_linear(&tex->image, mem_x, mem_y); } static unsigned tex_get_slice_stride(const struct ilo_texture *tex, unsigned level) { - return ilo_layout_get_slice_stride(&tex->layout, level); + return ilo_image_get_slice_stride(&tex->image, level); } static unsigned @@ -523,21 +523,21 @@ static tex_tile_offset_func tex_tile_choose_offset_func(const struct ilo_texture *tex, unsigned *tiles_per_row) { - switch (tex->layout.tiling) { + switch (tex->image.tiling) { default: assert(!"unknown tiling"); /* fall through */ case GEN6_TILING_NONE: - *tiles_per_row = tex->layout.bo_stride; + *tiles_per_row = tex->image.bo_stride; return tex_tile_none_offset; case GEN6_TILING_X: - *tiles_per_row = tex->layout.bo_stride / 512; + *tiles_per_row = tex->image.bo_stride / 512; return tex_tile_x_offset; case GEN6_TILING_Y: - *tiles_per_row = tex->layout.bo_stride / 128; + *tiles_per_row = tex->image.bo_stride / 128; return tex_tile_y_offset; case GEN8_TILING_W: - *tiles_per_row = tex->layout.bo_stride / 64; + *tiles_per_row = tex->image.bo_stride / 64; return tex_tile_w_offset; } } @@ -551,7 +551,7 @@ tex_staging_sys_map_bo(struct ilo_texture *tex, const bool prefer_cpu = (is->dev.has_llc || for_read_back); void *ptr; - if (prefer_cpu && (tex->layout.tiling == GEN6_TILING_NONE || + if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE || !linear_view)) ptr = intel_bo_map(tex->bo, !for_read_back); else @@ -584,7 +584,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row); - assert(tex->layout.block_width == 1 && tex->layout.block_height == 1); + assert(tex->image.block_width == 1 && tex->image.block_height == 1); if (tex->separate_s8) { struct ilo_texture *s8_tex = tex->separate_s8; @@ -602,7 +602,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { - assert(tex->layout.format == PIPE_FORMAT_Z24X8_UNORM); + assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM); dst_cpp = 4; dst_s8_pos = 3; @@ -610,7 +610,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, } else { assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); - assert(tex->layout.format == PIPE_FORMAT_Z32_FLOAT); + assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT); dst_cpp = 8; dst_s8_pos = 4; @@ -643,7 +643,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, d[dst_s8_pos] = s8_src[s8_offset]; d += dst_cpp; - x += tex->layout.block_size; + x += tex->image.block_size; s8_x++; } @@ -656,7 +656,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex, tex_staging_sys_unmap_bo(s8_tex); } else { - assert(tex->layout.format == PIPE_FORMAT_S8_UINT); + assert(tex->image.format == PIPE_FORMAT_S8_UINT); for (slice = 0; slice < box->depth; slice++) { unsigned mem_x, mem_y; @@ -711,7 +711,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, tile_offset = tex_tile_choose_offset_func(tex, &tiles_per_row); - assert(tex->layout.block_width == 1 && tex->layout.block_height == 1); + assert(tex->image.block_width == 1 && tex->image.block_height == 1); if (tex->separate_s8) { struct ilo_texture *s8_tex = tex->separate_s8; @@ -729,7 +729,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row); if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { - assert(tex->layout.format == PIPE_FORMAT_Z24X8_UNORM); + assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM); src_cpp = 4; src_s8_pos = 3; @@ -737,7 +737,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, } else { assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); - assert(tex->layout.format == PIPE_FORMAT_Z32_FLOAT); + assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT); src_cpp = 8; src_s8_pos = 4; @@ -770,7 +770,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, s8_dst[s8_offset] = s[src_s8_pos]; s += src_cpp; - x += tex->layout.block_size; + x += tex->image.block_size; s8_x++; } @@ -783,7 +783,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex, tex_staging_sys_unmap_bo(s8_tex); } else { - assert(tex->layout.format == PIPE_FORMAT_S8_UINT); + assert(tex->image.format == PIPE_FORMAT_S8_UINT); for (slice = 0; slice < box->depth; slice++) { unsigned mem_x, mem_y; @@ -841,8 +841,8 @@ tex_staging_sys_convert_write(struct ilo_texture *tex, else dst_slice_stride = 0; - if (unlikely(tex->layout.format == tex->base.format)) { - util_copy_box(dst, tex->layout.format, tex->layout.bo_stride, + if (unlikely(tex->image.format == tex->base.format)) { + util_copy_box(dst, tex->image.format, tex->image.bo_stride, dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth, xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride, 0, 0, 0); @@ -854,14 +854,14 @@ tex_staging_sys_convert_write(struct ilo_texture *tex, switch (tex->base.format) { case PIPE_FORMAT_ETC1_RGB8: - assert(tex->layout.format == PIPE_FORMAT_R8G8B8X8_UNORM); + assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM); for (slice = 0; slice < box->depth; slice++) { const void *src = xfer->staging.sys + xfer->base.layer_stride * slice; util_format_etc1_rgb8_unpack_rgba_8unorm(dst, - tex->layout.bo_stride, src, xfer->base.stride, + tex->image.bo_stride, src, xfer->base.stride, box->width, box->height); dst += dst_slice_stride; @@ -957,7 +957,7 @@ tex_map(struct ilo_transfer *xfer) ptr += tex_get_box_offset(tex, xfer->base.level, &xfer->base.box); /* stride is for a block row, not a texel row */ - xfer->base.stride = tex->layout.bo_stride; + xfer->base.stride = tex->image.bo_stride; /* note that slice stride is not always available */ xfer->base.layer_stride = (xfer->base.box.depth > 1) ? tex_get_slice_stride(tex, xfer->base.level) : 0; @@ -967,7 +967,7 @@ tex_map(struct ilo_transfer *xfer) ptr = xfer_map(xfer); if (ptr) { const struct ilo_texture *staging = ilo_texture(xfer->staging.res); - xfer->base.stride = staging->layout.bo_stride; + xfer->base.stride = staging->image.bo_stride; xfer->base.layer_stride = tex_get_slice_stride(staging, 0); } break;