X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fisl%2Fisl.h;h=98b55164facb4c40200010087074656202f4d3f0;hb=a965ffad21d41f14e09babd18896bb962b326da4;hp=a86688c91bcc0ede5603ea6aaf11eb6fb7e464e5;hpb=fc3650a0a9eca29a9498f663d489ab729f55f65f;p=mesa.git diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h index a86688c91bc..98b55164fac 100644 --- a/src/intel/isl/isl.h +++ b/src/intel/isl/isl.h @@ -35,7 +35,8 @@ * - functions */ -#pragma once +#ifndef ISL_H +#define ISL_H #include #include @@ -43,12 +44,13 @@ #include "c99_compat.h" #include "util/macros.h" +#include "util/format/u_format.h" #ifdef __cplusplus extern "C" { #endif -struct brw_device_info; +struct gen_device_info; struct brw_image_param; #ifndef ISL_DEV_GEN @@ -65,6 +67,10 @@ struct brw_image_param; (assert(ISL_DEV_GEN(__dev) == (__dev)->info->gen)) #endif +#ifndef ISL_DEV_IS_G4X +#define ISL_DEV_IS_G4X(__dev) ((__dev)->info->is_g4x) +#endif + #ifndef ISL_DEV_IS_HASWELL /** * @brief Get the hardware generation of isl_device. @@ -75,6 +81,10 @@ struct brw_image_param; #define ISL_DEV_IS_HASWELL(__dev) ((__dev)->info->is_haswell) #endif +#ifndef ISL_DEV_IS_BAYTRAIL +#define ISL_DEV_IS_BAYTRAIL(__dev) ((__dev)->info->is_baytrail) +#endif + #ifndef ISL_DEV_USE_SEPARATE_STENCIL /** * You can define this as a compile-time constant in the CFLAGS. For example, @@ -158,6 +168,7 @@ enum isl_format { ISL_FORMAT_B10G10R10A2_UNORM = 209, ISL_FORMAT_B10G10R10A2_UNORM_SRGB = 210, ISL_FORMAT_R11G11B10_FLOAT = 211, + ISL_FORMAT_R10G10B10_FLOAT_A2_UNORM = 213, ISL_FORMAT_R32_SINT = 214, ISL_FORMAT_R32_UINT = 215, ISL_FORMAT_R32_FLOAT = 216, @@ -344,6 +355,49 @@ enum isl_format { ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16 = 630, ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16 = 638, ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16 = 639, + ISL_FORMAT_ASTC_HDR_2D_4X4_FLT16 = 832, + ISL_FORMAT_ASTC_HDR_2D_5X4_FLT16 = 840, + ISL_FORMAT_ASTC_HDR_2D_5X5_FLT16 = 841, + ISL_FORMAT_ASTC_HDR_2D_6X5_FLT16 = 849, + ISL_FORMAT_ASTC_HDR_2D_6X6_FLT16 = 850, + ISL_FORMAT_ASTC_HDR_2D_8X5_FLT16 = 865, + ISL_FORMAT_ASTC_HDR_2D_8X6_FLT16 = 866, + ISL_FORMAT_ASTC_HDR_2D_8X8_FLT16 = 868, + ISL_FORMAT_ASTC_HDR_2D_10X5_FLT16 = 881, + ISL_FORMAT_ASTC_HDR_2D_10X6_FLT16 = 882, + ISL_FORMAT_ASTC_HDR_2D_10X8_FLT16 = 884, + ISL_FORMAT_ASTC_HDR_2D_10X10_FLT16 = 886, + ISL_FORMAT_ASTC_HDR_2D_12X10_FLT16 = 894, + ISL_FORMAT_ASTC_HDR_2D_12X12_FLT16 = 895, + + /* The formats that follow are internal to ISL and as such don't have an + * explicit number. We'll just let the C compiler assign it for us. Any + * actual hardware formats *must* come before these in the list. + */ + + /* Formats for auxiliary surfaces */ + ISL_FORMAT_HIZ, + ISL_FORMAT_MCS_2X, + ISL_FORMAT_MCS_4X, + ISL_FORMAT_MCS_8X, + ISL_FORMAT_MCS_16X, + ISL_FORMAT_GEN7_CCS_32BPP_X, + ISL_FORMAT_GEN7_CCS_64BPP_X, + ISL_FORMAT_GEN7_CCS_128BPP_X, + ISL_FORMAT_GEN7_CCS_32BPP_Y, + ISL_FORMAT_GEN7_CCS_64BPP_Y, + ISL_FORMAT_GEN7_CCS_128BPP_Y, + ISL_FORMAT_GEN9_CCS_32BPP, + ISL_FORMAT_GEN9_CCS_64BPP, + ISL_FORMAT_GEN9_CCS_128BPP, + ISL_FORMAT_GEN12_CCS_8BPP_Y0, + ISL_FORMAT_GEN12_CCS_16BPP_Y0, + ISL_FORMAT_GEN12_CCS_32BPP_Y0, + ISL_FORMAT_GEN12_CCS_64BPP_Y0, + ISL_FORMAT_GEN12_CCS_128BPP_Y0, + + /* An upper bound on the supported format enumerations */ + ISL_NUM_FORMATS, /* Hardware doesn't understand this out-of-band value */ ISL_FORMAT_UNSUPPORTED = UINT16_MAX, @@ -392,6 +446,11 @@ enum isl_txc { ISL_TXC_ETC1, ISL_TXC_ETC2, ISL_TXC_ASTC, + + /* Used for auxiliary surface formats */ + ISL_TXC_HIZ, + ISL_TXC_MCS, + ISL_TXC_CCS, }; /** @@ -410,6 +469,9 @@ enum isl_tiling { ISL_TILING_Y0, /**< Legacy Y tiling */ ISL_TILING_Yf, /**< Standard 4K tiling. The 'f' means "four". */ ISL_TILING_Ys, /**< Standard 64K tiling. The 's' means "sixty-four". */ + ISL_TILING_HIZ, /**< Tiling format for HiZ surfaces */ + ISL_TILING_CCS, /**< Tiling format for CCS surfaces */ + ISL_TILING_GEN12_CCS, /**< Tiling format for Gen12 CCS surfaces */ }; /** @@ -423,6 +485,9 @@ typedef uint32_t isl_tiling_flags_t; #define ISL_TILING_Y0_BIT (1u << ISL_TILING_Y0) #define ISL_TILING_Yf_BIT (1u << ISL_TILING_Yf) #define ISL_TILING_Ys_BIT (1u << ISL_TILING_Ys) +#define ISL_TILING_HIZ_BIT (1u << ISL_TILING_HIZ) +#define ISL_TILING_CCS_BIT (1u << ISL_TILING_CCS) +#define ISL_TILING_GEN12_CCS_BIT (1u << ISL_TILING_GEN12_CCS) #define ISL_TILING_ANY_MASK (~0u) #define ISL_TILING_NON_LINEAR_MASK (~ISL_TILING_LINEAR_BIT) @@ -474,6 +539,46 @@ enum isl_dim_layout { */ ISL_DIM_LAYOUT_GEN4_3D, + /** + * Special layout used for HiZ and stencil on Sandy Bridge to work around + * the hardware's lack of mipmap support. On gen6, HiZ and stencil buffers + * work the same as on gen7+ except that they don't technically support + * mipmapping. That does not, however, stop us from doing it. As far as + * Sandy Bridge hardware is concerned, HiZ and stencil always operates on a + * single miplevel 2D (possibly array) image. The dimensions of that image + * are NOT minified. + * + * In order to implement HiZ and stencil on Sandy Bridge, we create one + * full-sized 2D (possibly array) image for every LOD with every image + * aligned to a page boundary. When the surface is used with the stencil + * or HiZ hardware, we manually offset to the image for the given LOD. + * + * As a memory saving measure, we pretend that the width of each miplevel + * is minified and we place LOD1 and above below LOD0 but horizontally + * adjacent to each other. When considered as full-sized images, LOD1 and + * above technically overlap. However, since we only write to part of that + * image, the hardware will never notice the overlap. + * + * This layout looks something like this: + * + * +---------+ + * | | + * | | + * +---------+ + * | | + * | | + * +---------+ + * + * +----+ +-+ . + * | | +-+ + * +----+ + * + * +----+ +-+ . + * | | +-+ + * +----+ + */ + ISL_DIM_LAYOUT_GEN6_STENCIL_HIZ, + /** * For details, see the Skylake BSpec >> Memory Views >> Common Surface * Formats >> Surface Layout and Tiling >> » 1D Surfaces. @@ -481,6 +586,305 @@ enum isl_dim_layout { ISL_DIM_LAYOUT_GEN9_1D, }; +enum isl_aux_usage { + /** No Auxiliary surface is used */ + ISL_AUX_USAGE_NONE, + + /** The primary surface is a depth surface and the auxiliary surface is HiZ */ + ISL_AUX_USAGE_HIZ, + + /** The auxiliary surface is an MCS + * + * @invariant isl_surf::samples > 1 + */ + ISL_AUX_USAGE_MCS, + + /** The auxiliary surface is a fast-clear-only compression surface + * + * @invariant isl_surf::samples == 1 + */ + ISL_AUX_USAGE_CCS_D, + + /** The auxiliary surface provides full lossless color compression + * + * @invariant isl_surf::samples == 1 + */ + ISL_AUX_USAGE_CCS_E, + + /** The auxiliary surface provides full lossless color compression on + * Gen12. + * + * @invariant isl_surf::samples == 1 + */ + ISL_AUX_USAGE_GEN12_CCS_E, + + /** The auxiliary surface provides full lossless media color compression + * + * @invariant isl_surf::samples == 1 + */ + ISL_AUX_USAGE_MC, + + /** The auxiliary surface is a HiZ surface operating in write-through mode + * and CCS is also enabled + * + * In this mode, the HiZ and CCS surfaces act as a single fused compression + * surface where resolves and ambiguates operate on both surfaces at the + * same time. In this mode, the HiZ surface operates in write-through + * mode where it is only used for accelerating depth testing and not for + * actual compression. The CCS-compressed surface contains valid data at + * all times. + * + * @invariant isl_surf::samples == 1 + */ + ISL_AUX_USAGE_HIZ_CCS_WT, + + /** The auxiliary surface is a HiZ surface with and CCS is also enabled + * + * In this mode, the HiZ and CCS surfaces act as a single fused compression + * surface where resolves and ambiguates operate on both surfaces at the + * same time. In this mode, full HiZ compression is enabled and the + * CCS-compressed main surface may not contain valid data. The only way to + * read the surface outside of the depth hardware is to do a full resolve + * which resolves both HiZ and CCS so the surface is in the pass-through + * state. + */ + ISL_AUX_USAGE_HIZ_CCS, + + /** The auxiliary surface is an MCS and CCS is also enabled + * + * In this mode, we have fused MCS+CCS compression where the MCS is used + * for fast-clears and "identical samples" compression just like on Gen7-11 + * but each plane is then CCS compressed. + * + * @invariant isl_surf::samples > 1 + */ + ISL_AUX_USAGE_MCS_CCS, + + /** CCS auxiliary data is used to compress a stencil buffer + * + * @invariant isl_surf::samples == 1 + */ + ISL_AUX_USAGE_STC_CCS, +}; + +/** + * Enum for keeping track of the state an auxiliary compressed surface. + * + * For any given auxiliary surface compression format (HiZ, CCS, or MCS), any + * given slice (lod + array layer) can be in one of the six states described + * by this enum. Draw and resolve operations may cause the slice to change + * from one state to another. The six valid states are: + * + * 1) Clear: In this state, each block in the auxiliary surface contains a + * magic value that indicates that the block is in the clear state. If + * a block is in the clear state, it's values in the primary surface are + * ignored and the color of the samples in the block is taken either the + * RENDER_SURFACE_STATE packet for color or 3DSTATE_CLEAR_PARAMS for + * depth. Since neither the primary surface nor the auxiliary surface + * contains the clear value, the surface can be cleared to a different + * color by simply changing the clear color without modifying either + * surface. + * + * 2) Partial Clear: In this state, each block in the auxiliary surface + * contains either the magic clear or pass-through value. See Clear and + * Pass-through for more details. + * + * 3) Compressed w/ Clear: In this state, neither the auxiliary surface + * nor the primary surface has a complete representation of the data. + * Instead, both surfaces must be used together or else rendering + * corruption may occur. Depending on the auxiliary compression format + * and the data, any given block in the primary surface may contain all, + * some, or none of the data required to reconstruct the actual sample + * values. Blocks may also be in the clear state (see Clear) and have + * their value taken from outside the surface. + * + * 4) Compressed w/o Clear: This state is identical to the state above + * except that no blocks are in the clear state. In this state, all of + * the data required to reconstruct the final sample values is contained + * in the auxiliary and primary surface and the clear value is not + * considered. + * + * 5) Resolved: In this state, the primary surface contains 100% of the + * data. The auxiliary surface is also valid so the surface can be + * validly used with or without aux enabled. The auxiliary surface may, + * however, contain non-trivial data and any update to the primary + * surface with aux disabled will cause the two to get out of sync. + * + * 6) Pass-through: In this state, the primary surface contains 100% of the + * data and every block in the auxiliary surface contains a magic value + * which indicates that the auxiliary surface should be ignored and the + * only the primary surface should be considered. Updating the primary + * surface without aux works fine and can be done repeatedly in this + * mode. Writing to a surface in pass-through mode with aux enabled may + * cause the auxiliary buffer to contain non-trivial data and no longer + * be in the pass-through state. + * + * 7) Aux Invalid: In this state, the primary surface contains 100% of the + * data and the auxiliary surface is completely bogus. Any attempt to + * use the auxiliary surface is liable to result in rendering + * corruption. The only thing that one can do to re-enable aux once + * this state is reached is to use an ambiguate pass to transition into + * the pass-through state. + * + * Drawing with or without aux enabled may implicitly cause the surface to + * transition between these states. There are also four types of auxiliary + * compression operations which cause an explicit transition which are + * described by the isl_aux_op enum below. + * + * Not all operations are valid or useful in all states. The diagram below + * contains a complete description of the states and all valid and useful + * transitions except clear. + * + * Draw w/ Aux + * +----------+ + * | | + * | +-------------+ Draw w/ Aux +-------------+ + * +------>| Compressed |<-------------------| Clear | + * | w/ Clear |----->----+ | | + * +-------------+ | +-------------+ + * | /|\ | | | + * | | | | | + * | | +------<-----+ | Draw w/ + * | | | | Clear Only + * | | Full | | +----------+ + * Partial | | Resolve | \|/ | | + * Resolve | | | +-------------+ | + * | | | | Partial |<------+ + * | | | | Clear |<----------+ + * | | | +-------------+ | + * | | | | | + * | | +------>---------+ Full | + * | | | Resolve | + * Draw w/ aux | | Partial Fast Clear | | + * +----------+ | +--------------------------+ | | + * | | \|/ | \|/ | + * | +-------------+ Full Resolve +-------------+ | + * +------>| Compressed |------------------->| Resolved | | + * | w/o Clear |<-------------------| | | + * +-------------+ Draw w/ Aux +-------------+ | + * /|\ | | | + * | Draw | | Draw | + * | w/ Aux | | w/o Aux | + * | Ambiguate | | | + * | +--------------------------+ | | + * Draw w/o Aux | | | Draw w/o Aux | + * +----------+ | | | +----------+ | + * | | | \|/ \|/ | | | + * | +-------------+ Ambiguate +-------------+ | | + * +------>| Pass- |<-------------------| Aux |<------+ | + * +------>| through | | Invalid | | + * | +-------------+ +-------------+ | + * | | | | + * +----------+ +-----------------------------------------------------+ + * Draw w/ Partial Fast Clear + * Clear Only + * + * + * While the above general theory applies to all forms of auxiliary + * compression on Intel hardware, not all states and operations are available + * on all compression types. However, each of the auxiliary states and + * operations can be fairly easily mapped onto the above diagram: + * + * HiZ: Hierarchical depth compression is capable of being in any of the + * states above. Hardware provides three HiZ operations: "Depth + * Clear", "Depth Resolve", and "HiZ Resolve" which map to "Fast + * Clear", "Full Resolve", and "Ambiguate" respectively. The + * hardware provides no HiZ partial resolve operation so the only way + * to get into the "Compressed w/o Clear" state is to render with HiZ + * when the surface is in the resolved or pass-through states. + * + * MCS: Multisample compression is technically capable of being in any of + * the states above except that most of them aren't useful. Both the + * render engine and the sampler support MCS compression and, apart + * from clear color, MCS is format-unaware so we leave the surface + * compressed 100% of the time. The hardware provides no MCS + * operations. + * + * CCS_D: Single-sample fast-clears (also called CCS_D in ISL) are one of + * the simplest forms of compression since they don't do anything + * beyond clear color tracking. They really only support three of + * the six states: Clear, Partial Clear, and Pass-through. The + * only CCS_D operation is "Resolve" which maps to a full resolve + * followed by an ambiguate. + * + * CCS_E: Single-sample render target compression (also called CCS_E in ISL) + * is capable of being in almost all of the above states. THe only + * exception is that it does not have separate resolved and pass- + * through states. Instead, the CCS_E full resolve operation does + * both a resolve and an ambiguate so it goes directly into the + * pass-through state. CCS_E also provides fast clear and partial + * resolve operations which work as described above. + * + * While it is technically possible to perform a CCS_E ambiguate, it + * is not provided by Sky Lake hardware so we choose to avoid the aux + * invalid state. If the aux invalid state were determined to be + * useful, a CCS ambiguate could be done by carefully rendering to + * the CCS and filling it with zeros. + */ +enum isl_aux_state { +#ifdef IN_UNIT_TEST + ISL_AUX_STATE_ASSERT, +#endif + ISL_AUX_STATE_CLEAR, + ISL_AUX_STATE_PARTIAL_CLEAR, + ISL_AUX_STATE_COMPRESSED_CLEAR, + ISL_AUX_STATE_COMPRESSED_NO_CLEAR, + ISL_AUX_STATE_RESOLVED, + ISL_AUX_STATE_PASS_THROUGH, + ISL_AUX_STATE_AUX_INVALID, +}; + +/** + * Enum which describes explicit aux transition operations. + */ +enum isl_aux_op { +#ifdef IN_UNIT_TEST + ISL_AUX_OP_ASSERT, +#endif + + ISL_AUX_OP_NONE, + + /** Fast Clear + * + * This operation writes the magic "clear" value to the auxiliary surface. + * This operation will safely transition any slice of a surface from any + * state to the clear state so long as the entire slice is fast cleared at + * once. A fast clear that only covers part of a slice of a surface is + * called a partial fast clear. + */ + ISL_AUX_OP_FAST_CLEAR, + + /** Full Resolve + * + * This operation combines the auxiliary surface data with the primary + * surface data and writes the result to the primary. For HiZ, the docs + * call this a depth resolve. For CCS, the hardware full resolve operation + * does both a full resolve and an ambiguate so it actually takes you all + * the way to the pass-through state. + */ + ISL_AUX_OP_FULL_RESOLVE, + + /** Partial Resolve + * + * This operation considers blocks which are in the "clear" state and + * writes the clear value directly into the primary or auxiliary surface. + * Once this operation completes, the surface is still compressed but no + * longer references the clear color. This operation is only available + * for CCS_E. + */ + ISL_AUX_OP_PARTIAL_RESOLVE, + + /** Ambiguate + * + * This operation throws away the current auxiliary data and replaces it + * with the magic pass-through value. If an ambiguate operation is + * performed when the primary surface does not contain 100% of the data, + * data will be lost. This operation is only implemented in hardware for + * depth where it is called a HiZ resolve. + */ + ISL_AUX_OP_AMBIGUATE, +}; + /* TODO(chadv): Explain */ enum isl_array_pitch_span { ISL_ARRAY_PITCH_SPAN_FULL, @@ -505,12 +909,30 @@ typedef uint64_t isl_surf_usage_flags_t; #define ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT (1u << 10) #define ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT (1u << 11) #define ISL_SURF_USAGE_STORAGE_BIT (1u << 12) +#define ISL_SURF_USAGE_HIZ_BIT (1u << 13) +#define ISL_SURF_USAGE_MCS_BIT (1u << 14) +#define ISL_SURF_USAGE_CCS_BIT (1u << 15) +/** @} */ + +/** + * @defgroup Channel Mask + * + * These #define values are chosen to match the values of + * RENDER_SURFACE_STATE::Color Buffer Component Write Disables + * + * @{ + */ +typedef uint8_t isl_channel_mask_t; +#define ISL_CHANNEL_BLUE_BIT (1 << 0) +#define ISL_CHANNEL_GREEN_BIT (1 << 1) +#define ISL_CHANNEL_RED_BIT (1 << 2) +#define ISL_CHANNEL_ALPHA_BIT (1 << 3) /** @} */ /** * @brief A channel select (also known as texture swizzle) value */ -enum isl_channel_select { +enum PACKED isl_channel_select { ISL_CHANNEL_SELECT_ZERO = 0, ISL_CHANNEL_SELECT_ONE = 1, ISL_CHANNEL_SELECT_RED = 4, @@ -598,11 +1020,54 @@ enum isl_msaa_layout { ISL_MSAA_LAYOUT_ARRAY, }; +typedef enum { + ISL_MEMCPY = 0, + ISL_MEMCPY_BGRA8, + ISL_MEMCPY_STREAMING_LOAD, + ISL_MEMCPY_INVALID, +} isl_memcpy_type; struct isl_device { - const struct brw_device_info *info; + const struct gen_device_info *info; bool use_separate_stencil; bool has_bit6_swizzling; + + /** + * Describes the layout of a RENDER_SURFACE_STATE structure for the + * current gen. + */ + struct { + uint8_t size; + uint8_t align; + uint8_t addr_offset; + uint8_t aux_addr_offset; + + /* Rounded up to the nearest dword to simplify GPU memcpy operations. */ + + /* size of the state buffer used to store the clear color + extra + * additional space used by the hardware */ + uint8_t clear_color_state_size; + uint8_t clear_color_state_offset; + /* size of the clear color itself - used to copy it to/from a BO */ + uint8_t clear_value_size; + uint8_t clear_value_offset; + } ss; + + /** + * Describes the layout of the depth/stencil/hiz commands as emitted by + * isl_emit_depth_stencil_hiz. + */ + struct { + uint8_t size; + uint8_t depth_offset; + uint8_t stencil_offset; + uint8_t hiz_offset; + } ds; + + struct { + uint32_t internal; + uint32_t external; + } mocs; }; struct isl_extent2d { @@ -625,6 +1090,7 @@ struct isl_extent4d { struct isl_channel_layout { enum isl_base_type type; + uint8_t start_bit; /**< Bit at which this channel starts */ uint8_t bits; /**< Size in bits */ }; @@ -644,15 +1110,18 @@ struct isl_format_layout { uint8_t bh; /**< Block height, in pixels */ uint8_t bd; /**< Block depth, in pixels */ - struct { - struct isl_channel_layout r; /**< Red channel */ - struct isl_channel_layout g; /**< Green channel */ - struct isl_channel_layout b; /**< Blue channel */ - struct isl_channel_layout a; /**< Alpha channel */ - struct isl_channel_layout l; /**< Luminance channel */ - struct isl_channel_layout i; /**< Intensity channel */ - struct isl_channel_layout p; /**< Palette channel */ - } channels; + union { + struct { + struct isl_channel_layout r; /**< Red channel */ + struct isl_channel_layout g; /**< Green channel */ + struct isl_channel_layout b; /**< Blue channel */ + struct isl_channel_layout a; /**< Alpha channel */ + struct isl_channel_layout l; /**< Luminance channel */ + struct isl_channel_layout i; /**< Intensity channel */ + struct isl_channel_layout p; /**< Palette channel */ + } channels; + struct isl_channel_layout channels_array[7]; + }; enum isl_colorspace colorspace; enum isl_txc txc; @@ -661,7 +1130,26 @@ struct isl_format_layout { struct isl_tile_info { enum isl_tiling tiling; - /** The logical size of the tile in units of surface elements + /* The size (in bits per block) of a single surface element + * + * For surfaces with power-of-two formats, this is the same as + * isl_format_layout::bpb. For non-power-of-two formats it may be smaller. + * The logical_extent_el field is in terms of elements of this size. + * + * For example, consider ISL_FORMAT_R32G32B32_FLOAT for which + * isl_format_layout::bpb is 96 (a non-power-of-two). In this case, none + * of the tiling formats can actually hold an integer number of 96-bit + * surface elements so isl_tiling_get_info returns an isl_tile_info for a + * 32-bit element size. It is the responsibility of the caller to + * recognize that 32 != 96 ad adjust accordingly. For instance, to compute + * the width of a surface in tiles, you would do: + * + * width_tl = DIV_ROUND_UP(width_el * (format_bpb / tile_info.format_bpb), + * tile_info.logical_extent_el.width); + */ + uint32_t format_bpb; + + /** The logical size of the tile in units of format_bpb size elements * * This field determines how a given surface is cut up into tiles. It is * used to compute the size of a surface in tiles and can be used to @@ -679,11 +1167,30 @@ struct isl_tile_info { * always used with ISL_FORMAT_R8) has a logical size of 64el x 64el but * its physical size is 128B x 32rows, the same as a Y-tile. * - * @see isl_surf::row_pitch + * @see isl_surf::row_pitch_B */ struct isl_extent2d phys_extent_B; }; +/** + * Metadata about a DRM format modifier. + */ +struct isl_drm_modifier_info { + uint64_t modifier; + + /** Text name of the modifier */ + const char *name; + + /** ISL tiling implied by this modifier */ + enum isl_tiling tiling; + + /** ISL aux usage implied by this modifier */ + enum isl_aux_usage aux_usage; + + /** Whether or not this modifier supports clear color */ + bool supports_clear_color; +}; + /** * @brief Input to surface initialization * @@ -710,10 +1217,13 @@ struct isl_surf_init_info { uint32_t samples; /** Lower bound for isl_surf::alignment, in bytes. */ - uint32_t min_alignment; + uint32_t min_alignment_B; - /** Lower bound for isl_surf::pitch, in bytes. */ - uint32_t min_pitch; + /** + * Exact value for isl_surf::row_pitch. Ignored if zero. isl_surf_init() + * will fail if this is misaligned or out of bounds. + */ + uint32_t row_pitch_B; isl_surf_usage_flags_t usage; @@ -742,7 +1252,7 @@ struct isl_surf { /** * Physical extent of the surface's base level, in units of physical - * surface samples and aligned to the format's compression block. + * surface samples. * * Consider isl_dim_layout as an operator that transforms a logical surface * layout to a physical surface layout. Then @@ -756,17 +1266,17 @@ struct isl_surf { uint32_t samples; /** Total size of the surface, in bytes. */ - uint32_t size; + uint64_t size_B; /** Required alignment for the surface's base address. */ - uint32_t alignment; + uint32_t alignment_B; /** * The interpretation of this field depends on the value of * isl_tile_info::physical_extent_B. In particular, the width of the - * surface in tiles is row_pitch / isl_tile_info::physical_extent_B.width + * surface in tiles is row_pitch_B / isl_tile_info::physical_extent_B.width * and the distance in bytes between vertically adjacent tiles in the image - * is given by row_pitch * isl_tile_info::physical_extent_B.height. + * is given by row_pitch_B * isl_tile_info::physical_extent_B.height. * * For linear images where isl_tile_info::physical_extent_B.height == 1, * this cleanly reduces to being the distance, in bytes, between vertically @@ -774,7 +1284,7 @@ struct isl_surf { * * @see isl_tile_info::phys_extent_B; */ - uint32_t row_pitch; + uint32_t row_pitch_B; /** * Pitch between physical array slices, in rows of surface elements. @@ -787,6 +1297,22 @@ struct isl_surf { isl_surf_usage_flags_t usage; }; +struct isl_swizzle { + enum isl_channel_select r:4; + enum isl_channel_select g:4; + enum isl_channel_select b:4; + enum isl_channel_select a:4; +}; + +#define ISL_SWIZZLE(R, G, B, A) ((struct isl_swizzle) { \ + .r = ISL_CHANNEL_SELECT_##R, \ + .g = ISL_CHANNEL_SELECT_##G, \ + .b = ISL_CHANNEL_SELECT_##B, \ + .a = ISL_CHANNEL_SELECT_##A, \ + }) + +#define ISL_SWIZZLE_IDENTITY ISL_SWIZZLE(RED, GREEN, BLUE, ALPHA) + struct isl_view { /** * Indicates the usage of the particular view @@ -812,11 +1338,23 @@ struct isl_view { * * For cube maps, both base_array_layer and array_len should be * specified in terms of 2-D layers and must be a multiple of 6. + * + * 3-D textures are effectively treated as 2-D arrays when used as a + * storage image or render target. If `usage` contains + * ISL_SURF_USAGE_RENDER_TARGET_BIT or ISL_SURF_USAGE_STORAGE_BIT then + * base_array_layer and array_len are applied. If the surface is only used + * for texturing, they are ignored. */ uint32_t base_array_layer; + + /** + * Array Length + * + * Indicates the number of array elements starting at Base Array Layer. + */ uint32_t array_len; - enum isl_channel_select channel_select[4]; + struct isl_swizzle swizzle; }; union isl_color_value { @@ -841,12 +1379,36 @@ struct isl_surf_fill_state_info { */ uint32_t mocs; + /** + * The auxilary surface or NULL if no auxilary surface is to be used. + */ + const struct isl_surf *aux_surf; + enum isl_aux_usage aux_usage; + uint64_t aux_address; + /** * The clear color for this surface * * Valid values depend on hardware generation. */ union isl_color_value clear_color; + + /** + * Send only the clear value address + * + * If set, we only pass the clear address to the GPU and it will fetch it + * from wherever it is. + */ + bool use_clear_address; + uint64_t clear_address; + + /** + * Surface write disables for gen4-5 + */ + isl_channel_mask_t write_disables; + + /* Intra-tile offset */ + uint16_t x_offset_sa, y_offset_sa; }; struct isl_buffer_fill_state_info { @@ -858,7 +1420,7 @@ struct isl_buffer_fill_state_info { /** * The size of the buffer */ - uint64_t size; + uint64_t size_B; /** * The Memory Object Control state for the filled surface state. @@ -875,14 +1437,79 @@ struct isl_buffer_fill_state_info { */ enum isl_format format; - uint32_t stride; + /** + * The swizzle to use in the surface state + */ + struct isl_swizzle swizzle; + + uint32_t stride_B; +}; + +struct isl_depth_stencil_hiz_emit_info { + /** + * The depth surface + */ + const struct isl_surf *depth_surf; + + /** + * The stencil surface + * + * If separate stencil is not available, this must point to the same + * isl_surf as depth_surf. + */ + const struct isl_surf *stencil_surf; + + /** + * The view into the depth and stencil surfaces. + * + * This view applies to both surfaces simultaneously. + */ + const struct isl_view *view; + + /** + * The address of the depth surface in GPU memory + */ + uint64_t depth_address; + + /** + * The address of the stencil surface in GPU memory + * + * If separate stencil is not available, this must have the same value as + * depth_address. + */ + uint64_t stencil_address; + + /** + * The Memory Object Control state for depth and stencil buffers + * + * Both depth and stencil will get the same MOCS value. The exact format + * of this value depends on hardware generation. + */ + uint32_t mocs; + + /** + * The HiZ surface or NULL if HiZ is disabled. + */ + const struct isl_surf *hiz_surf; + enum isl_aux_usage hiz_usage; + uint64_t hiz_address; + + /** + * The depth clear value + */ + float depth_clear_value; + + /** + * Track stencil aux usage for Gen >= 12 + */ + enum isl_aux_usage stencil_aux_usage; }; extern const struct isl_format_layout isl_format_layouts[]; void isl_device_init(struct isl_device *dev, - const struct brw_device_info *info, + const struct gen_device_info *info, bool has_bit6_swizzling); isl_sample_count_mask_t ATTRIBUTE_CONST @@ -891,27 +1518,46 @@ isl_device_get_sample_counts(struct isl_device *dev); static inline const struct isl_format_layout * ATTRIBUTE_CONST isl_format_get_layout(enum isl_format fmt) { + assert(fmt != ISL_FORMAT_UNSUPPORTED); + assert(fmt < ISL_NUM_FORMATS); return &isl_format_layouts[fmt]; } +bool isl_format_is_valid(enum isl_format); + static inline const char * ATTRIBUTE_CONST isl_format_get_name(enum isl_format fmt) { - return isl_format_layouts[fmt].name; + return isl_format_get_layout(fmt)->name; } -bool isl_format_supports_rendering(const struct brw_device_info *devinfo, +enum isl_format isl_format_for_pipe_format(enum pipe_format pf); + +bool isl_format_supports_rendering(const struct gen_device_info *devinfo, enum isl_format format); -bool isl_format_supports_alpha_blending(const struct brw_device_info *devinfo, +bool isl_format_supports_alpha_blending(const struct gen_device_info *devinfo, enum isl_format format); -bool isl_format_supports_sampling(const struct brw_device_info *devinfo, +bool isl_format_supports_sampling(const struct gen_device_info *devinfo, enum isl_format format); -bool isl_format_supports_filtering(const struct brw_device_info *devinfo, +bool isl_format_supports_filtering(const struct gen_device_info *devinfo, enum isl_format format); -bool isl_format_supports_vertex_fetch(const struct brw_device_info *devinfo, +bool isl_format_supports_vertex_fetch(const struct gen_device_info *devinfo, enum isl_format format); -bool isl_format_supports_lossless_compression(const struct brw_device_info *devinfo, - enum isl_format format); +bool isl_format_supports_typed_writes(const struct gen_device_info *devinfo, + enum isl_format format); +bool isl_format_supports_typed_reads(const struct gen_device_info *devinfo, + enum isl_format format); +bool isl_format_supports_ccs_d(const struct gen_device_info *devinfo, + enum isl_format format); +bool isl_format_supports_ccs_e(const struct gen_device_info *devinfo, + enum isl_format format); +bool isl_format_supports_multisampling(const struct gen_device_info *devinfo, + enum isl_format format); + +bool isl_formats_are_ccs_e_compatible(const struct gen_device_info *devinfo, + enum isl_format format1, + enum isl_format format2); +uint8_t isl_format_get_aux_map_encoding(enum isl_format format); bool isl_format_has_unorm_channel(enum isl_format fmt) ATTRIBUTE_CONST; bool isl_format_has_snorm_channel(enum isl_format fmt) ATTRIBUTE_CONST; @@ -941,8 +1587,13 @@ isl_format_has_int_channel(enum isl_format fmt) isl_format_has_sint_channel(fmt); } +bool isl_format_has_color_component(enum isl_format fmt, + int component) ATTRIBUTE_CONST; + unsigned isl_format_get_num_channels(enum isl_format fmt); +uint32_t isl_format_get_depth_format(enum isl_format fmt, bool has_stencil); + static inline bool isl_format_is_compressed(enum isl_format fmt) { @@ -968,12 +1619,23 @@ isl_format_has_bc_compression(enum isl_format fmt) case ISL_TXC_ETC2: case ISL_TXC_ASTC: return false; + + case ISL_TXC_HIZ: + case ISL_TXC_MCS: + case ISL_TXC_CCS: + unreachable("Should not be called on an aux surface"); } unreachable("bad texture compression mode"); return false; } +static inline bool +isl_format_is_planar(enum isl_format fmt) +{ + return fmt == ISL_FORMAT_PLANAR_420_8; +} + static inline bool isl_format_is_yuv(enum isl_format fmt) { @@ -990,35 +1652,79 @@ isl_format_block_is_1x1x1(enum isl_format fmt) return fmtl->bw == 1 && fmtl->bh == 1 && fmtl->bd == 1; } +static inline bool +isl_format_is_srgb(enum isl_format fmt) +{ + return isl_format_get_layout(fmt)->colorspace == ISL_COLORSPACE_SRGB; +} + +enum isl_format isl_format_srgb_to_linear(enum isl_format fmt); + static inline bool isl_format_is_rgb(enum isl_format fmt) { - return isl_format_layouts[fmt].channels.r.bits > 0 && - isl_format_layouts[fmt].channels.g.bits > 0 && - isl_format_layouts[fmt].channels.b.bits > 0 && - isl_format_layouts[fmt].channels.a.bits == 0; + if (isl_format_is_yuv(fmt)) + return false; + + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->channels.r.bits > 0 && + fmtl->channels.g.bits > 0 && + fmtl->channels.b.bits > 0 && + fmtl->channels.a.bits == 0; +} + +static inline bool +isl_format_is_rgbx(enum isl_format fmt) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); + + return fmtl->channels.r.bits > 0 && + fmtl->channels.g.bits > 0 && + fmtl->channels.b.bits > 0 && + fmtl->channels.a.bits > 0 && + fmtl->channels.a.type == ISL_VOID; } enum isl_format isl_format_rgb_to_rgba(enum isl_format rgb) ATTRIBUTE_CONST; enum isl_format isl_format_rgb_to_rgbx(enum isl_format rgb) ATTRIBUTE_CONST; +enum isl_format isl_format_rgbx_to_rgba(enum isl_format rgb) ATTRIBUTE_CONST; + +union isl_color_value +isl_color_value_swizzle_inv(union isl_color_value src, + struct isl_swizzle swizzle); + +void isl_color_value_pack(const union isl_color_value *value, + enum isl_format format, + uint32_t *data_out); +void isl_color_value_unpack(union isl_color_value *value, + enum isl_format format, + const uint32_t *data_in); bool isl_is_storage_image_format(enum isl_format fmt); enum isl_format -isl_lower_storage_image_format(const struct brw_device_info *devinfo, +isl_lower_storage_image_format(const struct gen_device_info *devinfo, enum isl_format fmt); /* Returns true if this hardware supports typed load/store on a format with * the same size as the given format. */ bool -isl_has_matching_typed_storage_image_format(const struct brw_device_info *devinfo, +isl_has_matching_typed_storage_image_format(const struct gen_device_info *devinfo, enum isl_format fmt); +static inline enum isl_tiling +isl_tiling_flag_to_enum(isl_tiling_flags_t flag) +{ + assert(__builtin_popcount(flag) == 1); + return (enum isl_tiling) (__builtin_ffs(flag) - 1); +} + static inline bool isl_tiling_is_any_y(enum isl_tiling tiling) { - return (1u << tiling) & ISL_TILING_ANY_MASK; + return (1u << tiling) & ISL_TILING_ANY_Y_MASK; } static inline bool @@ -1027,15 +1733,153 @@ isl_tiling_is_std_y(enum isl_tiling tiling) return (1u << tiling) & ISL_TILING_STD_Y_MASK; } +uint32_t +isl_tiling_to_i915_tiling(enum isl_tiling tiling); + +enum isl_tiling +isl_tiling_from_i915_tiling(uint32_t tiling); + +/** + * Return an isl_aux_op needed to enable an access to occur in an + * isl_aux_state suitable for the isl_aux_usage. + * + * NOTE: If the access will invalidate the main surface, this function should + * not be called and the isl_aux_op of NONE should be used instead. + * Otherwise, an extra (but still lossless) ambiguate may occur. + * + * @invariant initial_state is possible with an isl_aux_usage compatible with + * the given usage. Two usages are compatible if it's possible to + * switch between them (e.g. CCS_E <-> CCS_D). + * @invariant fast_clear is false if the aux doesn't support fast clears. + */ +enum isl_aux_op +isl_aux_prepare_access(enum isl_aux_state initial_state, + enum isl_aux_usage usage, + bool fast_clear_supported); + +/** + * Return the isl_aux_state entered after performing an isl_aux_op. + * + * @invariant initial_state is possible with the given usage. + * @invariant op is possible with the given usage. + * @invariant op must not cause HW to read from an invalid aux. + */ +enum isl_aux_state +isl_aux_state_transition_aux_op(enum isl_aux_state initial_state, + enum isl_aux_usage usage, + enum isl_aux_op op); + +/** + * Return the isl_aux_state entered after performing a write. + * + * NOTE: full_surface should be true if the write covers the entire + * slice. Setting it to false in this case will still result in a + * correct (but imprecise) aux state. + * + * @invariant if usage is not ISL_AUX_USAGE_NONE, then initial_state is + * possible with the given usage. + * @invariant usage can be ISL_AUX_USAGE_NONE iff: + * * the main surface is valid, or + * * the main surface is being invalidated/replaced. + */ +enum isl_aux_state +isl_aux_state_transition_write(enum isl_aux_state initial_state, + enum isl_aux_usage usage, + bool full_surface); + bool -isl_tiling_get_info(const struct isl_device *dev, - enum isl_tiling tiling, - uint32_t format_bpb, - struct isl_tile_info *info); -bool -isl_surf_choose_tiling(const struct isl_device *dev, - const struct isl_surf_init_info *restrict info, - enum isl_tiling *tiling); +isl_aux_usage_has_fast_clears(enum isl_aux_usage usage); + +static inline bool +isl_aux_usage_has_hiz(enum isl_aux_usage usage) +{ + return usage == ISL_AUX_USAGE_HIZ || + usage == ISL_AUX_USAGE_HIZ_CCS_WT || + usage == ISL_AUX_USAGE_HIZ_CCS; +} + +static inline bool +isl_aux_usage_has_mcs(enum isl_aux_usage usage) +{ + return usage == ISL_AUX_USAGE_MCS || + usage == ISL_AUX_USAGE_MCS_CCS; +} + +static inline bool +isl_aux_usage_has_ccs(enum isl_aux_usage usage) +{ + return usage == ISL_AUX_USAGE_CCS_D || + usage == ISL_AUX_USAGE_CCS_E || + usage == ISL_AUX_USAGE_GEN12_CCS_E || + usage == ISL_AUX_USAGE_MC || + usage == ISL_AUX_USAGE_HIZ_CCS_WT || + usage == ISL_AUX_USAGE_HIZ_CCS || + usage == ISL_AUX_USAGE_MCS_CCS || + usage == ISL_AUX_USAGE_STC_CCS; +} + +static inline bool +isl_aux_state_has_valid_primary(enum isl_aux_state state) +{ + return state == ISL_AUX_STATE_RESOLVED || + state == ISL_AUX_STATE_PASS_THROUGH || + state == ISL_AUX_STATE_AUX_INVALID; +} + +static inline bool +isl_aux_state_has_valid_aux(enum isl_aux_state state) +{ + return state != ISL_AUX_STATE_AUX_INVALID; +} + +const struct isl_drm_modifier_info * ATTRIBUTE_CONST +isl_drm_modifier_get_info(uint64_t modifier); + +static inline bool +isl_drm_modifier_has_aux(uint64_t modifier) +{ + return isl_drm_modifier_get_info(modifier)->aux_usage != ISL_AUX_USAGE_NONE; +} + +/** Returns the default isl_aux_state for the given modifier. + * + * If we have a modifier which supports compression, then the auxiliary data + * could be in state other than ISL_AUX_STATE_AUX_INVALID. In particular, it + * can be in any of the following: + * + * - ISL_AUX_STATE_CLEAR + * - ISL_AUX_STATE_PARTIAL_CLEAR + * - ISL_AUX_STATE_COMPRESSED_CLEAR + * - ISL_AUX_STATE_COMPRESSED_NO_CLEAR + * - ISL_AUX_STATE_RESOLVED + * - ISL_AUX_STATE_PASS_THROUGH + * + * If the modifier does not support fast-clears, then we are guaranteed + * that the surface is at least partially resolved and the first three not + * possible. We return ISL_AUX_STATE_COMPRESSED_CLEAR if the modifier + * supports fast clears and ISL_AUX_STATE_COMPRESSED_NO_CLEAR if it does not + * because they are the least common denominator of the set of possible aux + * states and will yield a valid interpretation of the aux data. + * + * For modifiers with no aux support, ISL_AUX_STATE_AUX_INVALID is returned. + */ +static inline enum isl_aux_state +isl_drm_modifier_get_default_aux_state(uint64_t modifier) +{ + const struct isl_drm_modifier_info *mod_info = + isl_drm_modifier_get_info(modifier); + + if (!mod_info || mod_info->aux_usage == ISL_AUX_USAGE_NONE) + return ISL_AUX_STATE_AUX_INVALID; + + assert(mod_info->aux_usage == ISL_AUX_USAGE_CCS_E || + mod_info->aux_usage == ISL_AUX_USAGE_GEN12_CCS_E); + return mod_info->supports_clear_color ? ISL_AUX_STATE_COMPRESSED_CLEAR : + ISL_AUX_STATE_COMPRESSED_NO_CLEAR; +} + +struct isl_extent2d ATTRIBUTE_CONST +isl_get_interleaved_msaa_px_size_sa(uint32_t samples); static inline bool isl_surf_usage_is_display(isl_surf_usage_flags_t usage) @@ -1119,6 +1963,30 @@ isl_extent4d(uint32_t width, uint32_t height, uint32_t depth, return e; } +bool isl_color_value_is_zero(union isl_color_value value, + enum isl_format format); + +bool isl_color_value_is_zero_one(union isl_color_value value, + enum isl_format format); + +static inline bool +isl_swizzle_is_identity(struct isl_swizzle swizzle) +{ + return swizzle.r == ISL_CHANNEL_SELECT_RED && + swizzle.g == ISL_CHANNEL_SELECT_GREEN && + swizzle.b == ISL_CHANNEL_SELECT_BLUE && + swizzle.a == ISL_CHANNEL_SELECT_ALPHA; +} + +bool +isl_swizzle_supports_rendering(const struct gen_device_info *devinfo, + struct isl_swizzle swizzle); + +struct isl_swizzle +isl_swizzle_compose(struct isl_swizzle first, struct isl_swizzle second); +struct isl_swizzle +isl_swizzle_invert(struct isl_swizzle swizzle); + #define isl_surf_init(dev, surf, ...) \ isl_surf_init_s((dev), (surf), \ &(struct isl_surf_init_info) { __VA_ARGS__ }); @@ -1129,10 +1997,30 @@ isl_surf_init_s(const struct isl_device *dev, const struct isl_surf_init_info *restrict info); void -isl_surf_get_tile_info(const struct isl_device *dev, - const struct isl_surf *surf, +isl_surf_get_tile_info(const struct isl_surf *surf, struct isl_tile_info *tile_info); +bool +isl_surf_supports_ccs(const struct isl_device *dev, + const struct isl_surf *surf); + +bool +isl_surf_get_hiz_surf(const struct isl_device *dev, + const struct isl_surf *surf, + struct isl_surf *hiz_surf); + +bool +isl_surf_get_mcs_surf(const struct isl_device *dev, + const struct isl_surf *surf, + struct isl_surf *mcs_surf); + +bool +isl_surf_get_ccs_surf(const struct isl_device *dev, + const struct isl_surf *surf, + struct isl_surf *aux_surf, + struct isl_surf *extra_aux_surf, + uint32_t row_pitch_B /**< Ignored if 0 */); + #define isl_surf_fill_state(dev, state, ...) \ isl_surf_fill_state_s((dev), (state), \ &(struct isl_surf_fill_state_info) { __VA_ARGS__ }); @@ -1149,6 +2037,18 @@ void isl_buffer_fill_state_s(const struct isl_device *dev, void *state, const struct isl_buffer_fill_state_info *restrict info); +void +isl_null_fill_state(const struct isl_device *dev, void *state, + struct isl_extent3d size); + +#define isl_emit_depth_stencil_hiz(dev, batch, ...) \ + isl_emit_depth_stencil_hiz_s((dev), (batch), \ + &(struct isl_depth_stencil_hiz_emit_info) { __VA_ARGS__ }) + +void +isl_emit_depth_stencil_hiz_s(const struct isl_device *dev, void *batch, + const struct isl_depth_stencil_hiz_emit_info *restrict info); + void isl_surf_fill_image_param(const struct isl_device *dev, struct brw_image_param *param, @@ -1185,13 +2085,41 @@ isl_surf_get_image_alignment_sa(const struct isl_surf *surf) fmtl->bd * surf->image_alignment_el.d); } +/** + * Logical extent of level 0 in units of surface elements. + */ +static inline struct isl_extent4d +isl_surf_get_logical_level0_el(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + return isl_extent4d(DIV_ROUND_UP(surf->logical_level0_px.w, fmtl->bw), + DIV_ROUND_UP(surf->logical_level0_px.h, fmtl->bh), + DIV_ROUND_UP(surf->logical_level0_px.d, fmtl->bd), + surf->logical_level0_px.a); +} + +/** + * Physical extent of level 0 in units of surface elements. + */ +static inline struct isl_extent4d +isl_surf_get_phys_level0_el(const struct isl_surf *surf) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); + + return isl_extent4d(DIV_ROUND_UP(surf->phys_level0_sa.w, fmtl->bw), + DIV_ROUND_UP(surf->phys_level0_sa.h, fmtl->bh), + DIV_ROUND_UP(surf->phys_level0_sa.d, fmtl->bd), + surf->phys_level0_sa.a); +} + /** * Pitch between vertically adjacent surface elements, in bytes. */ static inline uint32_t -isl_surf_get_row_pitch(const struct isl_surf *surf) +isl_surf_get_row_pitch_B(const struct isl_surf *surf) { - return surf->row_pitch; + return surf->row_pitch_B; } /** @@ -1202,8 +2130,8 @@ isl_surf_get_row_pitch_el(const struct isl_surf *surf) { const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format); - assert(surf->row_pitch % (fmtl->bpb / 8) == 0); - return surf->row_pitch / (fmtl->bpb / 8); + assert(surf->row_pitch_B % (fmtl->bpb / 8) == 0); + return surf->row_pitch_B / (fmtl->bpb / 8); } /** @@ -1241,9 +2169,25 @@ isl_surf_get_array_pitch_sa_rows(const struct isl_surf *surf) static inline uint32_t isl_surf_get_array_pitch(const struct isl_surf *surf) { - return isl_surf_get_array_pitch_sa_rows(surf) * surf->row_pitch; + return isl_surf_get_array_pitch_sa_rows(surf) * surf->row_pitch_B; } +/** + * Calculate the offset, in units of surface samples, to a subimage in the + * surface. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +void +isl_surf_get_image_offset_sa(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa); + /** * Calculate the offset, in units of surface elements, to a subimage in the * surface. @@ -1260,6 +2204,71 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, uint32_t *x_offset_el, uint32_t *y_offset_el); +/** + * Calculate the offset, in bytes and intratile surface samples, to a + * subimage in the surface. + * + * This is equivalent to calling isl_surf_get_image_offset_el, passing the + * result to isl_tiling_get_intratile_offset_el, and converting the tile + * offsets to samples. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +void +isl_surf_get_image_offset_B_tile_sa(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *offset_B, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa); + +/** + * Calculate the range in bytes occupied by a subimage, to the nearest tile. + * + * The range returned will be the smallest memory range in which the give + * subimage fits, rounded to even tiles. Intel images do not usually have a + * direct subimage -> range mapping so the range returned may contain data + * from other sub-images. The returned range is a half-open interval where + * all of the addresses within the subimage are < end_tile_B. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +void +isl_surf_get_image_range_B_tile(const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + uint32_t *start_tile_B, + uint32_t *end_tile_B); + +/** + * Create an isl_surf that represents a particular subimage in the surface. + * + * The newly created surface will have a single miplevel and array slice. The + * surface lives at the returned byte and intratile offsets, in samples. + * + * It is safe to call this function with surf == image_surf. + * + * @invariant level < surface levels + * @invariant logical_array_layer < logical array length of surface + * @invariant logical_z_offset_px < logical depth of surface at level + */ +void +isl_surf_get_image_surf(const struct isl_device *dev, + const struct isl_surf *surf, + uint32_t level, + uint32_t logical_array_layer, + uint32_t logical_z_offset_px, + struct isl_surf *image_surf, + uint32_t *offset_B, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa); + /** * @brief Calculate the intratile offsets to a surface. * @@ -1271,15 +2280,43 @@ isl_surf_get_image_offset_el(const struct isl_surf *surf, * surface's tiling format. */ void -isl_tiling_get_intratile_offset_el(const struct isl_device *dev, - enum isl_tiling tiling, - uint8_t bs, - uint32_t row_pitch, - uint32_t total_x_offset_B, - uint32_t total_y_offset_rows, +isl_tiling_get_intratile_offset_el(enum isl_tiling tiling, + uint32_t bpb, + uint32_t row_pitch_B, + uint32_t total_x_offset_el, + uint32_t total_y_offset_el, uint32_t *base_address_offset, - uint32_t *x_offset_B, - uint32_t *y_offset_rows); + uint32_t *x_offset_el, + uint32_t *y_offset_el); + +static inline void +isl_tiling_get_intratile_offset_sa(enum isl_tiling tiling, + enum isl_format format, + uint32_t row_pitch_B, + uint32_t total_x_offset_sa, + uint32_t total_y_offset_sa, + uint32_t *base_address_offset, + uint32_t *x_offset_sa, + uint32_t *y_offset_sa) +{ + const struct isl_format_layout *fmtl = isl_format_get_layout(format); + + /* For computing the intratile offsets, we actually want a strange unit + * which is samples for multisampled surfaces but elements for compressed + * surfaces. + */ + assert(total_x_offset_sa % fmtl->bw == 0); + assert(total_y_offset_sa % fmtl->bh == 0); + const uint32_t total_x_offset = total_x_offset_sa / fmtl->bw; + const uint32_t total_y_offset = total_y_offset_sa / fmtl->bh; + + isl_tiling_get_intratile_offset_el(tiling, fmtl->bpb, row_pitch_B, + total_x_offset, total_y_offset, + base_address_offset, + x_offset_sa, y_offset_sa); + *x_offset_sa *= fmtl->bw; + *y_offset_sa *= fmtl->bh; +} /** * @brief Get value of 3DSTATE_DEPTH_BUFFER.SurfaceFormat @@ -1291,6 +2328,34 @@ uint32_t isl_surf_get_depth_format(const struct isl_device *dev, const struct isl_surf *surf); +/** + * @brief performs a copy from linear to tiled surface + * + */ +void +isl_memcpy_linear_to_tiled(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + uint32_t dst_pitch, int32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type); + +/** + * @brief performs a copy from tiled to linear surface + * + */ +void +isl_memcpy_tiled_to_linear(uint32_t xt1, uint32_t xt2, + uint32_t yt1, uint32_t yt2, + char *dst, const char *src, + int32_t dst_pitch, uint32_t src_pitch, + bool has_swizzling, + enum isl_tiling tiling, + isl_memcpy_type copy_type); + #ifdef __cplusplus } #endif + +#endif /* ISL_H */