X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir.h;h=d32bbab5dfc8fdd6e91866dcf06176d1abd35ef6;hb=111b0a669979cf277f31c69f501982fee004e067;hp=06acfc6c562efd25c13abb922140f0532d8d7376;hpb=078dcb7ccd307e8839ffbedddeab0328f6344715;p=mesa.git diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 06acfc6c562..d32bbab5dfc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -37,6 +37,7 @@ #include "util/bitscan.h" #include "util/bitset.h" #include "util/macros.h" +#include "util/format/u_format.h" #include "compiler/nir_types.h" #include "compiler/shader_enums.h" #include "compiler/shader_info.h" @@ -58,9 +59,19 @@ extern "C" { #define NIR_FALSE 0u #define NIR_TRUE (~0u) -#define NIR_MAX_VEC_COMPONENTS 4 +#define NIR_MAX_VEC_COMPONENTS 16 #define NIR_MAX_MATRIX_COLUMNS 4 -typedef uint8_t nir_component_mask_t; +#define NIR_STREAM_PACKED (1 << 8) +typedef uint16_t nir_component_mask_t; + +static inline bool +nir_num_components_valid(unsigned num_components) +{ + return (num_components >= 1 && + num_components <= 4) || + num_components == 8 || + num_components == 16; +} /** Defines a cast function * @@ -92,7 +103,7 @@ struct nir_builder; */ typedef struct { gl_state_index16 tokens[STATE_LENGTH]; - int swizzle; + uint16_t swizzle; } nir_state_slot; typedef enum { @@ -106,7 +117,9 @@ typedef enum { nir_var_mem_ssbo = (1 << 7), nir_var_mem_shared = (1 << 8), nir_var_mem_global = (1 << 9), - nir_var_all = ~0, + nir_var_mem_push_const = (1 << 10), /* not actually used for variables */ + nir_num_variable_modes = 11, + nir_var_all = (1 << nir_num_variable_modes) - 1, } nir_variable_mode; /** @@ -212,7 +225,7 @@ nir_const_value_as_int(nir_const_value value, unsigned bit_size) } } -static inline int64_t +static inline uint64_t nir_const_value_as_uint(nir_const_value value, unsigned bit_size) { switch (bit_size) { @@ -313,7 +326,7 @@ typedef struct nir_variable { * * \sa nir_variable_mode */ - nir_variable_mode mode; + nir_variable_mode mode:11; /** * Is the variable read-only? @@ -327,6 +340,19 @@ typedef struct nir_variable { unsigned patch:1; unsigned invariant:1; + /** + * Precision qualifier. + * + * In desktop GLSL we do not care about precision qualifiers at all, in + * fact, the spec says that precision qualifiers are ignored. + * + * To make things easy, we make it so that this field is always + * GLSL_PRECISION_NONE on desktop shaders. This way all the variables + * have the same precision value and the checks we add in the compiler + * for this field will never break a desktop shader compile. + */ + unsigned precision:2; + /** * Can this variable be coalesced with another? * @@ -353,7 +379,7 @@ typedef struct nir_variable { * * \sa glsl_interp_mode */ - unsigned interpolation:2; + unsigned interpolation:3; /** * If non-zero, then this variable may be packed along with other variables @@ -389,6 +415,15 @@ typedef struct nir_variable { */ unsigned explicit_binding:1; + /** + * Was the location explicitly set in the shader? + * + * If the location is explicitly set in the shader, it \b cannot be changed + * by the linker or by the API (e.g., calls to \c glBindAttribLocation have + * no effect). + */ + unsigned explicit_location:1; + /** * Was a transfer feedback buffer set in the shader? */ @@ -405,99 +440,130 @@ typedef struct nir_variable { unsigned explicit_offset:1; /** - * \brief Layout qualifier for gl_FragDepth. - * - * This is not equal to \c ir_depth_layout_none if and only if this - * variable is \c gl_FragDepth and a layout qualifier is specified. + * Layout of the matrix. Uses glsl_matrix_layout values. */ - nir_depth_layout depth_layout; + unsigned matrix_layout:2; /** - * Storage location of the base of this variable - * - * The precise meaning of this field depends on the nature of the variable. - * - * - Vertex shader input: one of the values from \c gl_vert_attrib. - * - Vertex shader output: one of the values from \c gl_varying_slot. - * - Geometry shader input: one of the values from \c gl_varying_slot. - * - Geometry shader output: one of the values from \c gl_varying_slot. - * - Fragment shader input: one of the values from \c gl_varying_slot. - * - Fragment shader output: one of the values from \c gl_frag_result. - * - Uniforms: Per-stage uniform slot number for default uniform block. - * - Uniforms: Index within the uniform block definition for UBO members. - * - Non-UBO Uniforms: uniform slot number. - * - Other: This field is not currently used. + * Non-zero if this variable was created by lowering a named interface + * block. + */ + unsigned from_named_ifc_block:1; + + /** + * How the variable was declared. See nir_var_declaration_type. * - * If the variable is a uniform, shader input, or shader output, and the - * slot has not been assigned, the value will be -1. + * This is used to detect variables generated by the compiler, so should + * not be visible via the API. */ - int location; + unsigned how_declared:2; /** - * The actual location of the variable in the IR. Only valid for inputs - * and outputs. + * Is this variable per-view? If so, we know it must be an array with + * size corresponding to the number of views. */ - unsigned int driver_location; + unsigned per_view:1; /** - * Vertex stream output identifier. + * \brief Layout qualifier for gl_FragDepth. * - * For packed outputs, bit 31 is set and bits [2*i+1,2*i] indicate the - * stream of the i-th component. + * This is not equal to \c ir_depth_layout_none if and only if this + * variable is \c gl_FragDepth and a layout qualifier is specified. */ - unsigned stream; + nir_depth_layout depth_layout:3; /** - * output index for dual source blending. + * Vertex stream output identifier. + * + * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i] + * indicate the stream of the i-th component. */ - int index; + unsigned stream:9; /** - * Descriptor set binding for sampler or UBO. + * Access flags for memory variables (SSBO/global), image uniforms, and + * bindless images in uniforms/inputs/outputs. */ - int descriptor_set; + enum gl_access_qualifier access:8; /** - * Initial binding point for a sampler or UBO. - * - * For array types, this represents the binding point for the first element. + * Descriptor set binding for sampler or UBO. */ - int binding; + unsigned descriptor_set:5; /** - * Location an atomic counter or transform feedback is stored at. + * output index for dual source blending. */ - unsigned offset; + unsigned index; /** - * Transform feedback buffer. + * Initial binding point for a sampler or UBO. + * + * For array types, this represents the binding point for the first element. */ - unsigned xfb_buffer; + unsigned binding; /** - * Transform feedback stride. + * Storage location of the base of this variable + * + * The precise meaning of this field depends on the nature of the variable. + * + * - Vertex shader input: one of the values from \c gl_vert_attrib. + * - Vertex shader output: one of the values from \c gl_varying_slot. + * - Geometry shader input: one of the values from \c gl_varying_slot. + * - Geometry shader output: one of the values from \c gl_varying_slot. + * - Fragment shader input: one of the values from \c gl_varying_slot. + * - Fragment shader output: one of the values from \c gl_frag_result. + * - Uniforms: Per-stage uniform slot number for default uniform block. + * - Uniforms: Index within the uniform block definition for UBO members. + * - Non-UBO Uniforms: uniform slot number. + * - Other: This field is not currently used. + * + * If the variable is a uniform, shader input, or shader output, and the + * slot has not been assigned, the value will be -1. */ - unsigned xfb_stride; + int location; /** - * How the variable was declared. See nir_var_declaration_type. - * - * This is used to detect variables generated by the compiler, so should - * not be visible via the API. + * The actual location of the variable in the IR. Only valid for inputs, + * outputs, and uniforms (including samplers and images). */ - unsigned how_declared:2; + unsigned driver_location; /** - * ARB_shader_image_load_store qualifiers. + * Location an atomic counter or transform feedback is stored at. */ - struct { - enum gl_access_qualifier access; + unsigned offset; - /** Image internal format if specified explicitly, otherwise GL_NONE. */ - GLenum format; - } image; + union { + struct { + /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */ + enum pipe_format format; + } image; + + struct { + /** + * Transform feedback buffer. + */ + uint16_t buffer:2; + + /** + * Transform feedback stride. + */ + uint16_t stride; + } xfb; + }; } data; + /** + * Identifier for this variable generated by nir_index_vars() that is unique + * among other variables in the same exec_list. + */ + unsigned index; + + /* Number of nir_variable_data members */ + uint16_t num_members; + /** * Built-in state that backs this uniform * @@ -510,7 +576,7 @@ typedef struct nir_variable { * \c state_slots will be \c NULL. */ /*@{*/ - unsigned num_state_slots; /**< Number of state slots used */ + uint16_t num_state_slots; /**< Number of state slots used */ nir_state_slot *state_slots; /**< State descriptors. */ /*@}*/ @@ -523,6 +589,14 @@ typedef struct nir_variable { */ nir_constant *constant_initializer; + /** + * Global variable assigned in the initializer of the variable + * This field should only be used temporarily by creators of NIR shaders + * and then lower_constant_initializers can be used to get rid of them. + * Most of the rest of NIR ignores this field or asserts that it's NULL. + */ + struct nir_variable *pointer_initializer; + /** * For variables that are in an interface block or are an instance of an * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. @@ -539,7 +613,6 @@ typedef struct nir_variable { * inputs each with their own layout specifier. This is only allowed on * variables with a struct or array of array of struct type. */ - unsigned num_members; struct nir_variable_data *members; } nir_variable; @@ -667,6 +740,12 @@ typedef struct nir_ssa_def { /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ uint8_t bit_size; + + /** + * True if this SSA value may have different values in different SIMD + * invocations of the shader. This is set by nir_divergence_analysis. + */ + bool divergent; } nir_ssa_def; struct nir_src; @@ -807,6 +886,13 @@ nir_src_is_const(nir_src src) src.ssa->parent_instr->type == nir_instr_type_load_const; } +static inline bool +nir_src_is_divergent(nir_src src) +{ + assert(src.is_ssa); + return src.ssa->divergent; +} + static inline unsigned nir_dest_bit_size(nir_dest dest) { @@ -819,6 +905,33 @@ nir_dest_num_components(nir_dest dest) return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; } +static inline bool +nir_dest_is_divergent(nir_dest dest) +{ + assert(dest.is_ssa); + return dest.ssa.divergent; +} + +/* Are all components the same, ie. .xxxx */ +static inline bool +nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) +{ + for (unsigned i = 1; i < nr_comp; i++) + if (swiz[i] != swiz[0]) + return false; + return true; +} + +/* Are all components sequential, ie. .yzw */ +static inline bool +nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp) +{ + for (unsigned i = 1; i < nr_comp; i++) + if (swiz[i] != (swiz[0] + i)) + return false; + return true; +} + void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); @@ -874,13 +987,15 @@ typedef struct { * The values in this enum are carefully chosen so that the sized type is * just the unsized type OR the number of bits. */ -typedef enum { +typedef enum PACKED { nir_type_invalid = 0, /* Not a valid type */ nir_type_int = 2, nir_type_uint = 4, nir_type_bool = 6, nir_type_float = 128, nir_type_bool1 = 1 | nir_type_bool, + nir_type_bool8 = 8 | nir_type_bool, + nir_type_bool16 = 16 | nir_type_bool, nir_type_bool32 = 32 | nir_type_bool, nir_type_int1 = 1 | nir_type_int, nir_type_int8 = 8 | nir_type_int, @@ -984,10 +1099,109 @@ nir_op_vec(unsigned components) case 2: return nir_op_vec2; case 3: return nir_op_vec3; case 4: return nir_op_vec4; + case 8: return nir_op_vec8; + case 16: return nir_op_vec16; default: unreachable("bad component count"); } } +static inline bool +nir_op_is_vec(nir_op op) +{ + switch (op) { + case nir_op_mov: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + case nir_op_vec8: + case nir_op_vec16: + return true; + default: + return false; + } +} + +static inline bool +nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64); +} + +static inline bool +nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64); +} + +static inline bool +nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64); +} + +static inline bool +nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); +} + +static inline bool +nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); +} + +static inline bool +nir_has_any_rounding_mode_rtz(unsigned execution_mode) +{ + return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || + (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || + (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); +} + +static inline bool +nir_has_any_rounding_mode_rtne(unsigned execution_mode) +{ + return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || + (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || + (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); +} + +static inline nir_rounding_mode +nir_get_rounding_mode_from_float_controls(unsigned execution_mode, + nir_alu_type type) +{ + if (nir_alu_type_get_base_type(type) != nir_type_float) + return nir_rounding_mode_undef; + + unsigned bit_size = nir_alu_type_get_type_size(type); + + if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) + return nir_rounding_mode_rtz; + if (nir_is_rounding_mode_rtne(execution_mode, bit_size)) + return nir_rounding_mode_rtne; + return nir_rounding_mode_undef; +} + +static inline bool +nir_has_any_rounding_mode_enabled(unsigned execution_mode) +{ + bool result = + nir_has_any_rounding_mode_rtne(execution_mode) || + nir_has_any_rounding_mode_rtz(execution_mode); + return result; +} + typedef enum { /** * Operation where the first two sources are commutative. @@ -1003,7 +1217,7 @@ typedef enum { typedef struct { const char *name; - unsigned num_inputs; + uint8_t num_inputs; /** * The number of components in the output @@ -1022,7 +1236,7 @@ typedef struct { * though output_size is zero; in that case, the inputs with a zero * size act per-component, while the inputs with non-zero size don't. */ - unsigned output_size; + uint8_t output_size; /** * The type of vector that the instruction outputs. Note that the @@ -1034,7 +1248,7 @@ typedef struct { /** * The number of components in each input */ - unsigned input_sizes[NIR_MAX_VEC_COMPONENTS]; + uint8_t input_sizes[NIR_MAX_VEC_COMPONENTS]; /** * The type of vector that each input takes. Note that negate and @@ -1297,6 +1511,25 @@ nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); } +typedef enum { + /* Memory ordering. */ + NIR_MEMORY_ACQUIRE = 1 << 0, + NIR_MEMORY_RELEASE = 1 << 1, + NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE, + + /* Memory visibility operations. */ + NIR_MEMORY_MAKE_AVAILABLE = 1 << 2, + NIR_MEMORY_MAKE_VISIBLE = 1 << 3, +} nir_memory_semantics; + +typedef enum { + NIR_SCOPE_INVOCATION, + NIR_SCOPE_SUBGROUP, + NIR_SCOPE_WORKGROUP, + NIR_SCOPE_QUEUE_FAMILY, + NIR_SCOPE_DEVICE, +} nir_scope; + /** * \name NIR intrinsics semantic flags * @@ -1443,6 +1676,24 @@ typedef enum { NIR_INTRINSIC_SRC_ACCESS, NIR_INTRINSIC_DST_ACCESS, + /* Driver location for nir_load_patch_location_ir3 */ + NIR_INTRINSIC_DRIVER_LOCATION, + + /** + * Mask of nir_memory_semantics, includes ordering and visibility. + */ + NIR_INTRINSIC_MEMORY_SEMANTICS, + + /** + * Mask of nir_variable_modes affected by the memory operation. + */ + NIR_INTRINSIC_MEMORY_MODES, + + /** + * Value of nir_scope. + */ + NIR_INTRINSIC_MEMORY_SCOPE, + NIR_INTRINSIC_NUM_INDEX_FLAGS, } nir_intrinsic_index_flag; @@ -1452,7 +1703,7 @@ typedef enum { typedef struct { const char *name; - unsigned num_srcs; /** < number of register/SSA inputs */ + uint8_t num_srcs; /** < number of register/SSA inputs */ /** number of components of each input register * @@ -1461,7 +1712,7 @@ typedef struct { * intrinsic consumes however many components are provided and it is not * validated at all. */ - int src_components[NIR_INTRINSIC_MAX_INPUTS]; + int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; bool has_dest; @@ -1470,16 +1721,16 @@ typedef struct { * If this value is 0, the number of components is given by the * num_components field of nir_intrinsic_instr. */ - unsigned dest_components; + uint8_t dest_components; /** bitfield of legal bit sizes */ - unsigned dest_bit_sizes; + uint8_t dest_bit_sizes; /** the number of constant indices used by the intrinsic */ - unsigned num_indices; + uint8_t num_indices; /** indicates the usage of intr->const_index[n] */ - unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; + uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; /** semantic flags for calls to this intrinsic */ nir_intrinsic_semantic_flag flags; @@ -1488,7 +1739,7 @@ typedef struct { extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; static inline unsigned -nir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn) +nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn) { const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; assert(srcn < info->num_srcs); @@ -1512,6 +1763,33 @@ nir_intrinsic_dest_components(nir_intrinsic_instr *intr) return intr->num_components; } +/** + * Helper to copy const_index[] from src to dst, without assuming they + * match in order. + */ +static inline void +nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src) +{ + if (src->intrinsic == dst->intrinsic) { + memcpy(dst->const_index, src->const_index, sizeof(dst->const_index)); + return; + } + + const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic]; + const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic]; + + for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) { + if (src_info->index_map[i] == 0) + continue; + + /* require that dst instruction also uses the same const_index[]: */ + assert(dst_info->index_map[i] > 0); + + dst->const_index[dst_info->index_map[i] - 1] = + src->const_index[src_info->index_map[i] - 1]; + } +} + #define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ static inline type \ nir_intrinsic_##name(const nir_intrinsic_instr *instr) \ @@ -1545,12 +1823,16 @@ INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool) INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier) INTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier) INTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier) -INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned) +INTRINSIC_IDX_ACCESSORS(format, FORMAT, enum pipe_format) INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned) INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned) INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned) INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type) INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned) +INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned) +INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics) +INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode) +INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope) static inline void nir_intrinsic_set_align(nir_intrinsic_instr *intrin, @@ -1578,6 +1860,9 @@ nir_intrinsic_align(const nir_intrinsic_instr *intrin) return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; } +unsigned +nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); + /* Converts a image_deref_* intrinsic into a image_* one */ void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, nir_ssa_def *handle, bool bindless); @@ -1651,6 +1936,9 @@ typedef enum { nir_texop_samples_identical, /**< Query whether all samples are definitely * identical. */ + nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */ + nir_texop_fragment_fetch, /**< Multisample fragment color texture fetch */ + nir_texop_fragment_mask_fetch,/**< Multisample fragment mask texture fetch */ } nir_texop; typedef struct { @@ -1690,9 +1978,6 @@ typedef struct { */ unsigned texture_index; - /** The size of the texture array or 0 if it's not an array */ - unsigned texture_array_size; - /** The sampler index * * The following operations do not require a sampler and, as such, this @@ -1711,6 +1996,30 @@ typedef struct { unsigned sampler_index; } nir_tex_instr; +/* + * Returns true if the texture operation requires a sampler as a general rule, + * see the documentation of sampler_index. + * + * Note that the specific hw/driver backend could require to a sampler + * object/configuration packet in any case, for some other reason. + */ +static inline bool +nir_tex_instr_need_sampler(const nir_tex_instr *instr) +{ + switch (instr->op) { + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_samples_identical: + return false; + default: + return true; + } +} + static inline unsigned nir_tex_instr_dest_size(const nir_tex_instr *instr) { @@ -1747,6 +2056,7 @@ nir_tex_instr_dest_size(const nir_tex_instr *instr) case nir_texop_texture_samples: case nir_texop_query_levels: case nir_texop_samples_identical: + case nir_texop_fragment_mask_fetch: return 1; default: @@ -1827,19 +2137,30 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) case nir_tex_src_projector: case nir_tex_src_comparator: case nir_tex_src_bias: + case nir_tex_src_min_lod: case nir_tex_src_ddx: case nir_tex_src_ddy: return nir_type_float; case nir_tex_src_offset: case nir_tex_src_ms_index: + case nir_tex_src_plane: + return nir_type_int; + + case nir_tex_src_ms_mcs: + case nir_tex_src_texture_deref: + case nir_tex_src_sampler_deref: case nir_tex_src_texture_offset: case nir_tex_src_sampler_offset: - return nir_type_int; + case nir_tex_src_texture_handle: + case nir_tex_src_sampler_handle: + return nir_type_uint; - default: - unreachable("Invalid texture source type"); + case nir_num_tex_src_types: + unreachable("nir_num_tex_src_types is not a valid source type"); } + + unreachable("Invalid texture source type"); } static inline unsigned @@ -1901,14 +2222,27 @@ typedef struct { nir_const_value value[]; } nir_load_const_instr; -#define nir_const_load_to_arr(arr, l, m) \ -{ \ - nir_const_value_to_array(arr, l->value, l->def.num_components, m); \ -} while (false); - typedef enum { + /** Return from a function + * + * This instruction is a classic function return. It jumps to + * nir_function_impl::end_block. No return value is provided in this + * instruction. Instead, the function is expected to write any return + * data to a deref passed in from the caller. + */ nir_jump_return, + + /** Break out of the inner-most loop + * + * This has the same semantics as C's "break" statement. + */ nir_jump_break, + + /** Jump back to the top of the inner-most loop + * + * This has the same semantics as C's "continue" statement assuming that a + * NIR loop is implemented as "while (1) { body }". + */ nir_jump_continue, } nir_jump_type; @@ -2161,13 +2495,20 @@ typedef struct nir_block { * dom_pre_index and dom_post_index for this block, which makes testing if * a given block is dominated by another block an O(1) operation. */ - unsigned dom_pre_index, dom_post_index; + int16_t dom_pre_index, dom_post_index; /* live in and out for this block; used for liveness analysis */ BITSET_WORD *live_in; BITSET_WORD *live_out; } nir_block; +static inline bool +nir_block_is_reachable(nir_block *b) +{ + /* See also nir_block_dominates */ + return b->dom_post_index != -1; +} + static inline nir_instr * nir_block_first_instr(nir_block *block) { @@ -2293,10 +2634,61 @@ typedef struct { */ typedef enum { nir_metadata_none = 0x0, + + /** Indicates that nir_block::index values are valid. + * + * The start block has index 0 and they increase through a natural walk of + * the CFG. nir_function_impl::num_blocks is the number of blocks and + * every block index is in the range [0, nir_function_impl::num_blocks]. + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_block_index = 0x1, + + /** Indicates that block dominance information is valid + * + * This includes: + * + * - nir_block::num_dom_children + * - nir_block::dom_children + * - nir_block::dom_frontier + * - nir_block::dom_pre_index + * - nir_block::dom_post_index + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_dominance = 0x2, + + /** Indicates that SSA def data-flow liveness information is valid + * + * This includes: + * + * - nir_ssa_def::live_index + * - nir_block::live_in + * - nir_block::live_out + * + * A pass can preserve this metadata type if it never adds or removes any + * SSA defs (most passes shouldn't preserve this metadata type). + */ nir_metadata_live_ssa_defs = 0x4, + + /** A dummy metadata value to track when a pass forgot to call + * nir_metadata_preserve. + * + * A pass should always clear this value even if it doesn't make any + * progress to indicate that it thought about preserving metadata. + */ nir_metadata_not_properly_reset = 0x8, + + /** Indicates that loop analysis information is valid. + * + * This includes everything pointed to by nir_loop::info. + * + * A pass can preserve this metadata type if it is guaranteed to not affect + * any loop metadata. However, since loop metadata includes things like + * loop counts which depend on arithmetic in the loop, this is very hard to + * determine. Most passes shouldn't preserve this metadata type. + */ nir_metadata_loop_analysis = 0x10, } nir_metadata; @@ -2478,6 +2870,7 @@ typedef enum { nir_lower_shift64 = (1 << 11), nir_lower_imul_2x32_64 = (1 << 12), nir_lower_extract64 = (1 << 13), + nir_lower_ufind_msb64 = (1 << 14), } nir_lower_int64_options; typedef enum { @@ -2495,6 +2888,13 @@ typedef enum { nir_lower_fp64_full_software = (1 << 11), } nir_lower_doubles_options; +typedef enum { + nir_divergence_single_prim_per_subgroup = (1 << 0), + nir_divergence_single_patch_per_tcs_subgroup = (1 << 1), + nir_divergence_single_patch_per_tes_subgroup = (1 << 2), + nir_divergence_view_index_uniform = (1 << 3), +} nir_divergence_options; + typedef struct nir_shader_compiler_options { bool lower_fdiv; bool lower_ffma; @@ -2589,6 +2989,8 @@ typedef struct nir_shader_compiler_options { bool lower_unpack_unorm_4x8; bool lower_unpack_snorm_4x8; + bool lower_pack_split; + bool lower_extract_byte; bool lower_extract_word; @@ -2634,14 +3036,59 @@ typedef struct nir_shader_compiler_options { /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */ bool lower_wpos_pntc; + /** + * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be + * lowered to simple arithmetic. + * + * If this flag is set, the lowering will be applied to all bit-sizes of + * these instructions. + * + * \sa ::lower_hadd64 + */ bool lower_hadd; + + /** + * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions + * should be lowered to simple arithmetic. + * + * If this flag is set, the lowering will be applied to only 64-bit + * versions of these instructions. + * + * \sa ::lower_hadd + */ + bool lower_hadd64; + + /** + * Set if nir_op_add_sat and nir_op_usub_sat should be lowered to simple + * arithmetic. + * + * If this flag is set, the lowering will be applied to all bit-sizes of + * these instructions. + * + * \sa ::lower_usub_sat64 + */ bool lower_add_sat; + /** + * Set if only 64-bit nir_op_usub_sat should be lowered to simple + * arithmetic. + * + * \sa ::lower_add_sat + */ + bool lower_usub_sat64; + /** * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's * for IO purposes and would prefer loads/stores be vectorized. */ bool vectorize_io; + bool lower_to_scalar; + + /** + * Should the linker unify inputs_read/outputs_written between adjacent + * shader stages which are linked into a single program? + */ + bool unify_interfaces; /** * Should nir_lower_io() create load_interpolated_input intrinsics? @@ -2657,6 +3104,27 @@ typedef struct nir_shader_compiler_options { /* Lowers when rotate instruction is not supported */ bool lower_rotate; + /** + * Backend supports imul24, and would like to use it (when possible) + * for address/offset calculation. If true, driver should call + * nir_lower_amul(). (If not set, amul will automatically be lowered + * to imul.) + */ + bool has_imul24; + + /** Backend supports umul24, if not set umul24 will automatically be lowered + * to imul with masked inputs */ + bool has_umul24; + + /** Backend supports umad24, if not set umad24 will automatically be lowered + * to imul with masked inputs and iadd */ + bool has_umad24; + + /* Whether to generate only scoped_memory_barrier intrinsics instead of the + * set of memory barrier intrinsics based on GLSL. + */ + bool use_scoped_memory_barrier; + /** * Is this the Intel vec4 backend? * @@ -2667,6 +3135,15 @@ typedef struct nir_shader_compiler_options { */ bool intel_vec4; + /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */ + bool lower_bfe_with_two_constants; + + /** Whether 8-bit ALU is supported. */ + bool support_8bit_alu; + + /** Whether 16-bit ALU is supported. */ + bool support_16bit_alu; + unsigned max_unroll_iterations; nir_lower_int64_options lower_int64_options; @@ -3077,6 +3554,9 @@ bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state); bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); +bool nir_foreach_phi_src_leaving_block(nir_block *instr, + nir_foreach_src_cb cb, + void *state); nir_const_value *nir_src_as_const_value(nir_src src); @@ -3186,6 +3666,8 @@ unsigned nir_index_instrs(nir_function_impl *impl); void nir_index_blocks(nir_function_impl *impl); +void nir_index_vars(nir_shader *shader, nir_function_impl *impl, nir_variable_mode modes); + void nir_print_shader(nir_shader *shader, FILE *fp); void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); void nir_print_instr(const nir_instr *instr, FILE *fp); @@ -3360,6 +3842,7 @@ void nir_calc_dominance(nir_shader *shader); nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); bool nir_block_dominates(nir_block *parent, nir_block *child); +bool nir_block_is_unreachable(nir_block *block); void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); void nir_dump_dom_tree(nir_shader *shader, FILE *fp); @@ -3420,6 +3903,8 @@ bool nir_lower_vars_to_scratch(nir_shader *shader, int size_threshold, glsl_type_size_align_func size_align); +void nir_lower_clip_halfz(nir_shader *shader); + void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_gather_ssa_types(nir_function_impl *impl, @@ -3439,11 +3924,22 @@ void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); +bool nir_lower_amul(nir_shader *shader, + int (*type_size)(const struct glsl_type *, bool)); void nir_assign_io_var_locations(struct exec_list *var_list, unsigned *size, gl_shader_stage stage); +typedef struct { + uint8_t num_linked_io_vars; + uint8_t num_linked_patch_io_vars; +} nir_linked_io_var_info; + +nir_linked_io_var_info +nir_assign_linked_io_var_locations(nir_shader *producer, + nir_shader *consumer); + typedef enum { /* If set, this causes all 64-bit IO operations to be lowered on-the-fly * to 32-bit operations. This is only valid for nir_var_shader_in/out @@ -3464,6 +3960,11 @@ bool nir_lower_io(nir_shader *shader, bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode); +bool +nir_lower_vars_to_explicit_types(nir_shader *shader, + nir_variable_mode modes, + glsl_type_size_align_func type_info); + typedef enum { /** * An address format which is a simple 32-bit global GPU address. @@ -3576,20 +4077,21 @@ bool nir_lower_vars_to_ssa(nir_shader *shader); bool nir_remove_dead_derefs(nir_shader *shader); bool nir_remove_dead_derefs_impl(nir_function_impl *impl); bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes); -bool nir_lower_constant_initializers(nir_shader *shader, +bool nir_lower_variable_initializers(nir_shader *shader, nir_variable_mode modes); -bool nir_move_load_const(nir_shader *shader); bool nir_move_vec_src_uses_to_dest(nir_shader *shader); bool nir_lower_vec_to_movs(nir_shader *shader); void nir_lower_alpha_test(nir_shader *shader, enum compare_func func, - bool alpha_to_one); + bool alpha_to_one, + const gl_state_index16 *alpha_ref_state_tokens); bool nir_lower_alu(nir_shader *shader); bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, bool always_precise, bool have_ffma); -bool nir_lower_alu_to_scalar(nir_shader *shader, BITSET_WORD *lower_set); +bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); +bool nir_lower_bool_to_bitsize(nir_shader *shader); bool nir_lower_bool_to_float(nir_shader *shader); bool nir_lower_bool_to_int32(nir_shader *shader); bool nir_lower_int_to_float(nir_shader *shader); @@ -3617,6 +4119,8 @@ typedef struct nir_lower_subgroups_options { bool lower_shuffle:1; bool lower_shuffle_to_32bit:1; bool lower_quad:1; + bool lower_quad_broadcast_dynamic:1; + bool lower_quad_broadcast_dynamic_to_const:1; } nir_lower_subgroups_options; bool nir_lower_subgroups(nir_shader *shader, @@ -3801,21 +4305,44 @@ enum nir_lower_non_uniform_access_type { bool nir_lower_non_uniform_access(nir_shader *shader, enum nir_lower_non_uniform_access_type); -bool nir_lower_idiv(nir_shader *shader); +enum nir_lower_idiv_path { + /* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of + * the two but it is not exact in some cases (for example, 1091317713u / + * 1034u gives 5209173 instead of 1055432) */ + nir_lower_idiv_fast, + /* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and + * AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than + * the nv50 path and many of them are integer multiplications, so it is + * probably slower. It should always return the correct result, though. */ + nir_lower_idiv_precise, +}; + +bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path); bool nir_lower_input_attachments(nir_shader *shader, bool use_fragcoord_sysval); -bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars); -bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables); -bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); +bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, + bool use_vars, + bool use_clipdist_array, + const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); +bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, + bool use_clipdist_array, + const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); +bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, + bool use_clipdist_array); bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); +void nir_lower_point_size_mov(nir_shader *shader, + const gl_state_index16 *pointsize_state_tokens); + bool nir_lower_frexp(nir_shader *nir); void nir_lower_two_sided_color(nir_shader *shader); bool nir_lower_clamp_color_outputs(nir_shader *shader); +bool nir_lower_flatshade(nir_shader *shader); + void nir_lower_passthrough_edgeflags(nir_shader *shader); bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, const gl_state_index16 *uniform_state_tokens); @@ -3832,6 +4359,8 @@ bool nir_lower_wpos_ytransform(nir_shader *shader, const nir_lower_wpos_ytransform_options *options); bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); +bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); + bool nir_lower_fb_read(nir_shader *shader); typedef struct nir_lower_drawpixels_options { @@ -3854,7 +4383,7 @@ typedef struct nir_lower_bitmap_options { void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); -bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset); +bool nir_lower_atomics_to_ssbo(nir_shader *shader); typedef enum { nir_lower_int_source_mods = 1 << 0, @@ -3866,7 +4395,7 @@ typedef enum { bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); -bool nir_lower_gs_intrinsics(nir_shader *shader); +bool nir_lower_gs_intrinsics(nir_shader *shader, bool per_stream); typedef unsigned (*nir_lower_bit_size_callback)(const nir_alu_instr *, void *); @@ -3882,6 +4411,10 @@ bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, nir_lower_doubles_options options); bool nir_lower_pack(nir_shader *shader); +void nir_lower_mediump_outputs(nir_shader *nir); + +bool nir_lower_point_size(nir_shader *shader, float min, float max); + typedef enum { nir_lower_interpolation_at_sample = (1 << 1), nir_lower_interpolation_at_offset = (1 << 2), @@ -3893,6 +4426,8 @@ typedef enum { bool nir_lower_interpolation(nir_shader *shader, nir_lower_interpolation_options options); +bool nir_lower_discard_to_demote(nir_shader *shader); + bool nir_normalize_cubemap_coords(nir_shader *shader); void nir_live_ssa_defs_impl(nir_function_impl *impl); @@ -3906,6 +4441,8 @@ bool nir_repair_ssa_impl(nir_function_impl *impl); bool nir_repair_ssa(nir_shader *shader); void nir_convert_loop_to_lcssa(nir_loop *loop); +bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); +void nir_divergence_analysis(nir_shader *shader, nir_divergence_options options); /* If phi_webs_only is true, only convert SSA values involved in phi nodes to * registers. If false, convert all values (even those not involved in a phi @@ -3917,16 +4454,32 @@ bool nir_lower_phis_to_regs_block(nir_block *block); bool nir_lower_ssa_defs_to_regs_block(nir_block *block); bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); +bool nir_lower_samplers(nir_shader *shader); +bool nir_lower_ssbo(nir_shader *shader); + /* This is here for unit tests. */ bool nir_opt_comparison_pre_impl(nir_function_impl *impl); bool nir_opt_comparison_pre(nir_shader *shader); +bool nir_opt_access(nir_shader *shader); bool nir_opt_algebraic(nir_shader *shader); bool nir_opt_algebraic_before_ffma(nir_shader *shader); bool nir_opt_algebraic_late(nir_shader *shader); +bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); +/* Try to combine a and b into a. Return true if combination was possible, + * which will result in b being removed by the pass. Return false if + * combination wasn't possible. + */ +typedef bool (*nir_combine_memory_barrier_cb)( + nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data); + +bool nir_opt_combine_memory_barriers(nir_shader *shader, + nir_combine_memory_barrier_cb combine_cb, + void *data); + bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); bool nir_copy_prop(nir_shader *shader); @@ -3960,9 +4513,19 @@ bool nir_opt_large_constants(nir_shader *shader, bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); -bool nir_opt_move_comparisons(nir_shader *shader); +typedef enum { + nir_move_const_undef = (1 << 0), + nir_move_load_ubo = (1 << 1), + nir_move_load_input = (1 << 2), + nir_move_comparisons = (1 << 3), + nir_move_copies = (1 << 4), +} nir_move_options; + +bool nir_can_move_instr(nir_instr *instr, nir_move_options options); -bool nir_opt_move_load_ubo(nir_shader *shader); +bool nir_opt_sink(nir_shader *shader, nir_move_options options); + +bool nir_opt_move(nir_shader *shader, nir_move_options options); bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, bool indirect_load_ok, bool expensive_alu_ok); @@ -3982,6 +4545,16 @@ bool nir_opt_vectorize(nir_shader *shader); bool nir_opt_conditional_discard(nir_shader *shader); +typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size, + unsigned num_components, unsigned high_offset, + nir_intrinsic_instr *low, nir_intrinsic_instr *high); + +bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes, + nir_should_vectorize_mem_func callback, + nir_variable_mode robust_modes); + +void nir_schedule(nir_shader *shader, int threshold); + void nir_strip(nir_shader *shader); void nir_sweep(nir_shader *shader);