X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir.h;h=7bbf347a3b549ba307acb918470651fcafeece0d;hb=7665398e6c4fa903405d3daefcc93ddb0a37b488;hp=4db85b3fafd60443af0bcc9c4c5ab5aac90c2469;hpb=45a0b5349082fba81dac7adf9a59c5a1b40baaa6;p=mesa.git diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 4db85b3fafd..7bbf347a3b5 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -37,9 +37,12 @@ #include "util/bitscan.h" #include "util/bitset.h" #include "util/macros.h" +#include "util/format/u_format.h" #include "compiler/nir_types.h" #include "compiler/shader_enums.h" #include "compiler/shader_info.h" +#define XXH_INLINE_ALL +#include "util/xxhash.h" #include #ifndef NDEBUG @@ -58,10 +61,19 @@ extern "C" { #define NIR_FALSE 0u #define NIR_TRUE (~0u) -#define NIR_MAX_VEC_COMPONENTS 4 +#define NIR_MAX_VEC_COMPONENTS 16 #define NIR_MAX_MATRIX_COLUMNS 4 #define NIR_STREAM_PACKED (1 << 8) -typedef uint8_t nir_component_mask_t; +typedef uint16_t nir_component_mask_t; + +static inline bool +nir_num_components_valid(unsigned num_components) +{ + return (num_components >= 1 && + num_components <= 4) || + num_components == 8 || + num_components == 16; +} /** Defines a cast function * @@ -107,7 +119,9 @@ typedef enum { nir_var_mem_ssbo = (1 << 7), nir_var_mem_shared = (1 << 8), nir_var_mem_global = (1 << 9), - nir_var_all = (1 << 10) - 1, + nir_var_mem_push_const = (1 << 10), /* not actually used for variables */ + nir_num_variable_modes = 11, + nir_var_all = (1 << nir_num_variable_modes) - 1, } nir_variable_mode; /** @@ -314,7 +328,7 @@ typedef struct nir_variable { * * \sa nir_variable_mode */ - nir_variable_mode mode:10; + nir_variable_mode mode:11; /** * Is the variable read-only? @@ -328,6 +342,19 @@ typedef struct nir_variable { unsigned patch:1; unsigned invariant:1; + /** + * Precision qualifier. + * + * In desktop GLSL we do not care about precision qualifiers at all, in + * fact, the spec says that precision qualifiers are ignored. + * + * To make things easy, we make it so that this field is always + * GLSL_PRECISION_NONE on desktop shaders. This way all the variables + * have the same precision value and the checks we add in the compiler + * for this field will never break a desktop shader compile. + */ + unsigned precision:2; + /** * Can this variable be coalesced with another? * @@ -354,7 +381,7 @@ typedef struct nir_variable { * * \sa glsl_interp_mode */ - unsigned interpolation:2; + unsigned interpolation:3; /** * If non-zero, then this variable may be packed along with other variables @@ -390,6 +417,15 @@ typedef struct nir_variable { */ unsigned explicit_binding:1; + /** + * Was the location explicitly set in the shader? + * + * If the location is explicitly set in the shader, it \b cannot be changed + * by the linker or by the API (e.g., calls to \c glBindAttribLocation have + * no effect). + */ + unsigned explicit_location:1; + /** * Was a transfer feedback buffer set in the shader? */ @@ -405,6 +441,17 @@ typedef struct nir_variable { */ unsigned explicit_offset:1; + /** + * Layout of the matrix. Uses glsl_matrix_layout values. + */ + unsigned matrix_layout:2; + + /** + * Non-zero if this variable was created by lowering a named interface + * block. + */ + unsigned from_named_ifc_block:1; + /** * How the variable was declared. See nir_var_declaration_type. * @@ -413,6 +460,12 @@ typedef struct nir_variable { */ unsigned how_declared:2; + /** + * Is this variable per-view? If so, we know it must be an array with + * size corresponding to the number of views. + */ + unsigned per_view:1; + /** * \brief Layout qualifier for gl_FragDepth. * @@ -486,8 +539,8 @@ typedef struct nir_variable { union { struct { - /** Image internal format if specified explicitly, otherwise GL_NONE. */ - uint16_t format; /* GLenum */ + /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */ + enum pipe_format format; } image; struct { @@ -504,6 +557,12 @@ typedef struct nir_variable { }; } data; + /** + * Identifier for this variable generated by nir_index_vars() that is unique + * among other variables in the same exec_list. + */ + unsigned index; + /* Number of nir_variable_data members */ uint16_t num_members; @@ -532,6 +591,14 @@ typedef struct nir_variable { */ nir_constant *constant_initializer; + /** + * Global variable assigned in the initializer of the variable + * This field should only be used temporarily by creators of NIR shaders + * and then lower_constant_initializers can be used to get rid of them. + * Most of the rest of NIR ignores this field or asserts that it's NULL. + */ + struct nir_variable *pointer_initializer; + /** * For variables that are in an interface block or are an instance of an * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. @@ -675,6 +742,12 @@ typedef struct nir_ssa_def { /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ uint8_t bit_size; + + /** + * True if this SSA value may have different values in different SIMD + * invocations of the shader. This is set by nir_divergence_analysis. + */ + bool divergent; } nir_ssa_def; struct nir_src; @@ -815,6 +888,13 @@ nir_src_is_const(nir_src src) src.ssa->parent_instr->type == nir_instr_type_load_const; } +static inline bool +nir_src_is_divergent(nir_src src) +{ + assert(src.is_ssa); + return src.ssa->divergent; +} + static inline unsigned nir_dest_bit_size(nir_dest dest) { @@ -827,6 +907,33 @@ nir_dest_num_components(nir_dest dest) return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; } +static inline bool +nir_dest_is_divergent(nir_dest dest) +{ + assert(dest.is_ssa); + return dest.ssa.divergent; +} + +/* Are all components the same, ie. .xxxx */ +static inline bool +nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) +{ + for (unsigned i = 1; i < nr_comp; i++) + if (swiz[i] != swiz[0]) + return false; + return true; +} + +/* Are all components sequential, ie. .yzw */ +static inline bool +nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp) +{ + for (unsigned i = 1; i < nr_comp; i++) + if (swiz[i] != (swiz[0] + i)) + return false; + return true; +} + void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); @@ -882,7 +989,7 @@ typedef struct { * The values in this enum are carefully chosen so that the sized type is * just the unsized type OR the number of bits. */ -typedef enum { +typedef enum PACKED { nir_type_invalid = 0, /* Not a valid type */ nir_type_int = 2, nir_type_uint = 4, @@ -916,10 +1023,10 @@ nir_alu_type_get_type_size(nir_alu_type type) return type & NIR_ALU_TYPE_SIZE_MASK; } -static inline unsigned +static inline nir_alu_type nir_alu_type_get_base_type(nir_alu_type type) { - return type & NIR_ALU_TYPE_BASE_TYPE_MASK; + return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK); } static inline nir_alu_type @@ -994,10 +1101,28 @@ nir_op_vec(unsigned components) case 2: return nir_op_vec2; case 3: return nir_op_vec3; case 4: return nir_op_vec4; + case 8: return nir_op_vec8; + case 16: return nir_op_vec16; default: unreachable("bad component count"); } } +static inline bool +nir_op_is_vec(nir_op op) +{ + switch (op) { + case nir_op_mov: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + case nir_op_vec8: + case nir_op_vec16: + return true; + default: + return false; + } +} + static inline bool nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) { @@ -1094,7 +1219,7 @@ typedef enum { typedef struct { const char *name; - unsigned num_inputs; + uint8_t num_inputs; /** * The number of components in the output @@ -1113,7 +1238,7 @@ typedef struct { * though output_size is zero; in that case, the inputs with a zero * size act per-component, while the inputs with non-zero size don't. */ - unsigned output_size; + uint8_t output_size; /** * The type of vector that the instruction outputs. Note that the @@ -1125,7 +1250,7 @@ typedef struct { /** * The number of components in each input */ - unsigned input_sizes[NIR_MAX_VEC_COMPONENTS]; + uint8_t input_sizes[NIR_MAX_VEC_COMPONENTS]; /** * The type of vector that each input takes. Note that negate and @@ -1392,18 +1517,20 @@ typedef enum { /* Memory ordering. */ NIR_MEMORY_ACQUIRE = 1 << 0, NIR_MEMORY_RELEASE = 1 << 1, + NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE, /* Memory visibility operations. */ - NIR_MEMORY_MAKE_AVAILABLE = 1 << 3, - NIR_MEMORY_MAKE_VISIBLE = 1 << 4, + NIR_MEMORY_MAKE_AVAILABLE = 1 << 2, + NIR_MEMORY_MAKE_VISIBLE = 1 << 3, } nir_memory_semantics; typedef enum { - NIR_SCOPE_DEVICE, - NIR_SCOPE_QUEUE_FAMILY, - NIR_SCOPE_WORKGROUP, - NIR_SCOPE_SUBGROUP, + NIR_SCOPE_NONE, NIR_SCOPE_INVOCATION, + NIR_SCOPE_SUBGROUP, + NIR_SCOPE_WORKGROUP, + NIR_SCOPE_QUEUE_FAMILY, + NIR_SCOPE_DEVICE, } nir_scope; /** @@ -1570,6 +1697,11 @@ typedef enum { */ NIR_INTRINSIC_MEMORY_SCOPE, + /** + * Value of nir_scope. + */ + NIR_INTRINSIC_EXECUTION_SCOPE, + NIR_INTRINSIC_NUM_INDEX_FLAGS, } nir_intrinsic_index_flag; @@ -1579,7 +1711,7 @@ typedef enum { typedef struct { const char *name; - unsigned num_srcs; /** < number of register/SSA inputs */ + uint8_t num_srcs; /** < number of register/SSA inputs */ /** number of components of each input register * @@ -1588,7 +1720,7 @@ typedef struct { * intrinsic consumes however many components are provided and it is not * validated at all. */ - int src_components[NIR_INTRINSIC_MAX_INPUTS]; + int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; bool has_dest; @@ -1597,16 +1729,16 @@ typedef struct { * If this value is 0, the number of components is given by the * num_components field of nir_intrinsic_instr. */ - unsigned dest_components; + uint8_t dest_components; /** bitfield of legal bit sizes */ - unsigned dest_bit_sizes; + uint8_t dest_bit_sizes; /** the number of constant indices used by the intrinsic */ - unsigned num_indices; + uint8_t num_indices; /** indicates the usage of intr->const_index[n] */ - unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; + uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; /** semantic flags for calls to this intrinsic */ nir_intrinsic_semantic_flag flags; @@ -1615,7 +1747,7 @@ typedef struct { extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; static inline unsigned -nir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn) +nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn) { const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; assert(srcn < info->num_srcs); @@ -1639,6 +1771,33 @@ nir_intrinsic_dest_components(nir_intrinsic_instr *intr) return intr->num_components; } +/** + * Helper to copy const_index[] from src to dst, without assuming they + * match in order. + */ +static inline void +nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src) +{ + if (src->intrinsic == dst->intrinsic) { + memcpy(dst->const_index, src->const_index, sizeof(dst->const_index)); + return; + } + + const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic]; + const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic]; + + for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) { + if (src_info->index_map[i] == 0) + continue; + + /* require that dst instruction also uses the same const_index[]: */ + assert(dst_info->index_map[i] > 0); + + dst->const_index[dst_info->index_map[i] - 1] = + src->const_index[src_info->index_map[i] - 1]; + } +} + #define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ static inline type \ nir_intrinsic_##name(const nir_intrinsic_instr *instr) \ @@ -1672,7 +1831,7 @@ INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool) INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier) INTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier) INTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier) -INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned) +INTRINSIC_IDX_ACCESSORS(format, FORMAT, enum pipe_format) INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned) INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned) INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned) @@ -1682,6 +1841,7 @@ INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned) INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics) INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode) INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope) +INTRINSIC_IDX_ACCESSORS(execution_scope, EXECUTION_SCOPE, nir_scope) static inline void nir_intrinsic_set_align(nir_intrinsic_instr *intrin, @@ -1709,6 +1869,9 @@ nir_intrinsic_align(const nir_intrinsic_instr *intrin) return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; } +unsigned +nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); + /* Converts a image_deref_* intrinsic into a image_* one */ void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, nir_ssa_def *handle, bool bindless); @@ -1783,6 +1946,8 @@ typedef enum { * identical. */ nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */ + nir_texop_fragment_fetch, /**< Multisample fragment color texture fetch */ + nir_texop_fragment_mask_fetch,/**< Multisample fragment mask texture fetch */ } nir_texop; typedef struct { @@ -1822,9 +1987,6 @@ typedef struct { */ unsigned texture_index; - /** The size of the texture array or 0 if it's not an array */ - unsigned texture_array_size; - /** The sampler index * * The following operations do not require a sampler and, as such, this @@ -1843,6 +2005,30 @@ typedef struct { unsigned sampler_index; } nir_tex_instr; +/* + * Returns true if the texture operation requires a sampler as a general rule, + * see the documentation of sampler_index. + * + * Note that the specific hw/driver backend could require to a sampler + * object/configuration packet in any case, for some other reason. + */ +static inline bool +nir_tex_instr_need_sampler(const nir_tex_instr *instr) +{ + switch (instr->op) { + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_samples_identical: + return false; + default: + return true; + } +} + static inline unsigned nir_tex_instr_dest_size(const nir_tex_instr *instr) { @@ -1879,6 +2065,7 @@ nir_tex_instr_dest_size(const nir_tex_instr *instr) case nir_texop_texture_samples: case nir_texop_query_levels: case nir_texop_samples_identical: + case nir_texop_fragment_mask_fetch: return 1; default: @@ -2045,8 +2232,26 @@ typedef struct { } nir_load_const_instr; typedef enum { + /** Return from a function + * + * This instruction is a classic function return. It jumps to + * nir_function_impl::end_block. No return value is provided in this + * instruction. Instead, the function is expected to write any return + * data to a deref passed in from the caller. + */ nir_jump_return, + + /** Break out of the inner-most loop + * + * This has the same semantics as C's "break" statement. + */ nir_jump_break, + + /** Jump back to the top of the inner-most loop + * + * This has the same semantics as C's "continue" statement assuming that a + * NIR loop is implemented as "while (1) { body }". + */ nir_jump_continue, } nir_jump_type; @@ -2299,13 +2504,20 @@ typedef struct nir_block { * dom_pre_index and dom_post_index for this block, which makes testing if * a given block is dominated by another block an O(1) operation. */ - unsigned dom_pre_index, dom_post_index; + int16_t dom_pre_index, dom_post_index; /* live in and out for this block; used for liveness analysis */ BITSET_WORD *live_in; BITSET_WORD *live_out; } nir_block; +static inline bool +nir_block_is_reachable(nir_block *b) +{ + /* See also nir_block_dominates */ + return b->dom_post_index != -1; +} + static inline nir_instr * nir_block_first_instr(nir_block *block) { @@ -2431,11 +2643,71 @@ typedef struct { */ typedef enum { nir_metadata_none = 0x0, + + /** Indicates that nir_block::index values are valid. + * + * The start block has index 0 and they increase through a natural walk of + * the CFG. nir_function_impl::num_blocks is the number of blocks and + * every block index is in the range [0, nir_function_impl::num_blocks]. + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_block_index = 0x1, + + /** Indicates that block dominance information is valid + * + * This includes: + * + * - nir_block::num_dom_children + * - nir_block::dom_children + * - nir_block::dom_frontier + * - nir_block::dom_pre_index + * - nir_block::dom_post_index + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_dominance = 0x2, + + /** Indicates that SSA def data-flow liveness information is valid + * + * This includes: + * + * - nir_ssa_def::live_index + * - nir_block::live_in + * - nir_block::live_out + * + * A pass can preserve this metadata type if it never adds or removes any + * SSA defs (most passes shouldn't preserve this metadata type). + */ nir_metadata_live_ssa_defs = 0x4, + + /** A dummy metadata value to track when a pass forgot to call + * nir_metadata_preserve. + * + * A pass should always clear this value even if it doesn't make any + * progress to indicate that it thought about preserving metadata. + */ nir_metadata_not_properly_reset = 0x8, + + /** Indicates that loop analysis information is valid. + * + * This includes everything pointed to by nir_loop::info. + * + * A pass can preserve this metadata type if it is guaranteed to not affect + * any loop metadata. However, since loop metadata includes things like + * loop counts which depend on arithmetic in the loop, this is very hard to + * determine. Most passes shouldn't preserve this metadata type. + */ nir_metadata_loop_analysis = 0x10, + + /** All metadata + * + * This includes all nir_metadata flags except not_properly_reset. Passes + * which do not change the shader in any way should call + * + * nir_metadata_preserve(impl, nir_metadata_all); + */ + nir_metadata_all = ~nir_metadata_not_properly_reset, } nir_metadata; typedef struct { @@ -2616,6 +2888,7 @@ typedef enum { nir_lower_shift64 = (1 << 11), nir_lower_imul_2x32_64 = (1 << 12), nir_lower_extract64 = (1 << 13), + nir_lower_ufind_msb64 = (1 << 14), } nir_lower_int64_options; typedef enum { @@ -2734,6 +3007,8 @@ typedef struct nir_shader_compiler_options { bool lower_unpack_unorm_4x8; bool lower_unpack_snorm_4x8; + bool lower_pack_split; + bool lower_extract_byte; bool lower_extract_word; @@ -2779,9 +3054,47 @@ typedef struct nir_shader_compiler_options { /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */ bool lower_wpos_pntc; + /** + * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be + * lowered to simple arithmetic. + * + * If this flag is set, the lowering will be applied to all bit-sizes of + * these instructions. + * + * \sa ::lower_hadd64 + */ bool lower_hadd; + + /** + * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions + * should be lowered to simple arithmetic. + * + * If this flag is set, the lowering will be applied to only 64-bit + * versions of these instructions. + * + * \sa ::lower_hadd + */ + bool lower_hadd64; + + /** + * Set if nir_op_add_sat and nir_op_usub_sat should be lowered to simple + * arithmetic. + * + * If this flag is set, the lowering will be applied to all bit-sizes of + * these instructions. + * + * \sa ::lower_usub_sat64 + */ bool lower_add_sat; + /** + * Set if only 64-bit nir_op_usub_sat should be lowered to simple + * arithmetic. + * + * \sa ::lower_add_sat + */ + bool lower_usub_sat64; + /** * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's * for IO purposes and would prefer loads/stores be vectorized. @@ -2789,6 +3102,17 @@ typedef struct nir_shader_compiler_options { bool vectorize_io; bool lower_to_scalar; + /** + * Whether nir_opt_vectorize should only create 16-bit 2D vectors. + */ + bool vectorize_vec2_16bit; + + /** + * Should the linker unify inputs_read/outputs_written between adjacent + * shader stages which are linked into a single program? + */ + bool unify_interfaces; + /** * Should nir_lower_io() create load_interpolated_input intrinsics? * @@ -2811,6 +3135,19 @@ typedef struct nir_shader_compiler_options { */ bool has_imul24; + /** Backend supports umul24, if not set umul24 will automatically be lowered + * to imul with masked inputs */ + bool has_umul24; + + /** Backend supports umad24, if not set umad24 will automatically be lowered + * to imul with masked inputs and iadd */ + bool has_umad24; + + /* Whether to generate only scoped_barrier intrinsics instead of the set of + * memory and control barrier intrinsics based on GLSL. + */ + bool use_scoped_barrier; + /** * Is this the Intel vec4 backend? * @@ -2821,6 +3158,15 @@ typedef struct nir_shader_compiler_options { */ bool intel_vec4; + /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */ + bool lower_bfe_with_two_constants; + + /** Whether 8-bit ALU is supported. */ + bool support_8bit_alu; + + /** Whether 16-bit ALU is supported. */ + bool support_16bit_alu; + unsigned max_unroll_iterations; nir_lower_int64_options lower_int64_options; @@ -2948,6 +3294,8 @@ nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); /** dirties all but the preserved metadata */ void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); +/** Preserves all metadata for the given shader */ +void nir_shader_preserve_all_metadata(nir_shader *shader); /** creates an instruction with default swizzle/writemask/etc. with NULL registers */ nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); @@ -3231,6 +3579,9 @@ bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state); bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); +bool nir_foreach_phi_src_leaving_block(nir_block *instr, + nir_foreach_src_cb cb, + void *state); nir_const_value *nir_src_as_const_value(nir_src src); @@ -3340,6 +3691,8 @@ unsigned nir_index_instrs(nir_function_impl *impl); void nir_index_blocks(nir_function_impl *impl); +void nir_index_vars(nir_shader *shader, nir_function_impl *impl, nir_variable_mode modes); + void nir_print_shader(nir_shader *shader, FILE *fp); void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); void nir_print_instr(const nir_instr *instr, FILE *fp); @@ -3575,6 +3928,8 @@ bool nir_lower_vars_to_scratch(nir_shader *shader, int size_threshold, glsl_type_size_align_func size_align); +void nir_lower_clip_halfz(nir_shader *shader); + void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_gather_ssa_types(nir_function_impl *impl, @@ -3601,6 +3956,15 @@ void nir_assign_io_var_locations(struct exec_list *var_list, unsigned *size, gl_shader_stage stage); +typedef struct { + uint8_t num_linked_io_vars; + uint8_t num_linked_patch_io_vars; +} nir_linked_io_var_info; + +nir_linked_io_var_info +nir_assign_linked_io_var_locations(nir_shader *producer, + nir_shader *consumer); + typedef enum { /* If set, this causes all 64-bit IO operations to be lowered on-the-fly * to 32-bit operations. This is only valid for nir_var_shader_in/out @@ -3653,6 +4017,12 @@ typedef enum { */ nir_address_format_32bit_index_offset, + /** + * An address format which is comprised of a vec3 where the first two + * components specify the buffer and the third is an offset. + */ + nir_address_format_vec2_index_32bit_offset, + /** * An address format which is a simple 32-bit offset. */ @@ -3672,12 +4042,13 @@ static inline unsigned nir_address_format_bit_size(nir_address_format addr_format) { switch (addr_format) { - case nir_address_format_32bit_global: return 32; - case nir_address_format_64bit_global: return 64; - case nir_address_format_64bit_bounded_global: return 32; - case nir_address_format_32bit_index_offset: return 32; - case nir_address_format_32bit_offset: return 32; - case nir_address_format_logical: return 32; + case nir_address_format_32bit_global: return 32; + case nir_address_format_64bit_global: return 64; + case nir_address_format_64bit_bounded_global: return 32; + case nir_address_format_32bit_index_offset: return 32; + case nir_address_format_vec2_index_32bit_offset: return 32; + case nir_address_format_32bit_offset: return 32; + case nir_address_format_logical: return 32; } unreachable("Invalid address format"); } @@ -3686,12 +4057,13 @@ static inline unsigned nir_address_format_num_components(nir_address_format addr_format) { switch (addr_format) { - case nir_address_format_32bit_global: return 1; - case nir_address_format_64bit_global: return 1; - case nir_address_format_64bit_bounded_global: return 4; - case nir_address_format_32bit_index_offset: return 2; - case nir_address_format_32bit_offset: return 1; - case nir_address_format_logical: return 1; + case nir_address_format_32bit_global: return 1; + case nir_address_format_64bit_global: return 1; + case nir_address_format_64bit_bounded_global: return 4; + case nir_address_format_32bit_index_offset: return 2; + case nir_address_format_vec2_index_32bit_offset: return 3; + case nir_address_format_32bit_offset: return 1; + case nir_address_format_logical: return 1; } unreachable("Invalid address format"); } @@ -3737,8 +4109,9 @@ bool nir_lower_vars_to_ssa(nir_shader *shader); bool nir_remove_dead_derefs(nir_shader *shader); bool nir_remove_dead_derefs_impl(nir_function_impl *impl); -bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes); -bool nir_lower_constant_initializers(nir_shader *shader, +bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, + bool (*can_remove_var)(nir_variable *var)); +bool nir_lower_variable_initializers(nir_shader *shader, nir_variable_mode modes); bool nir_move_vec_src_uses_to_dest(nir_shader *shader); @@ -3752,6 +4125,7 @@ bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, bool always_precise, bool have_ffma); bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); +bool nir_lower_bool_to_bitsize(nir_shader *shader); bool nir_lower_bool_to_float(nir_shader *shader); bool nir_lower_bool_to_int32(nir_shader *shader); bool nir_lower_int_to_float(nir_shader *shader); @@ -3765,6 +4139,7 @@ void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); void nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); +bool nir_lower_fragcolor(nir_shader *shader); void nir_lower_fragcoord_wtrans(nir_shader *shader); void nir_lower_viewport_transform(nir_shader *shader); bool nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier); @@ -3778,7 +4153,10 @@ typedef struct nir_lower_subgroups_options { bool lower_subgroup_masks:1; bool lower_shuffle:1; bool lower_shuffle_to_32bit:1; + bool lower_shuffle_to_swizzle_amd:1; bool lower_quad:1; + bool lower_quad_broadcast_dynamic:1; + bool lower_quad_broadcast_dynamic_to_const:1; } nir_lower_subgroups_options; bool nir_lower_subgroups(nir_shader *shader, @@ -3989,13 +4367,14 @@ bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, bool use_clipdist_array); bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); +bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable); void nir_lower_point_size_mov(nir_shader *shader, const gl_state_index16 *pointsize_state_tokens); bool nir_lower_frexp(nir_shader *nir); -void nir_lower_two_sided_color(nir_shader *shader); +void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval); bool nir_lower_clamp_color_outputs(nir_shader *shader); @@ -4017,6 +4396,8 @@ bool nir_lower_wpos_ytransform(nir_shader *shader, const nir_lower_wpos_ytransform_options *options); bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); +bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); + bool nir_lower_fb_read(nir_shader *shader); typedef struct nir_lower_drawpixels_options { @@ -4039,7 +4420,7 @@ typedef struct nir_lower_bitmap_options { void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); -bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset); +bool nir_lower_atomics_to_ssbo(nir_shader *shader); typedef enum { nir_lower_int_source_mods = 1 << 0, @@ -4051,7 +4432,7 @@ typedef enum { bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); -bool nir_lower_gs_intrinsics(nir_shader *shader); +bool nir_lower_gs_intrinsics(nir_shader *shader, bool per_stream); typedef unsigned (*nir_lower_bit_size_callback)(const nir_alu_instr *, void *); @@ -4067,6 +4448,8 @@ bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, nir_lower_doubles_options options); bool nir_lower_pack(nir_shader *shader); +void nir_lower_mediump_outputs(nir_shader *nir); + bool nir_lower_point_size(nir_shader *shader, float min, float max); typedef enum { @@ -4080,6 +4463,8 @@ typedef enum { bool nir_lower_interpolation(nir_shader *shader, nir_lower_interpolation_options options); +bool nir_lower_discard_to_demote(nir_shader *shader); + bool nir_normalize_cubemap_coords(nir_shader *shader); void nir_live_ssa_defs_impl(nir_function_impl *impl); @@ -4094,7 +4479,7 @@ bool nir_repair_ssa(nir_shader *shader); void nir_convert_loop_to_lcssa(nir_loop *loop); bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); -bool* nir_divergence_analysis(nir_shader *shader, nir_divergence_options options); +void nir_divergence_analysis(nir_shader *shader, nir_divergence_options options); /* If phi_webs_only is true, only convert SSA values involved in phi nodes to * registers. If false, convert all values (even those not involved in a phi @@ -4107,6 +4492,7 @@ bool nir_lower_ssa_defs_to_regs_block(nir_block *block); bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); bool nir_lower_samplers(nir_shader *shader); +bool nir_lower_ssbo(nir_shader *shader); /* This is here for unit tests. */ bool nir_opt_comparison_pre_impl(nir_function_impl *impl); @@ -4117,8 +4503,20 @@ bool nir_opt_access(nir_shader *shader); bool nir_opt_algebraic(nir_shader *shader); bool nir_opt_algebraic_before_ffma(nir_shader *shader); bool nir_opt_algebraic_late(nir_shader *shader); +bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); +/* Try to combine a and b into a. Return true if combination was possible, + * which will result in b being removed by the pass. Return false if + * combination wasn't possible. + */ +typedef bool (*nir_combine_memory_barrier_cb)( + nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data); + +bool nir_opt_combine_memory_barriers(nir_shader *shader, + nir_combine_memory_barrier_cb combine_cb, + void *data); + bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); bool nir_copy_prop(nir_shader *shader); @@ -4157,6 +4555,7 @@ typedef enum { nir_move_load_ubo = (1 << 1), nir_move_load_input = (1 << 2), nir_move_comparisons = (1 << 3), + nir_move_copies = (1 << 4), } nir_move_options; bool nir_can_move_instr(nir_instr *instr, nir_move_options options); @@ -4183,6 +4582,14 @@ bool nir_opt_vectorize(nir_shader *shader); bool nir_opt_conditional_discard(nir_shader *shader); +typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size, + unsigned num_components, unsigned high_offset, + nir_intrinsic_instr *low, nir_intrinsic_instr *high); + +bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes, + nir_should_vectorize_mem_func callback, + nir_variable_mode robust_modes); + void nir_strip(nir_shader *shader); void nir_sweep(nir_shader *shader); @@ -4214,6 +4621,26 @@ nir_variable_is_in_block(const nir_variable *var) return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var); } +typedef struct nir_unsigned_upper_bound_config { + unsigned min_subgroup_size; + unsigned max_subgroup_size; + unsigned max_work_group_invocations; + unsigned max_work_group_count[3]; + unsigned max_work_group_size[3]; + + uint32_t vertex_attrib_max[32]; +} nir_unsigned_upper_bound_config; + +uint32_t +nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, + nir_ssa_scalar scalar, + const nir_unsigned_upper_bound_config *config); + +bool +nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, + nir_ssa_scalar ssa, unsigned const_val, + const nir_unsigned_upper_bound_config *config); + #ifdef __cplusplus } /* extern "C" */ #endif