X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir.h;h=e6edb72ecf207ab18d900981e7ae49ef989d5d55;hp=ce12410b79bd04da474c099332b7196d5bf23bb6;hb=15da98365388983eb9ef1a65c2e7da6ef2ab89e0;hpb=42c9bbaeed6c814981d7100afda05ab942d88bee diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ce12410b79b..e6edb72ecf2 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -41,6 +41,8 @@ #include "compiler/nir_types.h" #include "compiler/shader_enums.h" #include "compiler/shader_info.h" +#define XXH_INLINE_ALL +#include "util/xxhash.h" #include #ifndef NDEBUG @@ -326,7 +328,7 @@ typedef struct nir_variable { * * \sa nir_variable_mode */ - nir_variable_mode mode:11; + unsigned mode:11; /** * Is the variable read-only? @@ -465,12 +467,12 @@ typedef struct nir_variable { unsigned per_view:1; /** - * \brief Layout qualifier for gl_FragDepth. + * \brief Layout qualifier for gl_FragDepth. See nir_depth_layout. * * This is not equal to \c ir_depth_layout_none if and only if this * variable is \c gl_FragDepth and a layout qualifier is specified. */ - nir_depth_layout depth_layout:3; + unsigned depth_layout:3; /** * Vertex stream output identifier. @@ -481,10 +483,12 @@ typedef struct nir_variable { unsigned stream:9; /** + * See gl_access_qualifier. + * * Access flags for memory variables (SSBO/global), image uniforms, and * bindless images in uniforms/inputs/outputs. */ - enum gl_access_qualifier access:8; + unsigned access:8; /** * Descriptor set binding for sampler or UBO. @@ -541,6 +545,16 @@ typedef struct nir_variable { enum pipe_format format; } image; + struct { + /** + * For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode + */ + unsigned is_inline_sampler : 1; + unsigned addressing_mode : 3; + unsigned normalized_coordinates : 1; + unsigned filter_mode : 1; + } sampler; + struct { /** * Transform feedback buffer. @@ -616,12 +630,52 @@ typedef struct nir_variable { struct nir_variable_data *members; } nir_variable; -#define nir_foreach_variable(var, var_list) \ +static inline bool +_nir_shader_variable_has_mode(nir_variable *var, unsigned modes) +{ + /* This isn't a shader variable */ + assert(!(modes & nir_var_function_temp)); + return var->data.mode & modes; +} + +#define nir_foreach_variable_in_list(var, var_list) \ foreach_list_typed(nir_variable, var, node, var_list) -#define nir_foreach_variable_safe(var, var_list) \ +#define nir_foreach_variable_in_list_safe(var, var_list) \ foreach_list_typed_safe(nir_variable, var, node, var_list) +#define nir_foreach_variable_in_shader(var, shader) \ + nir_foreach_variable_in_list(var, &(shader)->variables) + +#define nir_foreach_variable_in_shader_safe(var, shader) \ + nir_foreach_variable_in_list_safe(var, &(shader)->variables) + +#define nir_foreach_variable_with_modes(var, shader, modes) \ + nir_foreach_variable_in_shader(var, shader) \ + if (_nir_shader_variable_has_mode(var, modes)) + +#define nir_foreach_variable_with_modes_safe(var, shader, modes) \ + nir_foreach_variable_in_shader_safe(var, shader) \ + if (_nir_shader_variable_has_mode(var, modes)) + +#define nir_foreach_shader_in_variable(var, shader) \ + nir_foreach_variable_with_modes(var, shader, nir_var_shader_in) + +#define nir_foreach_shader_in_variable_safe(var, shader) \ + nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in) + +#define nir_foreach_shader_out_variable(var, shader) \ + nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) + +#define nir_foreach_shader_out_variable_safe(var, shader) \ + nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out) + +#define nir_foreach_uniform_variable(var, shader) \ + nir_foreach_variable_with_modes(var, shader, nir_var_uniform) + +#define nir_foreach_uniform_variable_safe(var, shader) \ + nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform) + static inline bool nir_variable_is_global(const nir_variable *var) { @@ -724,7 +778,7 @@ typedef struct nir_ssa_def { /** generic SSA definition index. */ unsigned index; - /** Index into the live_in and live_out bitfields */ + /** Ordered SSA definition index used by nir_liveness. */ unsigned live_index; /** Instruction which produces this SSA value. */ @@ -740,6 +794,12 @@ typedef struct nir_ssa_def { /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ uint8_t bit_size; + + /** + * True if this SSA value may have different values in different SIMD + * invocations of the shader. This is set by nir_divergence_analysis. + */ + bool divergent; } nir_ssa_def; struct nir_src; @@ -880,6 +940,13 @@ nir_src_is_const(nir_src src) src.ssa->parent_instr->type == nir_instr_type_load_const; } +static inline bool +nir_src_is_divergent(nir_src src) +{ + assert(src.is_ssa); + return src.ssa->divergent; +} + static inline unsigned nir_dest_bit_size(nir_dest dest) { @@ -892,6 +959,13 @@ nir_dest_num_components(nir_dest dest) return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; } +static inline bool +nir_dest_is_divergent(nir_dest dest) +{ + assert(dest.is_ssa); + return dest.ssa.divergent; +} + /* Are all components the same, ie. .xxxx */ static inline bool nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) @@ -967,7 +1041,7 @@ typedef struct { * The values in this enum are carefully chosen so that the sized type is * just the unsized type OR the number of bits. */ -typedef enum { +typedef enum PACKED { nir_type_invalid = 0, /* Not a valid type */ nir_type_int = 2, nir_type_uint = 4, @@ -1001,10 +1075,10 @@ nir_alu_type_get_type_size(nir_alu_type type) return type & NIR_ALU_TYPE_SIZE_MASK; } -static inline unsigned +static inline nir_alu_type nir_alu_type_get_base_type(nir_alu_type type) { - return type & NIR_ALU_TYPE_BASE_TYPE_MASK; + return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK); } static inline nir_alu_type @@ -1197,7 +1271,7 @@ typedef enum { typedef struct { const char *name; - unsigned num_inputs; + uint8_t num_inputs; /** * The number of components in the output @@ -1216,7 +1290,7 @@ typedef struct { * though output_size is zero; in that case, the inputs with a zero * size act per-component, while the inputs with non-zero size don't. */ - unsigned output_size; + uint8_t output_size; /** * The type of vector that the instruction outputs. Note that the @@ -1228,7 +1302,7 @@ typedef struct { /** * The number of components in each input */ - unsigned input_sizes[NIR_MAX_VEC_COMPONENTS]; + uint8_t input_sizes[NIR_MAX_VEC_COMPONENTS]; /** * The type of vector that each input takes. Note that negate and @@ -1318,7 +1392,7 @@ nir_alu_instr_is_comparison(const nir_alu_instr *instr) case nir_op_flt: case nir_op_fge: case nir_op_feq: - case nir_op_fne: + case nir_op_fneu: case nir_op_ilt: case nir_op_ult: case nir_op_ige: @@ -1435,7 +1509,7 @@ typedef struct { #include "nir_intrinsics.h" -#define NIR_INTRINSIC_MAX_CONST_INDEX 4 +#define NIR_INTRINSIC_MAX_CONST_INDEX 5 /** Represents an intrinsic * @@ -1503,6 +1577,7 @@ typedef enum { } nir_memory_semantics; typedef enum { + NIR_SCOPE_NONE, NIR_SCOPE_INVOCATION, NIR_SCOPE_SUBGROUP, NIR_SCOPE_WORKGROUP, @@ -1674,16 +1749,35 @@ typedef enum { */ NIR_INTRINSIC_MEMORY_SCOPE, + /** + * Value of nir_scope. + */ + NIR_INTRINSIC_EXECUTION_SCOPE, + + /** + * Value of nir_io_semantics. + */ + NIR_INTRINSIC_IO_SEMANTICS, + NIR_INTRINSIC_NUM_INDEX_FLAGS, } nir_intrinsic_index_flag; +typedef struct { + unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */ + unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */ + unsigned dual_source_blend_index:1; + unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */ + unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */ + unsigned _pad:9; +} nir_io_semantics; + #define NIR_INTRINSIC_MAX_INPUTS 5 typedef struct { const char *name; - unsigned num_srcs; /** < number of register/SSA inputs */ + uint8_t num_srcs; /** < number of register/SSA inputs */ /** number of components of each input register * @@ -1692,7 +1786,7 @@ typedef struct { * intrinsic consumes however many components are provided and it is not * validated at all. */ - int src_components[NIR_INTRINSIC_MAX_INPUTS]; + int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; bool has_dest; @@ -1701,16 +1795,16 @@ typedef struct { * If this value is 0, the number of components is given by the * num_components field of nir_intrinsic_instr. */ - unsigned dest_components; + uint8_t dest_components; /** bitfield of legal bit sizes */ - unsigned dest_bit_sizes; + uint8_t dest_bit_sizes; /** the number of constant indices used by the intrinsic */ - unsigned num_indices; + uint8_t num_indices; /** indicates the usage of intr->const_index[n] */ - unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; + uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; /** semantic flags for calls to this intrinsic */ nir_intrinsic_semantic_flag flags; @@ -1719,7 +1813,7 @@ typedef struct { extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; static inline unsigned -nir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn) +nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn) { const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; assert(srcn < info->num_srcs); @@ -1743,6 +1837,33 @@ nir_intrinsic_dest_components(nir_intrinsic_instr *intr) return intr->num_components; } +/** + * Helper to copy const_index[] from src to dst, without assuming they + * match in order. + */ +static inline void +nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src) +{ + if (src->intrinsic == dst->intrinsic) { + memcpy(dst->const_index, src->const_index, sizeof(dst->const_index)); + return; + } + + const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic]; + const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic]; + + for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) { + if (src_info->index_map[i] == 0) + continue; + + /* require that dst instruction also uses the same const_index[]: */ + assert(dst_info->index_map[i] > 0); + + dst->const_index[dst_info->index_map[i] - 1] = + src->const_index[src_info->index_map[i] - 1]; + } +} + #define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ static inline type \ nir_intrinsic_##name(const nir_intrinsic_instr *instr) \ @@ -1757,6 +1878,12 @@ nir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val) \ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1] = val; \ +} \ +static inline bool \ +nir_intrinsic_has_##name(nir_intrinsic_instr *instr) \ +{ \ + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ + return info->index_map[NIR_INTRINSIC_##flag] > 0; \ } INTRINSIC_IDX_ACCESSORS(write_mask, WRMASK, unsigned) @@ -1786,6 +1913,7 @@ INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned) INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics) INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode) INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope) +INTRINSIC_IDX_ACCESSORS(execution_scope, EXECUTION_SCOPE, nir_scope) static inline void nir_intrinsic_set_align(nir_intrinsic_instr *intrin, @@ -1813,6 +1941,30 @@ nir_intrinsic_align(const nir_intrinsic_instr *intrin) return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; } +static inline void +nir_intrinsic_set_io_semantics(nir_intrinsic_instr *intrin, + nir_io_semantics semantics) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; + assert(info->index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0); + STATIC_ASSERT(sizeof(nir_io_semantics) == sizeof(intrin->const_index[0])); + semantics._pad = 0; /* clear padding bits */ + memcpy(&intrin->const_index[info->index_map[NIR_INTRINSIC_IO_SEMANTICS] - 1], + &semantics, sizeof(semantics)); +} + +static inline nir_io_semantics +nir_intrinsic_io_semantics(const nir_intrinsic_instr *intrin) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; + assert(info->index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0); + nir_io_semantics semantics; + memcpy(&semantics, + &intrin->const_index[info->index_map[NIR_INTRINSIC_IO_SEMANTICS] - 1], + sizeof(semantics)); + return semantics; +} + unsigned nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); @@ -2176,14 +2328,43 @@ typedef struct { } nir_load_const_instr; typedef enum { + /** Return from a function + * + * This instruction is a classic function return. It jumps to + * nir_function_impl::end_block. No return value is provided in this + * instruction. Instead, the function is expected to write any return + * data to a deref passed in from the caller. + */ nir_jump_return, + + /** Break out of the inner-most loop + * + * This has the same semantics as C's "break" statement. + */ nir_jump_break, + + /** Jump back to the top of the inner-most loop + * + * This has the same semantics as C's "continue" statement assuming that a + * NIR loop is implemented as "while (1) { body }". + */ nir_jump_continue, + + /** Jumps for unstructured CFG. + * + * As within an unstructured CFG we can't rely on block ordering we need to + * place explicit jumps at the end of every block. + */ + nir_jump_goto, + nir_jump_goto_if, } nir_jump_type; typedef struct { nir_instr instr; nir_jump_type type; + nir_src condition; + struct nir_block *target; + struct nir_block *else_target; } nir_jump_instr; /* creates a new SSA variable in an undefined state */ @@ -2432,7 +2613,9 @@ typedef struct nir_block { */ int16_t dom_pre_index, dom_post_index; - /* live in and out for this block; used for liveness analysis */ + /* SSA def live in and out for this block; used for liveness analysis. + * Indexed by ssa_def->index + */ BITSET_WORD *live_in; BITSET_WORD *live_out; } nir_block; @@ -2569,11 +2752,71 @@ typedef struct { */ typedef enum { nir_metadata_none = 0x0, + + /** Indicates that nir_block::index values are valid. + * + * The start block has index 0 and they increase through a natural walk of + * the CFG. nir_function_impl::num_blocks is the number of blocks and + * every block index is in the range [0, nir_function_impl::num_blocks]. + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_block_index = 0x1, + + /** Indicates that block dominance information is valid + * + * This includes: + * + * - nir_block::num_dom_children + * - nir_block::dom_children + * - nir_block::dom_frontier + * - nir_block::dom_pre_index + * - nir_block::dom_post_index + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_dominance = 0x2, + + /** Indicates that SSA def data-flow liveness information is valid + * + * This includes: + * + * - nir_ssa_def::live_index + * - nir_block::live_in + * - nir_block::live_out + * + * A pass can preserve this metadata type if it never adds or removes any + * SSA defs (most passes shouldn't preserve this metadata type). + */ nir_metadata_live_ssa_defs = 0x4, + + /** A dummy metadata value to track when a pass forgot to call + * nir_metadata_preserve. + * + * A pass should always clear this value even if it doesn't make any + * progress to indicate that it thought about preserving metadata. + */ nir_metadata_not_properly_reset = 0x8, + + /** Indicates that loop analysis information is valid. + * + * This includes everything pointed to by nir_loop::info. + * + * A pass can preserve this metadata type if it is guaranteed to not affect + * any loop metadata. However, since loop metadata includes things like + * loop counts which depend on arithmetic in the loop, this is very hard to + * determine. Most passes shouldn't preserve this metadata type. + */ nir_metadata_loop_analysis = 0x10, + + /** All metadata + * + * This includes all nir_metadata flags except not_properly_reset. Passes + * which do not change the shader in any way should call + * + * nir_metadata_preserve(impl, nir_metadata_all); + */ + nir_metadata_all = ~nir_metadata_not_properly_reset, } nir_metadata; typedef struct { @@ -2601,9 +2844,21 @@ typedef struct { /* total number of basic blocks, only valid when block_index_dirty = false */ unsigned num_blocks; + /** True if this nir_function_impl uses structured control-flow + * + * Structured nir_function_impls have different validation rules. + */ + bool structured; + nir_metadata valid_metadata; } nir_function_impl; +#define nir_foreach_function_temp_variable(var, impl) \ + foreach_list_typed(nir_variable, var, node, &(impl)->locals) + +#define nir_foreach_function_temp_variable_safe(var, impl) \ + foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals) + ATTRIBUTE_RETURNS_NONNULL static inline nir_block * nir_start_block(nir_function_impl *impl) { @@ -2755,6 +3010,7 @@ typedef enum { nir_lower_imul_2x32_64 = (1 << 12), nir_lower_extract64 = (1 << 13), nir_lower_ufind_msb64 = (1 << 14), + nir_lower_bit_count64 = (1 << 15), } nir_lower_int64_options; typedef enum { @@ -2819,7 +3075,7 @@ typedef struct nir_shader_compiler_options { /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ bool lower_sub; - /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ + /* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */ bool lower_scmp; /* lower fall_equalN/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */ @@ -2837,6 +3093,9 @@ typedef struct nir_shader_compiler_options { /** enables rules to lower fsign to fsub and flt */ bool lower_fsign; + /** enables rules to lower iabs to ineg+imax */ + bool lower_iabs; + /* lower fdph to fdot4 */ bool lower_fdph; @@ -2863,17 +3122,21 @@ typedef struct nir_shader_compiler_options { bool lower_ldexp; bool lower_pack_half_2x16; - bool lower_pack_half_2x16_split; bool lower_pack_unorm_2x16; bool lower_pack_snorm_2x16; bool lower_pack_unorm_4x8; bool lower_pack_snorm_4x8; + bool lower_pack_64_2x32_split; + bool lower_pack_32_2x16_split; bool lower_unpack_half_2x16; - bool lower_unpack_half_2x16_split; bool lower_unpack_unorm_2x16; bool lower_unpack_snorm_2x16; bool lower_unpack_unorm_4x8; bool lower_unpack_snorm_4x8; + bool lower_unpack_64_2x32_split; + bool lower_unpack_32_2x16_split; + + bool lower_pack_split; bool lower_extract_byte; bool lower_extract_word; @@ -2915,6 +3178,9 @@ typedef struct nir_shader_compiler_options { bool lower_cs_local_index_from_id; bool lower_cs_local_id_from_index; + /* Prevents lowering global_invocation_id to be in terms of work_group_id */ + bool has_cs_global_id; + bool lower_device_index_to_zero; /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */ @@ -2968,6 +3234,11 @@ typedef struct nir_shader_compiler_options { bool vectorize_io; bool lower_to_scalar; + /** + * Whether nir_opt_vectorize should only create 16-bit 2D vectors. + */ + bool vectorize_vec2_16bit; + /** * Should the linker unify inputs_read/outputs_written between adjacent * shader stages which are linked into a single program? @@ -3004,10 +3275,10 @@ typedef struct nir_shader_compiler_options { * to imul with masked inputs and iadd */ bool has_umad24; - /* Whether to generate only scoped_memory_barrier intrinsics instead of the - * set of memory barrier intrinsics based on GLSL. + /* Whether to generate only scoped_barrier intrinsics instead of the set of + * memory and control barrier intrinsics based on GLSL. */ - bool use_scoped_memory_barrier; + bool use_scoped_barrier; /** * Is this the Intel vec4 backend? @@ -3019,6 +3290,9 @@ typedef struct nir_shader_compiler_options { */ bool intel_vec4; + /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */ + bool lower_bfe_with_two_constants; + /** Whether 8-bit ALU is supported. */ bool support_8bit_alu; @@ -3033,16 +3307,7 @@ typedef struct nir_shader_compiler_options { typedef struct nir_shader { /** list of uniforms (nir_variable) */ - struct exec_list uniforms; - - /** list of inputs (nir_variable) */ - struct exec_list inputs; - - /** list of outputs (nir_variable) */ - struct exec_list outputs; - - /** list of shared compute variables (nir_variable) */ - struct exec_list shared; + struct exec_list variables; /** Set of driver-specific options for the shader. * @@ -3054,29 +3319,31 @@ typedef struct nir_shader { /** Various bits of compile-time information about a given shader */ struct shader_info info; - /** list of global variables in the shader (nir_variable) */ - struct exec_list globals; - - /** list of system value variables in the shader (nir_variable) */ - struct exec_list system_values; - struct exec_list functions; /** < list of nir_function */ /** - * the highest index a load_input_*, load_uniform_*, etc. intrinsic can - * access plus one + * The size of the variable space for load_input_*, load_uniform_*, etc. + * intrinsics. This is in back-end specific units which is likely one of + * bytes, dwords, or vec4s depending on context and back-end. */ - unsigned num_inputs, num_uniforms, num_outputs, num_shared; + unsigned num_inputs, num_uniforms, num_outputs; + + /** Size in bytes of required shared memory */ + unsigned shared_size; /** Size in bytes of required scratch space */ unsigned scratch_size; /** Constant data associated with this shader. * - * Constant data is loaded through load_constant intrinsics. See also - * nir_opt_large_constants. + * Constant data is loaded through load_constant intrinsics (as compared to + * the NIR load_const instructions which have the constant value inlined + * into them). This is usually generated by nir_opt_large_constants (so + * shaders don't have to load_const into a temporary array when they want + * to indirect on a const array). */ void *constant_data; + /** Size of the constant data associated with the shader, in bytes */ unsigned constant_data_size; } nir_shader; @@ -3135,6 +3402,14 @@ nir_variable *nir_local_variable_create(nir_function_impl *impl, const struct glsl_type *type, const char *name); +nir_variable *nir_find_variable_with_location(nir_shader *shader, + nir_variable_mode mode, + unsigned location); + +nir_variable *nir_find_variable_with_driver_location(nir_shader *shader, + nir_variable_mode mode, + unsigned location); + /** creates a function and adds it to the shader's list of functions */ nir_function *nir_function_create(nir_shader *shader, const char *name); @@ -3152,6 +3427,8 @@ nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); /** dirties all but the preserved metadata */ void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); +/** Preserves all metadata for the given shader */ +void nir_shader_preserve_all_metadata(nir_shader *shader); /** creates an instruction with default swizzle/writemask/etc. with NULL registers */ nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); @@ -3435,6 +3712,9 @@ bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state); bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); +bool nir_foreach_phi_src_leaving_block(nir_block *instr, + nir_foreach_src_cb cb, + void *state); nir_const_value *nir_src_as_const_value(nir_src src); @@ -3481,6 +3761,25 @@ void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def); + +/** Returns the next block, disregarding structure + * + * The ordering is deterministic but has no guarantees beyond that. In + * particular, it is not guaranteed to be dominance-preserving. + */ +nir_block *nir_block_unstructured_next(nir_block *block); +nir_block *nir_unstructured_start_block(nir_function_impl *impl); + +#define nir_foreach_block_unstructured(block, impl) \ + for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \ + block = nir_block_unstructured_next(block)) + +#define nir_foreach_block_unstructured_safe(block, impl) \ + for (nir_block *block = nir_unstructured_start_block(impl), \ + *next = nir_block_unstructured_next(block); \ + block != NULL; \ + block = next, next = nir_block_unstructured_next(block)) + /* * finds the next basic block in source-code order, returns NULL if there is * none @@ -3544,7 +3843,8 @@ unsigned nir_index_instrs(nir_function_impl *impl); void nir_index_blocks(nir_function_impl *impl); -void nir_index_vars(nir_shader *shader, nir_function_impl *impl, nir_variable_mode modes); +unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes); +unsigned nir_function_impl_index_vars(nir_function_impl *impl); void nir_print_shader(nir_shader *shader, FILE *fp); void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); @@ -3744,7 +4044,8 @@ bool nir_lower_returns(nir_shader *shader); void nir_inline_function_impl(struct nir_builder *b, const nir_function_impl *impl, - nir_ssa_def **params); + nir_ssa_def **params, + struct hash_table *shader_var_remap); bool nir_inline_functions(nir_shader *shader); bool nir_propagate_invariant(nir_shader *shader); @@ -3789,12 +4090,13 @@ void nir_gather_ssa_types(nir_function_impl *impl, BITSET_WORD *float_types, BITSET_WORD *int_types); -void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, +void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode, + unsigned *size, int (*type_size)(const struct glsl_type *, bool)); /* Some helpers to do very simple linking */ bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer); -bool nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list, +bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode, uint64_t *used_by_other_stage, uint64_t *used_by_other_stage_patches); void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, @@ -3805,10 +4107,22 @@ bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); bool nir_lower_amul(nir_shader *shader, int (*type_size)(const struct glsl_type *, bool)); -void nir_assign_io_var_locations(struct exec_list *var_list, +bool nir_lower_ubo_vec4(nir_shader *shader); + +void nir_assign_io_var_locations(nir_shader *shader, + nir_variable_mode mode, unsigned *size, gl_shader_stage stage); +typedef struct { + uint8_t num_linked_io_vars; + uint8_t num_linked_patch_io_vars; +} nir_linked_io_var_info; + +nir_linked_io_var_info +nir_assign_linked_io_var_locations(nir_shader *producer, + nir_shader *consumer); + typedef enum { /* If set, this causes all 64-bit IO operations to be lowered on-the-fly * to 32-bit operations. This is only valid for nir_var_shader_in/out @@ -3861,11 +4175,28 @@ typedef enum { */ nir_address_format_32bit_index_offset, + /** + * An address format which is a 64-bit value, where the high 32 bits + * are a buffer index, and the low 32 bits are an offset. + */ + nir_address_format_32bit_index_offset_pack64, + + /** + * An address format which is comprised of a vec3 where the first two + * components specify the buffer and the third is an offset. + */ + nir_address_format_vec2_index_32bit_offset, + /** * An address format which is a simple 32-bit offset. */ nir_address_format_32bit_offset, + /** + * An address format which is a simple 32-bit offset cast to 64-bit. + */ + nir_address_format_32bit_offset_as_64bit, + /** * An address format representing a purely logical addressing model. In * this model, all deref chains must be complete from the dereference @@ -3880,12 +4211,15 @@ static inline unsigned nir_address_format_bit_size(nir_address_format addr_format) { switch (addr_format) { - case nir_address_format_32bit_global: return 32; - case nir_address_format_64bit_global: return 64; - case nir_address_format_64bit_bounded_global: return 32; - case nir_address_format_32bit_index_offset: return 32; - case nir_address_format_32bit_offset: return 32; - case nir_address_format_logical: return 32; + case nir_address_format_32bit_global: return 32; + case nir_address_format_64bit_global: return 64; + case nir_address_format_64bit_bounded_global: return 32; + case nir_address_format_32bit_index_offset: return 32; + case nir_address_format_32bit_index_offset_pack64: return 64; + case nir_address_format_vec2_index_32bit_offset: return 32; + case nir_address_format_32bit_offset: return 32; + case nir_address_format_32bit_offset_as_64bit: return 64; + case nir_address_format_logical: return 32; } unreachable("Invalid address format"); } @@ -3894,12 +4228,15 @@ static inline unsigned nir_address_format_num_components(nir_address_format addr_format) { switch (addr_format) { - case nir_address_format_32bit_global: return 1; - case nir_address_format_64bit_global: return 1; - case nir_address_format_64bit_bounded_global: return 4; - case nir_address_format_32bit_index_offset: return 2; - case nir_address_format_32bit_offset: return 1; - case nir_address_format_logical: return 1; + case nir_address_format_32bit_global: return 1; + case nir_address_format_64bit_global: return 1; + case nir_address_format_64bit_bounded_global: return 4; + case nir_address_format_32bit_index_offset: return 2; + case nir_address_format_32bit_index_offset_pack64: return 1; + case nir_address_format_vec2_index_32bit_offset: return 3; + case nir_address_format_32bit_offset: return 1; + case nir_address_format_32bit_offset_as_64bit: return 1; + case nir_address_format_logical: return 1; } unreachable("Invalid address format"); } @@ -3945,7 +4282,8 @@ bool nir_lower_vars_to_ssa(nir_shader *shader); bool nir_remove_dead_derefs(nir_shader *shader); bool nir_remove_dead_derefs_impl(nir_function_impl *impl); -bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes); +bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, + bool (*can_remove_var)(nir_variable *var)); bool nir_lower_variable_initializers(nir_shader *shader, nir_variable_mode modes); @@ -3974,7 +4312,8 @@ void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); void nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask); bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask); -void nir_lower_fragcoord_wtrans(nir_shader *shader); +bool nir_lower_fragcolor(nir_shader *shader); +bool nir_lower_fragcoord_wtrans(nir_shader *shader); void nir_lower_viewport_transform(nir_shader *shader); bool nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier); @@ -3987,6 +4326,7 @@ typedef struct nir_lower_subgroups_options { bool lower_subgroup_masks:1; bool lower_shuffle:1; bool lower_shuffle_to_32bit:1; + bool lower_shuffle_to_swizzle_amd:1; bool lower_quad:1; bool lower_quad_broadcast_dynamic:1; bool lower_quad_broadcast_dynamic_to_const:1; @@ -3997,6 +4337,14 @@ bool nir_lower_subgroups(nir_shader *shader, bool nir_lower_system_values(nir_shader *shader); +typedef struct nir_lower_compute_system_values_options { + bool has_base_global_invocation_id:1; + bool has_base_work_group_id:1; +} nir_lower_compute_system_values_options; + +bool nir_lower_compute_system_values(nir_shader *shader, + const nir_lower_compute_system_values_options *options); + enum PACKED nir_lower_tex_packing { nir_lower_tex_packing_none = 0, /* The sampler returns up to 2 32-bit words of half floats or 16-bit signed @@ -4040,6 +4388,8 @@ typedef struct nir_lower_tex_options { unsigned lower_xy_uxvx_external; unsigned lower_ayuv_external; unsigned lower_xyuv_external; + unsigned bt709_external; + unsigned bt2020_external; /** * To emulate certain texture wrap modes, this can be used @@ -4188,7 +4538,14 @@ enum nir_lower_idiv_path { bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path); -bool nir_lower_input_attachments(nir_shader *shader, bool use_fragcoord_sysval); +typedef struct nir_input_attachment_options { + bool use_fragcoord_sysval; + bool use_layer_id_sysval; + bool use_view_id_for_layer; +} nir_input_attachment_options; + +bool nir_lower_input_attachments(nir_shader *shader, + const nir_input_attachment_options *options); bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars, @@ -4200,13 +4557,14 @@ bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, bool use_clipdist_array); bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); +bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable); void nir_lower_point_size_mov(nir_shader *shader, const gl_state_index16 *pointsize_state_tokens); bool nir_lower_frexp(nir_shader *nir); -void nir_lower_two_sided_color(nir_shader *shader); +void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval); bool nir_lower_clamp_color_outputs(nir_shader *shader); @@ -4228,6 +4586,8 @@ bool nir_lower_wpos_ytransform(nir_shader *shader, const nir_lower_wpos_ytransform_options *options); bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); +bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); + bool nir_lower_fb_read(nir_shader *shader); typedef struct nir_lower_drawpixels_options { @@ -4269,9 +4629,10 @@ typedef unsigned (*nir_lower_bit_size_callback)(const nir_alu_instr *, void *); bool nir_lower_bit_size(nir_shader *shader, nir_lower_bit_size_callback callback, void *callback_data); +bool nir_lower_64bit_phis(nir_shader *shader); nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode); -bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options); +bool nir_lower_int64(nir_shader *shader); nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode); bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, @@ -4295,6 +4656,10 @@ bool nir_lower_interpolation(nir_shader *shader, bool nir_lower_discard_to_demote(nir_shader *shader); +bool nir_lower_memory_model(nir_shader *shader); + +bool nir_lower_goto_ifs(nir_shader *shader); + bool nir_normalize_cubemap_coords(nir_shader *shader); void nir_live_ssa_defs_impl(nir_function_impl *impl); @@ -4309,7 +4674,7 @@ bool nir_repair_ssa(nir_shader *shader); void nir_convert_loop_to_lcssa(nir_loop *loop); bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); -bool* nir_divergence_analysis(nir_shader *shader, nir_divergence_options options); +void nir_divergence_analysis(nir_shader *shader, nir_divergence_options options); /* If phi_webs_only is true, only convert SSA values involved in phi nodes to * registers. If false, convert all values (even those not involved in a phi @@ -4402,7 +4767,7 @@ bool nir_opt_rematerialize_compares(nir_shader *shader); bool nir_opt_remove_phis(nir_shader *shader); bool nir_opt_remove_phis_block(nir_block *block); -bool nir_opt_shrink_load(nir_shader *shader); +bool nir_opt_shrink_vectors(nir_shader *shader); bool nir_opt_trivial_continues(nir_shader *shader); @@ -4417,11 +4782,8 @@ typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high); bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes, - nir_should_vectorize_mem_func callback); - -void nir_schedule(nir_shader *shader, int threshold); - -void nir_strip(nir_shader *shader); + nir_should_vectorize_mem_func callback, + nir_variable_mode robust_modes); void nir_sweep(nir_shader *shader); @@ -4452,6 +4814,26 @@ nir_variable_is_in_block(const nir_variable *var) return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var); } +typedef struct nir_unsigned_upper_bound_config { + unsigned min_subgroup_size; + unsigned max_subgroup_size; + unsigned max_work_group_invocations; + unsigned max_work_group_count[3]; + unsigned max_work_group_size[3]; + + uint32_t vertex_attrib_max[32]; +} nir_unsigned_upper_bound_config; + +uint32_t +nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, + nir_ssa_scalar scalar, + const nir_unsigned_upper_bound_config *config); + +bool +nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, + nir_ssa_scalar ssa, unsigned const_val, + const nir_unsigned_upper_bound_config *config); + #ifdef __cplusplus } /* extern "C" */ #endif