X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir.h;h=030589932436eb88bda47ae8103738765f93b562;hb=12e18d9e7aded72dbfa513bce010e793f0d31cf9;hp=9f6d2c5895b83c9884a560c34422bee89a73439a;hpb=7aa61c84fe47f139b96b29d39b3298f30b96c89c;p=mesa.git diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 9f6d2c5895b..03058993243 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -41,6 +41,8 @@ #include "compiler/nir_types.h" #include "compiler/shader_enums.h" #include "compiler/shader_info.h" +#define XXH_INLINE_ALL +#include "util/xxhash.h" #include #ifndef NDEBUG @@ -740,6 +742,12 @@ typedef struct nir_ssa_def { /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ uint8_t bit_size; + + /** + * True if this SSA value may have different values in different SIMD + * invocations of the shader. This is set by nir_divergence_analysis. + */ + bool divergent; } nir_ssa_def; struct nir_src; @@ -880,6 +888,13 @@ nir_src_is_const(nir_src src) src.ssa->parent_instr->type == nir_instr_type_load_const; } +static inline bool +nir_src_is_divergent(nir_src src) +{ + assert(src.is_ssa); + return src.ssa->divergent; +} + static inline unsigned nir_dest_bit_size(nir_dest dest) { @@ -892,6 +907,13 @@ nir_dest_num_components(nir_dest dest) return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; } +static inline bool +nir_dest_is_divergent(nir_dest dest) +{ + assert(dest.is_ssa); + return dest.ssa.divergent; +} + /* Are all components the same, ie. .xxxx */ static inline bool nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) @@ -967,7 +989,7 @@ typedef struct { * The values in this enum are carefully chosen so that the sized type is * just the unsized type OR the number of bits. */ -typedef enum { +typedef enum PACKED { nir_type_invalid = 0, /* Not a valid type */ nir_type_int = 2, nir_type_uint = 4, @@ -1001,10 +1023,10 @@ nir_alu_type_get_type_size(nir_alu_type type) return type & NIR_ALU_TYPE_SIZE_MASK; } -static inline unsigned +static inline nir_alu_type nir_alu_type_get_base_type(nir_alu_type type) { - return type & NIR_ALU_TYPE_BASE_TYPE_MASK; + return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK); } static inline nir_alu_type @@ -1197,7 +1219,7 @@ typedef enum { typedef struct { const char *name; - unsigned num_inputs; + uint8_t num_inputs; /** * The number of components in the output @@ -1216,7 +1238,7 @@ typedef struct { * though output_size is zero; in that case, the inputs with a zero * size act per-component, while the inputs with non-zero size don't. */ - unsigned output_size; + uint8_t output_size; /** * The type of vector that the instruction outputs. Note that the @@ -1228,7 +1250,7 @@ typedef struct { /** * The number of components in each input */ - unsigned input_sizes[NIR_MAX_VEC_COMPONENTS]; + uint8_t input_sizes[NIR_MAX_VEC_COMPONENTS]; /** * The type of vector that each input takes. Note that negate and @@ -1503,6 +1525,7 @@ typedef enum { } nir_memory_semantics; typedef enum { + NIR_SCOPE_NONE, NIR_SCOPE_INVOCATION, NIR_SCOPE_SUBGROUP, NIR_SCOPE_WORKGROUP, @@ -1674,6 +1697,11 @@ typedef enum { */ NIR_INTRINSIC_MEMORY_SCOPE, + /** + * Value of nir_scope. + */ + NIR_INTRINSIC_EXECUTION_SCOPE, + NIR_INTRINSIC_NUM_INDEX_FLAGS, } nir_intrinsic_index_flag; @@ -1683,7 +1711,7 @@ typedef enum { typedef struct { const char *name; - unsigned num_srcs; /** < number of register/SSA inputs */ + uint8_t num_srcs; /** < number of register/SSA inputs */ /** number of components of each input register * @@ -1692,7 +1720,7 @@ typedef struct { * intrinsic consumes however many components are provided and it is not * validated at all. */ - int src_components[NIR_INTRINSIC_MAX_INPUTS]; + int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; bool has_dest; @@ -1701,16 +1729,16 @@ typedef struct { * If this value is 0, the number of components is given by the * num_components field of nir_intrinsic_instr. */ - unsigned dest_components; + uint8_t dest_components; /** bitfield of legal bit sizes */ - unsigned dest_bit_sizes; + uint8_t dest_bit_sizes; /** the number of constant indices used by the intrinsic */ - unsigned num_indices; + uint8_t num_indices; /** indicates the usage of intr->const_index[n] */ - unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; + uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; /** semantic flags for calls to this intrinsic */ nir_intrinsic_semantic_flag flags; @@ -1719,7 +1747,7 @@ typedef struct { extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; static inline unsigned -nir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn) +nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn) { const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; assert(srcn < info->num_srcs); @@ -1743,6 +1771,33 @@ nir_intrinsic_dest_components(nir_intrinsic_instr *intr) return intr->num_components; } +/** + * Helper to copy const_index[] from src to dst, without assuming they + * match in order. + */ +static inline void +nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src) +{ + if (src->intrinsic == dst->intrinsic) { + memcpy(dst->const_index, src->const_index, sizeof(dst->const_index)); + return; + } + + const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic]; + const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic]; + + for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) { + if (src_info->index_map[i] == 0) + continue; + + /* require that dst instruction also uses the same const_index[]: */ + assert(dst_info->index_map[i] > 0); + + dst->const_index[dst_info->index_map[i] - 1] = + src->const_index[src_info->index_map[i] - 1]; + } +} + #define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ static inline type \ nir_intrinsic_##name(const nir_intrinsic_instr *instr) \ @@ -1786,6 +1841,7 @@ INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned) INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics) INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode) INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope) +INTRINSIC_IDX_ACCESSORS(execution_scope, EXECUTION_SCOPE, nir_scope) static inline void nir_intrinsic_set_align(nir_intrinsic_instr *intrin, @@ -2176,8 +2232,26 @@ typedef struct { } nir_load_const_instr; typedef enum { + /** Return from a function + * + * This instruction is a classic function return. It jumps to + * nir_function_impl::end_block. No return value is provided in this + * instruction. Instead, the function is expected to write any return + * data to a deref passed in from the caller. + */ nir_jump_return, + + /** Break out of the inner-most loop + * + * This has the same semantics as C's "break" statement. + */ nir_jump_break, + + /** Jump back to the top of the inner-most loop + * + * This has the same semantics as C's "continue" statement assuming that a + * NIR loop is implemented as "while (1) { body }". + */ nir_jump_continue, } nir_jump_type; @@ -2569,11 +2643,71 @@ typedef struct { */ typedef enum { nir_metadata_none = 0x0, + + /** Indicates that nir_block::index values are valid. + * + * The start block has index 0 and they increase through a natural walk of + * the CFG. nir_function_impl::num_blocks is the number of blocks and + * every block index is in the range [0, nir_function_impl::num_blocks]. + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_block_index = 0x1, + + /** Indicates that block dominance information is valid + * + * This includes: + * + * - nir_block::num_dom_children + * - nir_block::dom_children + * - nir_block::dom_frontier + * - nir_block::dom_pre_index + * - nir_block::dom_post_index + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_dominance = 0x2, + + /** Indicates that SSA def data-flow liveness information is valid + * + * This includes: + * + * - nir_ssa_def::live_index + * - nir_block::live_in + * - nir_block::live_out + * + * A pass can preserve this metadata type if it never adds or removes any + * SSA defs (most passes shouldn't preserve this metadata type). + */ nir_metadata_live_ssa_defs = 0x4, + + /** A dummy metadata value to track when a pass forgot to call + * nir_metadata_preserve. + * + * A pass should always clear this value even if it doesn't make any + * progress to indicate that it thought about preserving metadata. + */ nir_metadata_not_properly_reset = 0x8, + + /** Indicates that loop analysis information is valid. + * + * This includes everything pointed to by nir_loop::info. + * + * A pass can preserve this metadata type if it is guaranteed to not affect + * any loop metadata. However, since loop metadata includes things like + * loop counts which depend on arithmetic in the loop, this is very hard to + * determine. Most passes shouldn't preserve this metadata type. + */ nir_metadata_loop_analysis = 0x10, + + /** All metadata + * + * This includes all nir_metadata flags except not_properly_reset. Passes + * which do not change the shader in any way should call + * + * nir_metadata_preserve(impl, nir_metadata_all); + */ + nir_metadata_all = ~nir_metadata_not_properly_reset, } nir_metadata; typedef struct { @@ -2968,6 +3102,11 @@ typedef struct nir_shader_compiler_options { bool vectorize_io; bool lower_to_scalar; + /** + * Whether nir_opt_vectorize should only create 16-bit 2D vectors. + */ + bool vectorize_vec2_16bit; + /** * Should the linker unify inputs_read/outputs_written between adjacent * shader stages which are linked into a single program? @@ -3004,10 +3143,10 @@ typedef struct nir_shader_compiler_options { * to imul with masked inputs and iadd */ bool has_umad24; - /* Whether to generate only scoped_memory_barrier intrinsics instead of the - * set of memory barrier intrinsics based on GLSL. + /* Whether to generate only scoped_barrier intrinsics instead of the set of + * memory and control barrier intrinsics based on GLSL. */ - bool use_scoped_memory_barrier; + bool use_scoped_barrier; /** * Is this the Intel vec4 backend? @@ -3019,6 +3158,9 @@ typedef struct nir_shader_compiler_options { */ bool intel_vec4; + /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */ + bool lower_bfe_with_two_constants; + /** Whether 8-bit ALU is supported. */ bool support_8bit_alu; @@ -3152,6 +3294,8 @@ nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); /** dirties all but the preserved metadata */ void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); +/** Preserves all metadata for the given shader */ +void nir_shader_preserve_all_metadata(nir_shader *shader); /** creates an instruction with default swizzle/writemask/etc. with NULL registers */ nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); @@ -3435,6 +3579,9 @@ bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state); bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); +bool nir_foreach_phi_src_leaving_block(nir_block *instr, + nir_foreach_src_cb cb, + void *state); nir_const_value *nir_src_as_const_value(nir_src src); @@ -3870,6 +4017,12 @@ typedef enum { */ nir_address_format_32bit_index_offset, + /** + * An address format which is comprised of a vec3 where the first two + * components specify the buffer and the third is an offset. + */ + nir_address_format_vec2_index_32bit_offset, + /** * An address format which is a simple 32-bit offset. */ @@ -3889,12 +4042,13 @@ static inline unsigned nir_address_format_bit_size(nir_address_format addr_format) { switch (addr_format) { - case nir_address_format_32bit_global: return 32; - case nir_address_format_64bit_global: return 64; - case nir_address_format_64bit_bounded_global: return 32; - case nir_address_format_32bit_index_offset: return 32; - case nir_address_format_32bit_offset: return 32; - case nir_address_format_logical: return 32; + case nir_address_format_32bit_global: return 32; + case nir_address_format_64bit_global: return 64; + case nir_address_format_64bit_bounded_global: return 32; + case nir_address_format_32bit_index_offset: return 32; + case nir_address_format_vec2_index_32bit_offset: return 32; + case nir_address_format_32bit_offset: return 32; + case nir_address_format_logical: return 32; } unreachable("Invalid address format"); } @@ -3903,12 +4057,13 @@ static inline unsigned nir_address_format_num_components(nir_address_format addr_format) { switch (addr_format) { - case nir_address_format_32bit_global: return 1; - case nir_address_format_64bit_global: return 1; - case nir_address_format_64bit_bounded_global: return 4; - case nir_address_format_32bit_index_offset: return 2; - case nir_address_format_32bit_offset: return 1; - case nir_address_format_logical: return 1; + case nir_address_format_32bit_global: return 1; + case nir_address_format_64bit_global: return 1; + case nir_address_format_64bit_bounded_global: return 4; + case nir_address_format_32bit_index_offset: return 2; + case nir_address_format_vec2_index_32bit_offset: return 3; + case nir_address_format_32bit_offset: return 1; + case nir_address_format_logical: return 1; } unreachable("Invalid address format"); } @@ -3954,7 +4109,8 @@ bool nir_lower_vars_to_ssa(nir_shader *shader); bool nir_remove_dead_derefs(nir_shader *shader); bool nir_remove_dead_derefs_impl(nir_function_impl *impl); -bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes); +bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes, + bool (*can_remove_var)(nir_variable *var)); bool nir_lower_variable_initializers(nir_shader *shader, nir_variable_mode modes); @@ -4209,6 +4365,7 @@ bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, bool use_clipdist_array); bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); +bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable); void nir_lower_point_size_mov(nir_shader *shader, const gl_state_index16 *pointsize_state_tokens); @@ -4237,6 +4394,8 @@ bool nir_lower_wpos_ytransform(nir_shader *shader, const nir_lower_wpos_ytransform_options *options); bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); +bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); + bool nir_lower_fb_read(nir_shader *shader); typedef struct nir_lower_drawpixels_options { @@ -4318,7 +4477,7 @@ bool nir_repair_ssa(nir_shader *shader); void nir_convert_loop_to_lcssa(nir_loop *loop); bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); -bool* nir_divergence_analysis(nir_shader *shader, nir_divergence_options options); +void nir_divergence_analysis(nir_shader *shader, nir_divergence_options options); /* If phi_webs_only is true, only convert SSA values involved in phi nodes to * registers. If false, convert all values (even those not involved in a phi @@ -4426,9 +4585,23 @@ typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size, nir_intrinsic_instr *low, nir_intrinsic_instr *high); bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes, - nir_should_vectorize_mem_func callback); + nir_should_vectorize_mem_func callback, + nir_variable_mode robust_modes); + +typedef struct nir_schedule_options { + /* On some hardware with some stages the inputs and outputs to the shader + * share the same memory. In that case scheduler needs to ensure that all + * output writes are scheduled after all of the input writes to avoid + * overwriting them. This is a bitmask of stages that need that. + */ + unsigned stages_with_shared_io_memory; + /* The approximate amount of register pressure at which point the scheduler + * will try to reduce register usage. + */ + int threshold; +} nir_schedule_options; -void nir_schedule(nir_shader *shader, int threshold); +void nir_schedule(nir_shader *shader, const nir_schedule_options *options); void nir_strip(nir_shader *shader);