X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fcompiler%2Fnir%2Fnir.h;h=d32bbab5dfc8fdd6e91866dcf06176d1abd35ef6;hb=111b0a669979cf277f31c69f501982fee004e067;hp=0ed451b9d1a8cd62bbce236b38612f2ea3748eb4;hpb=336eab063009ebc96c3625ef1b7cffb2501448ce;p=mesa.git diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 0ed451b9d1a..d32bbab5dfc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -37,6 +37,7 @@ #include "util/bitscan.h" #include "util/bitset.h" #include "util/macros.h" +#include "util/format/u_format.h" #include "compiler/nir_types.h" #include "compiler/shader_enums.h" #include "compiler/shader_info.h" @@ -58,9 +59,19 @@ extern "C" { #define NIR_FALSE 0u #define NIR_TRUE (~0u) -#define NIR_MAX_VEC_COMPONENTS 4 +#define NIR_MAX_VEC_COMPONENTS 16 #define NIR_MAX_MATRIX_COLUMNS 4 -typedef uint8_t nir_component_mask_t; +#define NIR_STREAM_PACKED (1 << 8) +typedef uint16_t nir_component_mask_t; + +static inline bool +nir_num_components_valid(unsigned num_components) +{ + return (num_components >= 1 && + num_components <= 4) || + num_components == 8 || + num_components == 16; +} /** Defines a cast function * @@ -92,7 +103,7 @@ struct nir_builder; */ typedef struct { gl_state_index16 tokens[STATE_LENGTH]; - int swizzle; + uint16_t swizzle; } nir_state_slot; typedef enum { @@ -106,7 +117,9 @@ typedef enum { nir_var_mem_ssbo = (1 << 7), nir_var_mem_shared = (1 << 8), nir_var_mem_global = (1 << 9), - nir_var_all = ~0, + nir_var_mem_push_const = (1 << 10), /* not actually used for variables */ + nir_num_variable_modes = 11, + nir_var_all = (1 << nir_num_variable_modes) - 1, } nir_variable_mode; /** @@ -140,6 +153,106 @@ typedef union { arr[i] = c[i].m; \ } while (false) +static inline nir_const_value +nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size) +{ + nir_const_value v; + memset(&v, 0, sizeof(v)); + + switch (bit_size) { + case 1: v.b = x; break; + case 8: v.u8 = x; break; + case 16: v.u16 = x; break; + case 32: v.u32 = x; break; + case 64: v.u64 = x; break; + default: + unreachable("Invalid bit size"); + } + + return v; +} + +static inline nir_const_value +nir_const_value_for_int(int64_t i, unsigned bit_size) +{ + nir_const_value v; + memset(&v, 0, sizeof(v)); + + assert(bit_size <= 64); + if (bit_size < 64) { + assert(i >= (-(1ll << (bit_size - 1)))); + assert(i < (1ll << (bit_size - 1))); + } + + return nir_const_value_for_raw_uint(i, bit_size); +} + +static inline nir_const_value +nir_const_value_for_uint(uint64_t u, unsigned bit_size) +{ + nir_const_value v; + memset(&v, 0, sizeof(v)); + + assert(bit_size <= 64); + if (bit_size < 64) + assert(u < (1ull << bit_size)); + + return nir_const_value_for_raw_uint(u, bit_size); +} + +static inline nir_const_value +nir_const_value_for_bool(bool b, unsigned bit_size) +{ + /* Booleans use a 0/-1 convention */ + return nir_const_value_for_int(-(int)b, bit_size); +} + +/* This one isn't inline because it requires half-float conversion */ +nir_const_value nir_const_value_for_float(double b, unsigned bit_size); + +static inline int64_t +nir_const_value_as_int(nir_const_value value, unsigned bit_size) +{ + switch (bit_size) { + /* int1_t uses 0/-1 convention */ + case 1: return -(int)value.b; + case 8: return value.i8; + case 16: return value.i16; + case 32: return value.i32; + case 64: return value.i64; + default: + unreachable("Invalid bit size"); + } +} + +static inline uint64_t +nir_const_value_as_uint(nir_const_value value, unsigned bit_size) +{ + switch (bit_size) { + case 1: return value.b; + case 8: return value.u8; + case 16: return value.u16; + case 32: return value.u32; + case 64: return value.u64; + default: + unreachable("Invalid bit size"); + } +} + +static inline bool +nir_const_value_as_bool(nir_const_value value, unsigned bit_size) +{ + int64_t i = nir_const_value_as_int(value, bit_size); + + /* Booleans of any size use 0/-1 convention */ + assert(i == 0 || i == -1); + + return i; +} + +/* This one isn't inline because it requires half-float conversion */ +double nir_const_value_as_float(nir_const_value value, unsigned bit_size); + typedef struct nir_constant { /** * Value of the constant. @@ -148,7 +261,7 @@ typedef struct nir_constant { * by the type associated with the \c nir_variable. Constants may be * scalars, vectors, or matrices. */ - nir_const_value values[NIR_MAX_MATRIX_COLUMNS][NIR_MAX_VEC_COMPONENTS]; + nir_const_value values[NIR_MAX_VEC_COMPONENTS]; /* we could get this from the var->type but makes clone *much* easier to * not have to care about the type. @@ -213,7 +326,7 @@ typedef struct nir_variable { * * \sa nir_variable_mode */ - nir_variable_mode mode; + nir_variable_mode mode:11; /** * Is the variable read-only? @@ -227,6 +340,30 @@ typedef struct nir_variable { unsigned patch:1; unsigned invariant:1; + /** + * Precision qualifier. + * + * In desktop GLSL we do not care about precision qualifiers at all, in + * fact, the spec says that precision qualifiers are ignored. + * + * To make things easy, we make it so that this field is always + * GLSL_PRECISION_NONE on desktop shaders. This way all the variables + * have the same precision value and the checks we add in the compiler + * for this field will never break a desktop shader compile. + */ + unsigned precision:2; + + /** + * Can this variable be coalesced with another? + * + * This is set by nir_lower_io_to_temporaries to say that any + * copies involving this variable should stay put. Propagating it can + * duplicate the resulting load/store, which is not wanted, and may + * result in a load/store of the variable with an indirect offset which + * the backend may not be able to handle. + */ + unsigned cannot_coalesce:1; + /** * When separate shader programs are enabled, only input/outputs between * the stages of a multi-stage separate program can be safely removed @@ -242,7 +379,7 @@ typedef struct nir_variable { * * \sa glsl_interp_mode */ - unsigned interpolation:2; + unsigned interpolation:3; /** * If non-zero, then this variable may be packed along with other variables @@ -278,6 +415,15 @@ typedef struct nir_variable { */ unsigned explicit_binding:1; + /** + * Was the location explicitly set in the shader? + * + * If the location is explicitly set in the shader, it \b cannot be changed + * by the linker or by the API (e.g., calls to \c glBindAttribLocation have + * no effect). + */ + unsigned explicit_location:1; + /** * Was a transfer feedback buffer set in the shader? */ @@ -294,99 +440,130 @@ typedef struct nir_variable { unsigned explicit_offset:1; /** - * \brief Layout qualifier for gl_FragDepth. - * - * This is not equal to \c ir_depth_layout_none if and only if this - * variable is \c gl_FragDepth and a layout qualifier is specified. + * Layout of the matrix. Uses glsl_matrix_layout values. */ - nir_depth_layout depth_layout; + unsigned matrix_layout:2; /** - * Storage location of the base of this variable - * - * The precise meaning of this field depends on the nature of the variable. - * - * - Vertex shader input: one of the values from \c gl_vert_attrib. - * - Vertex shader output: one of the values from \c gl_varying_slot. - * - Geometry shader input: one of the values from \c gl_varying_slot. - * - Geometry shader output: one of the values from \c gl_varying_slot. - * - Fragment shader input: one of the values from \c gl_varying_slot. - * - Fragment shader output: one of the values from \c gl_frag_result. - * - Uniforms: Per-stage uniform slot number for default uniform block. - * - Uniforms: Index within the uniform block definition for UBO members. - * - Non-UBO Uniforms: uniform slot number. - * - Other: This field is not currently used. + * Non-zero if this variable was created by lowering a named interface + * block. + */ + unsigned from_named_ifc_block:1; + + /** + * How the variable was declared. See nir_var_declaration_type. * - * If the variable is a uniform, shader input, or shader output, and the - * slot has not been assigned, the value will be -1. + * This is used to detect variables generated by the compiler, so should + * not be visible via the API. */ - int location; + unsigned how_declared:2; /** - * The actual location of the variable in the IR. Only valid for inputs - * and outputs. + * Is this variable per-view? If so, we know it must be an array with + * size corresponding to the number of views. */ - unsigned int driver_location; + unsigned per_view:1; /** - * Vertex stream output identifier. + * \brief Layout qualifier for gl_FragDepth. * - * For packed outputs, bit 31 is set and bits [2*i+1,2*i] indicate the - * stream of the i-th component. + * This is not equal to \c ir_depth_layout_none if and only if this + * variable is \c gl_FragDepth and a layout qualifier is specified. */ - unsigned stream; + nir_depth_layout depth_layout:3; /** - * output index for dual source blending. + * Vertex stream output identifier. + * + * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i] + * indicate the stream of the i-th component. */ - int index; + unsigned stream:9; /** - * Descriptor set binding for sampler or UBO. + * Access flags for memory variables (SSBO/global), image uniforms, and + * bindless images in uniforms/inputs/outputs. */ - int descriptor_set; + enum gl_access_qualifier access:8; /** - * Initial binding point for a sampler or UBO. - * - * For array types, this represents the binding point for the first element. + * Descriptor set binding for sampler or UBO. */ - int binding; + unsigned descriptor_set:5; /** - * Location an atomic counter or transform feedback is stored at. + * output index for dual source blending. */ - unsigned offset; + unsigned index; /** - * Transform feedback buffer. + * Initial binding point for a sampler or UBO. + * + * For array types, this represents the binding point for the first element. */ - unsigned xfb_buffer; + unsigned binding; /** - * Transform feedback stride. + * Storage location of the base of this variable + * + * The precise meaning of this field depends on the nature of the variable. + * + * - Vertex shader input: one of the values from \c gl_vert_attrib. + * - Vertex shader output: one of the values from \c gl_varying_slot. + * - Geometry shader input: one of the values from \c gl_varying_slot. + * - Geometry shader output: one of the values from \c gl_varying_slot. + * - Fragment shader input: one of the values from \c gl_varying_slot. + * - Fragment shader output: one of the values from \c gl_frag_result. + * - Uniforms: Per-stage uniform slot number for default uniform block. + * - Uniforms: Index within the uniform block definition for UBO members. + * - Non-UBO Uniforms: uniform slot number. + * - Other: This field is not currently used. + * + * If the variable is a uniform, shader input, or shader output, and the + * slot has not been assigned, the value will be -1. */ - unsigned xfb_stride; + int location; /** - * How the variable was declared. See nir_var_declaration_type. - * - * This is used to detect variables generated by the compiler, so should - * not be visible via the API. + * The actual location of the variable in the IR. Only valid for inputs, + * outputs, and uniforms (including samplers and images). */ - unsigned how_declared:2; + unsigned driver_location; /** - * ARB_shader_image_load_store qualifiers. + * Location an atomic counter or transform feedback is stored at. */ - struct { - enum gl_access_qualifier access; + unsigned offset; - /** Image internal format if specified explicitly, otherwise GL_NONE. */ - GLenum format; - } image; + union { + struct { + /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */ + enum pipe_format format; + } image; + + struct { + /** + * Transform feedback buffer. + */ + uint16_t buffer:2; + + /** + * Transform feedback stride. + */ + uint16_t stride; + } xfb; + }; } data; + /** + * Identifier for this variable generated by nir_index_vars() that is unique + * among other variables in the same exec_list. + */ + unsigned index; + + /* Number of nir_variable_data members */ + uint16_t num_members; + /** * Built-in state that backs this uniform * @@ -399,7 +576,7 @@ typedef struct nir_variable { * \c state_slots will be \c NULL. */ /*@{*/ - unsigned num_state_slots; /**< Number of state slots used */ + uint16_t num_state_slots; /**< Number of state slots used */ nir_state_slot *state_slots; /**< State descriptors. */ /*@}*/ @@ -412,6 +589,14 @@ typedef struct nir_variable { */ nir_constant *constant_initializer; + /** + * Global variable assigned in the initializer of the variable + * This field should only be used temporarily by creators of NIR shaders + * and then lower_constant_initializers can be used to get rid of them. + * Most of the rest of NIR ignores this field or asserts that it's NULL. + */ + struct nir_variable *pointer_initializer; + /** * For variables that are in an interface block or are an instance of an * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. @@ -428,7 +613,6 @@ typedef struct nir_variable { * inputs each with their own layout specifier. This is only allowed on * variables with a struct or array of array of struct type. */ - unsigned num_members; struct nir_variable_data *members; } nir_variable; @@ -556,6 +740,12 @@ typedef struct nir_ssa_def { /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ uint8_t bit_size; + + /** + * True if this SSA value may have different values in different SIMD + * invocations of the shader. This is set by nir_divergence_analysis. + */ + bool divergent; } nir_ssa_def; struct nir_src; @@ -696,14 +886,12 @@ nir_src_is_const(nir_src src) src.ssa->parent_instr->type == nir_instr_type_load_const; } -int64_t nir_src_as_int(nir_src src); -uint64_t nir_src_as_uint(nir_src src); -bool nir_src_as_bool(nir_src src); -double nir_src_as_float(nir_src src); -int64_t nir_src_comp_as_int(nir_src src, unsigned component); -uint64_t nir_src_comp_as_uint(nir_src src, unsigned component); -bool nir_src_comp_as_bool(nir_src src, unsigned component); -double nir_src_comp_as_float(nir_src src, unsigned component); +static inline bool +nir_src_is_divergent(nir_src src) +{ + assert(src.is_ssa); + return src.ssa->divergent; +} static inline unsigned nir_dest_bit_size(nir_dest dest) @@ -717,6 +905,33 @@ nir_dest_num_components(nir_dest dest) return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; } +static inline bool +nir_dest_is_divergent(nir_dest dest) +{ + assert(dest.is_ssa); + return dest.ssa.divergent; +} + +/* Are all components the same, ie. .xxxx */ +static inline bool +nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) +{ + for (unsigned i = 1; i < nr_comp; i++) + if (swiz[i] != swiz[0]) + return false; + return true; +} + +/* Are all components sequential, ie. .yzw */ +static inline bool +nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp) +{ + for (unsigned i = 1; i < nr_comp; i++) + if (swiz[i] != (swiz[0] + i)) + return false; + return true; +} + void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); @@ -772,13 +987,15 @@ typedef struct { * The values in this enum are carefully chosen so that the sized type is * just the unsized type OR the number of bits. */ -typedef enum { +typedef enum PACKED { nir_type_invalid = 0, /* Not a valid type */ nir_type_int = 2, nir_type_uint = 4, nir_type_bool = 6, nir_type_float = 128, nir_type_bool1 = 1 | nir_type_bool, + nir_type_bool8 = 8 | nir_type_bool, + nir_type_bool16 = 16 | nir_type_bool, nir_type_bool32 = 32 | nir_type_bool, nir_type_int1 = 1 | nir_type_int, nir_type_int8 = 8 | nir_type_int, @@ -848,9 +1065,21 @@ nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type) case GLSL_TYPE_DOUBLE: return nir_type_float64; break; - default: - unreachable("unknown type"); + + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_FUNCTION: + case GLSL_TYPE_ERROR: + return nir_type_invalid; } + + unreachable("unknown type"); } static inline nir_alu_type @@ -870,10 +1099,109 @@ nir_op_vec(unsigned components) case 2: return nir_op_vec2; case 3: return nir_op_vec3; case 4: return nir_op_vec4; + case 8: return nir_op_vec8; + case 16: return nir_op_vec16; default: unreachable("bad component count"); } } +static inline bool +nir_op_is_vec(nir_op op) +{ + switch (op) { + case nir_op_mov: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + case nir_op_vec8: + case nir_op_vec16: + return true; + default: + return false; + } +} + +static inline bool +nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64); +} + +static inline bool +nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64); +} + +static inline bool +nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64); +} + +static inline bool +nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); +} + +static inline bool +nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size) +{ + return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || + (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || + (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); +} + +static inline bool +nir_has_any_rounding_mode_rtz(unsigned execution_mode) +{ + return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) || + (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) || + (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64); +} + +static inline bool +nir_has_any_rounding_mode_rtne(unsigned execution_mode) +{ + return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) || + (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) || + (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64); +} + +static inline nir_rounding_mode +nir_get_rounding_mode_from_float_controls(unsigned execution_mode, + nir_alu_type type) +{ + if (nir_alu_type_get_base_type(type) != nir_type_float) + return nir_rounding_mode_undef; + + unsigned bit_size = nir_alu_type_get_type_size(type); + + if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) + return nir_rounding_mode_rtz; + if (nir_is_rounding_mode_rtne(execution_mode, bit_size)) + return nir_rounding_mode_rtne; + return nir_rounding_mode_undef; +} + +static inline bool +nir_has_any_rounding_mode_enabled(unsigned execution_mode) +{ + bool result = + nir_has_any_rounding_mode_rtne(execution_mode) || + nir_has_any_rounding_mode_rtz(execution_mode); + return result; +} + typedef enum { /** * Operation where the first two sources are commutative. @@ -889,7 +1217,7 @@ typedef enum { typedef struct { const char *name; - unsigned num_inputs; + uint8_t num_inputs; /** * The number of components in the output @@ -908,7 +1236,7 @@ typedef struct { * though output_size is zero; in that case, the inputs with a zero * size act per-component, while the inputs with non-zero size don't. */ - unsigned output_size; + uint8_t output_size; /** * The type of vector that the instruction outputs. Note that the @@ -920,7 +1248,7 @@ typedef struct { /** * The number of components in each input */ - unsigned input_sizes[NIR_MAX_VEC_COMPONENTS]; + uint8_t input_sizes[NIR_MAX_VEC_COMPONENTS]; /** * The type of vector that each input takes. Note that negate and @@ -949,7 +1277,14 @@ typedef struct nir_alu_instr { * it must ensure that the resulting value is bit-for-bit identical to the * original. */ - bool exact; + bool exact:1; + + /** + * Indicates that this instruction do not cause wrapping to occur, in the + * form of overflow or underflow. + */ + bool no_signed_wrap:1; + bool no_unsigned_wrap:1; nir_alu_dest dest; nir_alu_src src[]; @@ -984,26 +1319,43 @@ nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src) return read_mask; } -/* - * For instructions whose destinations are SSA, get the number of channels - * used for a source +/** + * Get the number of channels used for a source */ static inline unsigned nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) { - assert(instr->dest.dest.is_ssa); - if (nir_op_infos[instr->op].input_sizes[src] > 0) return nir_op_infos[instr->op].input_sizes[src]; - return instr->dest.dest.ssa.num_components; + return nir_dest_num_components(instr->dest.dest); } -bool nir_const_value_negative_equal(const nir_const_value *c1, - const nir_const_value *c2, - unsigned components, - nir_alu_type base_type, - unsigned bits); +static inline bool +nir_alu_instr_is_comparison(const nir_alu_instr *instr) +{ + switch (instr->op) { + case nir_op_flt: + case nir_op_fge: + case nir_op_feq: + case nir_op_fne: + case nir_op_ilt: + case nir_op_ult: + case nir_op_ige: + case nir_op_uge: + case nir_op_ieq: + case nir_op_ine: + case nir_op_i2b1: + case nir_op_f2b1: + case nir_op_inot: + return true; + default: + return false; + } +} + +bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2, + nir_alu_type full_type); bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, unsigned src1, unsigned src2); @@ -1085,6 +1437,7 @@ nir_deref_instr_get_variable(const nir_deref_instr *instr) } bool nir_deref_instr_has_indirect(nir_deref_instr *instr); +bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr); bool nir_deref_instr_has_complex_use(nir_deref_instr *instr); bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr); @@ -1158,6 +1511,25 @@ nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i) return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i])); } +typedef enum { + /* Memory ordering. */ + NIR_MEMORY_ACQUIRE = 1 << 0, + NIR_MEMORY_RELEASE = 1 << 1, + NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE, + + /* Memory visibility operations. */ + NIR_MEMORY_MAKE_AVAILABLE = 1 << 2, + NIR_MEMORY_MAKE_VISIBLE = 1 << 3, +} nir_memory_semantics; + +typedef enum { + NIR_SCOPE_INVOCATION, + NIR_SCOPE_SUBGROUP, + NIR_SCOPE_WORKGROUP, + NIR_SCOPE_QUEUE_FAMILY, + NIR_SCOPE_DEVICE, +} nir_scope; + /** * \name NIR intrinsics semantic flags * @@ -1197,80 +1569,80 @@ typedef enum { /** * For store instructions, a writemask for the store. */ - NIR_INTRINSIC_WRMASK = 2, + NIR_INTRINSIC_WRMASK, /** * The stream-id for GS emit_vertex/end_primitive intrinsics. */ - NIR_INTRINSIC_STREAM_ID = 3, + NIR_INTRINSIC_STREAM_ID, /** * The clip-plane id for load_user_clip_plane intrinsic. */ - NIR_INTRINSIC_UCP_ID = 4, + NIR_INTRINSIC_UCP_ID, /** * The amount of data, starting from BASE, that this instruction may * access. This is used to provide bounds if the offset is not constant. */ - NIR_INTRINSIC_RANGE = 5, + NIR_INTRINSIC_RANGE, /** * The Vulkan descriptor set for vulkan_resource_index intrinsic. */ - NIR_INTRINSIC_DESC_SET = 6, + NIR_INTRINSIC_DESC_SET, /** * The Vulkan descriptor set binding for vulkan_resource_index intrinsic. */ - NIR_INTRINSIC_BINDING = 7, + NIR_INTRINSIC_BINDING, /** * Component offset. */ - NIR_INTRINSIC_COMPONENT = 8, + NIR_INTRINSIC_COMPONENT, /** * Interpolation mode (only meaningful for FS inputs). */ - NIR_INTRINSIC_INTERP_MODE = 9, + NIR_INTRINSIC_INTERP_MODE, /** * A binary nir_op to use when performing a reduction or scan operation */ - NIR_INTRINSIC_REDUCTION_OP = 10, + NIR_INTRINSIC_REDUCTION_OP, /** * Cluster size for reduction operations */ - NIR_INTRINSIC_CLUSTER_SIZE = 11, + NIR_INTRINSIC_CLUSTER_SIZE, /** * Parameter index for a load_param intrinsic */ - NIR_INTRINSIC_PARAM_IDX = 12, + NIR_INTRINSIC_PARAM_IDX, /** * Image dimensionality for image intrinsics * * One of GLSL_SAMPLER_DIM_* */ - NIR_INTRINSIC_IMAGE_DIM = 13, + NIR_INTRINSIC_IMAGE_DIM, /** * Non-zero if we are accessing an array image */ - NIR_INTRINSIC_IMAGE_ARRAY = 14, + NIR_INTRINSIC_IMAGE_ARRAY, /** * Image format for image intrinsics */ - NIR_INTRINSIC_FORMAT = 15, + NIR_INTRINSIC_FORMAT, /** * Access qualifiers for image and memory access intrinsics */ - NIR_INTRINSIC_ACCESS = 16, + NIR_INTRINSIC_ACCESS, /** * Alignment for offsets and addresses @@ -1281,13 +1653,46 @@ typedef enum { * * (X - align_offset) % align_mul == 0 */ - NIR_INTRINSIC_ALIGN_MUL = 17, - NIR_INTRINSIC_ALIGN_OFFSET = 18, + NIR_INTRINSIC_ALIGN_MUL, + NIR_INTRINSIC_ALIGN_OFFSET, /** * The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic. */ - NIR_INTRINSIC_DESC_TYPE = 19, + NIR_INTRINSIC_DESC_TYPE, + + /** + * The nir_alu_type of a uniform/input/output + */ + NIR_INTRINSIC_TYPE, + + /** + * The swizzle mask for the instructions + * SwizzleInvocationsAMD and SwizzleInvocationsMaskedAMD + */ + NIR_INTRINSIC_SWIZZLE_MASK, + + /* Separate source/dest access flags for copies */ + NIR_INTRINSIC_SRC_ACCESS, + NIR_INTRINSIC_DST_ACCESS, + + /* Driver location for nir_load_patch_location_ir3 */ + NIR_INTRINSIC_DRIVER_LOCATION, + + /** + * Mask of nir_memory_semantics, includes ordering and visibility. + */ + NIR_INTRINSIC_MEMORY_SEMANTICS, + + /** + * Mask of nir_variable_modes affected by the memory operation. + */ + NIR_INTRINSIC_MEMORY_MODES, + + /** + * Value of nir_scope. + */ + NIR_INTRINSIC_MEMORY_SCOPE, NIR_INTRINSIC_NUM_INDEX_FLAGS, @@ -1298,7 +1703,7 @@ typedef enum { typedef struct { const char *name; - unsigned num_srcs; /** < number of register/SSA inputs */ + uint8_t num_srcs; /** < number of register/SSA inputs */ /** number of components of each input register * @@ -1307,7 +1712,7 @@ typedef struct { * intrinsic consumes however many components are provided and it is not * validated at all. */ - int src_components[NIR_INTRINSIC_MAX_INPUTS]; + int8_t src_components[NIR_INTRINSIC_MAX_INPUTS]; bool has_dest; @@ -1316,16 +1721,16 @@ typedef struct { * If this value is 0, the number of components is given by the * num_components field of nir_intrinsic_instr. */ - unsigned dest_components; + uint8_t dest_components; /** bitfield of legal bit sizes */ - unsigned dest_bit_sizes; + uint8_t dest_bit_sizes; /** the number of constant indices used by the intrinsic */ - unsigned num_indices; + uint8_t num_indices; /** indicates the usage of intr->const_index[n] */ - unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; + uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; /** semantic flags for calls to this intrinsic */ nir_intrinsic_semantic_flag flags; @@ -1334,7 +1739,7 @@ typedef struct { extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; static inline unsigned -nir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn) +nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn) { const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; assert(srcn < info->num_srcs); @@ -1358,6 +1763,33 @@ nir_intrinsic_dest_components(nir_intrinsic_instr *intr) return intr->num_components; } +/** + * Helper to copy const_index[] from src to dst, without assuming they + * match in order. + */ +static inline void +nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src) +{ + if (src->intrinsic == dst->intrinsic) { + memcpy(dst->const_index, src->const_index, sizeof(dst->const_index)); + return; + } + + const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic]; + const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic]; + + for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) { + if (src_info->index_map[i] == 0) + continue; + + /* require that dst instruction also uses the same const_index[]: */ + assert(dst_info->index_map[i] > 0); + + dst->const_index[dst_info->index_map[i] - 1] = + src->const_index[src_info->index_map[i] - 1]; + } +} + #define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ static inline type \ nir_intrinsic_##name(const nir_intrinsic_instr *instr) \ @@ -1389,10 +1821,18 @@ INTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned) INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim) INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool) INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier) -INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned) +INTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier) +INTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier) +INTRINSIC_IDX_ACCESSORS(format, FORMAT, enum pipe_format) INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned) INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned) INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned) +INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type) +INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned) +INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned) +INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics) +INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode) +INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope) static inline void nir_intrinsic_set_align(nir_intrinsic_instr *intrin, @@ -1420,10 +1860,31 @@ nir_intrinsic_align(const nir_intrinsic_instr *intrin) return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; } +unsigned +nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); + /* Converts a image_deref_* intrinsic into a image_* one */ void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr, nir_ssa_def *handle, bool bindless); +/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */ +static inline bool +nir_intrinsic_can_reorder(nir_intrinsic_instr *instr) +{ + if (instr->intrinsic == nir_intrinsic_load_deref || + instr->intrinsic == nir_intrinsic_load_ssbo || + instr->intrinsic == nir_intrinsic_bindless_image_load || + instr->intrinsic == nir_intrinsic_image_deref_load || + instr->intrinsic == nir_intrinsic_image_load) { + return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER; + } else { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[instr->intrinsic]; + return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && + (info->flags & NIR_INTRINSIC_CAN_REORDER); + } +} + /** * \group texture information * @@ -1475,6 +1936,9 @@ typedef enum { nir_texop_samples_identical, /**< Query whether all samples are definitely * identical. */ + nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */ + nir_texop_fragment_fetch, /**< Multisample fragment color texture fetch */ + nir_texop_fragment_mask_fetch,/**< Multisample fragment mask texture fetch */ } nir_texop; typedef struct { @@ -1514,9 +1978,6 @@ typedef struct { */ unsigned texture_index; - /** The size of the texture array or 0 if it's not an array */ - unsigned texture_array_size; - /** The sampler index * * The following operations do not require a sampler and, as such, this @@ -1535,6 +1996,30 @@ typedef struct { unsigned sampler_index; } nir_tex_instr; +/* + * Returns true if the texture operation requires a sampler as a general rule, + * see the documentation of sampler_index. + * + * Note that the specific hw/driver backend could require to a sampler + * object/configuration packet in any case, for some other reason. + */ +static inline bool +nir_tex_instr_need_sampler(const nir_tex_instr *instr) +{ + switch (instr->op) { + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_query_levels: + case nir_texop_texture_samples: + case nir_texop_samples_identical: + return false; + default: + return true; + } +} + static inline unsigned nir_tex_instr_dest_size(const nir_tex_instr *instr) { @@ -1571,6 +2056,7 @@ nir_tex_instr_dest_size(const nir_tex_instr *instr) case nir_texop_texture_samples: case nir_texop_query_levels: case nir_texop_samples_identical: + case nir_texop_fragment_mask_fetch: return 1; default: @@ -1609,23 +2095,12 @@ nir_tex_instr_is_query(const nir_tex_instr *instr) } static inline bool -nir_alu_instr_is_comparison(const nir_alu_instr *instr) +nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr) { switch (instr->op) { - case nir_op_flt: - case nir_op_fge: - case nir_op_feq: - case nir_op_fne: - case nir_op_ilt: - case nir_op_ult: - case nir_op_ige: - case nir_op_uge: - case nir_op_ieq: - case nir_op_ine: - case nir_op_i2b1: - case nir_op_f2b1: - case nir_op_inot: - case nir_op_fnot: + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_lod: return true; default: return false; @@ -1662,19 +2137,30 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src) case nir_tex_src_projector: case nir_tex_src_comparator: case nir_tex_src_bias: + case nir_tex_src_min_lod: case nir_tex_src_ddx: case nir_tex_src_ddy: return nir_type_float; case nir_tex_src_offset: case nir_tex_src_ms_index: + case nir_tex_src_plane: + return nir_type_int; + + case nir_tex_src_ms_mcs: + case nir_tex_src_texture_deref: + case nir_tex_src_sampler_deref: case nir_tex_src_texture_offset: case nir_tex_src_sampler_offset: - return nir_type_int; + case nir_tex_src_texture_handle: + case nir_tex_src_sampler_handle: + return nir_type_uint; - default: - unreachable("Invalid texture source type"); + case nir_num_tex_src_types: + unreachable("nir_num_tex_src_types is not a valid source type"); } + + unreachable("Invalid texture source type"); } static inline unsigned @@ -1736,14 +2222,27 @@ typedef struct { nir_const_value value[]; } nir_load_const_instr; -#define nir_const_load_to_arr(arr, l, m) \ -{ \ - nir_const_value_to_array(arr, l->value, l->def.num_components, m); \ -} while (false); - typedef enum { + /** Return from a function + * + * This instruction is a classic function return. It jumps to + * nir_function_impl::end_block. No return value is provided in this + * instruction. Instead, the function is expected to write any return + * data to a deref passed in from the caller. + */ nir_jump_return, + + /** Break out of the inner-most loop + * + * This has the same semantics as C's "break" statement. + */ nir_jump_break, + + /** Jump back to the top of the inner-most loop + * + * This has the same semantics as C's "continue" statement assuming that a + * NIR loop is implemented as "while (1) { body }". + */ nir_jump_continue, } nir_jump_type; @@ -1823,6 +2322,114 @@ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, nir_parallel_copy_instr, instr, type, nir_instr_type_parallel_copy) + +#define NIR_DEFINE_SRC_AS_CONST(type, suffix) \ +static inline type \ +nir_src_comp_as_##suffix(nir_src src, unsigned comp) \ +{ \ + assert(nir_src_is_const(src)); \ + nir_load_const_instr *load = \ + nir_instr_as_load_const(src.ssa->parent_instr); \ + assert(comp < load->def.num_components); \ + return nir_const_value_as_##suffix(load->value[comp], \ + load->def.bit_size); \ +} \ + \ +static inline type \ +nir_src_as_##suffix(nir_src src) \ +{ \ + assert(nir_src_num_components(src) == 1); \ + return nir_src_comp_as_##suffix(src, 0); \ +} + +NIR_DEFINE_SRC_AS_CONST(int64_t, int) +NIR_DEFINE_SRC_AS_CONST(uint64_t, uint) +NIR_DEFINE_SRC_AS_CONST(bool, bool) +NIR_DEFINE_SRC_AS_CONST(double, float) + +#undef NIR_DEFINE_SRC_AS_CONST + + +typedef struct { + nir_ssa_def *def; + unsigned comp; +} nir_ssa_scalar; + +static inline bool +nir_ssa_scalar_is_const(nir_ssa_scalar s) +{ + return s.def->parent_instr->type == nir_instr_type_load_const; +} + +static inline nir_const_value +nir_ssa_scalar_as_const_value(nir_ssa_scalar s) +{ + assert(s.comp < s.def->num_components); + nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr); + return load->value[s.comp]; +} + +#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \ +static inline type \ +nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \ +{ \ + return nir_const_value_as_##suffix( \ + nir_ssa_scalar_as_const_value(s), s.def->bit_size); \ +} + +NIR_DEFINE_SCALAR_AS_CONST(int64_t, int) +NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint) +NIR_DEFINE_SCALAR_AS_CONST(bool, bool) +NIR_DEFINE_SCALAR_AS_CONST(double, float) + +#undef NIR_DEFINE_SCALAR_AS_CONST + +static inline bool +nir_ssa_scalar_is_alu(nir_ssa_scalar s) +{ + return s.def->parent_instr->type == nir_instr_type_alu; +} + +static inline nir_op +nir_ssa_scalar_alu_op(nir_ssa_scalar s) +{ + return nir_instr_as_alu(s.def->parent_instr)->op; +} + +static inline nir_ssa_scalar +nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx) +{ + nir_ssa_scalar out = { NULL, 0 }; + + nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); + assert(alu_src_idx < nir_op_infos[alu->op].num_inputs); + + /* Our component must be written */ + assert(s.comp < s.def->num_components); + assert(alu->dest.write_mask & (1u << s.comp)); + + assert(alu->src[alu_src_idx].src.is_ssa); + out.def = alu->src[alu_src_idx].src.ssa; + + if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) { + /* The ALU src is unsized so the source component follows the + * destination component. + */ + out.comp = alu->src[alu_src_idx].swizzle[s.comp]; + } else { + /* This is a sized source so all source components work together to + * produce all the destination components. Since we need to return a + * scalar, this only works if the source is a scalar. + */ + assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1); + out.comp = alu->src[alu_src_idx].swizzle[0]; + } + assert(out.comp < out.def->num_components); + + return out; +} + + /* * Control flow * @@ -1888,13 +2495,20 @@ typedef struct nir_block { * dom_pre_index and dom_post_index for this block, which makes testing if * a given block is dominated by another block an O(1) operation. */ - unsigned dom_pre_index, dom_post_index; + int16_t dom_pre_index, dom_post_index; /* live in and out for this block; used for liveness analysis */ BITSET_WORD *live_in; BITSET_WORD *live_out; } nir_block; +static inline bool +nir_block_is_reachable(nir_block *b) +{ + /* See also nir_block_dominates */ + return b->dom_post_index != -1; +} + static inline nir_instr * nir_block_first_instr(nir_block *block) { @@ -2020,10 +2634,61 @@ typedef struct { */ typedef enum { nir_metadata_none = 0x0, + + /** Indicates that nir_block::index values are valid. + * + * The start block has index 0 and they increase through a natural walk of + * the CFG. nir_function_impl::num_blocks is the number of blocks and + * every block index is in the range [0, nir_function_impl::num_blocks]. + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_block_index = 0x1, + + /** Indicates that block dominance information is valid + * + * This includes: + * + * - nir_block::num_dom_children + * - nir_block::dom_children + * - nir_block::dom_frontier + * - nir_block::dom_pre_index + * - nir_block::dom_post_index + * + * A pass can preserve this metadata type if it doesn't touch the CFG. + */ nir_metadata_dominance = 0x2, + + /** Indicates that SSA def data-flow liveness information is valid + * + * This includes: + * + * - nir_ssa_def::live_index + * - nir_block::live_in + * - nir_block::live_out + * + * A pass can preserve this metadata type if it never adds or removes any + * SSA defs (most passes shouldn't preserve this metadata type). + */ nir_metadata_live_ssa_defs = 0x4, + + /** A dummy metadata value to track when a pass forgot to call + * nir_metadata_preserve. + * + * A pass should always clear this value even if it doesn't make any + * progress to indicate that it thought about preserving metadata. + */ nir_metadata_not_properly_reset = 0x8, + + /** Indicates that loop analysis information is valid. + * + * This includes everything pointed to by nir_loop::info. + * + * A pass can preserve this metadata type if it is guaranteed to not affect + * any loop metadata. However, since loop metadata includes things like + * loop counts which depend on arithmetic in the loop, this is very hard to + * determine. Most passes shouldn't preserve this metadata type. + */ nir_metadata_loop_analysis = 0x10, } nir_metadata; @@ -2204,6 +2869,8 @@ typedef enum { nir_lower_minmax64 = (1 << 10), nir_lower_shift64 = (1 << 11), nir_lower_imul_2x32_64 = (1 << 12), + nir_lower_extract64 = (1 << 13), + nir_lower_ufind_msb64 = (1 << 14), } nir_lower_int64_options; typedef enum { @@ -2216,9 +2883,18 @@ typedef enum { nir_lower_dfract = (1 << 6), nir_lower_dround_even = (1 << 7), nir_lower_dmod = (1 << 8), - nir_lower_fp64_full_software = (1 << 9), + nir_lower_dsub = (1 << 9), + nir_lower_ddiv = (1 << 10), + nir_lower_fp64_full_software = (1 << 11), } nir_lower_doubles_options; +typedef enum { + nir_divergence_single_prim_per_subgroup = (1 << 0), + nir_divergence_single_patch_per_tcs_subgroup = (1 << 1), + nir_divergence_single_patch_per_tes_subgroup = (1 << 2), + nir_divergence_view_index_uniform = (1 << 3), +} nir_divergence_options; + typedef struct nir_shader_compiler_options { bool lower_fdiv; bool lower_ffma; @@ -2230,23 +2906,22 @@ typedef struct nir_shader_compiler_options { bool lower_fpow; bool lower_fsat; bool lower_fsqrt; - bool lower_fmod16; - bool lower_fmod32; - bool lower_fmod64; + bool lower_sincos; + bool lower_fmod; /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */ bool lower_bitfield_extract; - /** Lowers ibitfield_extract/ubitfield_extract to bfm, compares, shifts. */ + /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */ bool lower_bitfield_extract_to_shifts; /** Lowers bitfield_insert to bfi/bfm */ bool lower_bitfield_insert; - /** Lowers bitfield_insert to bfm, compares, and shifts. */ + /** Lowers bitfield_insert to compares, and shifts. */ bool lower_bitfield_insert_to_shifts; + /** Lowers bitfield_insert to bfm/bitfield_select. */ + bool lower_bitfield_insert_to_bitfield_select; /** Lowers bitfield_reverse to shifts. */ bool lower_bitfield_reverse; /** Lowers bit_count to shifts. */ bool lower_bit_count; - /** Lowers bfm to shifts and subtracts. */ - bool lower_bfm; /** Lowers ifind_msb to compare and ufind_msb */ bool lower_ifind_msb; /** Lowers find_lsb to ufind_msb and logic ops */ @@ -2263,15 +2938,27 @@ typedef struct nir_shader_compiler_options { /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ bool lower_scmp; + /* lower fall_equalN/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */ + bool lower_vector_cmp; + /** enables rules to lower idiv by power-of-two: */ bool lower_idiv; + /** enable rules to avoid bit ops */ + bool lower_bitops; + /** enables rules to lower isign to imin+imax */ bool lower_isign; /** enables rules to lower fsign to fsub and flt */ bool lower_fsign; + /* lower fdph to fdot4 */ + bool lower_fdph; + + /** lower fdot to fmul and fsum/fadd. */ + bool lower_fdot; + /* Does the native fdot instruction replicate its result for four * components? If so, then opt_algebraic_late will turn all fdotN * instructions into fdot_replicatedN instructions. @@ -2302,6 +2989,8 @@ typedef struct nir_shader_compiler_options { bool lower_unpack_unorm_4x8; bool lower_unpack_snorm_4x8; + bool lower_pack_split; + bool lower_extract_byte; bool lower_extract_word; @@ -2347,14 +3036,59 @@ typedef struct nir_shader_compiler_options { /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */ bool lower_wpos_pntc; + /** + * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be + * lowered to simple arithmetic. + * + * If this flag is set, the lowering will be applied to all bit-sizes of + * these instructions. + * + * \sa ::lower_hadd64 + */ bool lower_hadd; + + /** + * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions + * should be lowered to simple arithmetic. + * + * If this flag is set, the lowering will be applied to only 64-bit + * versions of these instructions. + * + * \sa ::lower_hadd + */ + bool lower_hadd64; + + /** + * Set if nir_op_add_sat and nir_op_usub_sat should be lowered to simple + * arithmetic. + * + * If this flag is set, the lowering will be applied to all bit-sizes of + * these instructions. + * + * \sa ::lower_usub_sat64 + */ bool lower_add_sat; + /** + * Set if only 64-bit nir_op_usub_sat should be lowered to simple + * arithmetic. + * + * \sa ::lower_add_sat + */ + bool lower_usub_sat64; + /** * Should IO be re-vectorized? Some scalar ISAs still operate on vec4's * for IO purposes and would prefer loads/stores be vectorized. */ bool vectorize_io; + bool lower_to_scalar; + + /** + * Should the linker unify inputs_read/outputs_written between adjacent + * shader stages which are linked into a single program? + */ + bool unify_interfaces; /** * Should nir_lower_io() create load_interpolated_input intrinsics? @@ -2367,6 +3101,49 @@ typedef struct nir_shader_compiler_options { /* Lowers when 32x32->64 bit multiplication is not supported */ bool lower_mul_2x32_64; + /* Lowers when rotate instruction is not supported */ + bool lower_rotate; + + /** + * Backend supports imul24, and would like to use it (when possible) + * for address/offset calculation. If true, driver should call + * nir_lower_amul(). (If not set, amul will automatically be lowered + * to imul.) + */ + bool has_imul24; + + /** Backend supports umul24, if not set umul24 will automatically be lowered + * to imul with masked inputs */ + bool has_umul24; + + /** Backend supports umad24, if not set umad24 will automatically be lowered + * to imul with masked inputs and iadd */ + bool has_umad24; + + /* Whether to generate only scoped_memory_barrier intrinsics instead of the + * set of memory barrier intrinsics based on GLSL. + */ + bool use_scoped_memory_barrier; + + /** + * Is this the Intel vec4 backend? + * + * Used to inhibit algebraic optimizations that are known to be harmful on + * the Intel vec4 backend. This is generally applicable to any + * optimization that might cause more immediate values to be used in + * 3-source (e.g., ffma and flrp) instructions. + */ + bool intel_vec4; + + /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */ + bool lower_bfe_with_two_constants; + + /** Whether 8-bit ALU is supported. */ + bool support_8bit_alu; + + /** Whether 16-bit ALU is supported. */ + bool support_16bit_alu; + unsigned max_unroll_iterations; nir_lower_int64_options lower_int64_options; @@ -2768,6 +3545,8 @@ nir_instr_remove(nir_instr *instr) /** @} */ +nir_ssa_def *nir_instr_ssa_def(nir_instr *instr); + typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); @@ -2775,6 +3554,9 @@ bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state); bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); +bool nir_foreach_phi_src_leaving_block(nir_block *instr, + nir_foreach_src_cb cb, + void *state); nir_const_value *nir_src_as_const_value(nir_src src); @@ -2793,6 +3575,7 @@ NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref) bool nir_src_is_dynamically_uniform(nir_src src); bool nir_srcs_equal(nir_src src1, nir_src src2); +bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2); void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); @@ -2883,6 +3666,8 @@ unsigned nir_index_instrs(nir_function_impl *impl); void nir_index_blocks(nir_function_impl *impl); +void nir_index_vars(nir_shader *shader, nir_function_impl *impl, nir_variable_mode modes); + void nir_print_shader(nir_shader *shader, FILE *fp); void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); void nir_print_instr(const nir_instr *instr, FILE *fp); @@ -2897,7 +3682,9 @@ nir_function_impl *nir_function_impl_clone(nir_shader *shader, nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); -nir_shader *nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s); +void nir_shader_replace(nir_shader *dest, nir_shader *src); + +void nir_shader_serialize_deserialize(nir_shader *s); #ifndef NDEBUG void nir_validate_shader(nir_shader *shader, const char *when); @@ -2969,12 +3756,10 @@ static inline bool should_print_nir(void) { return false; } nir_validate_shader(nir, "after " #pass); \ if (should_clone_nir()) { \ nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ - ralloc_free(nir); \ - nir = clone; \ + nir_shader_replace(nir, clone); \ } \ if (should_serialize_deserialize_nir()) { \ - void *mem_ctx = ralloc_parent(nir); \ - nir = nir_shader_serialize_deserialize(mem_ctx, nir); \ + nir_shader_serialize_deserialize(nir); \ } \ } while (0) @@ -3000,11 +3785,64 @@ static inline bool should_print_nir(void) { return false; } #define NIR_SKIP(name) should_skip_nir(#name) +/** An instruction filtering callback + * + * Returns true if the instruction should be processed and false otherwise. + */ +typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *); + +/** A simple instruction lowering callback + * + * Many instruction lowering passes can be written as a simple function which + * takes an instruction as its input and returns a sequence of instructions + * that implement the consumed instruction. This function type represents + * such a lowering function. When called, a function with this prototype + * should either return NULL indicating that no lowering needs to be done or + * emit a sequence of instructions using the provided builder (whose cursor + * will already be placed after the instruction to be lowered) and return the + * resulting nir_ssa_def. + */ +typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *, + nir_instr *, void *); + +/** + * Special return value for nir_lower_instr_cb when some progress occurred + * (like changing an input to the instr) that didn't result in a replacement + * SSA def being generated. + */ +#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1) + +/** Iterate over all the instructions in a nir_function_impl and lower them + * using the provided callbacks + * + * This function implements the guts of a standard lowering pass for you. It + * iterates over all of the instructions in a nir_function_impl and calls the + * filter callback on each one. If the filter callback returns true, it then + * calls the lowering call back on the instruction. (Splitting it this way + * allows us to avoid some save/restore work for instructions we know won't be + * lowered.) If the instruction is dead after the lowering is complete, it + * will be removed. If new instructions are added, the lowering callback will + * also be called on them in case multiple lowerings are required. + * + * The metadata for the nir_function_impl will also be updated. If any blocks + * are added (they cannot be removed), dominance and block indices will be + * invalidated. + */ +bool nir_function_impl_lower_instructions(nir_function_impl *impl, + nir_instr_filter_cb filter, + nir_lower_instr_cb lower, + void *cb_data); +bool nir_shader_lower_instructions(nir_shader *shader, + nir_instr_filter_cb filter, + nir_lower_instr_cb lower, + void *cb_data); + void nir_calc_dominance_impl(nir_function_impl *impl); void nir_calc_dominance(nir_shader *shader); nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); bool nir_block_dominates(nir_block *parent, nir_block *child); +bool nir_block_is_unreachable(nir_block *block); void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); void nir_dump_dom_tree(nir_shader *shader, FILE *fp); @@ -3065,6 +3903,8 @@ bool nir_lower_vars_to_scratch(nir_shader *shader, int size_threshold, glsl_type_size_align_func size_align); +void nir_lower_clip_halfz(nir_shader *shader); + void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); void nir_gather_ssa_types(nir_function_impl *impl, @@ -3084,7 +3924,29 @@ void nir_compact_varyings(nir_shader *producer, nir_shader *consumer, void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer); bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer); +bool nir_lower_amul(nir_shader *shader, + int (*type_size)(const struct glsl_type *, bool)); + +void nir_assign_io_var_locations(struct exec_list *var_list, + unsigned *size, + gl_shader_stage stage); + +typedef struct { + uint8_t num_linked_io_vars; + uint8_t num_linked_patch_io_vars; +} nir_linked_io_var_info; + +nir_linked_io_var_info +nir_assign_linked_io_var_locations(nir_shader *producer, + nir_shader *consumer); + typedef enum { + /* If set, this causes all 64-bit IO operations to be lowered on-the-fly + * to 32-bit operations. This is only valid for nir_var_shader_in/out + * modes. + */ + nir_lower_io_lower_64bit_to_32 = (1 << 0), + /* If set, this forces all non-flat fragment shader inputs to be * interpolated as if with the "sample" qualifier. This requires * nir_shader_compiler_options::use_interpolated_input_intrinsics. @@ -3096,6 +3958,13 @@ bool nir_lower_io(nir_shader *shader, int (*type_size)(const struct glsl_type *, bool), nir_lower_io_options); +bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode); + +bool +nir_lower_vars_to_explicit_types(nir_shader *shader, + nir_variable_mode modes, + glsl_type_size_align_func type_info); + typedef enum { /** * An address format which is a simple 32-bit global GPU address. @@ -3177,6 +4046,12 @@ nir_address_format_to_glsl_type(nir_address_format addr_format) const nir_const_value *nir_address_format_null_value(nir_address_format addr_format); +nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, + nir_address_format addr_format); + +nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1, + nir_address_format addr_format); + nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b, nir_deref_instr *deref, nir_ssa_def *base_addr, @@ -3202,20 +4077,21 @@ bool nir_lower_vars_to_ssa(nir_shader *shader); bool nir_remove_dead_derefs(nir_shader *shader); bool nir_remove_dead_derefs_impl(nir_function_impl *impl); bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes); -bool nir_lower_constant_initializers(nir_shader *shader, +bool nir_lower_variable_initializers(nir_shader *shader, nir_variable_mode modes); -bool nir_move_load_const(nir_shader *shader); bool nir_move_vec_src_uses_to_dest(nir_shader *shader); bool nir_lower_vec_to_movs(nir_shader *shader); void nir_lower_alpha_test(nir_shader *shader, enum compare_func func, - bool alpha_to_one); + bool alpha_to_one, + const gl_state_index16 *alpha_ref_state_tokens); bool nir_lower_alu(nir_shader *shader); bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask, bool always_precise, bool have_ffma); -bool nir_lower_alu_to_scalar(nir_shader *shader, BITSET_WORD *lower_set); +bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data); +bool nir_lower_bool_to_bitsize(nir_shader *shader); bool nir_lower_bool_to_float(nir_shader *shader); bool nir_lower_bool_to_int32(nir_shader *shader); bool nir_lower_int_to_float(nir_shader *shader); @@ -3243,6 +4119,8 @@ typedef struct nir_lower_subgroups_options { bool lower_shuffle:1; bool lower_shuffle_to_32bit:1; bool lower_quad:1; + bool lower_quad_broadcast_dynamic:1; + bool lower_quad_broadcast_dynamic_to_const:1; } nir_lower_subgroups_options; bool nir_lower_subgroups(nir_shader *shader, @@ -3394,6 +4272,12 @@ typedef struct nir_lower_tex_options { */ bool lower_txd_clamp_if_sampler_index_not_lt_16; + /** + * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with + * 0-lod followed by a nir_ishr. + */ + bool lower_txs_lod; + /** * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's * mixed-up tg4 locations. @@ -3421,18 +4305,44 @@ enum nir_lower_non_uniform_access_type { bool nir_lower_non_uniform_access(nir_shader *shader, enum nir_lower_non_uniform_access_type); -bool nir_lower_idiv(nir_shader *shader); +enum nir_lower_idiv_path { + /* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of + * the two but it is not exact in some cases (for example, 1091317713u / + * 1034u gives 5209173 instead of 1055432) */ + nir_lower_idiv_fast, + /* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and + * AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than + * the nv50 path and many of them are integer multiplications, so it is + * probably slower. It should always return the correct result, though. */ + nir_lower_idiv_precise, +}; + +bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path); + +bool nir_lower_input_attachments(nir_shader *shader, bool use_fragcoord_sysval); -bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars); -bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); +bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, + bool use_vars, + bool use_clipdist_array, + const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); +bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables, + bool use_clipdist_array, + const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]); +bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables, + bool use_clipdist_array); bool nir_lower_clip_cull_distance_arrays(nir_shader *nir); +void nir_lower_point_size_mov(nir_shader *shader, + const gl_state_index16 *pointsize_state_tokens); + bool nir_lower_frexp(nir_shader *nir); void nir_lower_two_sided_color(nir_shader *shader); bool nir_lower_clamp_color_outputs(nir_shader *shader); +bool nir_lower_flatshade(nir_shader *shader); + void nir_lower_passthrough_edgeflags(nir_shader *shader); bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count, const gl_state_index16 *uniform_state_tokens); @@ -3449,6 +4359,8 @@ bool nir_lower_wpos_ytransform(nir_shader *shader, const nir_lower_wpos_ytransform_options *options); bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading); +bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data); + bool nir_lower_fb_read(nir_shader *shader); typedef struct nir_lower_drawpixels_options { @@ -3471,7 +4383,7 @@ typedef struct nir_lower_bitmap_options { void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); -bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset); +bool nir_lower_atomics_to_ssbo(nir_shader *shader); typedef enum { nir_lower_int_source_mods = 1 << 0, @@ -3483,7 +4395,7 @@ typedef enum { bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options); -bool nir_lower_gs_intrinsics(nir_shader *shader); +bool nir_lower_gs_intrinsics(nir_shader *shader, bool per_stream); typedef unsigned (*nir_lower_bit_size_callback)(const nir_alu_instr *, void *); @@ -3499,6 +4411,23 @@ bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, nir_lower_doubles_options options); bool nir_lower_pack(nir_shader *shader); +void nir_lower_mediump_outputs(nir_shader *nir); + +bool nir_lower_point_size(nir_shader *shader, float min, float max); + +typedef enum { + nir_lower_interpolation_at_sample = (1 << 1), + nir_lower_interpolation_at_offset = (1 << 2), + nir_lower_interpolation_centroid = (1 << 3), + nir_lower_interpolation_pixel = (1 << 4), + nir_lower_interpolation_sample = (1 << 5), +} nir_lower_interpolation_options; + +bool nir_lower_interpolation(nir_shader *shader, + nir_lower_interpolation_options options); + +bool nir_lower_discard_to_demote(nir_shader *shader); + bool nir_normalize_cubemap_coords(nir_shader *shader); void nir_live_ssa_defs_impl(nir_function_impl *impl); @@ -3512,6 +4441,8 @@ bool nir_repair_ssa_impl(nir_function_impl *impl); bool nir_repair_ssa(nir_shader *shader); void nir_convert_loop_to_lcssa(nir_loop *loop); +bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); +void nir_divergence_analysis(nir_shader *shader, nir_divergence_options options); /* If phi_webs_only is true, only convert SSA values involved in phi nodes to * registers. If false, convert all values (even those not involved in a phi @@ -3523,13 +4454,32 @@ bool nir_lower_phis_to_regs_block(nir_block *block); bool nir_lower_ssa_defs_to_regs_block(nir_block *block); bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl); +bool nir_lower_samplers(nir_shader *shader); +bool nir_lower_ssbo(nir_shader *shader); + +/* This is here for unit tests. */ +bool nir_opt_comparison_pre_impl(nir_function_impl *impl); + bool nir_opt_comparison_pre(nir_shader *shader); +bool nir_opt_access(nir_shader *shader); bool nir_opt_algebraic(nir_shader *shader); bool nir_opt_algebraic_before_ffma(nir_shader *shader); bool nir_opt_algebraic_late(nir_shader *shader); +bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); +/* Try to combine a and b into a. Return true if combination was possible, + * which will result in b being removed by the pass. Return false if + * combination wasn't possible. + */ +typedef bool (*nir_combine_memory_barrier_cb)( + nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data); + +bool nir_opt_combine_memory_barriers(nir_shader *shader, + nir_combine_memory_barrier_cb combine_cb, + void *data); + bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes); bool nir_copy_prop(nir_shader *shader); @@ -3563,14 +4513,27 @@ bool nir_opt_large_constants(nir_shader *shader, bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); -bool nir_opt_move_comparisons(nir_shader *shader); +typedef enum { + nir_move_const_undef = (1 << 0), + nir_move_load_ubo = (1 << 1), + nir_move_load_input = (1 << 2), + nir_move_comparisons = (1 << 3), + nir_move_copies = (1 << 4), +} nir_move_options; + +bool nir_can_move_instr(nir_instr *instr, nir_move_options options); -bool nir_opt_move_load_ubo(nir_shader *shader); +bool nir_opt_sink(nir_shader *shader, nir_move_options options); + +bool nir_opt_move(nir_shader *shader, nir_move_options options); bool nir_opt_peephole_select(nir_shader *shader, unsigned limit, bool indirect_load_ok, bool expensive_alu_ok); +bool nir_opt_rematerialize_compares(nir_shader *shader); + bool nir_opt_remove_phis(nir_shader *shader); +bool nir_opt_remove_phis_block(nir_block *block); bool nir_opt_shrink_load(nir_shader *shader); @@ -3578,8 +4541,20 @@ bool nir_opt_trivial_continues(nir_shader *shader); bool nir_opt_undef(nir_shader *shader); +bool nir_opt_vectorize(nir_shader *shader); + bool nir_opt_conditional_discard(nir_shader *shader); +typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size, + unsigned num_components, unsigned high_offset, + nir_intrinsic_instr *low, nir_intrinsic_instr *high); + +bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes, + nir_should_vectorize_mem_func callback, + nir_variable_mode robust_modes); + +void nir_schedule(nir_shader *shader, int threshold); + void nir_strip(nir_shader *shader); void nir_sweep(nir_shader *shader); @@ -3591,7 +4566,25 @@ uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot); nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); -bool nir_lower_sincos(nir_shader *shader); +static inline bool +nir_variable_is_in_ubo(const nir_variable *var) +{ + return (var->data.mode == nir_var_mem_ubo && + var->interface_type != NULL); +} + +static inline bool +nir_variable_is_in_ssbo(const nir_variable *var) +{ + return (var->data.mode == nir_var_mem_ssbo && + var->interface_type != NULL); +} + +static inline bool +nir_variable_is_in_block(const nir_variable *var) +{ + return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var); +} #ifdef __cplusplus } /* extern "C" */