#include "compiler/shader_info.h"
#include <stdio.h>
-#ifdef DEBUG
+#ifndef NDEBUG
#include "util/debug.h"
-#endif /* DEBUG */
+#endif /* NDEBUG */
#include "nir_opcodes.h"
extern "C" {
#endif
-struct gl_program;
-struct gl_shader_program;
-
#define NIR_FALSE 0u
#define NIR_TRUE (~0u)
* \sa nir_variable::state_slots
*/
typedef struct {
- int tokens[5];
+ gl_state_index16 tokens[STATE_LENGTH];
int swizzle;
} nir_state_slot;
nir_var_all = ~0,
} nir_variable_mode;
+/**
+ * Rounding modes.
+ */
+typedef enum {
+ nir_rounding_mode_undef = 0,
+ nir_rounding_mode_rtne = 1, /* round to nearest even */
+ nir_rounding_mode_ru = 2, /* round up */
+ nir_rounding_mode_rd = 3, /* round down */
+ nir_rounding_mode_rtz = 4, /* round towards zero */
+} nir_rounding_mode;
typedef union {
float f32[4];
*/
unsigned fb_fetch_output:1;
+ /**
+ * Non-zero if this variable is considered bindless as defined by
+ * ARB_bindless_texture.
+ */
+ unsigned bindless:1;
+
+ /**
+ * Was an explicit binding set in the shader?
+ */
+ unsigned explicit_binding:1;
+
/**
* \brief Layout qualifier for gl_FragDepth.
*
*/
unsigned int driver_location;
+ /**
+ * Vertex stream output identifier.
+ *
+ * For packed outputs, bit 31 is set and bits [2*i+1,2*i] indicate the
+ * stream of the i-th component.
+ */
+ unsigned stream;
+
/**
* output index for dual source blending.
*/
/** Index into the live_in and live_out bitfields */
unsigned live_index;
+ /** Instruction which produces this SSA value. */
nir_instr *parent_instr;
/** set of nir_instrs where this register is used (read from) */
typedef struct nir_src {
union {
+ /** Instruction that consumes this value as a source. */
nir_instr *parent_instr;
struct nir_if *parent_if;
};
return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size;
}
+static inline unsigned
+nir_src_num_components(nir_src src)
+{
+ return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components;
+}
+
static inline unsigned
nir_dest_bit_size(nir_dest dest)
{
return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
}
+static inline unsigned
+nir_dest_num_components(nir_dest dest)
+{
+ return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components;
+}
+
void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
case GLSL_TYPE_INT:
return nir_type_int32;
break;
+ case GLSL_TYPE_UINT16:
+ return nir_type_uint16;
+ break;
+ case GLSL_TYPE_INT16:
+ return nir_type_int16;
+ break;
+ case GLSL_TYPE_UINT8:
+ return nir_type_uint8;
+ case GLSL_TYPE_INT8:
+ return nir_type_int8;
case GLSL_TYPE_UINT64:
return nir_type_uint64;
break;
case GLSL_TYPE_FLOAT:
return nir_type_float32;
break;
+ case GLSL_TYPE_FLOAT16:
+ return nir_type_float16;
+ break;
case GLSL_TYPE_DOUBLE:
return nir_type_float64;
break;
return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type));
}
-nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst);
+nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst,
+ nir_rounding_mode rnd);
typedef enum {
NIR_OP_IS_COMMUTATIVE = (1 << 0),
struct nir_function *callee;
} nir_call_instr;
-#define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \
- num_variables, num_indices, idx0, idx1, idx2, flags) \
- nir_intrinsic_##name,
-
-#define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,
-
-typedef enum {
#include "nir_intrinsics.h"
- nir_num_intrinsics = nir_last_intrinsic + 1
-} nir_intrinsic_op;
#define NIR_INTRINSIC_MAX_CONST_INDEX 3
*/
NIR_INTRINSIC_INTERP_MODE = 9,
+ /**
+ * A binary nir_op to use when performing a reduction or scan operation
+ */
+ NIR_INTRINSIC_REDUCTION_OP = 10,
+
+ /**
+ * Cluster size for reduction operations
+ */
+ NIR_INTRINSIC_CLUSTER_SIZE = 11,
+
NIR_INTRINSIC_NUM_INDEX_FLAGS,
} nir_intrinsic_index_flag;
extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
+static inline unsigned
+nir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn)
+{
+ const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
+ assert(srcn < info->num_srcs);
+ if (info->src_components[srcn])
+ return info->src_components[srcn];
+ else
+ return intr->num_components;
+}
+
+static inline unsigned
+nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
+{
+ const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
+ if (!info->has_dest)
+ return 0;
+ else if (info->dest_components)
+ return info->dest_components;
+ else
+ return intr->num_components;
+}
#define INTRINSIC_IDX_ACCESSORS(name, flag, type) \
static inline type \
INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned)
INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned)
INTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned)
+INTRINSIC_IDX_ACCESSORS(reduction_op, REDUCTION_OP, unsigned)
+INTRINSIC_IDX_ACCESSORS(cluster_size, CLUSTER_SIZE, unsigned)
/**
* \group texture information
* - nir_texop_txf_ms
* - nir_texop_txs
* - nir_texop_lod
- * - nir_texop_tg4
* - nir_texop_query_levels
* - nir_texop_texture_samples
* - nir_texop_samples_identical
}
}
+static inline bool
+nir_alu_instr_is_comparison(const nir_alu_instr *instr)
+{
+ switch (instr->op) {
+ case nir_op_flt:
+ case nir_op_fge:
+ case nir_op_feq:
+ case nir_op_fne:
+ case nir_op_ilt:
+ case nir_op_ult:
+ case nir_op_ige:
+ case nir_op_uge:
+ case nir_op_ieq:
+ case nir_op_ine:
+ case nir_op_i2b:
+ case nir_op_f2b:
+ case nir_op_inot:
+ case nir_op_fnot:
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline nir_alu_type
nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
{
if (instr->src[src].src_type == nir_tex_src_ms_mcs)
return 4;
- if (instr->src[src].src_type == nir_tex_src_offset ||
- instr->src[src].src_type == nir_tex_src_ddx ||
+ if (instr->src[src].src_type == nir_tex_src_ddx ||
instr->src[src].src_type == nir_tex_src_ddy) {
if (instr->is_array)
return instr->coord_components - 1;
return instr->coord_components;
}
+ /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for
+ * the offset, since a cube maps to a single face.
+ */
+ if (instr->src[src].src_type == nir_tex_src_offset) {
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
+ return 2;
+ else if (instr->is_array)
+ return instr->coord_components - 1;
+ else
+ return instr->coord_components;
+ }
+
return 1;
}
bool lower_fsqrt;
bool lower_fmod32;
bool lower_fmod64;
+ /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
bool lower_bitfield_extract;
+ /** Lowers ibitfield_extract/ubitfield_extract to bfm, compares, shifts. */
+ bool lower_bitfield_extract_to_shifts;
+ /** Lowers bitfield_insert to bfi/bfm */
bool lower_bitfield_insert;
+ /** Lowers bitfield_insert to bfm, compares, and shifts. */
+ bool lower_bitfield_insert_to_shifts;
+ /** Lowers bitfield_reverse to shifts. */
+ bool lower_bitfield_reverse;
+ /** Lowers bit_count to shifts. */
+ bool lower_bit_count;
+ /** Lowers bfm to shifts and subtracts. */
+ bool lower_bfm;
+ /** Lowers ifind_msb to compare and ufind_msb */
+ bool lower_ifind_msb;
+ /** Lowers find_lsb to ufind_msb and logic ops */
+ bool lower_find_lsb;
bool lower_uadd_carry;
bool lower_usub_borrow;
+ /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
+ bool lower_mul_high;
/** lowers fneg and ineg to fsub and isub. */
bool lower_negate;
/** lowers fsub and isub to fadd+fneg and iadd+ineg. */
/** enables rules to lower idiv by power-of-two: */
bool lower_idiv;
+ /* lower b2f to iand */
+ bool lower_b2f;
+
/* Does the native fdot instruction replicate its result for four
* components? If so, then opt_algebraic_late will turn all fdotN
* instructions into fdot_replicatedN instructions.
/** lowers ffract to fsub+ffloor: */
bool lower_ffract;
+ bool lower_ldexp;
+
bool lower_pack_half_2x16;
bool lower_pack_unorm_2x16;
bool lower_pack_snorm_2x16;
bool lower_extract_byte;
bool lower_extract_word;
+ bool lower_all_io_to_temps;
+
/**
* Does the driver support real 32-bit integers? (Otherwise, integers
* are simulated by floats.)
/* Indicates that the driver only has zero-based vertex id */
bool vertex_id_zero_based;
+ /**
+ * If enabled, gl_BaseVertex will be lowered as:
+ * is_indexed_draw (~0/0) & firstvertex
+ */
+ bool lower_base_vertex;
+
bool lower_cs_local_index_from_id;
+ bool lower_device_index_to_zero;
+
/**
* Should nir_lower_io() create load_interpolated_input intrinsics?
*
*/
bool use_interpolated_input_intrinsics;
+ /**
+ * Do vertex shader double inputs use two locations? The Vulkan spec
+ * requires two locations to be used, OpenGL allows a single location.
+ */
+ bool vs_inputs_dual_locations;
+
unsigned max_unroll_iterations;
} nir_shader_compiler_options;
nir_load_const_instr *
nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);
+nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size);
+
/**
* NIR Cursors and Instruction Insertion API
* @{
nir_instr_insert(nir_after_cf_list(list), after);
}
-void nir_instr_remove(nir_instr *instr);
+void nir_instr_remove_v(nir_instr *instr);
+
+static inline nir_cursor
+nir_instr_remove(nir_instr *instr)
+{
+ nir_cursor cursor;
+ nir_instr *prev = nir_instr_prev(instr);
+ if (prev) {
+ cursor = nir_after_instr(prev);
+ } else {
+ cursor = nir_before_block(instr->block);
+ }
+ nir_instr_remove_v(instr);
+ return cursor;
+}
/** @} */
void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
nir_instr *after_me);
-uint8_t nir_ssa_def_components_read(nir_ssa_def *def);
+uint8_t nir_ssa_def_components_read(const nir_ssa_def *def);
/*
* finds the next basic block in source-code order, returns NULL if there is
nir_shader *nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s);
-#ifdef DEBUG
+#ifndef NDEBUG
void nir_validate_shader(nir_shader *shader);
void nir_metadata_set_validation_flag(nir_shader *shader);
void nir_metadata_check_validation_flag(nir_shader *shader);
static inline bool should_clone_nir(void) { return false; }
static inline bool should_serialize_deserialize_nir(void) { return false; }
static inline bool should_print_nir(void) { return false; }
-#endif /* DEBUG */
+#endif /* NDEBUG */
#define _PASS(nir, do_pass) do { \
do_pass \
/* Some helpers to do very simple linking */
bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
+void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
+ bool default_to_smooth_interp);
typedef enum {
/* If set, this forces all non-flat fragment shader inputs to be
bool nir_lower_constant_initializers(nir_shader *shader,
nir_variable_mode modes);
+bool nir_move_load_const(nir_shader *shader);
bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
bool nir_lower_vec_to_movs(nir_shader *shader);
void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
bool alpha_to_one);
+bool nir_lower_alu(nir_shader *shader);
bool nir_lower_alu_to_scalar(nir_shader *shader);
bool nir_lower_load_const_to_scalar(nir_shader *shader);
bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
bool nir_lower_phis_to_scalar(nir_shader *shader);
+void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer);
+void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader,
+ bool outputs_only);
void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask);
void nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
-bool nir_lower_samplers(nir_shader *shader,
- const struct gl_shader_program *shader_program);
-bool nir_lower_samplers_as_deref(nir_shader *shader,
- const struct gl_shader_program *shader_program);
-
typedef struct nir_lower_subgroups_options {
uint8_t subgroup_size;
uint8_t ballot_bit_size;
bool lower_to_scalar:1;
bool lower_vote_trivial:1;
+ bool lower_vote_eq_to_ballot:1;
bool lower_subgroup_masks:1;
+ bool lower_shuffle:1;
+ bool lower_shuffle_to_32bit:1;
+ bool lower_quad:1;
} nir_lower_subgroups_options;
bool nir_lower_subgroups(nir_shader *shader,
* with lower_txd_cube_map.
*/
bool lower_txd_shadow;
+
+ /**
+ * If true, lower nir_texop_txd on all samplers to a nir_texop_txl.
+ * Implies lower_txd_cube_map and lower_txd_shadow.
+ */
+ bool lower_txd;
} nir_lower_tex_options;
bool nir_lower_tex(nir_shader *shader,
void nir_lower_tes_patch_vertices(nir_shader *tes, unsigned patch_vertices);
typedef struct nir_lower_wpos_ytransform_options {
- int state_tokens[5];
+ gl_state_index16 state_tokens[STATE_LENGTH];
bool fs_coord_origin_upper_left :1;
bool fs_coord_origin_lower_left :1;
bool fs_coord_pixel_center_integer :1;
bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading);
typedef struct nir_lower_drawpixels_options {
- int texcoord_state_tokens[5];
- int scale_state_tokens[5];
- int bias_state_tokens[5];
+ gl_state_index16 texcoord_state_tokens[STATE_LENGTH];
+ gl_state_index16 scale_state_tokens[STATE_LENGTH];
+ gl_state_index16 bias_state_tokens[STATE_LENGTH];
unsigned drawpix_sampler;
unsigned pixelmap_sampler;
bool pixel_maps :1;
void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
-bool nir_lower_atomics(nir_shader *shader,
- const struct gl_shader_program *shader_program);
bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset);
-bool nir_lower_uniforms_to_ubo(nir_shader *shader);
bool nir_lower_to_source_mods(nir_shader *shader);
bool nir_lower_gs_intrinsics(nir_shader *shader);
+typedef unsigned (*nir_lower_bit_size_callback)(const nir_alu_instr *, void *);
+
+bool nir_lower_bit_size(nir_shader *shader,
+ nir_lower_bit_size_callback callback,
+ void *callback_data);
+
typedef enum {
nir_lower_imul64 = (1 << 0),
nir_lower_isign64 = (1 << 1),
} nir_lower_doubles_options;
bool nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options);
-bool nir_lower_64bit_pack(nir_shader *shader);
+bool nir_lower_pack(nir_shader *shader);
bool nir_normalize_cubemap_coords(nir_shader *shader);
bool nir_opt_move_comparisons(nir_shader *shader);
+bool nir_opt_move_load_ubo(nir_shader *shader);
+
bool nir_opt_peephole_select(nir_shader *shader, unsigned limit);
bool nir_opt_remove_phis(nir_shader *shader);
+bool nir_opt_shrink_load(nir_shader *shader);
+
bool nir_opt_trivial_continues(nir_shader *shader);
bool nir_opt_undef(nir_shader *shader);