nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- gl_shader_stage stage = b->shader->stage;
+ gl_shader_stage stage = b->shader->info.stage;
if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) ||
(stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) {
void
brw_nir_lower_vs_inputs(nir_shader *nir,
- bool use_legacy_snorm_formula,
const uint8_t *vs_attrib_wa_flags)
{
/* Start with the location of the variable's base. */
add_const_offset_to_base(nir, nir_var_shader_in);
- brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula,
- vs_attrib_wa_flags);
+ brw_nir_apply_attribute_workarounds(nir, vs_attrib_wa_flags);
/* The last step is to remap VERT_ATTRIB_* to actual registers */
*/
const bool has_sgvs =
nir->info.system_values_read &
- (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
+ (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
- case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_first_vertex:
case nir_intrinsic_load_base_instance:
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_instance_id:
+ case nir_intrinsic_load_is_indexed_draw:
case nir_intrinsic_load_draw_id: {
b.cursor = nir_after_instr(&intrin->instr);
nir_intrinsic_set_base(load, num_inputs);
switch (intrin->intrinsic) {
- case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_first_vertex:
nir_intrinsic_set_component(load, 0);
break;
case nir_intrinsic_load_base_instance:
nir_intrinsic_set_component(load, 3);
break;
case nir_intrinsic_load_draw_id:
- /* gl_DrawID is stored right after gl_VertexID and friends
- * if any of them exist.
+ case nir_intrinsic_load_is_indexed_draw:
+ /* gl_DrawID and IsIndexedDraw are stored right after
+ * gl_VertexID and friends if any of them exist.
*/
nir_intrinsic_set_base(load, num_inputs + has_sgvs);
- nir_intrinsic_set_component(load, 0);
+ if (intrin->intrinsic == nir_intrinsic_load_draw_id)
+ nir_intrinsic_set_component(load, 0);
+ else
+ nir_intrinsic_set_component(load, 1);
break;
default:
unreachable("Invalid system value intrinsic");
}
void
-brw_nir_lower_vue_outputs(nir_shader *nir,
- bool is_scalar)
+brw_nir_lower_vue_outputs(nir_shader *nir)
{
nir_foreach_variable(var, &nir->outputs) {
var->data.driver_location = var->data.location;
nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0);
}
-void
-brw_nir_lower_cs_shared(nir_shader *nir)
-{
- nir_assign_var_locations(&nir->shared, &nir->num_shared,
- type_size_scalar_bytes);
- nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes, 0);
-}
-
#define OPT(pass, ...) ({ \
bool this_progress = false; \
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
this_progress; \
})
-static nir_shader *
-nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
- bool is_scalar)
+static nir_variable_mode
+brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
+ gl_shader_stage stage)
{
nir_variable_mode indirect_mask = 0;
- if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
+
+ if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
indirect_mask |= nir_var_shader_in;
- if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
+ if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput)
indirect_mask |= nir_var_shader_out;
- if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
+ if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
indirect_mask |= nir_var_local;
+ return indirect_mask;
+}
+
+nir_shader *
+brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
+ bool is_scalar)
+{
+ nir_variable_mode indirect_mask =
+ brw_nir_no_indirect_mask(compiler, nir->info.stage);
+
bool progress;
do {
progress = false;
nir_lower_dfract |
nir_lower_dround_even |
nir_lower_dmod);
- OPT(nir_lower_64bit_pack);
+ OPT(nir_lower_pack);
} while (progress);
return nir;
}
+static unsigned
+lower_bit_size_callback(const nir_alu_instr *alu, UNUSED void *data)
+{
+ assert(alu->dest.dest.is_ssa);
+ if (alu->dest.dest.ssa.bit_size != 16)
+ return 0;
+
+ switch (alu->op) {
+ case nir_op_idiv:
+ case nir_op_imod:
+ case nir_op_irem:
+ case nir_op_udiv:
+ case nir_op_umod:
+ return 32;
+ default:
+ return 0;
+ }
+}
+
/* Does some simple lowering and runs the standard suite of optimizations
*
* This is intended to be called more-or-less directly after you get the
const struct gen_device_info *devinfo = compiler->devinfo;
UNUSED bool progress; /* Written by OPT */
- const bool is_scalar = compiler->scalar_stage[nir->stage];
+ const bool is_scalar = compiler->scalar_stage[nir->info.stage];
- if (nir->stage == MESA_SHADER_GEOMETRY)
+ if (nir->info.stage == MESA_SHADER_GEOMETRY)
OPT(nir_lower_gs_intrinsics);
/* See also brw_nir_trig_workarounds.py */
OPT(nir_lower_tex, &tex_options);
OPT(nir_normalize_cubemap_coords);
- OPT(nir_lower_read_invocation_to_scalar);
OPT(nir_lower_global_vars_to_local);
OPT(nir_split_var_copies);
- nir = nir_optimize(nir, compiler, is_scalar);
+ /* Run opt_algebraic before int64 lowering so we can hopefully get rid
+ * of some int64 instructions.
+ */
+ OPT(nir_opt_algebraic);
+
+ /* Lower int64 instructions before nir_optimize so that loop unrolling
+ * sees their actual cost.
+ */
+ nir_lower_int64(nir, nir_lower_imul64 |
+ nir_lower_isign64 |
+ nir_lower_divmod64);
+
+ nir = brw_nir_optimize(nir, compiler, is_scalar);
+
+ /* This needs to be run after the first optimization pass but before we
+ * lower indirect derefs away
+ */
+ if (compiler->supports_shader_constants) {
+ OPT(nir_opt_large_constants, NULL, 32);
+ }
+
+ nir_lower_bit_size(nir, lower_bit_size_callback, NULL);
if (is_scalar) {
OPT(nir_lower_load_const_to_scalar);
/* Lower a bunch of stuff */
OPT(nir_lower_var_copies);
- OPT(nir_lower_clip_cull_distance_arrays);
+ OPT(nir_lower_system_values);
- nir_variable_mode indirect_mask = 0;
- if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
- indirect_mask |= nir_var_shader_in;
- if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
- indirect_mask |= nir_var_shader_out;
- if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
- indirect_mask |= nir_var_local;
+ const nir_lower_subgroups_options subgroups_options = {
+ .subgroup_size = BRW_SUBGROUP_SIZE,
+ .ballot_bit_size = 32,
+ .lower_to_scalar = true,
+ .lower_subgroup_masks = true,
+ .lower_vote_trivial = !is_scalar,
+ .lower_shuffle = true,
+ };
+ OPT(nir_lower_subgroups, &subgroups_options);
- nir_lower_indirect_derefs(nir, indirect_mask);
+ OPT(nir_lower_clip_cull_distance_arrays);
- nir_lower_int64(nir, nir_lower_imul64 |
- nir_lower_isign64 |
- nir_lower_divmod64);
+ nir_variable_mode indirect_mask =
+ brw_nir_no_indirect_mask(compiler, nir->info.stage);
+ nir_lower_indirect_derefs(nir, indirect_mask);
/* Get rid of split copies */
- nir = nir_optimize(nir, compiler, is_scalar);
+ nir = brw_nir_optimize(nir, compiler, is_scalar);
OPT(nir_remove_dead_variables, nir_var_local);
return nir;
}
+void
+brw_nir_link_shaders(const struct brw_compiler *compiler,
+ nir_shader **producer, nir_shader **consumer)
+{
+ nir_lower_io_arrays_to_elements(*producer, *consumer);
+ nir_validate_shader(*producer);
+ nir_validate_shader(*consumer);
+
+ NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
+ NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
+
+ if (nir_remove_unused_varyings(*producer, *consumer)) {
+ NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
+ NIR_PASS_V(*consumer, nir_lower_global_vars_to_local);
+
+ /* The backend might not be able to handle indirects on
+ * temporaries so we need to lower indirects on any of the
+ * varyings we have demoted here.
+ */
+ NIR_PASS_V(*producer, nir_lower_indirect_derefs,
+ brw_nir_no_indirect_mask(compiler, (*producer)->info.stage));
+ NIR_PASS_V(*consumer, nir_lower_indirect_derefs,
+ brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage));
+
+ const bool p_is_scalar =
+ compiler->scalar_stage[(*producer)->info.stage];
+ *producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
+
+ const bool c_is_scalar =
+ compiler->scalar_stage[(*consumer)->info.stage];
+ *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
+ }
+}
+
/* Prepare the given shader for codegen
*
* This function is intended to be called right before going into the actual
{
const struct gen_device_info *devinfo = compiler->devinfo;
bool debug_enabled =
- (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
+ (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->info.stage));
UNUSED bool progress; /* Written by OPT */
OPT(nir_opt_algebraic_before_ffma);
} while (progress);
- nir = nir_optimize(nir, compiler, is_scalar);
+ nir = brw_nir_optimize(nir, compiler, is_scalar);
if (devinfo->gen >= 6) {
/* Try and fuse multiply-adds */
}
fprintf(stderr, "NIR (SSA form) for %s shader:\n",
- _mesa_shader_stage_to_string(nir->stage));
+ _mesa_shader_stage_to_string(nir->info.stage));
nir_print_shader(nir, stderr);
}
OPT(nir_lower_vec_to_movs);
}
+ OPT(nir_opt_dce);
+
/* This is the last pass we run before we start emitting stuff. It
* determines when we need to insert boolean resolves on Gen <= 5. We
* run it last because it stashes data in instr->pass_flags and we don't
if (unlikely(debug_enabled)) {
fprintf(stderr, "NIR (final form) for %s shader:\n",
- _mesa_shader_stage_to_string(nir->stage));
+ _mesa_shader_stage_to_string(nir->info.stage));
nir_print_shader(nir, stderr);
}
if (nir_lower_tex(nir, &tex_options)) {
nir_validate_shader(nir);
- nir = nir_optimize(nir, compiler, is_scalar);
+ nir = brw_nir_optimize(nir, compiler, is_scalar);
}
return nir;
case nir_type_float:
case nir_type_float32:
return BRW_REGISTER_TYPE_F;
+ case nir_type_float16:
+ return BRW_REGISTER_TYPE_HF;
case nir_type_float64:
return BRW_REGISTER_TYPE_DF;
case nir_type_int64:
return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_Q;
case nir_type_uint64:
return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_UQ;
+ case nir_type_int16:
+ return BRW_REGISTER_TYPE_W;
+ case nir_type_uint16:
+ return BRW_REGISTER_TYPE_UW;
+ case nir_type_int8:
+ return BRW_REGISTER_TYPE_B;
+ case nir_type_uint8:
+ return BRW_REGISTER_TYPE_UB;
default:
unreachable("unknown type");
}
case nir_type_float32:
return GLSL_TYPE_FLOAT;
+ case nir_type_float16:
+ return GLSL_TYPE_FLOAT16;
+
case nir_type_float64:
return GLSL_TYPE_DOUBLE;
case nir_type_uint32:
return GLSL_TYPE_UINT;
+ case nir_type_int16:
+ return GLSL_TYPE_INT16;
+
+ case nir_type_uint16:
+ return GLSL_TYPE_UINT16;
+
default:
unreachable("bad type");
}