.lower_rotate = true,
.lower_to_scalar = true,
.has_imul24 = true,
+ .lower_wpos_pntc = true,
};
/* we don't want to lower vertex_id to _zero_based on newer gpus: */
.lower_to_scalar = true,
.has_imul24 = true,
.max_unroll_iterations = 32,
+ .lower_wpos_pntc = true,
};
const nir_shader_compiler_options *
}
void
-ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s)
+ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
{
struct nir_lower_tex_options tex_options = {
.lower_rect = 0,
.lower_tg4_offsets = true,
};
- if (shader->compiler->gpu_id >= 400) {
+ if (compiler->gpu_id >= 400) {
/* a4xx seems to have *no* sam.p */
tex_options.lower_txp = ~0; /* lower all txp */
} else {
debug_printf("----------------------\n");
}
- OPT_V(s, nir_lower_regs_to_ssa);
- OPT_V(s, nir_lower_wrmasks, should_split_wrmask, s);
+ if (s->info.stage == MESA_SHADER_GEOMETRY)
+ NIR_PASS_V(s, ir3_nir_lower_gs);
- OPT_V(s, ir3_nir_apply_trig_workarounds);
+ NIR_PASS_V(s, nir_lower_io_arrays_to_elements_no_indirects, false);
- if (shader->type == MESA_SHADER_FRAGMENT)
- OPT_V(s, nir_lower_fb_read);
+ NIR_PASS_V(s, nir_lower_amul, ir3_glsl_type_size);
+
+ OPT_V(s, nir_lower_regs_to_ssa);
+ OPT_V(s, nir_lower_wrmasks, should_split_wrmask, s);
OPT_V(s, nir_lower_tex, &tex_options);
OPT_V(s, nir_lower_load_const_to_scalar);
- if (shader->compiler->gpu_id < 500)
+ if (compiler->gpu_id < 500)
OPT_V(s, ir3_nir_lower_tg4_to_tex);
ir3_optimize_loop(s);
- /* do ubo load and idiv lowering after first opt loop to get a chance to
- * propagate constants for divide by immed power-of-two and constant ubo
- * block/offsets:
- *
- * NOTE that UBO analysis pass should only be done once, before variants
+ /* do idiv lowering after first opt loop to get a chance to propagate
+ * constants for divide by immed power-of-two:
*/
- const bool ubo_progress = OPT(s, ir3_nir_analyze_ubo_ranges, shader);
const bool idiv_progress = OPT(s, nir_lower_idiv, nir_lower_idiv_fast);
- /* UBO offset lowering has to come after we've decided what will be left as load_ubo */
- OPT_V(s, ir3_nir_lower_io_offsets, shader->compiler->gpu_id);
- if (ubo_progress || idiv_progress)
+ if (idiv_progress)
ir3_optimize_loop(s);
OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
}
nir_sweep(s);
+}
+
+/**
+ * Late passes that need to be done after pscreen->finalize_nir()
+ */
+void
+ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s)
+{
+ NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+ ir3_glsl_type_size, (nir_lower_io_options)0);
+
+ if (s->info.stage == MESA_SHADER_FRAGMENT) {
+ /* NOTE: lower load_barycentric_at_sample first, since it
+ * produces load_barycentric_at_offset:
+ */
+ NIR_PASS_V(s, ir3_nir_lower_load_barycentric_at_sample);
+ NIR_PASS_V(s, ir3_nir_lower_load_barycentric_at_offset);
+ NIR_PASS_V(s, ir3_nir_move_varying_inputs);
+ NIR_PASS_V(s, nir_lower_fb_read);
+ }
- /* The first time thru, when not creating variant, do the one-time
- * const_state layout setup. This should be done after ubo range
- * analysis.
+ if (compiler->gpu_id >= 600 &&
+ s->info.stage == MESA_SHADER_FRAGMENT &&
+ !(ir3_shader_debug & IR3_DBG_NOFP16)) {
+ NIR_PASS_V(s, nir_lower_mediump_outputs);
+ }
+
+ /* we cannot ensure that ir3_finalize_nir() is only called once, so
+ * we also need to do trig workarounds here:
*/
- ir3_setup_const_state(shader, s, shader->const_state);
+ OPT_V(s, ir3_nir_apply_trig_workarounds);
+
+ ir3_optimize_loop(s);
}
void
if (so->key.has_gs || so->key.tessellation) {
switch (so->shader->type) {
case MESA_SHADER_VERTEX:
- NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so->shader, so->key.tessellation);
+ NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation);
progress = true;
break;
case MESA_SHADER_TESS_CTRL:
- NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so->shader, so->key.tessellation);
+ NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so, so->key.tessellation);
NIR_PASS_V(s, ir3_nir_lower_to_explicit_input);
progress = true;
break;
case MESA_SHADER_TESS_EVAL:
NIR_PASS_V(s, ir3_nir_lower_tess_eval, so->key.tessellation);
if (so->key.has_gs)
- NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so->shader, so->key.tessellation);
+ NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation);
progress = true;
break;
case MESA_SHADER_GEOMETRY:
progress |= OPT(s, nir_lower_tex, &tex_options);
}
+ if (!so->binning_pass)
+ OPT_V(s, ir3_nir_analyze_ubo_ranges, so);
+
+ progress |= OPT(s, ir3_nir_lower_ubo_loads, so);
+
+ /* UBO offset lowering has to come after we've decided what will
+ * be left as load_ubo
+ */
+ OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id);
+
if (progress)
ir3_optimize_loop(s);
}
nir_sweep(s);
+
+ /* Binning pass variants re-use the const_state of the corresponding
+ * draw pass shader, so that same const emit can be re-used for both
+ * passes:
+ */
+ if (!so->binning_pass)
+ ir3_setup_const_state(s, so, ir3_const_state(so));
}
static void
MAX2(layout->num_driver_params, IR3_DP_INSTID_BASE + 1);
break;
case nir_intrinsic_load_user_clip_plane:
+ idx = nir_intrinsic_ucp_id(intr);
layout->num_driver_params =
- MAX2(layout->num_driver_params, IR3_DP_UCP7_W + 1);
+ MAX2(layout->num_driver_params, IR3_DP_UCP0_X + (idx + 1) * 4);
break;
case nir_intrinsic_load_num_work_groups:
layout->num_driver_params =
}
}
-/* Sets up the non-variant-dependent constant state for the ir3_shader. Note
+/* Sets up the variant-dependent constant state for the ir3_shader. Note
* that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the
* maximum number of driver params that would eventually be used, to leave
* space for this function to allocate the driver params.
*/
void
-ir3_setup_const_state(struct ir3_shader *shader, nir_shader *nir,
- struct ir3_const_state *const_state)
+ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
+ struct ir3_const_state *const_state)
{
- struct ir3_compiler *compiler = shader->compiler;
+ struct ir3_compiler *compiler = v->shader->compiler;
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
ir3_nir_scan_driver_consts(nir, const_state);
if ((compiler->gpu_id < 500) &&
- (shader->stream_output.num_outputs > 0)) {
+ (v->shader->stream_output.num_outputs > 0)) {
const_state->num_driver_params =
MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1);
}
constoff += align(cnt, 4) / 4;
}
- if (const_state->num_driver_params > 0)
+ if (const_state->num_driver_params > 0) {
+ /* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */
+ if (v->type == MESA_SHADER_VERTEX && compiler->gpu_id >= 600)
+ constoff = MAX2(constoff, 1);
const_state->offsets.driver_param = constoff;
+ }
constoff += const_state->num_driver_params / 4;
- if ((shader->type == MESA_SHADER_VERTEX) &&
+ if ((v->type == MESA_SHADER_VERTEX) &&
(compiler->gpu_id < 500) &&
- shader->stream_output.num_outputs > 0) {
+ v->shader->stream_output.num_outputs > 0) {
const_state->offsets.tfbo = constoff;
constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
}
- switch (shader->type) {
+ switch (v->type) {
case MESA_SHADER_VERTEX:
const_state->offsets.primitive_param = constoff;
constoff += 1;
const_state->offsets.immediate = constoff;
- assert(constoff <= compiler->max_const);
+ assert(constoff <= ir3_max_const(v));
}