From a2dc11a7818c04d8dc0324e8fcba98d60baea529 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sun, 17 Jul 2016 18:44:58 -0700 Subject: [PATCH] i965: Move load_interpolated_input/barycentric_* intrinsics to the top. Currently, i965 interpolates all FS inputs at the top of the program. This has advantages and disadvantages, but I'd like to keep that policy while reworking this code. We can consider changing it independently. The next patch will make the compiler generate PLN instructions "on the fly", when it encounters an input load intrinsic, rather than doing it for all inputs at the start of the program. To emulate this behavior, we introduce an ugly pass to move all NIR load_interpolated_input and payload-based (not interpolator message) load_barycentric_* intrinsics to the shader's start block. This helps avoid regressions in shader-db for cases such as: if (...) { ...load some input... } else { ...load that same input... } which CSE can't handle, because there's no dominance relationship between the two loads. Because the start block dominates all others, we can CSE all inputs and emit PLNs exactly once, as we did before. Ideally, global value numbering would eliminate these redundant loads, while not forcing them all the way to the start block. When that lands, we should consider dropping this hacky pass. Again, this pass currently does nothing, as i965 doesn't generate these intrinsics yet. But it will shortly, and I figured I'd separate this code as it's relatively self-contained. v2: Dramatically simplify pass - instead of creating new instructions, just remove/re-insert their list nodes (suggested by Jason Ekstrand). Signed-off-by: Kenneth Graunke Reviewed-by: Chris Forbes [v1] Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 64 ++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 3aaf843bc97..fc91bbcfa46 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -6402,6 +6402,69 @@ computed_depth_mode(const nir_shader *shader) return BRW_PSCDEPTH_OFF; } +/** + * Move load_interpolated_input with simple (payload-based) barycentric modes + * to the top of the program so we don't emit multiple PLNs for the same input. + * + * This works around CSE not being able to handle non-dominating cases + * such as: + * + * if (...) { + * interpolate input + * } else { + * interpolate the same exact input + * } + * + * This should be replaced by global value numbering someday. + */ +void +move_interpolation_to_top(nir_shader *nir) +{ + nir_foreach_function(f, nir) { + if (!f->impl) + continue; + + nir_block *top = nir_start_block(f->impl); + + nir_foreach_block(block, f->impl) { + if (block == top) + continue; + + nir_foreach_instr_reverse_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_centroid: + case nir_intrinsic_load_barycentric_sample: + break; + case nir_intrinsic_load_interpolated_input: { + nir_intrinsic_instr *bary_intrinsic = + nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr); + nir_intrinsic_op op = bary_intrinsic->intrinsic; + + /* Leave interpolateAtSample/Offset() where it is. */ + if (op == nir_intrinsic_load_barycentric_at_sample || + op == nir_intrinsic_load_barycentric_at_offset) + continue; + } + default: + continue; + } + + exec_node_remove(&instr->node); + exec_list_push_head(&top->instr_list, &instr->node); + instr->block = top; + } + } + nir_metadata_preserve(f->impl, (nir_metadata) + ((unsigned) nir_metadata_block_index | + (unsigned) nir_metadata_dominance)); + } +} + /** * Apply default interpolation settings to FS inputs which don't specify any. */ @@ -6509,6 +6572,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, brw_nir_lower_fs_outputs(shader); if (!key->multisample_fbo) NIR_PASS_V(shader, demote_sample_qualifiers); + NIR_PASS_V(shader, move_interpolation_to_top); shader = brw_postprocess_nir(shader, compiler->devinfo, true); /* key->alpha_test_func means simulating alpha testing via discards, -- 2.30.2