* are ok too.
*/
return nir_op_infos[src_alu->op].output_size == 0 ||
- src_alu->op == nir_op_vec2 ||
- src_alu->op == nir_op_vec3 ||
- src_alu->op == nir_op_vec4;
+ nir_op_is_vec(src_alu->op);
}
case nir_instr_type_phi:
return should_lower_phi(nir_instr_as_phi(src_instr), state);
case nir_instr_type_load_const:
- case nir_instr_type_ssa_undef:
/* These are trivially scalarizable */
return true;
+ case nir_instr_type_ssa_undef:
+ /* The caller of this function is going to OR the results and we don't
+ * want undefs to count so we return false.
+ */
+ return false;
+
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr);
switch (src_intrin->intrinsic) {
- case nir_intrinsic_load_var:
- return src_intrin->variables[0]->var->data.mode == nir_var_shader_in ||
- src_intrin->variables[0]->var->data.mode == nir_var_uniform;
+ case nir_intrinsic_load_deref: {
+ nir_deref_instr *deref = nir_src_as_deref(src_intrin->src[0]);
+ return deref->mode == nir_var_shader_in ||
+ deref->mode == nir_var_uniform ||
+ deref->mode == nir_var_mem_ubo ||
+ deref->mode == nir_var_mem_ssbo ||
+ deref->mode == nir_var_mem_global;
+ }
- case nir_intrinsic_interp_var_at_centroid:
- case nir_intrinsic_interp_var_at_sample:
- case nir_intrinsic_interp_var_at_offset:
+ case nir_intrinsic_interp_deref_at_centroid:
+ case nir_intrinsic_interp_deref_at_sample:
+ case nir_intrinsic_interp_deref_at_offset:
+ case nir_intrinsic_interp_deref_at_vertex:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
+ case nir_intrinsic_load_global:
case nir_intrinsic_load_input:
return true;
default:
*/
entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1);
- bool scalarizable = true;
+ bool scalarizable = false;
nir_foreach_phi_src(src, phi) {
+ /* This loop ignores srcs that are not scalarizable because its likely
+ * still worth copying to temps if another phi source is scalarizable.
+ * This reduces register spilling by a huge amount in the i965 driver for
+ * Deus Ex: MD.
+ */
scalarizable = is_phi_src_scalarizable(src, state);
- if (!scalarizable)
+ if (scalarizable)
break;
}
lower_phis_to_scalar_block(nir_block *block,
struct lower_phis_to_scalar_state *state)
{
+ bool progress = false;
+
/* Find the last phi node in the block */
nir_phi_instr *last_phi = NULL;
nir_foreach_instr(instr, block) {
* will be redundant, but copy propagation should clean them up for
* us. No need to add the complexity here.
*/
- nir_op vec_op;
- switch (phi->dest.ssa.num_components) {
- case 2: vec_op = nir_op_vec2; break;
- case 3: vec_op = nir_op_vec3; break;
- case 4: vec_op = nir_op_vec4; break;
- default: unreachable("Invalid number of components");
- }
+ nir_op vec_op = nir_op_vec(phi->dest.ssa.num_components);
nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
nir_foreach_phi_src(src, phi) {
/* We need to insert a mov to grab the i'th component of src */
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
- nir_op_imov);
+ nir_op_mov);
nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, bit_size, NULL);
mov->dest.write_mask = 1;
nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
ralloc_steal(state->dead_ctx, phi);
nir_instr_remove(&phi->instr);
+ progress = true;
+
/* We're using the safe iterator and inserting all the newly
* scalarized phi nodes before their non-scalarized version so that's
* ok. However, we are also inserting vec operations after all of
break;
}
- return true;
+ return progress;
}
-static void
+static bool
lower_phis_to_scalar_impl(nir_function_impl *impl)
{
struct lower_phis_to_scalar_state state;
+ bool progress = false;
state.mem_ctx = ralloc_parent(impl);
state.dead_ctx = ralloc_context(NULL);
- state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
+ state.phi_table = _mesa_pointer_hash_table_create(state.dead_ctx);
nir_foreach_block(block, impl) {
- lower_phis_to_scalar_block(block, &state);
+ progress = lower_phis_to_scalar_block(block, &state) || progress;
}
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
ralloc_free(state.dead_ctx);
+ return progress;
}
/** A pass that lowers vector phi nodes to scalar
* instance, if one of the sources is a non-scalarizable vector, then we
* don't bother lowering because that would generate hard-to-coalesce movs.
*/
-void
+bool
nir_lower_phis_to_scalar(nir_shader *shader)
{
- nir_foreach_function(shader, function) {
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
if (function->impl)
- lower_phis_to_scalar_impl(function->impl);
+ progress = lower_phis_to_scalar_impl(function->impl) || progress;
}
+
+ return progress;
}