* are ok too.
*/
return nir_op_infos[src_alu->op].output_size == 0 ||
- src_alu->op == nir_op_vec2 ||
- src_alu->op == nir_op_vec3 ||
- src_alu->op == nir_op_vec4;
+ nir_op_is_vec(src_alu->op);
}
case nir_instr_type_phi:
return should_lower_phi(nir_instr_as_phi(src_instr), state);
case nir_instr_type_load_const:
- case nir_instr_type_ssa_undef:
/* These are trivially scalarizable */
return true;
+ case nir_instr_type_ssa_undef:
+ /* The caller of this function is going to OR the results and we don't
+ * want undefs to count so we return false.
+ */
+ return false;
+
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr);
case nir_intrinsic_interp_deref_at_centroid:
case nir_intrinsic_interp_deref_at_sample:
case nir_intrinsic_interp_deref_at_offset:
+ case nir_intrinsic_interp_deref_at_vertex:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
break;
}
}
+ /* fallthrough */
default:
/* We can't scalarize this type of instruction */
*/
entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1);
- bool scalarizable = true;
+ bool scalarizable = false;
nir_foreach_phi_src(src, phi) {
+ /* This loop ignores srcs that are not scalarizable because its likely
+ * still worth copying to temps if another phi source is scalarizable.
+ * This reduces register spilling by a huge amount in the i965 driver for
+ * Deus Ex: MD.
+ */
scalarizable = is_phi_src_scalarizable(src, state);
- if (!scalarizable)
+ if (scalarizable)
break;
}
* will be redundant, but copy propagation should clean them up for
* us. No need to add the complexity here.
*/
- nir_op vec_op;
- switch (phi->dest.ssa.num_components) {
- case 2: vec_op = nir_op_vec2; break;
- case 3: vec_op = nir_op_vec3; break;
- case 4: vec_op = nir_op_vec4; break;
- default: unreachable("Invalid number of components");
- }
+ nir_op vec_op = nir_op_vec(phi->dest.ssa.num_components);
nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
nir_foreach_phi_src(src, phi) {
/* We need to insert a mov to grab the i'th component of src */
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
- nir_op_imov);
+ nir_op_mov);
nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, bit_size, NULL);
mov->dest.write_mask = 1;
nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);