+/**
+ * Replace flrp(a, b, c) with (b*c ± c) + a => b*c + (a ± c)
+ *
+ * \note: This only works if a = ±1.
+ */
+static void
+replace_with_expanded_ffma_and_add(struct nir_builder *bld,
+ struct u_vector *dead_flrp,
+ struct nir_alu_instr *alu, bool subtract_c)
+{
+ nir_ssa_def *const a = nir_ssa_for_alu_src(bld, alu, 0);
+ nir_ssa_def *const b = nir_ssa_for_alu_src(bld, alu, 1);
+ nir_ssa_def *const c = nir_ssa_for_alu_src(bld, alu, 2);
+
+ nir_ssa_def *const b_times_c = nir_fmul(bld, b, c);
+ nir_instr_as_alu(b_times_c->parent_instr)->exact = alu->exact;
+
+ nir_ssa_def *inner_sum;
+
+ if (subtract_c) {
+ nir_ssa_def *const neg_c = nir_fneg(bld, c);
+ nir_instr_as_alu(neg_c->parent_instr)->exact = alu->exact;
+
+ inner_sum = nir_fadd(bld, a, neg_c);
+ } else {
+ inner_sum = nir_fadd(bld, a, c);
+ }
+
+ nir_instr_as_alu(inner_sum->parent_instr)->exact = alu->exact;
+
+ nir_ssa_def *const outer_sum = nir_fadd(bld, inner_sum, b_times_c);
+ nir_instr_as_alu(outer_sum->parent_instr)->exact = alu->exact;
+
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(outer_sum));
+
+ /* DO NOT REMOVE the original flrp yet. Many of the lowering choices are
+ * based on other uses of the sources. Removing the flrp may cause the
+ * last flrp in a sequence to make a different, incorrect choice.
+ */
+ append_flrp_to_dead_list(dead_flrp, alu);
+}
+
+/**
+ * Determines whether a swizzled source is constant w/ all components the same.
+ *
+ * The value of the constant is stored in \c result.
+ *
+ * \return
+ * True if all components of the swizzled source are the same constant.
+ * Otherwise false is returned.
+ */
+static bool
+all_same_constant(const nir_alu_instr *instr, unsigned src, double *result)
+{
+ nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
+
+ if (!val)
+ return false;
+
+ const uint8_t *const swizzle = instr->src[src].swizzle;
+ const unsigned num_components = nir_dest_num_components(instr->dest.dest);
+
+ if (instr->dest.dest.ssa.bit_size == 32) {
+ const float first = val[swizzle[0]].f32;
+
+ for (unsigned i = 1; i < num_components; i++) {
+ if (val[swizzle[i]].f32 != first)
+ return false;
+ }
+
+ *result = first;
+ } else {
+ const double first = val[swizzle[0]].f64;
+
+ for (unsigned i = 1; i < num_components; i++) {
+ if (val[swizzle[i]].f64 != first)
+ return false;
+ }
+
+ *result = first;
+ }
+
+ return true;
+}
+
+static bool
+sources_are_constants_with_similar_magnitudes(const nir_alu_instr *instr)
+{
+ nir_const_value *val0 = nir_src_as_const_value(instr->src[0].src);
+ nir_const_value *val1 = nir_src_as_const_value(instr->src[1].src);
+
+ if (val0 == NULL || val1 == NULL)
+ return false;
+
+ const uint8_t *const swizzle0 = instr->src[0].swizzle;
+ const uint8_t *const swizzle1 = instr->src[1].swizzle;
+ const unsigned num_components = nir_dest_num_components(instr->dest.dest);
+
+ if (instr->dest.dest.ssa.bit_size == 32) {
+ for (unsigned i = 0; i < num_components; i++) {
+ int exp0;
+ int exp1;
+
+ frexpf(val0[swizzle0[i]].f32, &exp0);
+ frexpf(val1[swizzle1[i]].f32, &exp1);
+
+ /* If the difference between exponents is >= 24, then A+B will always
+ * have the value whichever between A and B has the largest absolute
+ * value. So, [0, 23] is the valid range. The smaller the limit
+ * value, the more precision will be maintained at a potential
+ * performance cost. Somewhat arbitrarilly split the range in half.
+ */
+ if (abs(exp0 - exp1) > (23 / 2))
+ return false;
+ }
+ } else {
+ for (unsigned i = 0; i < num_components; i++) {
+ int exp0;
+ int exp1;
+
+ frexp(val0[swizzle0[i]].f64, &exp0);
+ frexp(val1[swizzle1[i]].f64, &exp1);
+
+ /* If the difference between exponents is >= 53, then A+B will always
+ * have the value whichever between A and B has the largest absolute
+ * value. So, [0, 52] is the valid range. The smaller the limit
+ * value, the more precision will be maintained at a potential
+ * performance cost. Somewhat arbitrarilly split the range in half.
+ */
+ if (abs(exp0 - exp1) > (52 / 2))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Counts of similar types of nir_op_flrp instructions
+ *
+ * If a similar instruction fits into more than one category, it will only be
+ * counted once. The assumption is that no other instruction will have all
+ * sources the same, or CSE would have removed one of the instructions.
+ */
+struct similar_flrp_stats {
+ unsigned src2;
+ unsigned src0_and_src2;
+ unsigned src1_and_src2;
+};
+
+/**
+ * Collection counts of similar FLRP instructions.
+ *
+ * This function only cares about similar instructions that have src2 in
+ * common.
+ */
+static void
+get_similar_flrp_stats(nir_alu_instr *alu, struct similar_flrp_stats *st)
+{
+ memset(st, 0, sizeof(*st));
+
+ nir_foreach_use(other_use, alu->src[2].src.ssa) {
+ /* Is the use also a flrp? */
+ nir_instr *const other_instr = other_use->parent_instr;
+ if (other_instr->type != nir_instr_type_alu)
+ continue;
+
+ /* Eh-hem... don't match the instruction with itself. */
+ if (other_instr == &alu->instr)
+ continue;
+
+ nir_alu_instr *const other_alu = nir_instr_as_alu(other_instr);
+ if (other_alu->op != nir_op_flrp)
+ continue;
+
+ /* Does the other flrp use source 2 from the first flrp as its source 2
+ * as well?
+ */
+ if (!nir_alu_srcs_equal(alu, other_alu, 2, 2))
+ continue;
+
+ if (nir_alu_srcs_equal(alu, other_alu, 0, 0))
+ st->src0_and_src2++;
+ else if (nir_alu_srcs_equal(alu, other_alu, 1, 1))
+ st->src1_and_src2++;
+ else
+ st->src2++;
+ }
+}
+