nir: Move nir_lower_mediump_outputs from ir3

[mesa.git] / src / compiler / nir / nir_lower_double_ops.c
diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c

index e8ae884d12c10cfd3f2ed8806234a74e01f23e35..bb2476523ec8e48f5778a631c6576ab6e6725ecd 100644 (file)
--- a/src/compiler/nir/nir_lower_double_ops.c
+++ b/src/compiler/nir/nir_lower_double_ops.c
@@ -26,6 +26,8 @@
  #include "nir_builder.h"
  #include "c99_math.h"
  
+#include <float.h>
+
  /*
   * Lowers some unsupported double operations, using only:
   *
@@ -41,22 +43,22 @@ static nir_ssa_def *
  set_exponent(nir_builder *b, nir_ssa_def *src, nir_ssa_def *exp)
  {
     /* Split into bits 0-31 and 32-63 */
-   nir_ssa_def *lo = nir_unpack_double_2x32_split_x(b, src);
-   nir_ssa_def *hi = nir_unpack_double_2x32_split_y(b, src);
+   nir_ssa_def *lo = nir_unpack_64_2x32_split_x(b, src);
+   nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src);
  
     /* The exponent is bits 52-62, or 20-30 of the high word, so set the exponent
      * to 1023
      */
     nir_ssa_def *new_hi = nir_bfi(b, nir_imm_int(b, 0x7ff00000), exp, hi);
     /* recombine */
-   return nir_pack_double_2x32_split(b, lo, new_hi);
+   return nir_pack_64_2x32_split(b, lo, new_hi);
  }
  
  static nir_ssa_def *
  get_exponent(nir_builder *b, nir_ssa_def *src)
  {
     /* get bits 32-63 */
-   nir_ssa_def *hi = nir_unpack_double_2x32_split_y(b, src);
+   nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src);
  
     /* extract bits 20-30 of the high word */
     return nir_ubitfield_extract(b, hi, nir_imm_int(b, 20), nir_imm_int(b, 11));
@@ -67,7 +69,7 @@ get_exponent(nir_builder *b, nir_ssa_def *src)
  static nir_ssa_def *
  get_signed_inf(nir_builder *b, nir_ssa_def *zero)
  {
-   nir_ssa_def *zero_hi = nir_unpack_double_2x32_split_y(b, zero);
+   nir_ssa_def *zero_hi = nir_unpack_64_2x32_split_y(b, zero);
  
     /* The bit pattern for infinity is 0x7ff0000000000000, where the sign bit
      * is the highest bit. Only the sign bit can be non-zero in the passed in
@@ -76,7 +78,7 @@ get_signed_inf(nir_builder *b, nir_ssa_def *zero)
      * bits and then pack it together with zero low 32 bits.
      */
     nir_ssa_def *inf_hi = nir_ior(b, nir_imm_int(b, 0x7ff00000), zero_hi);
-   return nir_pack_double_2x32_split(b, nir_imm_int(b, 0), inf_hi);
+   return nir_pack_64_2x32_split(b, nir_imm_int(b, 0), inf_hi);
  }
  
  /*
@@ -116,7 +118,7 @@ lower_rcp(nir_builder *b, nir_ssa_def *src)
     /* cast to float, do an rcp, and then cast back to get an approximate
      * result
      */
-   nir_ssa_def *ra = nir_f2d(b, nir_frcp(b, nir_d2f(b, src_norm)));
+   nir_ssa_def *ra = nir_f2f64(b, nir_frcp(b, nir_f2f32(b, src_norm)));
  
     /* Fixup the exponent of the result - note that we check if this is too
      * small below.
@@ -142,8 +144,8 @@ lower_rcp(nir_builder *b, nir_ssa_def *src)
      * See https://en.wikipedia.org/wiki/Division_algorithm for more details.
      */
  
-   ra = nir_ffma(b, ra, nir_ffma(b, ra, src, nir_imm_double(b, -1)), ra);
-   ra = nir_ffma(b, ra, nir_ffma(b, ra, src, nir_imm_double(b, -1)), ra);
+   ra = nir_ffma(b, nir_fneg(b, ra), nir_ffma(b, ra, src, nir_imm_double(b, -1)), ra);
+   ra = nir_ffma(b, nir_fneg(b, ra), nir_ffma(b, ra, src, nir_imm_double(b, -1)), ra);
  
     return fix_inv_result(b, ra, src, new_exp);
  }
@@ -180,7 +182,7 @@ lower_sqrt_rsq(nir_builder *b, nir_ssa_def *src, bool sqrt)
                                          nir_iadd(b, nir_imm_int(b, 1023),
                                                   even));
  
-   nir_ssa_def *ra = nir_f2d(b, nir_frsq(b, nir_d2f(b, src_norm)));
+   nir_ssa_def *ra = nir_f2f64(b, nir_frsq(b, nir_f2f32(b, src_norm)));
     nir_ssa_def *new_exp = nir_isub(b, get_exponent(b, ra), half);
     ra = set_exponent(b, ra, new_exp);
  
@@ -267,36 +269,47 @@ lower_sqrt_rsq(nir_builder *b, nir_ssa_def *src, bool sqrt)
      * (https://en.wikipedia.org/wiki/Methods_of_computing_square_roots).
      */
  
-    nir_ssa_def *one_half = nir_imm_double(b, 0.5);
-    nir_ssa_def *h_0 = nir_fmul(b, one_half, ra);
-    nir_ssa_def *g_0 = nir_fmul(b, src, ra);
-    nir_ssa_def *r_0 = nir_ffma(b, nir_fneg(b, h_0), g_0, one_half);
-    nir_ssa_def *h_1 = nir_ffma(b, h_0, r_0, h_0);
-    nir_ssa_def *res;
-    if (sqrt) {
-       nir_ssa_def *g_1 = nir_ffma(b, g_0, r_0, g_0);
-       nir_ssa_def *r_1 = nir_ffma(b, nir_fneg(b, g_1), g_1, src);
-       res = nir_ffma(b, h_1, r_1, g_1);
-    } else {
-       nir_ssa_def *y_1 = nir_fmul(b, nir_imm_double(b, 2.0), h_1);
-       nir_ssa_def *r_1 = nir_ffma(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
-                                   one_half);
-       res = nir_ffma(b, y_1, r_1, y_1);
-    }
-
-    if (sqrt) {
-       /* Here, the special cases we need to handle are
-        * 0 -> 0 and
-        * +inf -> +inf
-        */
-       res = nir_bcsel(b, nir_ior(b, nir_feq(b, src, nir_imm_double(b, 0.0)),
-                                  nir_feq(b, src, nir_imm_double(b, INFINITY))),
-                       src, res);
-    } else {
-       res = fix_inv_result(b, res, src, new_exp);
-    }
-
-    return res;
+   nir_ssa_def *one_half = nir_imm_double(b, 0.5);
+   nir_ssa_def *h_0 = nir_fmul(b, one_half, ra);
+   nir_ssa_def *g_0 = nir_fmul(b, src, ra);
+   nir_ssa_def *r_0 = nir_ffma(b, nir_fneg(b, h_0), g_0, one_half);
+   nir_ssa_def *h_1 = nir_ffma(b, h_0, r_0, h_0);
+   nir_ssa_def *res;
+   if (sqrt) {
+      nir_ssa_def *g_1 = nir_ffma(b, g_0, r_0, g_0);
+      nir_ssa_def *r_1 = nir_ffma(b, nir_fneg(b, g_1), g_1, src);
+      res = nir_ffma(b, h_1, r_1, g_1);
+   } else {
+      nir_ssa_def *y_1 = nir_fmul(b, nir_imm_double(b, 2.0), h_1);
+      nir_ssa_def *r_1 = nir_ffma(b, nir_fneg(b, y_1), nir_fmul(b, h_1, src),
+                                  one_half);
+      res = nir_ffma(b, y_1, r_1, y_1);
+   }
+
+   if (sqrt) {
+      /* Here, the special cases we need to handle are
+       * 0 -> 0 and
+       * +inf -> +inf
+       */
+      const bool preserve_denorms =
+         b->shader->info.float_controls_execution_mode &
+         FLOAT_CONTROLS_DENORM_PRESERVE_FP64;
+      nir_ssa_def *src_flushed = src;
+      if (!preserve_denorms) {
+         src_flushed = nir_bcsel(b,
+                                 nir_flt(b, nir_fabs(b, src),
+                                         nir_imm_double(b, DBL_MIN)),
+                                 nir_imm_double(b, 0.0),
+                                 src);
+      }
+      res = nir_bcsel(b, nir_ior(b, nir_feq(b, src_flushed, nir_imm_double(b, 0.0)),
+                                 nir_feq(b, src, nir_imm_double(b, INFINITY))),
+                                 src_flushed, res);
+   } else {
+      res = fix_inv_result(b, res, src, new_exp);
+   }
+
+   return res;
  }
  
  static nir_ssa_def *
@@ -337,8 +350,8 @@ lower_trunc(nir_builder *b, nir_ssa_def *src)
                           nir_imm_int(b, ~0),
                           nir_isub(b, frac_bits, nir_imm_int(b, 32))));
  
-   nir_ssa_def *src_lo = nir_unpack_double_2x32_split_x(b, src);
-   nir_ssa_def *src_hi = nir_unpack_double_2x32_split_y(b, src);
+   nir_ssa_def *src_lo = nir_unpack_64_2x32_split_x(b, src);
+   nir_ssa_def *src_hi = nir_unpack_64_2x32_split_y(b, src);
  
     return
        nir_bcsel(b,
@@ -346,99 +359,417 @@ lower_trunc(nir_builder *b, nir_ssa_def *src)
                  nir_imm_double(b, 0.0),
                  nir_bcsel(b, nir_ige(b, unbiased_exp, nir_imm_int(b, 53)),
                            src,
-                          nir_pack_double_2x32_split(b,
-                                                     nir_iand(b, mask_lo, src_lo),
-                                                     nir_iand(b, mask_hi, src_hi))));
+                          nir_pack_64_2x32_split(b,
+                                                 nir_iand(b, mask_lo, src_lo),
+                                                 nir_iand(b, mask_hi, src_hi))));
  }
  
-static void
-lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
+static nir_ssa_def *
+lower_floor(nir_builder *b, nir_ssa_def *src)
  {
-   assert(instr->dest.dest.is_ssa);
-   if (instr->dest.dest.ssa.bit_size != 64)
-      return;
+   /*
+    * For x >= 0, floor(x) = trunc(x)
+    * For x < 0,
+    *    - if x is integer, floor(x) = x
+    *    - otherwise, floor(x) = trunc(x) - 1
+    */
+   nir_ssa_def *tr = nir_ftrunc(b, src);
+   nir_ssa_def *positive = nir_fge(b, src, nir_imm_double(b, 0.0));
+   return nir_bcsel(b,
+                    nir_ior(b, positive, nir_feq(b, src, tr)),
+                    tr,
+                    nir_fsub(b, tr, nir_imm_double(b, 1.0)));
+}
  
-   switch (instr->op) {
-   case nir_op_frcp:
-      if (!(options & nir_lower_drcp))
-         return;
-      break;
+static nir_ssa_def *
+lower_ceil(nir_builder *b, nir_ssa_def *src)
+{
+   /* if x < 0,                    ceil(x) = trunc(x)
+    * else if (x - trunc(x) == 0), ceil(x) = x
+    * else,                        ceil(x) = trunc(x) + 1
+    */
+   nir_ssa_def *tr = nir_ftrunc(b, src);
+   nir_ssa_def *negative = nir_flt(b, src, nir_imm_double(b, 0.0));
+   return nir_bcsel(b,
+                    nir_ior(b, negative, nir_feq(b, src, tr)),
+                    tr,
+                    nir_fadd(b, tr, nir_imm_double(b, 1.0)));
+}
  
-   case nir_op_fsqrt:
-      if (!(options & nir_lower_dsqrt))
-         return;
-      break;
+static nir_ssa_def *
+lower_fract(nir_builder *b, nir_ssa_def *src)
+{
+   return nir_fsub(b, src, nir_ffloor(b, src));
+}
  
-   case nir_op_frsq:
-      if (!(options & nir_lower_drsq))
-         return;
-      break;
+static nir_ssa_def *
+lower_round_even(nir_builder *b, nir_ssa_def *src)
+{
+   /* Add and subtract 2**52 to round off any fractional bits. */
+   nir_ssa_def *two52 = nir_imm_double(b, (double)(1ull << 52));
+   nir_ssa_def *sign = nir_iand(b, nir_unpack_64_2x32_split_y(b, src),
+                                nir_imm_int(b, 1ull << 31));
+
+   b->exact = true;
+   nir_ssa_def *res = nir_fsub(b, nir_fadd(b, nir_fabs(b, src), two52), two52);
+   b->exact = false;
+
+   return nir_bcsel(b, nir_flt(b, nir_fabs(b, src), two52),
+                    nir_pack_64_2x32_split(b, nir_unpack_64_2x32_split_x(b, res),
+                                           nir_ior(b, nir_unpack_64_2x32_split_y(b, res), sign)), src);
+}
  
-   case nir_op_ftrunc:
-      if (!(options & nir_lower_dtrunc))
-         return;
-      break;
+static nir_ssa_def *
+lower_mod(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)
+{
+   /* mod(x,y) = x - y * floor(x/y)
+    *
+    * If the division is lowered, it could add some rounding errors that make
+    * floor() to return the quotient minus one when x = N * y. If this is the
+    * case, we should return zero because mod(x, y) output value is [0, y).
+    * But fortunately Vulkan spec allows this kind of errors; from Vulkan
+    * spec, appendix A (Precision and Operation of SPIR-V instructions:
+    *
+    *   "The OpFRem and OpFMod instructions use cheap approximations of
+    *   remainder, and the error can be large due to the discontinuity in
+    *   trunc() and floor(). This can produce mathematically unexpected
+    *   results in some cases, such as FMod(x,x) computing x rather than 0,
+    *   and can also cause the result to have a different sign than the
+    *   infinitely precise result."
+    *
+    * In practice this means the output value is actually in the interval
+    * [0, y].
+    *
+    * While Vulkan states this behaviour explicitly, OpenGL does not, and thus
+    * we need to assume that value should be in range [0, y); but on the other
+    * hand, mod(a,b) is defined as "a - b * floor(a/b)" and OpenGL allows for
+    * some error in division, so a/a could actually end up being 1.0 - 1ULP;
+    * so in this case floor(a/a) would end up as 0, and hence mod(a,a) == a.
+    *
+    * In summary, in the practice mod(a,a) can be "a" both for OpenGL and
+    * Vulkan.
+    */
+   nir_ssa_def *floor = nir_ffloor(b, nir_fdiv(b, src0, src1));
  
-   default:
-      return;
-   }
+   return nir_fsub(b, src0, nir_fmul(b, src1, floor));
+}
  
-   nir_builder bld;
-   nir_builder_init(&bld, nir_cf_node_get_function(&instr->instr.block->cf_node));
-   bld.cursor = nir_before_instr(&instr->instr);
+static nir_ssa_def *
+lower_doubles_instr_to_soft(nir_builder *b, nir_alu_instr *instr,
+                            const nir_shader *softfp64,
+                            nir_lower_doubles_options options)
+{
+   if (!(options & nir_lower_fp64_full_software))
+      return NULL;
  
-   nir_ssa_def *src = nir_fmov_alu(&bld, instr->src[0],
-                                   instr->dest.dest.ssa.num_components);
+   assert(instr->dest.dest.is_ssa);
  
-   nir_ssa_def *result;
+   const char *name;
+   const struct glsl_type *return_type = glsl_uint64_t_type();
  
     switch (instr->op) {
-   case nir_op_frcp:
-      result = lower_rcp(&bld, src);
+   case nir_op_f2i64:
+      if (instr->src[0].src.ssa->bit_size == 64)
+         name = "__fp64_to_int64";
+      else
+         name = "__fp32_to_int64";
+      return_type = glsl_int64_t_type();
        break;
-   case nir_op_fsqrt:
-      result = lower_sqrt_rsq(&bld, src, true);
+   case nir_op_f2u64:
+      if (instr->src[0].src.ssa->bit_size == 64)
+         name = "__fp64_to_uint64";
+      else
+         name = "__fp32_to_uint64";
        break;
-   case nir_op_frsq:
-      result = lower_sqrt_rsq(&bld, src, false);
+   case nir_op_f2f64:
+      name = "__fp32_to_fp64";
+      break;
+   case nir_op_f2f32:
+      name = "__fp64_to_fp32";
+      return_type = glsl_float_type();
+      break;
+   case nir_op_f2i32:
+      name = "__fp64_to_int";
+      return_type = glsl_int_type();
+      break;
+   case nir_op_f2u32:
+      name = "__fp64_to_uint";
+      return_type = glsl_uint_type();
+      break;
+   case nir_op_f2b1:
+   case nir_op_f2b32:
+      name = "__fp64_to_bool";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_b2f64:
+      name = "__bool_to_fp64";
+      break;
+   case nir_op_i2f32:
+      if (instr->src[0].src.ssa->bit_size != 64)
+         return false;
+      name = "__int64_to_fp32";
+      return_type = glsl_float_type();
+      break;
+   case nir_op_u2f32:
+      if (instr->src[0].src.ssa->bit_size != 64)
+         return false;
+      name = "__uint64_to_fp32";
+      return_type = glsl_float_type();
+      break;
+   case nir_op_i2f64:
+      if (instr->src[0].src.ssa->bit_size == 64)
+         name = "__int64_to_fp64";
+      else
+         name = "__int_to_fp64";
+      break;
+   case nir_op_u2f64:
+      if (instr->src[0].src.ssa->bit_size == 64)
+         name = "__uint64_to_fp64";
+      else
+         name = "__uint_to_fp64";
+      break;
+   case nir_op_fabs:
+      name = "__fabs64";
+      break;
+   case nir_op_fneg:
+      name = "__fneg64";
+      break;
+   case nir_op_fround_even:
+      name = "__fround64";
        break;
     case nir_op_ftrunc:
-      result = lower_trunc(&bld, src);
+      name = "__ftrunc64";
+      break;
+   case nir_op_ffloor:
+      name = "__ffloor64";
+      break;
+   case nir_op_ffract:
+      name = "__ffract64";
+      break;
+   case nir_op_fsign:
+      name = "__fsign64";
+      break;
+   case nir_op_feq:
+      name = "__feq64";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_fne:
+      name = "__fne64";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_flt:
+      name = "__flt64";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_fge:
+      name = "__fge64";
+      return_type = glsl_bool_type();
+      break;
+   case nir_op_fmin:
+      name = "__fmin64";
+      break;
+   case nir_op_fmax:
+      name = "__fmax64";
+      break;
+   case nir_op_fadd:
+      name = "__fadd64";
+      break;
+   case nir_op_fmul:
+      name = "__fmul64";
+      break;
+   case nir_op_ffma:
+      name = "__ffma64";
+      break;
+   case nir_op_fsat:
+      name = "__fsat64";
        break;
     default:
-      unreachable("unhandled opcode");
+      return false;
+   }
+
+   nir_function *func = NULL;
+   nir_foreach_function(function, softfp64) {
+      if (strcmp(function->name, name) == 0) {
+         func = function;
+         break;
+      }
+   }
+   if (!func || !func->impl) {
+      fprintf(stderr, "Cannot find function \"%s\"\n", name);
+      assert(func);
+   }
+
+   nir_ssa_def *params[4] = { NULL, };
+
+   nir_variable *ret_tmp =
+      nir_local_variable_create(b->impl, return_type, "return_tmp");
+   nir_deref_instr *ret_deref = nir_build_deref_var(b, ret_tmp);
+   params[0] = &ret_deref->dest.ssa;
+
+   assert(nir_op_infos[instr->op].num_inputs + 1 == func->num_params);
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      assert(i + 1 < ARRAY_SIZE(params));
+      params[i + 1] = nir_mov_alu(b, instr->src[i], 1);
     }
  
-   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(result));
-   nir_instr_remove(&instr->instr);
+   nir_inline_function_impl(b, func->impl, params);
+
+   return nir_load_deref(b, ret_deref);
  }
  
+nir_lower_doubles_options
+nir_lower_doubles_op_to_options_mask(nir_op opcode)
+{
+   switch (opcode) {
+   case nir_op_frcp:          return nir_lower_drcp;
+   case nir_op_fsqrt:         return nir_lower_dsqrt;
+   case nir_op_frsq:          return nir_lower_drsq;
+   case nir_op_ftrunc:        return nir_lower_dtrunc;
+   case nir_op_ffloor:        return nir_lower_dfloor;
+   case nir_op_fceil:         return nir_lower_dceil;
+   case nir_op_ffract:        return nir_lower_dfract;
+   case nir_op_fround_even:   return nir_lower_dround_even;
+   case nir_op_fmod:          return nir_lower_dmod;
+   case nir_op_fsub:          return nir_lower_dsub;
+   case nir_op_fdiv:          return nir_lower_ddiv;
+   default:                   return 0;
+   }
+}
+
+struct lower_doubles_data {
+   const nir_shader *softfp64;
+   nir_lower_doubles_options options;
+};
+
  static bool
-lower_doubles_block(nir_block *block, void *ctx)
+should_lower_double_instr(const nir_instr *instr, const void *_data)
  {
-   nir_lower_doubles_options options = *((nir_lower_doubles_options *) ctx);
+   const struct lower_doubles_data *data = _data;
+   const nir_lower_doubles_options options = data->options;
+
+   if (instr->type != nir_instr_type_alu)
+      return false;
+
+   const nir_alu_instr *alu = nir_instr_as_alu(instr);
  
-   nir_foreach_instr_safe(block, instr) {
-      if (instr->type != nir_instr_type_alu)
-         continue;
+   assert(alu->dest.dest.is_ssa);
+   bool is_64 = alu->dest.dest.ssa.bit_size == 64;
  
-      lower_doubles_instr(nir_instr_as_alu(instr), options);
+   unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
+   for (unsigned i = 0; i < num_srcs; i++) {
+      is_64 |= (nir_src_bit_size(alu->src[i].src) == 64);
     }
  
-   return true;
+   if (!is_64)
+      return false;
+
+   if (options & nir_lower_fp64_full_software)
+      return true;
+
+   return options & nir_lower_doubles_op_to_options_mask(alu->op);
  }
  
-static void
-lower_doubles_impl(nir_function_impl *impl, nir_lower_doubles_options options)
+static nir_ssa_def *
+lower_doubles_instr(nir_builder *b, nir_instr *instr, void *_data)
  {
-   nir_foreach_block_call(impl, lower_doubles_block, &options);
+   const struct lower_doubles_data *data = _data;
+   const nir_lower_doubles_options options = data->options;
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+   nir_ssa_def *soft_def =
+      lower_doubles_instr_to_soft(b, alu, data->softfp64, options);
+   if (soft_def)
+      return soft_def;
+
+   if (!(options & nir_lower_doubles_op_to_options_mask(alu->op)))
+      return NULL;
+
+   nir_ssa_def *src = nir_mov_alu(b, alu->src[0],
+                                  alu->dest.dest.ssa.num_components);
+
+   switch (alu->op) {
+   case nir_op_frcp:
+      return lower_rcp(b, src);
+   case nir_op_fsqrt:
+      return lower_sqrt_rsq(b, src, true);
+   case nir_op_frsq:
+      return lower_sqrt_rsq(b, src, false);
+   case nir_op_ftrunc:
+      return lower_trunc(b, src);
+   case nir_op_ffloor:
+      return lower_floor(b, src);
+   case nir_op_fceil:
+      return lower_ceil(b, src);
+   case nir_op_ffract:
+      return lower_fract(b, src);
+   case nir_op_fround_even:
+      return lower_round_even(b, src);
+
+   case nir_op_fdiv:
+   case nir_op_fsub:
+   case nir_op_fmod: {
+      nir_ssa_def *src1 = nir_mov_alu(b, alu->src[1],
+                                      alu->dest.dest.ssa.num_components);
+      switch (alu->op) {
+      case nir_op_fdiv:
+         return nir_fmul(b, src, nir_frcp(b, src1));
+      case nir_op_fsub:
+         return nir_fadd(b, src, nir_fneg(b, src1));
+      case nir_op_fmod:
+         return lower_mod(b, src, src1);
+      default:
+         unreachable("unhandled opcode");
+      }
+   }
+   default:
+      unreachable("unhandled opcode");
+   }
  }
  
-void
-nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options)
+static bool
+nir_lower_doubles_impl(nir_function_impl *impl,
+                       const nir_shader *softfp64,
+                       nir_lower_doubles_options options)
  {
-   nir_foreach_function(shader, function) {
-      if (function->impl)
-         lower_doubles_impl(function->impl, options);
+   struct lower_doubles_data data = {
+      .softfp64 = softfp64,
+      .options = options,
+   };
+
+   bool progress =
+      nir_function_impl_lower_instructions(impl,
+                                           should_lower_double_instr,
+                                           lower_doubles_instr,
+                                           &data);
+
+   if (progress && (options & nir_lower_fp64_full_software)) {
+      /* SSA and register indices are completely messed up now */
+      nir_index_ssa_defs(impl);
+      nir_index_local_regs(impl);
+
+      nir_metadata_preserve(impl, nir_metadata_none);
+
+      /* And we have deref casts we need to clean up thanks to function
+       * inlining.
+       */
+      nir_opt_deref_impl(impl);
     }
+
+   return progress;
+}
+
+bool
+nir_lower_doubles(nir_shader *shader,
+                  const nir_shader *softfp64,
+                  nir_lower_doubles_options options)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl) {
+         progress |= nir_lower_doubles_impl(function->impl, softfp64, options);
+      }
+   }
+
+   return progress;
  }