src/compiler/nir/nir_lower_frexp.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  * Copyright © 2019 Valve Corporation
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *    Jason Ekstrand (jason@jlekstrand.net)
  26  *    Samuel Pitoiset (samuel.pitoiset@gmail.com>
  27  */
  28
  29 #include "nir.h"
  30 #include "nir_builder.h"
  31
  32 static nir_ssa_def *
  33 lower_frexp_sig(nir_builder *b, nir_ssa_def *x)
  34 {
  35    nir_ssa_def *abs_x = nir_fabs(b, x);
  36    nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
  37    nir_ssa_def *sign_mantissa_mask, *exponent_value;
  38    nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
  39
  40    switch (x->bit_size) {
  41    case 16:
  42       /* Half-precision floating-point values are stored as
  43        *   1 sign bit;
  44        *   5 exponent bits;
  45        *   10 mantissa bits.
  46        *
  47        * An exponent shift of 10 will shift the mantissa out, leaving only the
  48        * exponent and sign bit (which itself may be zero, if the absolute value
  49        * was taken before the bitcast and shift).
  50        */
  51       sign_mantissa_mask = nir_imm_intN_t(b, 0x83ffu, 16);
  52       /* Exponent of floating-point values in the range [0.5, 1.0). */
  53       exponent_value = nir_imm_intN_t(b, 0x3800u, 16);
  54       break;
  55    case 32:
  56       /* Single-precision floating-point values are stored as
  57        *   1 sign bit;
  58        *   8 exponent bits;
  59        *   23 mantissa bits.
  60        *
  61        * An exponent shift of 23 will shift the mantissa out, leaving only the
  62        * exponent and sign bit (which itself may be zero, if the absolute value
  63        * was taken before the bitcast and shift.
  64        */
  65       sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
  66       /* Exponent of floating-point values in the range [0.5, 1.0). */
  67       exponent_value = nir_imm_int(b, 0x3f000000u);
  68       break;
  69    case 64:
  70       /* Double-precision floating-point values are stored as
  71        *   1 sign bit;
  72        *   11 exponent bits;
  73        *   52 mantissa bits.
  74        *
  75        * An exponent shift of 20 will shift the remaining mantissa bits out,
  76        * leaving only the exponent and sign bit (which itself may be zero, if
  77        * the absolute value was taken before the bitcast and shift.
  78        */
  79       sign_mantissa_mask = nir_imm_int(b, 0x800fffffu);
  80       /* Exponent of floating-point values in the range [0.5, 1.0). */
  81       exponent_value = nir_imm_int(b, 0x3fe00000u);
  82       break;
  83    default:
  84       unreachable("Invalid bitsize");
  85    }
  86
  87    if (x->bit_size == 64) {
  88       /* We only need to deal with the exponent so first we extract the upper
  89        * 32 bits using nir_unpack_64_2x32_split_y.
  90        */
  91       nir_ssa_def *upper_x = nir_unpack_64_2x32_split_y(b, x);
  92       nir_ssa_def *zero32 = nir_imm_int(b, 0);
  93
  94       nir_ssa_def *new_upper =
  95          nir_ior(b, nir_iand(b, upper_x, sign_mantissa_mask),
  96                     nir_bcsel(b, is_not_zero, exponent_value, zero32));
  97
  98       nir_ssa_def *lower_x = nir_unpack_64_2x32_split_x(b, x);
  99
 100       return nir_pack_64_2x32_split(b, lower_x, new_upper);
 101    } else {
 102       return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
 103                         nir_bcsel(b, is_not_zero, exponent_value, zero));
 104    }
 105 }
 106
 107 static nir_ssa_def *
 108 lower_frexp_exp(nir_builder *b, nir_ssa_def *x)
 109 {
 110    nir_ssa_def *abs_x = nir_fabs(b, x);
 111    nir_ssa_def *zero = nir_imm_floatN_t(b, 0, x->bit_size);
 112    nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
 113    nir_ssa_def *exponent;
 114
 115    switch (x->bit_size) {
 116    case 16: {
 117       nir_ssa_def *exponent_shift = nir_imm_int(b, 10);
 118       nir_ssa_def *exponent_bias = nir_imm_intN_t(b, -14, 16);
 119
 120       /* Significand return must be of the same type as the input, but the
 121        * exponent must be a 32-bit integer.
 122        */
 123       exponent = nir_i2i32(b, nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
 124                               nir_bcsel(b, is_not_zero, exponent_bias, zero)));
 125       break;
 126    }
 127    case 32: {
 128       nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
 129       nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
 130
 131       exponent = nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
 132                              nir_bcsel(b, is_not_zero, exponent_bias, zero));
 133       break;
 134    }
 135    case 64: {
 136       nir_ssa_def *exponent_shift = nir_imm_int(b, 20);
 137       nir_ssa_def *exponent_bias = nir_imm_int(b, -1022);
 138
 139       nir_ssa_def *zero32 = nir_imm_int(b, 0);
 140       nir_ssa_def *abs_upper_x = nir_unpack_64_2x32_split_y(b, abs_x);
 141
 142       exponent = nir_iadd(b, nir_ushr(b, abs_upper_x, exponent_shift),
 143                              nir_bcsel(b, is_not_zero, exponent_bias, zero32));
 144       break;
 145    }
 146    default:
 147       unreachable("Invalid bitsize");
 148    }
 149
 150    return exponent;
 151 }
 152
 153 static bool
 154 lower_frexp_impl(nir_function_impl *impl)
 155 {
 156    bool progress = false;
 157
 158    nir_builder b;
 159    nir_builder_init(&b, impl);
 160
 161    nir_foreach_block(block, impl) {
 162       nir_foreach_instr_safe(instr, block) {
 163          if (instr->type != nir_instr_type_alu)
 164             continue;
 165
 166          nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
 167          nir_ssa_def *lower;
 168
 169          b.cursor = nir_before_instr(instr);
 170
 171          switch (alu_instr->op) {
 172          case nir_op_frexp_sig:
 173             lower = lower_frexp_sig(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
 174             break;
 175          case nir_op_frexp_exp:
 176             lower = lower_frexp_exp(&b, nir_ssa_for_alu_src(&b, alu_instr, 0));
 177             break;
 178          default:
 179             continue;
 180          }
 181
 182          nir_ssa_def_rewrite_uses(&alu_instr->dest.dest.ssa,
 183                                   nir_src_for_ssa(lower));
 184          nir_instr_remove(instr);
 185          progress = true;
 186       }
 187    }
 188
 189    if (progress) {
 190       nir_metadata_preserve(impl, nir_metadata_block_index |
 191                                   nir_metadata_dominance);
 192    }
 193
 194    return progress;
 195 }
 196
 197 bool
 198 nir_lower_frexp(nir_shader *shader)
 199 {
 200    bool progress = false;
 201
 202    nir_foreach_function(function, shader) {
 203       if (function->impl)
 204          progress |= lower_frexp_impl(function->impl);
 205    }
 206
 207    return progress;
 208 }