2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2019 Valve Corporation
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * Jason Ekstrand (jason@jlekstrand.net)
26 * Samuel Pitoiset (samuel.pitoiset@gmail.com>
30 #include "nir_builder.h"
33 lower_frexp_sig(nir_builder
*b
, nir_ssa_def
*x
)
35 nir_ssa_def
*abs_x
= nir_fabs(b
, x
);
36 nir_ssa_def
*zero
= nir_imm_floatN_t(b
, 0, x
->bit_size
);
37 nir_ssa_def
*sign_mantissa_mask
, *exponent_value
;
38 nir_ssa_def
*is_not_zero
= nir_fne(b
, abs_x
, zero
);
40 switch (x
->bit_size
) {
42 /* Half-precision floating-point values are stored as
47 * An exponent shift of 10 will shift the mantissa out, leaving only the
48 * exponent and sign bit (which itself may be zero, if the absolute value
49 * was taken before the bitcast and shift).
51 sign_mantissa_mask
= nir_imm_intN_t(b
, 0x83ffu
, 16);
52 /* Exponent of floating-point values in the range [0.5, 1.0). */
53 exponent_value
= nir_imm_intN_t(b
, 0x3800u
, 16);
56 /* Single-precision floating-point values are stored as
61 * An exponent shift of 23 will shift the mantissa out, leaving only the
62 * exponent and sign bit (which itself may be zero, if the absolute value
63 * was taken before the bitcast and shift.
65 sign_mantissa_mask
= nir_imm_int(b
, 0x807fffffu
);
66 /* Exponent of floating-point values in the range [0.5, 1.0). */
67 exponent_value
= nir_imm_int(b
, 0x3f000000u
);
70 /* Double-precision floating-point values are stored as
75 * An exponent shift of 20 will shift the remaining mantissa bits out,
76 * leaving only the exponent and sign bit (which itself may be zero, if
77 * the absolute value was taken before the bitcast and shift.
79 sign_mantissa_mask
= nir_imm_int(b
, 0x800fffffu
);
80 /* Exponent of floating-point values in the range [0.5, 1.0). */
81 exponent_value
= nir_imm_int(b
, 0x3fe00000u
);
84 unreachable("Invalid bitsize");
87 if (x
->bit_size
== 64) {
88 /* We only need to deal with the exponent so first we extract the upper
89 * 32 bits using nir_unpack_64_2x32_split_y.
91 nir_ssa_def
*upper_x
= nir_unpack_64_2x32_split_y(b
, x
);
92 nir_ssa_def
*zero32
= nir_imm_int(b
, 0);
94 nir_ssa_def
*new_upper
=
95 nir_ior(b
, nir_iand(b
, upper_x
, sign_mantissa_mask
),
96 nir_bcsel(b
, is_not_zero
, exponent_value
, zero32
));
98 nir_ssa_def
*lower_x
= nir_unpack_64_2x32_split_x(b
, x
);
100 return nir_pack_64_2x32_split(b
, lower_x
, new_upper
);
102 return nir_ior(b
, nir_iand(b
, x
, sign_mantissa_mask
),
103 nir_bcsel(b
, is_not_zero
, exponent_value
, zero
));
108 lower_frexp_exp(nir_builder
*b
, nir_ssa_def
*x
)
110 nir_ssa_def
*abs_x
= nir_fabs(b
, x
);
111 nir_ssa_def
*zero
= nir_imm_floatN_t(b
, 0, x
->bit_size
);
112 nir_ssa_def
*is_not_zero
= nir_fne(b
, abs_x
, zero
);
113 nir_ssa_def
*exponent
;
115 switch (x
->bit_size
) {
117 nir_ssa_def
*exponent_shift
= nir_imm_int(b
, 10);
118 nir_ssa_def
*exponent_bias
= nir_imm_intN_t(b
, -14, 16);
120 /* Significand return must be of the same type as the input, but the
121 * exponent must be a 32-bit integer.
123 exponent
= nir_i2i32(b
, nir_iadd(b
, nir_ushr(b
, abs_x
, exponent_shift
),
124 nir_bcsel(b
, is_not_zero
, exponent_bias
, zero
)));
128 nir_ssa_def
*exponent_shift
= nir_imm_int(b
, 23);
129 nir_ssa_def
*exponent_bias
= nir_imm_int(b
, -126);
131 exponent
= nir_iadd(b
, nir_ushr(b
, abs_x
, exponent_shift
),
132 nir_bcsel(b
, is_not_zero
, exponent_bias
, zero
));
136 nir_ssa_def
*exponent_shift
= nir_imm_int(b
, 20);
137 nir_ssa_def
*exponent_bias
= nir_imm_int(b
, -1022);
139 nir_ssa_def
*zero32
= nir_imm_int(b
, 0);
140 nir_ssa_def
*abs_upper_x
= nir_unpack_64_2x32_split_y(b
, abs_x
);
142 exponent
= nir_iadd(b
, nir_ushr(b
, abs_upper_x
, exponent_shift
),
143 nir_bcsel(b
, is_not_zero
, exponent_bias
, zero32
));
147 unreachable("Invalid bitsize");
154 lower_frexp_impl(nir_function_impl
*impl
)
156 bool progress
= false;
159 nir_builder_init(&b
, impl
);
161 nir_foreach_block(block
, impl
) {
162 nir_foreach_instr_safe(instr
, block
) {
163 if (instr
->type
!= nir_instr_type_alu
)
166 nir_alu_instr
*alu_instr
= nir_instr_as_alu(instr
);
169 b
.cursor
= nir_before_instr(instr
);
171 switch (alu_instr
->op
) {
172 case nir_op_frexp_sig
:
173 lower
= lower_frexp_sig(&b
, nir_ssa_for_alu_src(&b
, alu_instr
, 0));
175 case nir_op_frexp_exp
:
176 lower
= lower_frexp_exp(&b
, nir_ssa_for_alu_src(&b
, alu_instr
, 0));
182 nir_ssa_def_rewrite_uses(&alu_instr
->dest
.dest
.ssa
,
183 nir_src_for_ssa(lower
));
184 nir_instr_remove(instr
);
190 nir_metadata_preserve(impl
, nir_metadata_block_index
|
191 nir_metadata_dominance
);
198 nir_lower_frexp(nir_shader
*shader
)
200 bool progress
= false;
202 nir_foreach_function(function
, shader
) {
204 progress
|= lower_frexp_impl(function
->impl
);