2 * Copyright (c) 2014 Scott Mansell
3 * Copyright © 2014 Broadcom
4 * Copyright (c) 2019 Vasily Khoruzhick <anarsoul@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "util/u_math.h"
28 #include "nir_builder.h"
31 shrink_input(nir_builder
*b
, nir_ssa_def
*x
)
33 nir_ssa_def
*scaled_x
= nir_fmul_imm(b
, x
, 1.0 / (M_PI
* 2));
35 nir_ssa_def
*xfrac
= nir_ffract(b
, scaled_x
);
36 /* Map [0.5, 1] to [-0.5, 0] */
37 nir_ssa_def
*xfrac_0_5_1
= nir_fadd_imm(b
, xfrac
, -1.0);
38 /* Map [-1, -0.5] to [0, 0.5] */
39 nir_ssa_def
*xfrac_n1_n0_5
= nir_fadd_imm(b
, xfrac
, 1.0);
41 nir_ssa_def
*geq_0_5
= nir_build_alu(b
, nir_op_fge
, xfrac
, nir_imm_float(b
, 0.5), NULL
, NULL
);
42 nir_ssa_def
*less_m0_5
= nir_build_alu(b
, nir_op_flt
, xfrac
, nir_imm_float(b
, -0.5), NULL
, NULL
);
44 nir_ssa_def
*sel1
= nir_build_alu(b
, nir_op_bcsel
, geq_0_5
, xfrac_0_5_1
, xfrac
, NULL
);
45 nir_ssa_def
*sel2
= nir_build_alu(b
, nir_op_bcsel
, less_m0_5
, xfrac_n1_n0_5
, sel1
, NULL
);
51 lower_sincos(nir_builder
*b
, nir_ssa_def
*src
, bool do_cos
)
53 /* Fast sin/cos implementation, see
54 * https://web.archive.org/web/20180105155939/http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
56 const float B
= 8.0; //(M_PI * 2.0) * (4.0 / M_PI);
57 const float C
= -16.0; //(M_PI * 2.0) * (M_PI * 2.0) * (-4.0 / (M_PI * M_PI));
58 const float P
= 0.225;
61 src
= nir_fadd_imm(b
, src
, M_PI
/ 2.0);
63 nir_ssa_def
*x
= shrink_input(b
, src
);
65 nir_ssa_def
*bx
= nir_fmul_imm(b
, x
, B
);
66 nir_ssa_def
*cx
= nir_fmul_imm(b
, x
, C
);
67 nir_ssa_def
*absx
= nir_fabs(b
, x
);
68 nir_ssa_def
*cxabsx
= nir_fmul(b
, cx
, absx
);
70 /* Y1 = B * x + C * x * fabs(x) */
71 nir_ssa_def
*y1
= nir_fadd(b
, bx
, cxabsx
);
73 /* Precision step: Y = P * (Y1 * fabs(Y1) - Y1) + Y1 */
74 nir_ssa_def
*y
= nir_fabs(b
, y1
);
75 y
= nir_fmul(b
, y
, y1
);
76 y
= nir_fsub(b
, y
, y1
);
77 y
= nir_fmul_imm(b
, y
, P
);
78 y
= nir_fadd(b
, y
, y1
);
84 lower_sincos_impl(nir_function_impl
*impl
)
86 bool progress
= false;
89 nir_builder_init(&b
, impl
);
91 nir_foreach_block(block
, impl
) {
92 nir_foreach_instr_safe(instr
, block
) {
93 if (instr
->type
!= nir_instr_type_alu
)
96 nir_alu_instr
*alu_instr
= nir_instr_as_alu(instr
);
99 b
.cursor
= nir_before_instr(instr
);
101 switch (alu_instr
->op
) {
103 lower
= lower_sincos(&b
, nir_ssa_for_alu_src(&b
, alu_instr
, 0), false);
106 lower
= lower_sincos(&b
, nir_ssa_for_alu_src(&b
, alu_instr
, 0), true);
112 nir_ssa_def_rewrite_uses(&alu_instr
->dest
.dest
.ssa
,
113 nir_src_for_ssa(lower
));
114 nir_instr_remove(instr
);
120 nir_metadata_preserve(impl
, nir_metadata_block_index
|
121 nir_metadata_dominance
);
128 nir_lower_sincos(nir_shader
*shader
)
130 bool progress
= false;
132 nir_foreach_function(function
, shader
) {
134 progress
|= lower_sincos_impl(function
->impl
);