nir: support lowering clipdist to arrays
[mesa.git] / src / compiler / nir / nir_lower_idiv.c
1 /*
2 * Copyright © 2015 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "nir.h"
28 #include "nir_builder.h"
29
30 /* Lowers idiv/udiv/umod
31 * Based on NV50LegalizeSSA::handleDIV()
32 *
33 * Note that this is probably not enough precision for compute shaders.
34 * Perhaps we want a second higher precision (looping) version of this?
35 * Or perhaps we assume if you can do compute shaders you can also
36 * branch out to a pre-optimized shader library routine..
37 */
38
39 static bool
40 convert_instr(nir_builder *bld, nir_alu_instr *alu)
41 {
42 nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r, *rt;
43 nir_op op = alu->op;
44 bool is_signed;
45
46 if ((op != nir_op_idiv) &&
47 (op != nir_op_udiv) &&
48 (op != nir_op_imod) &&
49 (op != nir_op_umod) &&
50 (op != nir_op_irem))
51 return false;
52
53 is_signed = (op == nir_op_idiv ||
54 op == nir_op_imod ||
55 op == nir_op_irem);
56
57 bld->cursor = nir_before_instr(&alu->instr);
58
59 numer = nir_ssa_for_alu_src(bld, alu, 0);
60 denom = nir_ssa_for_alu_src(bld, alu, 1);
61
62 if (is_signed) {
63 af = nir_i2f32(bld, numer);
64 bf = nir_i2f32(bld, denom);
65 af = nir_fabs(bld, af);
66 bf = nir_fabs(bld, bf);
67 a = nir_iabs(bld, numer);
68 b = nir_iabs(bld, denom);
69 } else {
70 af = nir_u2f32(bld, numer);
71 bf = nir_u2f32(bld, denom);
72 a = numer;
73 b = denom;
74 }
75
76 /* get first result: */
77 bf = nir_frcp(bld, bf);
78 bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */
79 q = nir_fmul(bld, af, bf);
80
81 if (is_signed) {
82 q = nir_f2i32(bld, q);
83 } else {
84 q = nir_f2u32(bld, q);
85 }
86
87 /* get error of first result: */
88 r = nir_imul(bld, q, b);
89 r = nir_isub(bld, a, r);
90 r = nir_u2f32(bld, r);
91 r = nir_fmul(bld, r, bf);
92 r = nir_f2u32(bld, r);
93
94 /* add quotients: */
95 q = nir_iadd(bld, q, r);
96
97 /* correction: if modulus >= divisor, add 1 */
98 r = nir_imul(bld, q, b);
99 r = nir_isub(bld, a, r);
100 rt = nir_uge(bld, r, b);
101
102 if (op == nir_op_umod) {
103 q = nir_bcsel(bld, rt, nir_isub(bld, r, b), r);
104 } else {
105 r = nir_b2i32(bld, rt);
106
107 q = nir_iadd(bld, q, r);
108 if (is_signed) {
109 /* fix the sign: */
110 r = nir_ixor(bld, numer, denom);
111 r = nir_ilt(bld, r, nir_imm_int(bld, 0));
112 b = nir_ineg(bld, q);
113 q = nir_bcsel(bld, r, b, q);
114
115 if (op == nir_op_imod || op == nir_op_irem) {
116 q = nir_imul(bld, q, denom);
117 q = nir_isub(bld, numer, q);
118 if (op == nir_op_imod) {
119 q = nir_bcsel(bld, nir_ieq(bld, q, nir_imm_int(bld, 0)),
120 nir_imm_int(bld, 0),
121 nir_bcsel(bld, r, nir_iadd(bld, q, denom), q));
122 }
123 }
124 }
125 }
126
127 assert(alu->dest.dest.is_ssa);
128 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q));
129
130 return true;
131 }
132
133 static bool
134 convert_impl(nir_function_impl *impl)
135 {
136 nir_builder b;
137 nir_builder_init(&b, impl);
138 bool progress = false;
139
140 nir_foreach_block(block, impl) {
141 nir_foreach_instr_safe(instr, block) {
142 if (instr->type == nir_instr_type_alu)
143 progress |= convert_instr(&b, nir_instr_as_alu(instr));
144 }
145 }
146
147 nir_metadata_preserve(impl, nir_metadata_block_index |
148 nir_metadata_dominance);
149
150 return progress;
151 }
152
153 bool
154 nir_lower_idiv(nir_shader *shader)
155 {
156 bool progress = false;
157
158 nir_foreach_function(function, shader) {
159 if (function->impl)
160 progress |= convert_impl(function->impl);
161 }
162
163 return progress;
164 }