nir: add nir_var_shader_storage
[mesa.git] / src / glsl / nir / nir_lower_idiv.c
1 /*
2 * Copyright © 2015 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "nir.h"
28 #include "nir_builder.h"
29
30 /* Lowers idiv/udiv/umod
31 * Based on NV50LegalizeSSA::handleDIV()
32 *
33 * Note that this is probably not enough precision for compute shaders.
34 * Perhaps we want a second higher precision (looping) version of this?
35 * Or perhaps we assume if you can do compute shaders you can also
36 * branch out to a pre-optimized shader library routine..
37 */
38
39 static void
40 convert_instr(nir_builder *bld, nir_alu_instr *alu)
41 {
42 nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
43 nir_op op = alu->op;
44 bool is_signed;
45
46 if ((op != nir_op_idiv) &&
47 (op != nir_op_udiv) &&
48 (op != nir_op_umod))
49 return;
50
51 is_signed = (op == nir_op_idiv);
52
53 nir_builder_insert_before_instr(bld, &alu->instr);
54
55 numer = nir_ssa_for_src(bld, alu->src[0].src,
56 nir_ssa_alu_instr_src_components(alu, 0));
57 denom = nir_ssa_for_src(bld, alu->src[1].src,
58 nir_ssa_alu_instr_src_components(alu, 1));
59
60 if (is_signed) {
61 af = nir_i2f(bld, numer);
62 bf = nir_i2f(bld, denom);
63 af = nir_fabs(bld, af);
64 bf = nir_fabs(bld, bf);
65 a = nir_iabs(bld, numer);
66 b = nir_iabs(bld, denom);
67 } else {
68 af = nir_u2f(bld, numer);
69 bf = nir_u2f(bld, denom);
70 a = numer;
71 b = denom;
72 }
73
74 /* get first result: */
75 bf = nir_frcp(bld, bf);
76 bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */
77 q = nir_fmul(bld, af, bf);
78
79 if (is_signed) {
80 q = nir_f2i(bld, q);
81 } else {
82 q = nir_f2u(bld, q);
83 }
84
85 /* get error of first result: */
86 r = nir_imul(bld, q, b);
87 r = nir_isub(bld, a, r);
88 r = nir_u2f(bld, r);
89 r = nir_fmul(bld, r, bf);
90 r = nir_f2u(bld, r);
91
92 /* add quotients: */
93 q = nir_iadd(bld, q, r);
94
95 /* correction: if modulus >= divisor, add 1 */
96 r = nir_imul(bld, q, b);
97 r = nir_isub(bld, a, r);
98
99 r = nir_ige(bld, r, b);
100 r = nir_b2i(bld, r);
101
102 q = nir_iadd(bld, q, r);
103 if (is_signed) {
104 /* fix the sign: */
105 r = nir_ixor(bld, numer, denom);
106 r = nir_ushr(bld, r, nir_imm_int(bld, 31));
107 r = nir_i2b(bld, r);
108 b = nir_ineg(bld, q);
109 q = nir_bcsel(bld, r, b, q);
110 }
111
112 if (op == nir_op_umod) {
113 /* division result in q */
114 r = nir_imul(bld, q, b);
115 q = nir_isub(bld, a, r);
116 }
117
118 assert(alu->dest.dest.is_ssa);
119 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
120 nir_src_for_ssa(q),
121 ralloc_parent(alu));
122 }
123
124 static bool
125 convert_block(nir_block *block, void *state)
126 {
127 nir_builder *b = state;
128
129 nir_foreach_instr_safe(block, instr) {
130 if (instr->type == nir_instr_type_alu)
131 convert_instr(b, nir_instr_as_alu(instr));
132 }
133
134 return true;
135 }
136
137 static void
138 convert_impl(nir_function_impl *impl)
139 {
140 nir_builder b;
141 nir_builder_init(&b, impl);
142
143 nir_foreach_block(impl, convert_block, &b);
144 nir_metadata_preserve(impl, nir_metadata_block_index |
145 nir_metadata_dominance);
146 }
147
148 void
149 nir_lower_idiv(nir_shader *shader)
150 {
151 nir_foreach_overload(shader, overload) {
152 if (overload->impl)
153 convert_impl(overload->impl);
154 }
155 }