nir: Add a lowering pass to split 64bit phis
[mesa.git] / src / compiler / nir / nir_lower_bit_size.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_builder.h"
25
26 /**
27 * Some ALU operations may not be supported in hardware in specific bit-sizes.
28 * This pass allows implementations to selectively lower such operations to
29 * a bit-size that is supported natively and then converts the result back to
30 * the original bit-size.
31 */
32
33 static void
34 lower_instr(nir_builder *bld, nir_alu_instr *alu, unsigned bit_size)
35 {
36 const nir_op op = alu->op;
37 unsigned dst_bit_size = alu->dest.dest.ssa.bit_size;
38
39 bld->cursor = nir_before_instr(&alu->instr);
40
41 /* Convert each source to the requested bit-size */
42 nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS] = { NULL };
43 for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
44 nir_ssa_def *src = nir_ssa_for_alu_src(bld, alu, i);
45
46 nir_alu_type type = nir_op_infos[op].input_types[i];
47 if (nir_alu_type_get_type_size(type) == 0)
48 src = nir_convert_to_bit_size(bld, src, type, bit_size);
49
50 if (i == 1 && (op == nir_op_ishl || op == nir_op_ishr || op == nir_op_ushr)) {
51 assert(util_is_power_of_two_nonzero(dst_bit_size));
52 src = nir_iand(bld, src, nir_imm_int(bld, dst_bit_size - 1));
53 }
54
55 srcs[i] = src;
56 }
57
58 /* Emit the lowered ALU instruction */
59 nir_ssa_def *lowered_dst = NULL;
60 if (op == nir_op_imul_high || op == nir_op_umul_high) {
61 assert(dst_bit_size * 2 <= bit_size);
62 nir_ssa_def *lowered_dst = nir_imul(bld, srcs[0], srcs[1]);
63 if (nir_op_infos[op].output_type & nir_type_uint)
64 lowered_dst = nir_ushr_imm(bld, lowered_dst, dst_bit_size);
65 else
66 lowered_dst = nir_ishr_imm(bld, lowered_dst, dst_bit_size);
67 } else {
68 lowered_dst = nir_build_alu_src_arr(bld, op, srcs);
69 }
70
71
72 /* Convert result back to the original bit-size */
73 nir_alu_type type = nir_op_infos[op].output_type;
74 nir_ssa_def *dst = nir_convert_to_bit_size(bld, lowered_dst, type, dst_bit_size);
75 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(dst));
76 }
77
78 static bool
79 lower_impl(nir_function_impl *impl,
80 nir_lower_bit_size_callback callback,
81 void *callback_data)
82 {
83 nir_builder b;
84 nir_builder_init(&b, impl);
85 bool progress = false;
86
87 nir_foreach_block(block, impl) {
88 nir_foreach_instr_safe(instr, block) {
89 if (instr->type != nir_instr_type_alu)
90 continue;
91
92 nir_alu_instr *alu = nir_instr_as_alu(instr);
93 assert(alu->dest.dest.is_ssa);
94
95 unsigned lower_bit_size = callback(alu, callback_data);
96 if (lower_bit_size == 0)
97 continue;
98
99 assert(lower_bit_size != alu->dest.dest.ssa.bit_size);
100
101 lower_instr(&b, alu, lower_bit_size);
102 progress = true;
103 }
104 }
105
106 if (progress) {
107 nir_metadata_preserve(impl, nir_metadata_block_index |
108 nir_metadata_dominance);
109 } else {
110 nir_metadata_preserve(impl, nir_metadata_all);
111 }
112
113 return progress;
114 }
115
116 bool
117 nir_lower_bit_size(nir_shader *shader,
118 nir_lower_bit_size_callback callback,
119 void *callback_data)
120 {
121 bool progress = false;
122
123 nir_foreach_function(function, shader) {
124 if (function->impl)
125 progress |= lower_impl(function->impl, callback, callback_data);
126 }
127
128 return progress;
129 }
130
131 static void
132 split_phi(nir_builder *b, nir_phi_instr *phi)
133 {
134 nir_phi_instr *lowered[2] = {
135 nir_phi_instr_create(b->shader),
136 nir_phi_instr_create(b->shader)
137 };
138 int num_components = phi->dest.ssa.num_components;
139 assert(phi->dest.ssa.bit_size == 64);
140
141 nir_foreach_phi_src(src, phi) {
142 assert(num_components == src->src.ssa->num_components);
143
144 b->cursor = nir_before_src(&src->src, false);
145
146 nir_ssa_def *x = nir_unpack_64_2x32_split_x(b, src->src.ssa);
147 nir_ssa_def *y = nir_unpack_64_2x32_split_y(b, src->src.ssa);
148
149 nir_phi_src *xsrc = rzalloc(lowered[0], nir_phi_src);
150 xsrc->pred = src->pred;
151 xsrc->src = nir_src_for_ssa(x);
152 exec_list_push_tail(&lowered[0]->srcs, &xsrc->node);
153
154 nir_phi_src *ysrc = rzalloc(lowered[1], nir_phi_src);
155 ysrc->pred = src->pred;
156 ysrc->src = nir_src_for_ssa(y);
157 exec_list_push_tail(&lowered[1]->srcs, &ysrc->node);
158 }
159
160 nir_ssa_dest_init(&lowered[0]->instr, &lowered[0]->dest,
161 num_components, 32, NULL);
162 nir_ssa_dest_init(&lowered[1]->instr, &lowered[1]->dest,
163 num_components, 32, NULL);
164
165 b->cursor = nir_before_instr(&phi->instr);
166 nir_builder_instr_insert(b, &lowered[0]->instr);
167 nir_builder_instr_insert(b, &lowered[1]->instr);
168
169 b->cursor = nir_after_phis(nir_cursor_current_block(b->cursor));
170 nir_ssa_def *merged = nir_pack_64_2x32_split(b, &lowered[0]->dest.ssa, &lowered[1]->dest.ssa);
171 nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(merged));
172 nir_instr_remove(&phi->instr);
173 }
174
175 static bool
176 lower_64bit_phi_impl(nir_function_impl *impl)
177 {
178 nir_builder b;
179 nir_builder_init(&b, impl);
180 bool progress = false;
181
182 nir_foreach_block(block, impl) {
183 nir_foreach_instr_safe(instr, block) {
184 if (instr->type != nir_instr_type_phi)
185 break;
186
187 nir_phi_instr *phi = nir_instr_as_phi(instr);
188 assert(phi->dest.is_ssa);
189
190 if (phi->dest.ssa.bit_size <= 32)
191 continue;
192
193 split_phi(&b, phi);
194 progress = true;
195 }
196 }
197
198 if (progress) {
199 nir_metadata_preserve(impl, nir_metadata_block_index |
200 nir_metadata_dominance);
201 } else {
202 nir_metadata_preserve(impl, nir_metadata_all);
203 }
204
205 return progress;
206 }
207
208 bool
209 nir_lower_64bit_phis(nir_shader *shader)
210 {
211 bool progress = false;
212
213 nir_foreach_function(function, shader) {
214 if (function->impl)
215 progress |= lower_64bit_phi_impl(function->impl);
216 }
217
218 return progress;
219 }