nir: Add goto_if jump instruction
[mesa.git] / src / compiler / nir / nir_lower_int64.c
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26
27 #define COND_LOWER_OP(b, name, ...) \
28 (b->shader->options->lower_int64_options & \
29 nir_lower_int64_op_to_options_mask(nir_op_##name)) ? \
30 lower_##name##64(b, __VA_ARGS__) : nir_##name(b, __VA_ARGS__)
31
32 #define COND_LOWER_CMP(b, name, ...) \
33 (b->shader->options->lower_int64_options & \
34 nir_lower_int64_op_to_options_mask(nir_op_##name)) ? \
35 lower_int64_compare(b, nir_op_##name, __VA_ARGS__) : \
36 nir_##name(b, __VA_ARGS__)
37
38 #define COND_LOWER_CAST(b, name, ...) \
39 (b->shader->options->lower_int64_options & \
40 nir_lower_int64_op_to_options_mask(nir_op_##name)) ? \
41 lower_##name(b, __VA_ARGS__) : \
42 nir_##name(b, __VA_ARGS__)
43
44 static nir_ssa_def *
45 lower_b2i64(nir_builder *b, nir_ssa_def *x)
46 {
47 return nir_pack_64_2x32_split(b, nir_b2i32(b, x), nir_imm_int(b, 0));
48 }
49
50 static nir_ssa_def *
51 lower_i2b(nir_builder *b, nir_ssa_def *x)
52 {
53 return nir_ine(b, nir_ior(b, nir_unpack_64_2x32_split_x(b, x),
54 nir_unpack_64_2x32_split_y(b, x)),
55 nir_imm_int(b, 0));
56 }
57
58 static nir_ssa_def *
59 lower_i2i8(nir_builder *b, nir_ssa_def *x)
60 {
61 return nir_i2i8(b, nir_unpack_64_2x32_split_x(b, x));
62 }
63
64 static nir_ssa_def *
65 lower_i2i16(nir_builder *b, nir_ssa_def *x)
66 {
67 return nir_i2i16(b, nir_unpack_64_2x32_split_x(b, x));
68 }
69
70
71 static nir_ssa_def *
72 lower_i2i32(nir_builder *b, nir_ssa_def *x)
73 {
74 return nir_unpack_64_2x32_split_x(b, x);
75 }
76
77 static nir_ssa_def *
78 lower_i2i64(nir_builder *b, nir_ssa_def *x)
79 {
80 nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_i2i32(b, x);
81 return nir_pack_64_2x32_split(b, x32, nir_ishr(b, x32, nir_imm_int(b, 31)));
82 }
83
84 static nir_ssa_def *
85 lower_u2u8(nir_builder *b, nir_ssa_def *x)
86 {
87 return nir_u2u8(b, nir_unpack_64_2x32_split_x(b, x));
88 }
89
90 static nir_ssa_def *
91 lower_u2u16(nir_builder *b, nir_ssa_def *x)
92 {
93 return nir_u2u16(b, nir_unpack_64_2x32_split_x(b, x));
94 }
95
96 static nir_ssa_def *
97 lower_u2u32(nir_builder *b, nir_ssa_def *x)
98 {
99 return nir_unpack_64_2x32_split_x(b, x);
100 }
101
102 static nir_ssa_def *
103 lower_u2u64(nir_builder *b, nir_ssa_def *x)
104 {
105 nir_ssa_def *x32 = x->bit_size == 32 ? x : nir_u2u32(b, x);
106 return nir_pack_64_2x32_split(b, x32, nir_imm_int(b, 0));
107 }
108
109 static nir_ssa_def *
110 lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, nir_ssa_def *y)
111 {
112 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
113 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
114 nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
115 nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
116
117 return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo),
118 nir_bcsel(b, cond, x_hi, y_hi));
119 }
120
121 static nir_ssa_def *
122 lower_inot64(nir_builder *b, nir_ssa_def *x)
123 {
124 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
125 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
126
127 return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi));
128 }
129
130 static nir_ssa_def *
131 lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
132 {
133 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
134 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
135 nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
136 nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
137
138 return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo),
139 nir_iand(b, x_hi, y_hi));
140 }
141
142 static nir_ssa_def *
143 lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
144 {
145 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
146 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
147 nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
148 nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
149
150 return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo),
151 nir_ior(b, x_hi, y_hi));
152 }
153
154 static nir_ssa_def *
155 lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
156 {
157 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
158 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
159 nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
160 nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
161
162 return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo),
163 nir_ixor(b, x_hi, y_hi));
164 }
165
166 static nir_ssa_def *
167 lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
168 {
169 /* Implemented as
170 *
171 * uint64_t lshift(uint64_t x, int c)
172 * {
173 * if (c == 0) return x;
174 *
175 * uint32_t lo = LO(x), hi = HI(x);
176 *
177 * if (c < 32) {
178 * uint32_t lo_shifted = lo << c;
179 * uint32_t hi_shifted = hi << c;
180 * uint32_t lo_shifted_hi = lo >> abs(32 - c);
181 * return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
182 * } else {
183 * uint32_t lo_shifted_hi = lo << abs(32 - c);
184 * return pack_64(0, lo_shifted_hi);
185 * }
186 * }
187 */
188 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
189 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
190
191 nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
192 nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
193 nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
194 nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
195
196 nir_ssa_def *res_if_lt_32 =
197 nir_pack_64_2x32_split(b, lo_shifted,
198 nir_ior(b, hi_shifted, lo_shifted_hi));
199 nir_ssa_def *res_if_ge_32 =
200 nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
201 nir_ishl(b, x_lo, reverse_count));
202
203 return nir_bcsel(b,
204 nir_ieq(b, y, nir_imm_int(b, 0)), x,
205 nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
206 res_if_ge_32, res_if_lt_32));
207 }
208
209 static nir_ssa_def *
210 lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
211 {
212 /* Implemented as
213 *
214 * uint64_t arshift(uint64_t x, int c)
215 * {
216 * if (c == 0) return x;
217 *
218 * uint32_t lo = LO(x);
219 * int32_t hi = HI(x);
220 *
221 * if (c < 32) {
222 * uint32_t lo_shifted = lo >> c;
223 * uint32_t hi_shifted = hi >> c;
224 * uint32_t hi_shifted_lo = hi << abs(32 - c);
225 * return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
226 * } else {
227 * uint32_t hi_shifted = hi >> 31;
228 * uint32_t hi_shifted_lo = hi >> abs(32 - c);
229 * return pack_64(hi_shifted, hi_shifted_lo);
230 * }
231 * }
232 */
233 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
234 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
235
236 nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
237 nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
238 nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);
239 nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
240
241 nir_ssa_def *res_if_lt_32 =
242 nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
243 hi_shifted);
244 nir_ssa_def *res_if_ge_32 =
245 nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),
246 nir_ishr(b, x_hi, nir_imm_int(b, 31)));
247
248 return nir_bcsel(b,
249 nir_ieq(b, y, nir_imm_int(b, 0)), x,
250 nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
251 res_if_ge_32, res_if_lt_32));
252 }
253
254 static nir_ssa_def *
255 lower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
256 {
257 /* Implemented as
258 *
259 * uint64_t rshift(uint64_t x, int c)
260 * {
261 * if (c == 0) return x;
262 *
263 * uint32_t lo = LO(x), hi = HI(x);
264 *
265 * if (c < 32) {
266 * uint32_t lo_shifted = lo >> c;
267 * uint32_t hi_shifted = hi >> c;
268 * uint32_t hi_shifted_lo = hi << abs(32 - c);
269 * return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
270 * } else {
271 * uint32_t hi_shifted_lo = hi >> abs(32 - c);
272 * return pack_64(0, hi_shifted_lo);
273 * }
274 * }
275 */
276
277 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
278 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
279
280 nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
281 nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
282 nir_ssa_def *hi_shifted = nir_ushr(b, x_hi, y);
283 nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
284
285 nir_ssa_def *res_if_lt_32 =
286 nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
287 hi_shifted);
288 nir_ssa_def *res_if_ge_32 =
289 nir_pack_64_2x32_split(b, nir_ushr(b, x_hi, reverse_count),
290 nir_imm_int(b, 0));
291
292 return nir_bcsel(b,
293 nir_ieq(b, y, nir_imm_int(b, 0)), x,
294 nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
295 res_if_ge_32, res_if_lt_32));
296 }
297
298 static nir_ssa_def *
299 lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
300 {
301 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
302 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
303 nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
304 nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
305
306 nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo);
307 nir_ssa_def *carry = nir_b2i32(b, nir_ult(b, res_lo, x_lo));
308 nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi));
309
310 return nir_pack_64_2x32_split(b, res_lo, res_hi);
311 }
312
313 static nir_ssa_def *
314 lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
315 {
316 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
317 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
318 nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
319 nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
320
321 nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo);
322 nir_ssa_def *borrow = nir_ineg(b, nir_b2i32(b, nir_ult(b, x_lo, y_lo)));
323 nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow);
324
325 return nir_pack_64_2x32_split(b, res_lo, res_hi);
326 }
327
328 static nir_ssa_def *
329 lower_ineg64(nir_builder *b, nir_ssa_def *x)
330 {
331 /* Since isub is the same number of instructions (with better dependencies)
332 * as iadd, subtraction is actually more efficient for ineg than the usual
333 * 2's complement "flip the bits and add one".
334 */
335 return lower_isub64(b, nir_imm_int64(b, 0), x);
336 }
337
338 static nir_ssa_def *
339 lower_iabs64(nir_builder *b, nir_ssa_def *x)
340 {
341 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
342 nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
343 return nir_bcsel(b, x_is_neg, nir_ineg(b, x), x);
344 }
345
346 static nir_ssa_def *
347 lower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *y)
348 {
349 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
350 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
351 nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
352 nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
353
354 switch (op) {
355 case nir_op_ieq:
356 return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo, y_lo));
357 case nir_op_ine:
358 return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo, y_lo));
359 case nir_op_ult:
360 return nir_ior(b, nir_ult(b, x_hi, y_hi),
361 nir_iand(b, nir_ieq(b, x_hi, y_hi),
362 nir_ult(b, x_lo, y_lo)));
363 case nir_op_ilt:
364 return nir_ior(b, nir_ilt(b, x_hi, y_hi),
365 nir_iand(b, nir_ieq(b, x_hi, y_hi),
366 nir_ult(b, x_lo, y_lo)));
367 break;
368 case nir_op_uge:
369 /* Lower as !(x < y) in the hopes of better CSE */
370 return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y));
371 case nir_op_ige:
372 /* Lower as !(x < y) in the hopes of better CSE */
373 return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y));
374 default:
375 unreachable("Invalid comparison");
376 }
377 }
378
379 static nir_ssa_def *
380 lower_umax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
381 {
382 return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), y, x);
383 }
384
385 static nir_ssa_def *
386 lower_imax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
387 {
388 return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), y, x);
389 }
390
391 static nir_ssa_def *
392 lower_umin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
393 {
394 return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), x, y);
395 }
396
397 static nir_ssa_def *
398 lower_imin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
399 {
400 return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), x, y);
401 }
402
403 static nir_ssa_def *
404 lower_mul_2x32_64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
405 bool sign_extend)
406 {
407 nir_ssa_def *res_hi = sign_extend ? nir_imul_high(b, x, y)
408 : nir_umul_high(b, x, y);
409
410 return nir_pack_64_2x32_split(b, nir_imul(b, x, y), res_hi);
411 }
412
413 static nir_ssa_def *
414 lower_imul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
415 {
416 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
417 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
418 nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
419 nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
420
421 nir_ssa_def *mul_lo = nir_umul_2x32_64(b, x_lo, y_lo);
422 nir_ssa_def *res_hi = nir_iadd(b, nir_unpack_64_2x32_split_y(b, mul_lo),
423 nir_iadd(b, nir_imul(b, x_lo, y_hi),
424 nir_imul(b, x_hi, y_lo)));
425
426 return nir_pack_64_2x32_split(b, nir_unpack_64_2x32_split_x(b, mul_lo),
427 res_hi);
428 }
429
430 static nir_ssa_def *
431 lower_mul_high64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
432 bool sign_extend)
433 {
434 nir_ssa_def *x32[4], *y32[4];
435 x32[0] = nir_unpack_64_2x32_split_x(b, x);
436 x32[1] = nir_unpack_64_2x32_split_y(b, x);
437 if (sign_extend) {
438 x32[2] = x32[3] = nir_ishr(b, x32[1], nir_imm_int(b, 31));
439 } else {
440 x32[2] = x32[3] = nir_imm_int(b, 0);
441 }
442
443 y32[0] = nir_unpack_64_2x32_split_x(b, y);
444 y32[1] = nir_unpack_64_2x32_split_y(b, y);
445 if (sign_extend) {
446 y32[2] = y32[3] = nir_ishr(b, y32[1], nir_imm_int(b, 31));
447 } else {
448 y32[2] = y32[3] = nir_imm_int(b, 0);
449 }
450
451 nir_ssa_def *res[8] = { NULL, };
452
453 /* Yes, the following generates a pile of code. However, we throw res[0]
454 * and res[1] away in the end and, if we're in the umul case, four of our
455 * eight dword operands will be constant zero and opt_algebraic will clean
456 * this up nicely.
457 */
458 for (unsigned i = 0; i < 4; i++) {
459 nir_ssa_def *carry = NULL;
460 for (unsigned j = 0; j < 4; j++) {
461 /* The maximum values of x32[i] and y32[i] are UINT32_MAX so the
462 * maximum value of tmp is UINT32_MAX * UINT32_MAX. The maximum
463 * value that will fit in tmp is
464 *
465 * UINT64_MAX = UINT32_MAX << 32 + UINT32_MAX
466 * = UINT32_MAX * (UINT32_MAX + 1) + UINT32_MAX
467 * = UINT32_MAX * UINT32_MAX + 2 * UINT32_MAX
468 *
469 * so we're guaranteed that we can add in two more 32-bit values
470 * without overflowing tmp.
471 */
472 nir_ssa_def *tmp = nir_umul_2x32_64(b, x32[i], y32[i]);
473
474 if (res[i + j])
475 tmp = nir_iadd(b, tmp, nir_u2u64(b, res[i + j]));
476 if (carry)
477 tmp = nir_iadd(b, tmp, carry);
478 res[i + j] = nir_u2u32(b, tmp);
479 carry = nir_ushr(b, tmp, nir_imm_int(b, 32));
480 }
481 res[i + 4] = nir_u2u32(b, carry);
482 }
483
484 return nir_pack_64_2x32_split(b, res[2], res[3]);
485 }
486
487 static nir_ssa_def *
488 lower_isign64(nir_builder *b, nir_ssa_def *x)
489 {
490 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
491 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
492
493 nir_ssa_def *is_non_zero = nir_i2b(b, nir_ior(b, x_lo, x_hi));
494 nir_ssa_def *res_hi = nir_ishr(b, x_hi, nir_imm_int(b, 31));
495 nir_ssa_def *res_lo = nir_ior(b, res_hi, nir_b2i32(b, is_non_zero));
496
497 return nir_pack_64_2x32_split(b, res_lo, res_hi);
498 }
499
500 static void
501 lower_udiv64_mod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d,
502 nir_ssa_def **q, nir_ssa_def **r)
503 {
504 /* TODO: We should specially handle the case where the denominator is a
505 * constant. In that case, we should be able to reduce it to a multiply by
506 * a constant, some shifts, and an add.
507 */
508 nir_ssa_def *n_lo = nir_unpack_64_2x32_split_x(b, n);
509 nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
510 nir_ssa_def *d_lo = nir_unpack_64_2x32_split_x(b, d);
511 nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
512
513 nir_ssa_def *q_lo = nir_imm_zero(b, n->num_components, 32);
514 nir_ssa_def *q_hi = nir_imm_zero(b, n->num_components, 32);
515
516 nir_ssa_def *n_hi_before_if = n_hi;
517 nir_ssa_def *q_hi_before_if = q_hi;
518
519 /* If the upper 32 bits of denom are non-zero, it is impossible for shifts
520 * greater than 32 bits to occur. If the upper 32 bits of the numerator
521 * are zero, it is impossible for (denom << [63, 32]) <= numer unless
522 * denom == 0.
523 */
524 nir_ssa_def *need_high_div =
525 nir_iand(b, nir_ieq(b, d_hi, nir_imm_int(b, 0)), nir_uge(b, n_hi, d_lo));
526 nir_push_if(b, nir_bany(b, need_high_div));
527 {
528 /* If we only have one component, then the bany above goes away and
529 * this is always true within the if statement.
530 */
531 if (n->num_components == 1)
532 need_high_div = nir_imm_true(b);
533
534 nir_ssa_def *log2_d_lo = nir_ufind_msb(b, d_lo);
535
536 for (int i = 31; i >= 0; i--) {
537 /* if ((d.x << i) <= n.y) {
538 * n.y -= d.x << i;
539 * quot.y |= 1U << i;
540 * }
541 */
542 nir_ssa_def *d_shift = nir_ishl(b, d_lo, nir_imm_int(b, i));
543 nir_ssa_def *new_n_hi = nir_isub(b, n_hi, d_shift);
544 nir_ssa_def *new_q_hi = nir_ior(b, q_hi, nir_imm_int(b, 1u << i));
545 nir_ssa_def *cond = nir_iand(b, need_high_div,
546 nir_uge(b, n_hi, d_shift));
547 if (i != 0) {
548 /* log2_d_lo is always <= 31, so we don't need to bother with it
549 * in the last iteration.
550 */
551 cond = nir_iand(b, cond,
552 nir_ige(b, nir_imm_int(b, 31 - i), log2_d_lo));
553 }
554 n_hi = nir_bcsel(b, cond, new_n_hi, n_hi);
555 q_hi = nir_bcsel(b, cond, new_q_hi, q_hi);
556 }
557 }
558 nir_pop_if(b, NULL);
559 n_hi = nir_if_phi(b, n_hi, n_hi_before_if);
560 q_hi = nir_if_phi(b, q_hi, q_hi_before_if);
561
562 nir_ssa_def *log2_denom = nir_ufind_msb(b, d_hi);
563
564 n = nir_pack_64_2x32_split(b, n_lo, n_hi);
565 d = nir_pack_64_2x32_split(b, d_lo, d_hi);
566 for (int i = 31; i >= 0; i--) {
567 /* if ((d64 << i) <= n64) {
568 * n64 -= d64 << i;
569 * quot.x |= 1U << i;
570 * }
571 */
572 nir_ssa_def *d_shift = nir_ishl(b, d, nir_imm_int(b, i));
573 nir_ssa_def *new_n = nir_isub(b, n, d_shift);
574 nir_ssa_def *new_q_lo = nir_ior(b, q_lo, nir_imm_int(b, 1u << i));
575 nir_ssa_def *cond = nir_uge(b, n, d_shift);
576 if (i != 0) {
577 /* log2_denom is always <= 31, so we don't need to bother with it
578 * in the last iteration.
579 */
580 cond = nir_iand(b, cond,
581 nir_ige(b, nir_imm_int(b, 31 - i), log2_denom));
582 }
583 n = nir_bcsel(b, cond, new_n, n);
584 q_lo = nir_bcsel(b, cond, new_q_lo, q_lo);
585 }
586
587 *q = nir_pack_64_2x32_split(b, q_lo, q_hi);
588 *r = n;
589 }
590
591 static nir_ssa_def *
592 lower_udiv64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
593 {
594 nir_ssa_def *q, *r;
595 lower_udiv64_mod64(b, n, d, &q, &r);
596 return q;
597 }
598
599 static nir_ssa_def *
600 lower_idiv64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
601 {
602 nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
603 nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
604
605 nir_ssa_def *negate = nir_ine(b, nir_ilt(b, n_hi, nir_imm_int(b, 0)),
606 nir_ilt(b, d_hi, nir_imm_int(b, 0)));
607 nir_ssa_def *q, *r;
608 lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r);
609 return nir_bcsel(b, negate, nir_ineg(b, q), q);
610 }
611
612 static nir_ssa_def *
613 lower_umod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
614 {
615 nir_ssa_def *q, *r;
616 lower_udiv64_mod64(b, n, d, &q, &r);
617 return r;
618 }
619
620 static nir_ssa_def *
621 lower_imod64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
622 {
623 nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
624 nir_ssa_def *d_hi = nir_unpack_64_2x32_split_y(b, d);
625 nir_ssa_def *n_is_neg = nir_ilt(b, n_hi, nir_imm_int(b, 0));
626 nir_ssa_def *d_is_neg = nir_ilt(b, d_hi, nir_imm_int(b, 0));
627
628 nir_ssa_def *q, *r;
629 lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r);
630
631 nir_ssa_def *rem = nir_bcsel(b, n_is_neg, nir_ineg(b, r), r);
632
633 return nir_bcsel(b, nir_ieq(b, r, nir_imm_int64(b, 0)), nir_imm_int64(b, 0),
634 nir_bcsel(b, nir_ieq(b, n_is_neg, d_is_neg), rem,
635 nir_iadd(b, rem, d)));
636 }
637
638 static nir_ssa_def *
639 lower_irem64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
640 {
641 nir_ssa_def *n_hi = nir_unpack_64_2x32_split_y(b, n);
642 nir_ssa_def *n_is_neg = nir_ilt(b, n_hi, nir_imm_int(b, 0));
643
644 nir_ssa_def *q, *r;
645 lower_udiv64_mod64(b, nir_iabs(b, n), nir_iabs(b, d), &q, &r);
646 return nir_bcsel(b, n_is_neg, nir_ineg(b, r), r);
647 }
648
649 static nir_ssa_def *
650 lower_extract(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *c)
651 {
652 assert(op == nir_op_extract_u8 || op == nir_op_extract_i8 ||
653 op == nir_op_extract_u16 || op == nir_op_extract_i16);
654
655 const int chunk = nir_src_as_uint(nir_src_for_ssa(c));
656 const int chunk_bits =
657 (op == nir_op_extract_u8 || op == nir_op_extract_i8) ? 8 : 16;
658 const int num_chunks_in_32 = 32 / chunk_bits;
659
660 nir_ssa_def *extract32;
661 if (chunk < num_chunks_in_32) {
662 extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_x(b, x),
663 nir_imm_int(b, chunk),
664 NULL, NULL);
665 } else {
666 extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_y(b, x),
667 nir_imm_int(b, chunk - num_chunks_in_32),
668 NULL, NULL);
669 }
670
671 if (op == nir_op_extract_i8 || op == nir_op_extract_i16)
672 return lower_i2i64(b, extract32);
673 else
674 return lower_u2u64(b, extract32);
675 }
676
677 static nir_ssa_def *
678 lower_ufind_msb64(nir_builder *b, nir_ssa_def *x)
679 {
680
681 nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
682 nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
683 nir_ssa_def *lo_count = nir_ufind_msb(b, x_lo);
684 nir_ssa_def *hi_count = nir_ufind_msb(b, x_hi);
685 nir_ssa_def *valid_hi_bits = nir_ine(b, x_hi, nir_imm_int(b, 0));
686 nir_ssa_def *hi_res = nir_iadd(b, nir_imm_intN_t(b, 32, 32), hi_count);
687 return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
688 }
689
690 static nir_ssa_def *
691 lower_2f(nir_builder *b, nir_ssa_def *x, unsigned dest_bit_size,
692 bool src_is_signed)
693 {
694 nir_ssa_def *x_sign = NULL;
695
696 if (src_is_signed) {
697 x_sign = nir_bcsel(b, COND_LOWER_CMP(b, ilt, x, nir_imm_int64(b, 0)),
698 nir_imm_floatN_t(b, -1, dest_bit_size),
699 nir_imm_floatN_t(b, 1, dest_bit_size));
700 x = COND_LOWER_OP(b, iabs, x);
701 }
702
703 nir_ssa_def *exp = COND_LOWER_OP(b, ufind_msb, x);
704 unsigned significand_bits;
705
706 switch (dest_bit_size) {
707 case 32:
708 significand_bits = 23;
709 break;
710 case 16:
711 significand_bits = 10;
712 break;
713 default:
714 unreachable("Invalid dest_bit_size");
715 }
716
717 /* We keep one more bit than can fit in the significand field to let the
718 * u2f32 conversion do the rounding for us.
719 */
720 nir_ssa_def *discard =
721 nir_imax(b, nir_isub(b, exp, nir_imm_int(b, significand_bits + 1)),
722 nir_imm_int(b, 0));
723
724 /* Part of the "round to nearest" has to be taken care of before we discard
725 * the LSB, and that's what this extra iadd is for.
726 * "Round to nearest even" is handled by u2f. That works because the
727 * shifted value either fits in the significand field (which means no
728 * rounding is required) or contains one extra bit that forces the
729 * conversion op to round things properly.
730 */
731 nir_ssa_def *add = COND_LOWER_OP(b, ishl, nir_imm_int64(b, 1), discard);
732 add = COND_LOWER_OP(b, isub, add, nir_imm_int64(b, 1));
733 nir_ssa_def *rounded_x = COND_LOWER_OP(b, iadd, x, add);
734
735 /* Signed Values can't overflow because we've saved the sign and promoted
736 * them to unsigned values.
737 */
738 if (!src_is_signed) {
739 nir_ssa_def *overflow = COND_LOWER_CMP(b, ult, rounded_x, x);
740 rounded_x = COND_LOWER_OP(b, bcsel, overflow,
741 nir_imm_int64(b, UINT64_MAX), rounded_x);
742 }
743
744 nir_ssa_def *significand = COND_LOWER_OP(b, ushr, rounded_x, discard);
745 significand = COND_LOWER_CAST(b, u2u32, significand);
746
747 nir_ssa_def *res;
748
749 if (dest_bit_size == 32)
750 res = nir_fmul(b, nir_u2f32(b, significand),
751 nir_fexp2(b, nir_u2f32(b, discard)));
752 else
753 res = nir_fmul(b, nir_u2f16(b, significand),
754 nir_fexp2(b, nir_u2f16(b, discard)));
755
756 if (src_is_signed)
757 res = nir_fmul(b, res, x_sign);
758
759 return res;
760 }
761
762 static nir_ssa_def *
763 lower_f2(nir_builder *b, nir_ssa_def *x, bool dst_is_signed)
764 {
765 assert(x->bit_size == 16 || x->bit_size == 32);
766 nir_ssa_def *x_sign = NULL;
767
768 if (dst_is_signed)
769 x_sign = nir_fsign(b, x);
770 else
771 x = nir_fmin(b, x, nir_imm_floatN_t(b, UINT64_MAX, x->bit_size));
772
773 x = nir_ftrunc(b, x);
774
775 if (dst_is_signed) {
776 x = nir_fmin(b, x, nir_imm_floatN_t(b, INT64_MAX, x->bit_size));
777 x = nir_fmax(b, x, nir_imm_floatN_t(b, INT64_MIN, x->bit_size));
778 x = nir_fabs(b, x);
779 }
780
781 nir_ssa_def *div = nir_imm_floatN_t(b, 1ULL << 32, x->bit_size);
782 nir_ssa_def *res_hi = nir_f2u32(b, nir_fdiv(b, x, div));
783 nir_ssa_def *res_lo = nir_f2u32(b, nir_frem(b, x, div));
784 nir_ssa_def *res = nir_pack_64_2x32_split(b, res_lo, res_hi);
785
786 if (dst_is_signed)
787 res = nir_bcsel(b, nir_flt(b, x_sign, nir_imm_float(b, 0)),
788 nir_ineg(b, res), res);
789
790 return res;
791 }
792
793 nir_lower_int64_options
794 nir_lower_int64_op_to_options_mask(nir_op opcode)
795 {
796 switch (opcode) {
797 case nir_op_imul:
798 case nir_op_amul:
799 return nir_lower_imul64;
800 case nir_op_imul_2x32_64:
801 case nir_op_umul_2x32_64:
802 return nir_lower_imul_2x32_64;
803 case nir_op_imul_high:
804 case nir_op_umul_high:
805 return nir_lower_imul_high64;
806 case nir_op_isign:
807 return nir_lower_isign64;
808 case nir_op_udiv:
809 case nir_op_idiv:
810 case nir_op_umod:
811 case nir_op_imod:
812 case nir_op_irem:
813 return nir_lower_divmod64;
814 case nir_op_b2i64:
815 case nir_op_i2b1:
816 case nir_op_i2i8:
817 case nir_op_i2i16:
818 case nir_op_i2i32:
819 case nir_op_i2i64:
820 case nir_op_u2u8:
821 case nir_op_u2u16:
822 case nir_op_u2u32:
823 case nir_op_u2u64:
824 case nir_op_i2f32:
825 case nir_op_u2f32:
826 case nir_op_i2f16:
827 case nir_op_u2f16:
828 case nir_op_f2i64:
829 case nir_op_f2u64:
830 case nir_op_bcsel:
831 return nir_lower_mov64;
832 case nir_op_ieq:
833 case nir_op_ine:
834 case nir_op_ult:
835 case nir_op_ilt:
836 case nir_op_uge:
837 case nir_op_ige:
838 return nir_lower_icmp64;
839 case nir_op_iadd:
840 case nir_op_isub:
841 return nir_lower_iadd64;
842 case nir_op_imin:
843 case nir_op_imax:
844 case nir_op_umin:
845 case nir_op_umax:
846 case nir_op_imin3:
847 case nir_op_imax3:
848 case nir_op_umin3:
849 case nir_op_umax3:
850 case nir_op_imed3:
851 case nir_op_umed3:
852 return nir_lower_minmax64;
853 case nir_op_iabs:
854 return nir_lower_iabs64;
855 case nir_op_ineg:
856 return nir_lower_ineg64;
857 case nir_op_iand:
858 case nir_op_ior:
859 case nir_op_ixor:
860 case nir_op_inot:
861 return nir_lower_logic64;
862 case nir_op_ishl:
863 case nir_op_ishr:
864 case nir_op_ushr:
865 return nir_lower_shift64;
866 case nir_op_extract_u8:
867 case nir_op_extract_i8:
868 case nir_op_extract_u16:
869 case nir_op_extract_i16:
870 return nir_lower_extract64;
871 case nir_op_ufind_msb:
872 return nir_lower_ufind_msb64;
873 default:
874 return 0;
875 }
876 }
877
878 static nir_ssa_def *
879 lower_int64_alu_instr(nir_builder *b, nir_instr *instr, void *_state)
880 {
881 nir_alu_instr *alu = nir_instr_as_alu(instr);
882
883 nir_ssa_def *src[4];
884 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
885 src[i] = nir_ssa_for_alu_src(b, alu, i);
886
887 switch (alu->op) {
888 case nir_op_imul:
889 case nir_op_amul:
890 return lower_imul64(b, src[0], src[1]);
891 case nir_op_imul_2x32_64:
892 return lower_mul_2x32_64(b, src[0], src[1], true);
893 case nir_op_umul_2x32_64:
894 return lower_mul_2x32_64(b, src[0], src[1], false);
895 case nir_op_imul_high:
896 return lower_mul_high64(b, src[0], src[1], true);
897 case nir_op_umul_high:
898 return lower_mul_high64(b, src[0], src[1], false);
899 case nir_op_isign:
900 return lower_isign64(b, src[0]);
901 case nir_op_udiv:
902 return lower_udiv64(b, src[0], src[1]);
903 case nir_op_idiv:
904 return lower_idiv64(b, src[0], src[1]);
905 case nir_op_umod:
906 return lower_umod64(b, src[0], src[1]);
907 case nir_op_imod:
908 return lower_imod64(b, src[0], src[1]);
909 case nir_op_irem:
910 return lower_irem64(b, src[0], src[1]);
911 case nir_op_b2i64:
912 return lower_b2i64(b, src[0]);
913 case nir_op_i2b1:
914 return lower_i2b(b, src[0]);
915 case nir_op_i2i8:
916 return lower_i2i8(b, src[0]);
917 case nir_op_i2i16:
918 return lower_i2i16(b, src[0]);
919 case nir_op_i2i32:
920 return lower_i2i32(b, src[0]);
921 case nir_op_i2i64:
922 return lower_i2i64(b, src[0]);
923 case nir_op_u2u8:
924 return lower_u2u8(b, src[0]);
925 case nir_op_u2u16:
926 return lower_u2u16(b, src[0]);
927 case nir_op_u2u32:
928 return lower_u2u32(b, src[0]);
929 case nir_op_u2u64:
930 return lower_u2u64(b, src[0]);
931 case nir_op_bcsel:
932 return lower_bcsel64(b, src[0], src[1], src[2]);
933 case nir_op_ieq:
934 case nir_op_ine:
935 case nir_op_ult:
936 case nir_op_ilt:
937 case nir_op_uge:
938 case nir_op_ige:
939 return lower_int64_compare(b, alu->op, src[0], src[1]);
940 case nir_op_iadd:
941 return lower_iadd64(b, src[0], src[1]);
942 case nir_op_isub:
943 return lower_isub64(b, src[0], src[1]);
944 case nir_op_imin:
945 return lower_imin64(b, src[0], src[1]);
946 case nir_op_imax:
947 return lower_imax64(b, src[0], src[1]);
948 case nir_op_umin:
949 return lower_umin64(b, src[0], src[1]);
950 case nir_op_umax:
951 return lower_umax64(b, src[0], src[1]);
952 case nir_op_imin3:
953 return lower_imin64(b, src[0], lower_imin64(b, src[1], src[2]));
954 case nir_op_imax3:
955 return lower_imax64(b, src[0], lower_imax64(b, src[1], src[2]));
956 case nir_op_umin3:
957 return lower_umin64(b, src[0], lower_umin64(b, src[1], src[2]));
958 case nir_op_umax3:
959 return lower_umax64(b, src[0], lower_umax64(b, src[1], src[2]));
960 case nir_op_imed3:
961 return lower_imax64(b, lower_imin64(b, lower_imax64(b, src[0], src[1]), src[2]), lower_imin64(b, src[0], src[1]));
962 case nir_op_umed3:
963 return lower_umax64(b, lower_umin64(b, lower_umax64(b, src[0], src[1]), src[2]), lower_umin64(b, src[0], src[1]));
964 case nir_op_iabs:
965 return lower_iabs64(b, src[0]);
966 case nir_op_ineg:
967 return lower_ineg64(b, src[0]);
968 case nir_op_iand:
969 return lower_iand64(b, src[0], src[1]);
970 case nir_op_ior:
971 return lower_ior64(b, src[0], src[1]);
972 case nir_op_ixor:
973 return lower_ixor64(b, src[0], src[1]);
974 case nir_op_inot:
975 return lower_inot64(b, src[0]);
976 case nir_op_ishl:
977 return lower_ishl64(b, src[0], src[1]);
978 case nir_op_ishr:
979 return lower_ishr64(b, src[0], src[1]);
980 case nir_op_ushr:
981 return lower_ushr64(b, src[0], src[1]);
982 case nir_op_extract_u8:
983 case nir_op_extract_i8:
984 case nir_op_extract_u16:
985 case nir_op_extract_i16:
986 return lower_extract(b, alu->op, src[0], src[1]);
987 case nir_op_ufind_msb:
988 return lower_ufind_msb64(b, src[0]);
989 case nir_op_i2f64:
990 case nir_op_i2f32:
991 case nir_op_i2f16:
992 return lower_2f(b, src[0], nir_dest_bit_size(alu->dest.dest), true);
993 case nir_op_u2f64:
994 case nir_op_u2f32:
995 case nir_op_u2f16:
996 return lower_2f(b, src[0], nir_dest_bit_size(alu->dest.dest), false);
997 case nir_op_f2i64:
998 case nir_op_f2u64:
999 /* We don't support f64toi64 (yet?). */
1000 if (src[0]->bit_size > 32)
1001 return false;
1002
1003 return lower_f2(b, src[0], alu->op == nir_op_f2i64);
1004 default:
1005 unreachable("Invalid ALU opcode to lower");
1006 }
1007 }
1008
1009 static bool
1010 should_lower_int64_alu_instr(const nir_instr *instr, const void *_data)
1011 {
1012 const nir_shader_compiler_options *options =
1013 (const nir_shader_compiler_options *)_data;
1014
1015 if (instr->type != nir_instr_type_alu)
1016 return false;
1017
1018 const nir_alu_instr *alu = nir_instr_as_alu(instr);
1019
1020 switch (alu->op) {
1021 case nir_op_i2b1:
1022 case nir_op_i2i8:
1023 case nir_op_i2i16:
1024 case nir_op_i2i32:
1025 case nir_op_u2u8:
1026 case nir_op_u2u16:
1027 case nir_op_u2u32:
1028 assert(alu->src[0].src.is_ssa);
1029 if (alu->src[0].src.ssa->bit_size != 64)
1030 return false;
1031 break;
1032 case nir_op_bcsel:
1033 assert(alu->src[1].src.is_ssa);
1034 assert(alu->src[2].src.is_ssa);
1035 assert(alu->src[1].src.ssa->bit_size ==
1036 alu->src[2].src.ssa->bit_size);
1037 if (alu->src[1].src.ssa->bit_size != 64)
1038 return false;
1039 break;
1040 case nir_op_ieq:
1041 case nir_op_ine:
1042 case nir_op_ult:
1043 case nir_op_ilt:
1044 case nir_op_uge:
1045 case nir_op_ige:
1046 assert(alu->src[0].src.is_ssa);
1047 assert(alu->src[1].src.is_ssa);
1048 assert(alu->src[0].src.ssa->bit_size ==
1049 alu->src[1].src.ssa->bit_size);
1050 if (alu->src[0].src.ssa->bit_size != 64)
1051 return false;
1052 break;
1053 case nir_op_ufind_msb:
1054 assert(alu->src[0].src.is_ssa);
1055 if (alu->src[0].src.ssa->bit_size != 64)
1056 return false;
1057 break;
1058 case nir_op_amul:
1059 assert(alu->dest.dest.is_ssa);
1060 if (options->has_imul24)
1061 return false;
1062 if (alu->dest.dest.ssa.bit_size != 64)
1063 return false;
1064 break;
1065 case nir_op_i2f64:
1066 case nir_op_u2f64:
1067 case nir_op_i2f32:
1068 case nir_op_u2f32:
1069 case nir_op_i2f16:
1070 case nir_op_u2f16:
1071 assert(alu->src[0].src.is_ssa);
1072 if (alu->src[0].src.ssa->bit_size != 64)
1073 return false;
1074 break;
1075 case nir_op_f2u64:
1076 case nir_op_f2i64:
1077 /* fall-through */
1078 default:
1079 assert(alu->dest.dest.is_ssa);
1080 if (alu->dest.dest.ssa.bit_size != 64)
1081 return false;
1082 break;
1083 }
1084
1085 unsigned mask = nir_lower_int64_op_to_options_mask(alu->op);
1086 return (options->lower_int64_options & mask) != 0;
1087 }
1088
1089 bool
1090 nir_lower_int64(nir_shader *shader)
1091 {
1092 return nir_shader_lower_instructions(shader,
1093 should_lower_int64_alu_instr,
1094 lower_int64_alu_instr,
1095 (void *)shader->options);
1096 }