2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
28 lower_b2i64(nir_builder
*b
, nir_ssa_def
*x
)
30 return nir_pack_64_2x32_split(b
, nir_b2i32(b
, x
), nir_imm_int(b
, 0));
34 lower_i2b(nir_builder
*b
, nir_ssa_def
*x
)
36 return nir_ine(b
, nir_ior(b
, nir_unpack_64_2x32_split_x(b
, x
),
37 nir_unpack_64_2x32_split_y(b
, x
)),
42 lower_i2i8(nir_builder
*b
, nir_ssa_def
*x
)
44 return nir_i2i8(b
, nir_unpack_64_2x32_split_x(b
, x
));
48 lower_i2i16(nir_builder
*b
, nir_ssa_def
*x
)
50 return nir_i2i16(b
, nir_unpack_64_2x32_split_x(b
, x
));
55 lower_i2i32(nir_builder
*b
, nir_ssa_def
*x
)
57 return nir_unpack_64_2x32_split_x(b
, x
);
61 lower_i2i64(nir_builder
*b
, nir_ssa_def
*x
)
63 nir_ssa_def
*x32
= x
->bit_size
== 32 ? x
: nir_i2i32(b
, x
);
64 return nir_pack_64_2x32_split(b
, x32
, nir_ishr(b
, x32
, nir_imm_int(b
, 31)));
68 lower_u2u8(nir_builder
*b
, nir_ssa_def
*x
)
70 return nir_u2u8(b
, nir_unpack_64_2x32_split_x(b
, x
));
74 lower_u2u16(nir_builder
*b
, nir_ssa_def
*x
)
76 return nir_u2u16(b
, nir_unpack_64_2x32_split_x(b
, x
));
80 lower_u2u32(nir_builder
*b
, nir_ssa_def
*x
)
82 return nir_unpack_64_2x32_split_x(b
, x
);
86 lower_u2u64(nir_builder
*b
, nir_ssa_def
*x
)
88 nir_ssa_def
*x32
= x
->bit_size
== 32 ? x
: nir_u2u32(b
, x
);
89 return nir_pack_64_2x32_split(b
, x32
, nir_imm_int(b
, 0));
93 lower_bcsel64(nir_builder
*b
, nir_ssa_def
*cond
, nir_ssa_def
*x
, nir_ssa_def
*y
)
95 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
96 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
97 nir_ssa_def
*y_lo
= nir_unpack_64_2x32_split_x(b
, y
);
98 nir_ssa_def
*y_hi
= nir_unpack_64_2x32_split_y(b
, y
);
100 return nir_pack_64_2x32_split(b
, nir_bcsel(b
, cond
, x_lo
, y_lo
),
101 nir_bcsel(b
, cond
, x_hi
, y_hi
));
105 lower_inot64(nir_builder
*b
, nir_ssa_def
*x
)
107 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
108 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
110 return nir_pack_64_2x32_split(b
, nir_inot(b
, x_lo
), nir_inot(b
, x_hi
));
114 lower_iand64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
116 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
117 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
118 nir_ssa_def
*y_lo
= nir_unpack_64_2x32_split_x(b
, y
);
119 nir_ssa_def
*y_hi
= nir_unpack_64_2x32_split_y(b
, y
);
121 return nir_pack_64_2x32_split(b
, nir_iand(b
, x_lo
, y_lo
),
122 nir_iand(b
, x_hi
, y_hi
));
126 lower_ior64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
128 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
129 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
130 nir_ssa_def
*y_lo
= nir_unpack_64_2x32_split_x(b
, y
);
131 nir_ssa_def
*y_hi
= nir_unpack_64_2x32_split_y(b
, y
);
133 return nir_pack_64_2x32_split(b
, nir_ior(b
, x_lo
, y_lo
),
134 nir_ior(b
, x_hi
, y_hi
));
138 lower_ixor64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
140 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
141 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
142 nir_ssa_def
*y_lo
= nir_unpack_64_2x32_split_x(b
, y
);
143 nir_ssa_def
*y_hi
= nir_unpack_64_2x32_split_y(b
, y
);
145 return nir_pack_64_2x32_split(b
, nir_ixor(b
, x_lo
, y_lo
),
146 nir_ixor(b
, x_hi
, y_hi
));
150 lower_iadd64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
152 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
153 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
154 nir_ssa_def
*y_lo
= nir_unpack_64_2x32_split_x(b
, y
);
155 nir_ssa_def
*y_hi
= nir_unpack_64_2x32_split_y(b
, y
);
157 nir_ssa_def
*res_lo
= nir_iadd(b
, x_lo
, y_lo
);
158 nir_ssa_def
*carry
= nir_b2i32(b
, nir_ult(b
, res_lo
, x_lo
));
159 nir_ssa_def
*res_hi
= nir_iadd(b
, carry
, nir_iadd(b
, x_hi
, y_hi
));
161 return nir_pack_64_2x32_split(b
, res_lo
, res_hi
);
165 lower_isub64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
167 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
168 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
169 nir_ssa_def
*y_lo
= nir_unpack_64_2x32_split_x(b
, y
);
170 nir_ssa_def
*y_hi
= nir_unpack_64_2x32_split_y(b
, y
);
172 nir_ssa_def
*res_lo
= nir_isub(b
, x_lo
, y_lo
);
173 nir_ssa_def
*borrow
= nir_ineg(b
, nir_b2i32(b
, nir_ult(b
, x_lo
, y_lo
)));
174 nir_ssa_def
*res_hi
= nir_iadd(b
, nir_isub(b
, x_hi
, y_hi
), borrow
);
176 return nir_pack_64_2x32_split(b
, res_lo
, res_hi
);
180 lower_ineg64(nir_builder
*b
, nir_ssa_def
*x
)
182 /* Since isub is the same number of instructions (with better dependencies)
183 * as iadd, subtraction is actually more efficient for ineg than the usual
184 * 2's complement "flip the bits and add one".
186 return lower_isub64(b
, nir_imm_int64(b
, 0), x
);
190 lower_iabs64(nir_builder
*b
, nir_ssa_def
*x
)
192 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
193 nir_ssa_def
*x_is_neg
= nir_ilt(b
, x_hi
, nir_imm_int(b
, 0));
194 return nir_bcsel(b
, x_is_neg
, nir_ineg(b
, x
), x
);
198 lower_int64_compare(nir_builder
*b
, nir_op op
, nir_ssa_def
*x
, nir_ssa_def
*y
)
200 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
201 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
202 nir_ssa_def
*y_lo
= nir_unpack_64_2x32_split_x(b
, y
);
203 nir_ssa_def
*y_hi
= nir_unpack_64_2x32_split_y(b
, y
);
207 return nir_iand(b
, nir_ieq(b
, x_hi
, y_hi
), nir_ieq(b
, x_lo
, y_lo
));
209 return nir_ior(b
, nir_ine(b
, x_hi
, y_hi
), nir_ine(b
, x_lo
, y_lo
));
211 return nir_ior(b
, nir_ult(b
, x_hi
, y_hi
),
212 nir_iand(b
, nir_ieq(b
, x_hi
, y_hi
),
213 nir_ult(b
, x_lo
, y_lo
)));
215 return nir_ior(b
, nir_ilt(b
, x_hi
, y_hi
),
216 nir_iand(b
, nir_ieq(b
, x_hi
, y_hi
),
217 nir_ult(b
, x_lo
, y_lo
)));
220 /* Lower as !(x < y) in the hopes of better CSE */
221 return nir_inot(b
, lower_int64_compare(b
, nir_op_ult
, x
, y
));
223 /* Lower as !(x < y) in the hopes of better CSE */
224 return nir_inot(b
, lower_int64_compare(b
, nir_op_ilt
, x
, y
));
226 unreachable("Invalid comparison");
231 lower_umax64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
233 return nir_bcsel(b
, lower_int64_compare(b
, nir_op_ult
, x
, y
), y
, x
);
237 lower_imax64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
239 return nir_bcsel(b
, lower_int64_compare(b
, nir_op_ilt
, x
, y
), y
, x
);
243 lower_umin64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
245 return nir_bcsel(b
, lower_int64_compare(b
, nir_op_ult
, x
, y
), x
, y
);
249 lower_imin64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
251 return nir_bcsel(b
, lower_int64_compare(b
, nir_op_ilt
, x
, y
), x
, y
);
255 lower_imul64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
)
257 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
258 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
259 nir_ssa_def
*y_lo
= nir_unpack_64_2x32_split_x(b
, y
);
260 nir_ssa_def
*y_hi
= nir_unpack_64_2x32_split_y(b
, y
);
262 nir_ssa_def
*res_lo
= nir_imul(b
, x_lo
, y_lo
);
263 nir_ssa_def
*res_hi
= nir_iadd(b
, nir_umul_high(b
, x_lo
, y_lo
),
264 nir_iadd(b
, nir_imul(b
, x_lo
, y_hi
),
265 nir_imul(b
, x_hi
, y_lo
)));
267 return nir_pack_64_2x32_split(b
, res_lo
, res_hi
);
271 lower_mul_high64(nir_builder
*b
, nir_ssa_def
*x
, nir_ssa_def
*y
,
274 nir_ssa_def
*x32
[4], *y32
[4];
275 x32
[0] = nir_unpack_64_2x32_split_x(b
, x
);
276 x32
[1] = nir_unpack_64_2x32_split_y(b
, x
);
278 x32
[2] = x32
[3] = nir_ishr(b
, x32
[1], nir_imm_int(b
, 31));
280 x32
[2] = x32
[3] = nir_imm_int(b
, 0);
283 y32
[0] = nir_unpack_64_2x32_split_x(b
, y
);
284 y32
[1] = nir_unpack_64_2x32_split_y(b
, y
);
286 y32
[2] = y32
[3] = nir_ishr(b
, y32
[1], nir_imm_int(b
, 31));
288 y32
[2] = y32
[3] = nir_imm_int(b
, 0);
291 nir_ssa_def
*res
[8] = { NULL
, };
293 /* Yes, the following generates a pile of code. However, we throw res[0]
294 * and res[1] away in the end and, if we're in the umul case, four of our
295 * eight dword operands will be constant zero and opt_algebraic will clean
298 for (unsigned i
= 0; i
< 4; i
++) {
299 nir_ssa_def
*carry
= NULL
;
300 for (unsigned j
= 0; j
< 4; j
++) {
301 /* The maximum values of x32[i] and y32[i] are UINT32_MAX so the
302 * maximum value of tmp is UINT32_MAX * UINT32_MAX. The maximum
303 * value that will fit in tmp is
305 * UINT64_MAX = UINT32_MAX << 32 + UINT32_MAX
306 * = UINT32_MAX * (UINT32_MAX + 1) + UINT32_MAX
307 * = UINT32_MAX * UINT32_MAX + 2 * UINT32_MAX
309 * so we're guaranteed that we can add in two more 32-bit values
310 * without overflowing tmp.
313 nir_pack_64_2x32_split(b
, nir_imul(b
, x32
[i
], y32
[j
]),
314 nir_umul_high(b
, x32
[i
], y32
[j
]));
316 tmp
= nir_iadd(b
, tmp
, nir_u2u64(b
, res
[i
+ j
]));
318 tmp
= nir_iadd(b
, tmp
, carry
);
319 res
[i
+ j
] = nir_u2u32(b
, tmp
);
320 carry
= nir_ushr(b
, tmp
, nir_imm_int(b
, 32));
322 res
[i
+ 4] = nir_u2u32(b
, carry
);
325 return nir_pack_64_2x32_split(b
, res
[2], res
[3]);
329 lower_isign64(nir_builder
*b
, nir_ssa_def
*x
)
331 nir_ssa_def
*x_lo
= nir_unpack_64_2x32_split_x(b
, x
);
332 nir_ssa_def
*x_hi
= nir_unpack_64_2x32_split_y(b
, x
);
334 nir_ssa_def
*is_non_zero
= nir_i2b(b
, nir_ior(b
, x_lo
, x_hi
));
335 nir_ssa_def
*res_hi
= nir_ishr(b
, x_hi
, nir_imm_int(b
, 31));
336 nir_ssa_def
*res_lo
= nir_ior(b
, res_hi
, nir_b2i32(b
, is_non_zero
));
338 return nir_pack_64_2x32_split(b
, res_lo
, res_hi
);
342 lower_udiv64_mod64(nir_builder
*b
, nir_ssa_def
*n
, nir_ssa_def
*d
,
343 nir_ssa_def
**q
, nir_ssa_def
**r
)
345 /* TODO: We should specially handle the case where the denominator is a
346 * constant. In that case, we should be able to reduce it to a multiply by
347 * a constant, some shifts, and an add.
349 nir_ssa_def
*n_lo
= nir_unpack_64_2x32_split_x(b
, n
);
350 nir_ssa_def
*n_hi
= nir_unpack_64_2x32_split_y(b
, n
);
351 nir_ssa_def
*d_lo
= nir_unpack_64_2x32_split_x(b
, d
);
352 nir_ssa_def
*d_hi
= nir_unpack_64_2x32_split_y(b
, d
);
354 nir_const_value v
= { .u32
= { 0, 0, 0, 0 } };
355 nir_ssa_def
*q_lo
= nir_build_imm(b
, n
->num_components
, 32, v
);
356 nir_ssa_def
*q_hi
= nir_build_imm(b
, n
->num_components
, 32, v
);
358 nir_ssa_def
*n_hi_before_if
= n_hi
;
359 nir_ssa_def
*q_hi_before_if
= q_hi
;
361 /* If the upper 32 bits of denom are non-zero, it is impossible for shifts
362 * greater than 32 bits to occur. If the upper 32 bits of the numerator
363 * are zero, it is impossible for (denom << [63, 32]) <= numer unless
366 nir_ssa_def
*need_high_div
=
367 nir_iand(b
, nir_ieq(b
, d_hi
, nir_imm_int(b
, 0)), nir_uge(b
, n_hi
, d_lo
));
368 nir_push_if(b
, nir_bany(b
, need_high_div
));
370 /* If we only have one component, then the bany above goes away and
371 * this is always true within the if statement.
373 if (n
->num_components
== 1)
374 need_high_div
= nir_imm_true(b
);
376 nir_ssa_def
*log2_d_lo
= nir_ufind_msb(b
, d_lo
);
378 for (int i
= 31; i
>= 0; i
--) {
379 /* if ((d.x << i) <= n.y) {
384 nir_ssa_def
*d_shift
= nir_ishl(b
, d_lo
, nir_imm_int(b
, i
));
385 nir_ssa_def
*new_n_hi
= nir_isub(b
, n_hi
, d_shift
);
386 nir_ssa_def
*new_q_hi
= nir_ior(b
, q_hi
, nir_imm_int(b
, 1u << i
));
387 nir_ssa_def
*cond
= nir_iand(b
, need_high_div
,
388 nir_uge(b
, n_hi
, d_shift
));
390 /* log2_d_lo is always <= 31, so we don't need to bother with it
391 * in the last iteration.
393 cond
= nir_iand(b
, cond
,
394 nir_ige(b
, nir_imm_int(b
, 31 - i
), log2_d_lo
));
396 n_hi
= nir_bcsel(b
, cond
, new_n_hi
, n_hi
);
397 q_hi
= nir_bcsel(b
, cond
, new_q_hi
, q_hi
);
401 n_hi
= nir_if_phi(b
, n_hi
, n_hi_before_if
);
402 q_hi
= nir_if_phi(b
, q_hi
, q_hi_before_if
);
404 nir_ssa_def
*log2_denom
= nir_ufind_msb(b
, d_hi
);
406 n
= nir_pack_64_2x32_split(b
, n_lo
, n_hi
);
407 d
= nir_pack_64_2x32_split(b
, d_lo
, d_hi
);
408 for (int i
= 31; i
>= 0; i
--) {
409 /* if ((d64 << i) <= n64) {
414 nir_ssa_def
*d_shift
= nir_ishl(b
, d
, nir_imm_int(b
, i
));
415 nir_ssa_def
*new_n
= nir_isub(b
, n
, d_shift
);
416 nir_ssa_def
*new_q_lo
= nir_ior(b
, q_lo
, nir_imm_int(b
, 1u << i
));
417 nir_ssa_def
*cond
= nir_uge(b
, n
, d_shift
);
419 /* log2_denom is always <= 31, so we don't need to bother with it
420 * in the last iteration.
422 cond
= nir_iand(b
, cond
,
423 nir_ige(b
, nir_imm_int(b
, 31 - i
), log2_denom
));
425 n
= nir_bcsel(b
, cond
, new_n
, n
);
426 q_lo
= nir_bcsel(b
, cond
, new_q_lo
, q_lo
);
429 *q
= nir_pack_64_2x32_split(b
, q_lo
, q_hi
);
434 lower_udiv64(nir_builder
*b
, nir_ssa_def
*n
, nir_ssa_def
*d
)
437 lower_udiv64_mod64(b
, n
, d
, &q
, &r
);
442 lower_idiv64(nir_builder
*b
, nir_ssa_def
*n
, nir_ssa_def
*d
)
444 nir_ssa_def
*n_hi
= nir_unpack_64_2x32_split_y(b
, n
);
445 nir_ssa_def
*d_hi
= nir_unpack_64_2x32_split_y(b
, d
);
447 nir_ssa_def
*negate
= nir_ine(b
, nir_ilt(b
, n_hi
, nir_imm_int(b
, 0)),
448 nir_ilt(b
, d_hi
, nir_imm_int(b
, 0)));
450 lower_udiv64_mod64(b
, nir_iabs(b
, n
), nir_iabs(b
, d
), &q
, &r
);
451 return nir_bcsel(b
, negate
, nir_ineg(b
, q
), q
);
455 lower_umod64(nir_builder
*b
, nir_ssa_def
*n
, nir_ssa_def
*d
)
458 lower_udiv64_mod64(b
, n
, d
, &q
, &r
);
463 lower_imod64(nir_builder
*b
, nir_ssa_def
*n
, nir_ssa_def
*d
)
465 nir_ssa_def
*n_hi
= nir_unpack_64_2x32_split_y(b
, n
);
466 nir_ssa_def
*d_hi
= nir_unpack_64_2x32_split_y(b
, d
);
467 nir_ssa_def
*n_is_neg
= nir_ilt(b
, n_hi
, nir_imm_int(b
, 0));
468 nir_ssa_def
*d_is_neg
= nir_ilt(b
, d_hi
, nir_imm_int(b
, 0));
471 lower_udiv64_mod64(b
, nir_iabs(b
, n
), nir_iabs(b
, d
), &q
, &r
);
473 nir_ssa_def
*rem
= nir_bcsel(b
, n_is_neg
, nir_ineg(b
, r
), r
);
475 return nir_bcsel(b
, nir_ieq(b
, r
, nir_imm_int64(b
, 0)), nir_imm_int64(b
, 0),
476 nir_bcsel(b
, nir_ieq(b
, n_is_neg
, d_is_neg
), rem
,
477 nir_iadd(b
, rem
, d
)));
481 lower_irem64(nir_builder
*b
, nir_ssa_def
*n
, nir_ssa_def
*d
)
483 nir_ssa_def
*n_hi
= nir_unpack_64_2x32_split_y(b
, n
);
484 nir_ssa_def
*n_is_neg
= nir_ilt(b
, n_hi
, nir_imm_int(b
, 0));
487 lower_udiv64_mod64(b
, nir_iabs(b
, n
), nir_iabs(b
, d
), &q
, &r
);
488 return nir_bcsel(b
, n_is_neg
, nir_ineg(b
, r
), r
);
491 static nir_lower_int64_options
492 opcode_to_options_mask(nir_op opcode
)
496 return nir_lower_imul64
;
497 case nir_op_imul_high
:
498 case nir_op_umul_high
:
499 return nir_lower_imul_high64
;
501 return nir_lower_isign64
;
507 return nir_lower_divmod64
;
514 lower_int64_alu_instr(nir_builder
*b
, nir_alu_instr
*alu
)
517 for (unsigned i
= 0; i
< nir_op_infos
[alu
->op
].num_inputs
; i
++)
518 src
[i
] = nir_ssa_for_alu_src(b
, alu
, i
);
522 return lower_imul64(b
, src
[0], src
[1]);
523 case nir_op_imul_high
:
524 return lower_mul_high64(b
, src
[0], src
[1], true);
525 case nir_op_umul_high
:
526 return lower_mul_high64(b
, src
[0], src
[1], false);
528 return lower_isign64(b
, src
[0]);
530 return lower_udiv64(b
, src
[0], src
[1]);
532 return lower_idiv64(b
, src
[0], src
[1]);
534 return lower_umod64(b
, src
[0], src
[1]);
536 return lower_imod64(b
, src
[0], src
[1]);
538 return lower_irem64(b
, src
[0], src
[1]);
540 unreachable("Invalid ALU opcode to lower");
545 lower_int64_impl(nir_function_impl
*impl
, nir_lower_int64_options options
)
548 nir_builder_init(&b
, impl
);
550 bool progress
= false;
551 nir_foreach_block(block
, impl
) {
552 nir_foreach_instr_safe(instr
, block
) {
553 if (instr
->type
!= nir_instr_type_alu
)
556 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
557 assert(alu
->dest
.dest
.is_ssa
);
558 if (alu
->dest
.dest
.ssa
.bit_size
!= 64)
561 if (!(options
& opcode_to_options_mask(alu
->op
)))
564 b
.cursor
= nir_before_instr(instr
);
566 nir_ssa_def
*lowered
= lower_int64_alu_instr(&b
, alu
);
567 nir_ssa_def_rewrite_uses(&alu
->dest
.dest
.ssa
,
568 nir_src_for_ssa(lowered
));
569 nir_instr_remove(&alu
->instr
);
575 nir_metadata_preserve(impl
, nir_metadata_none
);
581 nir_lower_int64(nir_shader
*shader
, nir_lower_int64_options options
)
583 bool progress
= false;
585 nir_foreach_function(function
, shader
) {
587 progress
|= lower_int64_impl(function
->impl
, options
);