nir_lower_logic64 = (1 << 9),
nir_lower_minmax64 = (1 << 10),
nir_lower_shift64 = (1 << 11),
+ nir_lower_imul_2x32_64 = (1 << 12),
} nir_lower_int64_options;
typedef enum {
*/
bool use_interpolated_input_intrinsics;
+ /* Lowers when 32x32->64 bit multiplication is not supported */
+ bool lower_mul_2x32_64;
+
unsigned max_unroll_iterations;
nir_lower_int64_options lower_int64_options;
return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), x, y);
}
+static nir_ssa_def *
+lower_mul_2x32_64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y,
+ bool sign_extend)
+{
+ nir_ssa_def *res_hi = sign_extend ? nir_imul_high(b, x, y)
+ : nir_umul_high(b, x, y);
+
+ return nir_pack_64_2x32_split(b, nir_imul(b, x, y), res_hi);
+}
+
static nir_ssa_def *
lower_imul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
{
nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
- nir_ssa_def *res_lo = nir_imul(b, x_lo, y_lo);
- nir_ssa_def *res_hi = nir_iadd(b, nir_umul_high(b, x_lo, y_lo),
+ nir_ssa_def *mul_lo = nir_umul_2x32_64(b, x_lo, y_lo);
+ nir_ssa_def *res_hi = nir_iadd(b, nir_unpack_64_2x32_split_y(b, mul_lo),
nir_iadd(b, nir_imul(b, x_lo, y_hi),
nir_imul(b, x_hi, y_lo)));
- return nir_pack_64_2x32_split(b, res_lo, res_hi);
+ return nir_pack_64_2x32_split(b, nir_unpack_64_2x32_split_x(b, mul_lo),
+ res_hi);
}
static nir_ssa_def *
* so we're guaranteed that we can add in two more 32-bit values
* without overflowing tmp.
*/
- nir_ssa_def *tmp =
- nir_pack_64_2x32_split(b, nir_imul(b, x32[i], y32[j]),
- nir_umul_high(b, x32[i], y32[j]));
+ nir_ssa_def *tmp = nir_umul_2x32_64(b, x32[i], y32[i]);
+
if (res[i + j])
tmp = nir_iadd(b, tmp, nir_u2u64(b, res[i + j]));
if (carry)
switch (opcode) {
case nir_op_imul:
return nir_lower_imul64;
+ case nir_op_imul_2x32_64:
+ case nir_op_umul_2x32_64:
+ return nir_lower_imul_2x32_64;
case nir_op_imul_high:
case nir_op_umul_high:
return nir_lower_imul_high64;
switch (alu->op) {
case nir_op_imul:
return lower_imul64(b, src[0], src[1]);
+ case nir_op_imul_2x32_64:
+ return lower_mul_2x32_64(b, src[0], src[1], true);
+ case nir_op_umul_2x32_64:
+ return lower_mul_2x32_64(b, src[0], src[1], false);
case nir_op_imul_high:
return lower_mul_high64(b, src[0], src[1], true);
case nir_op_umul_high:
# low 32-bits of signed/unsigned integer multiply
binop("imul", tint, commutative + associative, "src0 * src1")
+# Generate 64 bit result from 2 32 bits quantity
+binop_convert("imul_2x32_64", tint64, tint32, commutative,
+ "(int64_t)src0 * (int64_t)src1")
+binop_convert("umul_2x32_64", tuint64, tuint32, commutative,
+ "(uint64_t)src0 * (uint64_t)src1")
+
# high 32-bits of signed integer multiply
binop("imul_high", tint, commutative, """
if (bit_size == 64) {
(('imul', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b))),
(('imul', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b))))),
+ (('imul_2x32_64', a, b), ('pack_64_2x32_split', ('imul', a, b), ('imul_high', a, b)), 'options->lower_mul_2x32_64'),
+ (('umul_2x32_64', a, b), ('pack_64_2x32_split', ('imul', a, b), ('umul_high', a, b)), 'options->lower_mul_2x32_64'),
(('udiv', a, 1), a),
(('idiv', a, 1), a),
(('umod', a, 1), 0),