From 3460d05a718f3859a77fe100f3972095d194be26 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Thu, 1 Sep 2016 15:21:04 -0700
Subject: [PATCH] nir: Add 64-bit integer support for conversions and bitcasts

v2 (idr): "cut them down later" => Remove ir_unop_b2u64 and
ir_unop_u642b.  Handle these with extra i2u or u2i casts just like
uint(bool) and bool(uint) conversion is done.

v3 (idr): Make the "from" type in a cast unsized.  This reduces the
number of required cast operations at the expensive slightly more
complex code.  However, this will be a dramatic improvement when other
sized integer types are added.  Suggested by Connor.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/compiler/glsl/glsl_to_nir.cpp          | 37 ++++++++++++++++++++++
 src/compiler/nir/nir_lower_alu_to_scalar.c |  1 +
 src/compiler/nir/nir_opcodes.py            | 30 +++++++++++++++++-
 3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
index f4c3d01e723..7a1c1f4c8a2 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1480,6 +1480,7 @@ nir_visitor::visit(ir_expression *ir)
    case ir_unop_f2b:  result = nir_f2b(&b, srcs[0]);   break;
    case ir_unop_i2b:  result = nir_i2b(&b, srcs[0]);   break;
    case ir_unop_b2i:  result = nir_b2i(&b, srcs[0]);   break;
+   case ir_unop_b2i64:result = nir_b2i64(&b, srcs[0]); break;
    case ir_unop_d2f:  result = nir_d2f(&b, srcs[0]);   break;
    case ir_unop_f2d:  result = nir_f2d(&b, srcs[0]);   break;
    case ir_unop_d2i:  result = nir_d2i(&b, srcs[0]);   break;
@@ -1493,12 +1494,40 @@ nir_visitor::visit(ir_expression *ir)
       assert(supports_ints);
       result = nir_u2d(&b, srcs[0]);
       break;
+   case ir_unop_i642i: result = nir_i2i32(&b, srcs[0]);   break;
+   case ir_unop_i642u: result = nir_i2u32(&b, srcs[0]);   break;
+   case ir_unop_i642f: result = nir_i642f(&b, srcs[0]);   break;
+   case ir_unop_i642d: result = nir_i642d(&b, srcs[0]);   break;
+
+   case ir_unop_u642i: result = nir_u2i32(&b, srcs[0]);   break;
+   case ir_unop_u642u: result = nir_u2u32(&b, srcs[0]);   break;
+   case ir_unop_u642f: result = nir_u642f(&b, srcs[0]);   break;
+   case ir_unop_u642d: result = nir_u642d(&b, srcs[0]);   break;
+
+   case ir_unop_i2i64: result = nir_i2i64(&b, srcs[0]);   break;
+   case ir_unop_u2i64: result = nir_u2i64(&b, srcs[0]);   break;
+   case ir_unop_f2i64:
+   case ir_unop_d2i64:
+      result = nir_f2i64(&b, srcs[0]);
+      break;
+   case ir_unop_i2u64: result = nir_i2u64(&b, srcs[0]);   break;
+   case ir_unop_u2u64: result = nir_u2u64(&b, srcs[0]);   break;
+   case ir_unop_f2u64:
+   case ir_unop_d2u64:
+      result = nir_f2u64(&b, srcs[0]);
+      break;
    case ir_unop_i2u:
    case ir_unop_u2i:
+   case ir_unop_i642u64:
+   case ir_unop_u642i64:
    case ir_unop_bitcast_i2f:
    case ir_unop_bitcast_f2i:
    case ir_unop_bitcast_u2f:
    case ir_unop_bitcast_f2u:
+   case ir_unop_bitcast_i642d:
+   case ir_unop_bitcast_d2i64:
+   case ir_unop_bitcast_u642d:
+   case ir_unop_bitcast_d2u64:
    case ir_unop_subroutine_to_int:
       /* no-op */
       result = nir_imov(&b, srcs[0]);
@@ -1552,6 +1581,14 @@ nir_visitor::visit(ir_expression *ir)
    case ir_unop_unpack_double_2x32:
       result = nir_unpack_double_2x32(&b, srcs[0]);
       break;
+   case ir_unop_pack_int_2x32:
+   case ir_unop_pack_uint_2x32:
+      result = nir_pack_int_2x32(&b, srcs[0]);
+      break;
+   case ir_unop_unpack_int_2x32:
+   case ir_unop_unpack_uint_2x32:
+      result = nir_unpack_int_2x32(&b, srcs[0]);
+      break;
    case ir_unop_bitfield_reverse:
       result = nir_bitfield_reverse(&b, srcs[0]);
       break;
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index fa18debd850..8a967c56831 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -189,6 +189,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
    }
 
    case nir_op_unpack_double_2x32:
+   case nir_op_unpack_int_2x32:
       return false;
 
       LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 7045c953bef..2e5a665fb39 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -95,6 +95,7 @@ tuint = "uint"
 tfloat32 = "float32"
 tint32 = "int32"
 tuint32 = "uint32"
+tint64 = "int64"
 tuint64 = "uint64"
 tfloat64 = "float64"
 
@@ -171,14 +172,30 @@ unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion.
 unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion.
 unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
 unop_convert("i2d", tfloat64, tint32, "src0") # Integer-to-double conversion.
+unop_convert("i2i32", tint32, tint, "src0")    # General int (int8_t, int64_t, etc.) to int32_t conversion
+unop_convert("u2i32", tint32, tuint, "src0")   # General uint (uint8_t, uint64_t, etc.) to int32_t conversion
+unop_convert("i2u32", tuint32, tint, "src0")   # General int (int8_t, int64_t, etc.) to uint32_t conversion
+unop_convert("u2u32", tuint32, tuint, "src0")  # General uint (uint8_t, uint64_t, etc.) to uint32_t conversion
+unop_convert("i2i64", tint64, tint, "src0")    # General int (int8_t, int32_t, etc.) to int64_t conversion
+unop_convert("u2i64", tint64, tuint, "src0")   # General uint (uint8_t, uint64_t, etc.) to int64_t conversion
+unop_convert("f2i64", tint64, tfloat, "src0")  # General float (float or double) to int64_t conversion
+unop_convert("i2u64", tuint64, tint,  "src0")  # General int (int8_t, int64_t, etc.) to uint64_t conversion
+unop_convert("u2u64", tuint64, tuint, "src0")  # General uint (uint8_t, uint32_t, etc.) to uint64_t conversion
+unop_convert("f2u64", tuint64, tfloat, "src0") # General float (float or double) to uint64_t conversion
+unop_convert("i642f", tfloat32, tint64, "src0")  # int64_t-to-float conversion.
+unop_convert("i642d", tfloat64, tint64, "src0")  # int64_t-to-double conversion.
+unop_convert("u642f", tfloat32, tuint64, "src0") # uint64_t-to-float conversion.
+unop_convert("u642d", tfloat64, tuint64, "src0") # uint64_t-to-double conversion.
+
 # Float-to-boolean conversion
 unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
 unop_convert("d2b", tbool, tfloat64, "src0 != 0.0")
 # Boolean-to-float conversion
 unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
 # Int-to-boolean conversion
-unop_convert("i2b", tbool, tint32, "src0 != 0")
+unop_convert("i2b", tbool, tint, "src0 != 0")
 unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
+unop_convert("b2i64", tint64, tbool, "src0 ? 1 : 0")  # Boolean-to-int64_t conversion.
 unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
 unop_convert("u2d", tfloat64, tuint32, "src0") # Unsigned-to-double conversion.
 # double-to-float conversion
@@ -270,9 +287,15 @@ dst.x = (src0.x <<  0) |
 unop_horiz("pack_double_2x32", 1, tuint64, 2, tuint32,
            "dst.x = src0.x | ((uint64_t)src0.y << 32);")
 
+unop_horiz("pack_int_2x32", 1, tint64, 2, tint32,
+           "dst.x = src0.x | ((int64_t)src0.y << 32);")
+
 unop_horiz("unpack_double_2x32", 2, tuint32, 1, tuint64,
            "dst.x = src0.x; dst.y = src0.x >> 32;")
 
+unop_horiz("unpack_int_2x32", 2, tint32, 1, tint64,
+           "dst.x = src0.x; dst.y = src0.x >> 32;")
+
 # Lowered floating point unpacking operations.
 
 
@@ -283,6 +306,8 @@ unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
 
 unop_convert("unpack_double_2x32_split_x", tuint32, tuint64, "src0")
 unop_convert("unpack_double_2x32_split_y", tuint32, tuint64, "src0 >> 32")
+unop_convert("unpack_int_2x32_split_x", tuint32, tuint64, "src0")
+unop_convert("unpack_int_2x32_split_y", tuint32, tuint64, "src0 >> 32")
 
 # Bit operations, part of ARB_gpu_shader5.
 
@@ -565,6 +590,9 @@ binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
 binop_convert("pack_double_2x32_split", tuint64, tuint32, "",
               "src0 | ((uint64_t)src1 << 32)")
 
+binop_convert("pack_int_2x32_split", tuint64, tuint32, "",
+              "src0 | ((uint64_t)src1 << 32)")
+
 # bfm implements the behavior of the first operation of the SM5 "bfi" assembly
 # and that of the "bfi1" i965 instruction. That is, it has undefined behavior
 # if either of its arguments are 32.
-- 
2.30.2