nir,intel: Add support for lowering 64-bit nir_opt_extract_*
authorJason Ekstrand <jason@jlekstrand.net>
Mon, 15 Jul 2019 15:31:49 +0000 (10:31 -0500)
committerJason Ekstrand <jason@jlekstrand.net>
Mon, 15 Jul 2019 21:08:37 +0000 (16:08 -0500)
We need this when doing full software 64-bit emulation.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110309
Fixes: cbad201c2b3 "nir/algebraic: Add missing 64-bit extract_[iu]8..."
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
src/compiler/nir/nir.h
src/compiler/nir/nir_lower_int64.c
src/intel/compiler/brw_compiler.c

index 2719087e9a45aa521d34070d9be28e7172ebf2f5..c7a1bf357c83804c3d4c9fdbbfa450ba88a161fe 100644 (file)
@@ -2463,6 +2463,7 @@ typedef enum {
    nir_lower_minmax64 = (1 << 10),
    nir_lower_shift64 = (1 << 11),
    nir_lower_imul_2x32_64 = (1 << 12),
+   nir_lower_extract64 = (1 << 13),
 } nir_lower_int64_options;
 
 typedef enum {
index b3b78c6649a14de1c5a35cbdafd7d99c0cd9f303..84ec2a77f1e36aec680bbedbeeb70b80751b380f 100644 (file)
@@ -629,6 +629,34 @@ lower_irem64(nir_builder *b, nir_ssa_def *n, nir_ssa_def *d)
    return nir_bcsel(b, n_is_neg, nir_ineg(b, r), r);
 }
 
+static nir_ssa_def *
+lower_extract(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *c)
+{
+   assert(op == nir_op_extract_u8 || op == nir_op_extract_i8 ||
+          op == nir_op_extract_u16 || op == nir_op_extract_i16);
+
+   const int chunk = nir_src_as_uint(nir_src_for_ssa(c));
+   const int chunk_bits =
+      (op == nir_op_extract_u8 || op == nir_op_extract_i8) ? 8 : 16;
+   const int num_chunks_in_32 = 32 / chunk_bits;
+
+   nir_ssa_def *extract32;
+   if (chunk < num_chunks_in_32) {
+      extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_x(b, x),
+                                   nir_imm_int(b, chunk),
+                                   NULL, NULL);
+   } else {
+      extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_y(b, x),
+                                   nir_imm_int(b, chunk - num_chunks_in_32),
+                                   NULL, NULL);
+   }
+
+   if (op == nir_op_extract_i8 || op == nir_op_extract_i16)
+      return lower_i2i64(b, extract32);
+   else
+      return lower_u2u64(b, extract32);
+}
+
 nir_lower_int64_options
 nir_lower_int64_op_to_options_mask(nir_op opcode)
 {
@@ -685,6 +713,11 @@ nir_lower_int64_op_to_options_mask(nir_op opcode)
    case nir_op_ishr:
    case nir_op_ushr:
       return nir_lower_shift64;
+   case nir_op_extract_u8:
+   case nir_op_extract_i8:
+   case nir_op_extract_u16:
+   case nir_op_extract_i16:
+      return nir_lower_extract64;
    default:
       return 0;
    }
@@ -779,6 +812,11 @@ lower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu)
       return lower_ishr64(b, src[0], src[1]);
    case nir_op_ushr:
       return lower_ushr64(b, src[0], src[1]);
+   case nir_op_extract_u8:
+   case nir_op_extract_i8:
+   case nir_op_extract_u16:
+   case nir_op_extract_i16:
+      return lower_extract(b, alu->op, src[0], src[1]);
    default:
       unreachable("Invalid ALU opcode to lower");
    }
index 3a80f807b8777133039c104117b98b61124ce14b..6d9dac6c3ca9ebb6f1443027449573768e455c4a 100644 (file)
@@ -143,7 +143,8 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
                        nir_lower_ineg64 |
                        nir_lower_logic64 |
                        nir_lower_minmax64 |
-                       nir_lower_shift64;
+                       nir_lower_shift64 |
+                       nir_lower_extract64;
       fp64_options |= nir_lower_fp64_full_software;
    }