i965: Use LZD to implement nir_op_ufind_msb
authorIan Romanick <ian.d.romanick@intel.com>
Tue, 21 Jun 2016 22:14:03 +0000 (15:14 -0700)
committerIan Romanick <ian.d.romanick@intel.com>
Tue, 19 Jul 2016 19:19:29 +0000 (12:19 -0700)
This uses one less instruction.

v2: Move emit_find_msb_using_lzd out of the visitor classes.  Suggested
by Curro.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_fs_generator.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp

index 1e9c7da9e278a203d9c1fa6082d4b05f2d627017..f69e744d306e0f0d6a5182d5aabe894f0eb66064 100644 (file)
@@ -1761,6 +1761,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          /* FBL only supports UD type for dst. */
          brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
          break;
+      case BRW_OPCODE_LZD:
+         brw_LZD(p, dst, src[0]);
+         break;
       case BRW_OPCODE_CBIT:
          assert(devinfo->gen >= 7);
          /* CBIT only supports UD type for dst. */
index 6265dc67be091908d966bf0aaad1db076c7cf40d..112155627382474623b847a522d3b7a1eb80df94 100644 (file)
@@ -614,6 +614,25 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
    return true;
 }
 
+static void
+emit_find_msb_using_lzd(const fs_builder &bld,
+                        const fs_reg &result,
+                        const fs_reg &src,
+                        bool is_signed)
+{
+   fs_inst *inst;
+
+   bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src);
+
+   /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
+    * from the LSB side. Subtract the result from 31 to convert the MSB
+    * count into an LSB count.  If no bits are set, LZD will return 32.
+    * 31-32 = -1, which is exactly what findMSB() is supposed to return.
+    */
+   inst = bld.ADD(result, retype(result, BRW_REGISTER_TYPE_D), brw_imm_d(31));
+   inst->src[0].negate = true;
+}
+
 void
 fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
 {
@@ -1310,7 +1329,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       bld.CBIT(result, op[0]);
       break;
 
-   case nir_op_ufind_msb:
+   case nir_op_ufind_msb: {
+      assert(nir_dest_bit_size(instr->dest.dest) < 64);
+      emit_find_msb_using_lzd(bld, result, op[0], false);
+      break;
+   }
+
    case nir_op_ifind_msb: {
       assert(nir_dest_bit_size(instr->dest.dest) < 64);
       bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
index 3878c4ad268bede23a12da401a377510e30c922f..7ad4f86aebd209027626516b5b2355a5f3199b3e 100644 (file)
@@ -1637,6 +1637,9 @@ generate_code(struct brw_codegen *p,
          /* FBL only supports UD type for dst. */
          brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
          break;
+      case BRW_OPCODE_LZD:
+         brw_LZD(p, dst, src[0]);
+         break;
       case BRW_OPCODE_CBIT:
          assert(devinfo->gen >= 7);
          /* CBIT only supports UD type for dst. */
index f3b4528559fc81b2a18362f4789f775eefa54fd4..352d88af4791b25528de55701783d961e8262d77 100644 (file)
@@ -993,6 +993,26 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr,
    return true;
 }
 
+static void
+emit_find_msb_using_lzd(const vec4_builder &bld,
+                        const dst_reg &dst,
+                        const src_reg &src,
+                        bool is_signed)
+{
+   vec4_instruction *inst;
+
+   bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD), src);
+
+   /* LZD counts from the MSB side, while GLSL's findMSB() wants the count
+    * from the LSB side. Subtract the result from 31 to convert the MSB count
+    * into an LSB count.  If no bits are set, LZD will return 32.  31-32 = -1,
+    * which is exactly what findMSB() is supposed to return.
+    */
+   inst = bld.ADD(dst, retype(src_reg(dst), BRW_REGISTER_TYPE_D),
+                  brw_imm_d(31));
+   inst->src[0].negate = true;
+}
+
 void
 vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
 {
@@ -1461,6 +1481,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
       break;
 
    case nir_op_ufind_msb:
+      emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0], false);
+      break;
+
    case nir_op_ifind_msb: {
       emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));