S/390: arch13: Support new popcount instruction
authorAndreas Krebbel <krebbel@linux.ibm.com>
Tue, 2 Apr 2019 10:55:26 +0000 (10:55 +0000)
committerAndreas Krebbel <krebbel@gcc.gnu.org>
Tue, 2 Apr 2019 10:55:26 +0000 (10:55 +0000)
 variant.

The new arch13 popcount instruction counts bits in the entire 64 bit
register instead of just in 8 bit portions.

gcc/ChangeLog:

2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>

* config/s390/s390.md ("*popcountdi_arch13_cc")
("*popcountdi_arch13_cconly", "*popcountdi_arch13"): New insn
definition.
("*popcount<mode>", "popcountdi2", "popcountsi2", "popcounthi2"):
Append _z196 to make it ...
("*popcount<mode>_z196", "popcountdi2_z196", "popcountsi2_z196")
("popcounthi2_z196"): ... this.
("popcountdi2_z196"): Remove TARGET_64BIT from the insn condition.
("popcountdi2", "popcountsi2", "popcounthi2"): New expanders.

gcc/testsuite/ChangeLog:

2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>

* gcc.target/s390/arch13/popcount-1.c: New test.

From-SVN: r270079

gcc/ChangeLog
gcc/config/s390/s390.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/s390/arch13/popcount-1.c [new file with mode: 0644]

index f9ed2b0a38ca21ce9ef4b3a1277b22d4c7ca0bfa..6601061c39abf58c60d8680f3c855cdca7d9afe3 100644 (file)
@@ -1,3 +1,15 @@
+2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+       * config/s390/s390.md ("*popcountdi_arch13_cc")
+       ("*popcountdi_arch13_cconly", "*popcountdi_arch13"): New insn
+       definition.
+       ("*popcount<mode>", "popcountdi2", "popcountsi2", "popcounthi2"):
+       Append _z196 to make it ...
+       ("*popcount<mode>_z196", "popcountdi2_z196", "popcountsi2_z196")
+       ("popcounthi2_z196"): ... this.
+       ("popcountdi2_z196"): Remove TARGET_64BIT from the insn condition.
+       ("popcountdi2", "popcountsi2", "popcounthi2"): New expanders.
+
 2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
 
        * config/s390/s390.c (s390_canonicalize_comparison): Convert
index d635f849f18517d108bfdb30e2c7435d6b25c30f..018020cd19de1d6b993a8dcebe3c136f358097f3 100644 (file)
    ; Test Data Class (TDC)
    UNSPEC_TDC_INSN
 
-   ; Population Count
+   ; Byte-wise Population Count
    UNSPEC_POPCNT
    UNSPEC_COPYSIGN
 
 ; Population count instruction
 ;
 
-; The S/390 popcount instruction counts the bits of op1 in 8 byte
+(define_insn "*popcountdi_arch13_cc"
+  [(set (reg CC_REGNUM)
+       (compare (popcount:DI (match_operand:DI 1 "register_operand" "d"))
+                (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=d")
+       (match_dup 1))]
+  "TARGET_ARCH13 && s390_match_ccmode (insn, CCTmode)"
+  "popcnt\t%0,%1,8"
+  [(set_attr "op_type" "RRF")])
+
+(define_insn "*popcountdi_arch13_cconly"
+  [(set (reg CC_REGNUM)
+       (compare (popcount:DI (match_operand:DI 1 "register_operand" "d"))
+                (const_int 0)))
+   (clobber (match_scratch:DI 0 "=d"))]
+  "TARGET_ARCH13 && s390_match_ccmode(insn, CCTmode)"
+  "popcnt\t%0,%1,8"
+  [(set_attr "op_type" "RRF")])
+
+(define_insn "*popcountdi_arch13"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+       (popcount:DI (match_operand:DI 1 "register_operand" "d")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_ARCH13"
+  "popcnt\t%0,%1,8"
+  [(set_attr "op_type" "RRF")])
+
+; The pre-arch13 popcount instruction counts the bits of op1 in 8 byte
 ; portions and stores the result in the corresponding bytes in op0.
-(define_insn "*popcount<mode>"
+(define_insn "*popcount<mode>_z196"
   [(set (match_operand:INT 0 "register_operand" "=d")
        (unspec:INT [(match_operand:INT 1 "register_operand" "d")] UNSPEC_POPCNT))
    (clobber (reg:CC CC_REGNUM))]
   "popcnt\t%0,%1"
   [(set_attr "op_type" "RRE")])
 
-(define_expand "popcountdi2"
+(define_expand "popcountdi2_z196"
   [; popcnt op0, op1
    (parallel [(set (match_operand:DI 0 "register_operand" "")
                   (unspec:DI [(match_operand:DI 1 "register_operand")]
              (clobber (reg:CC CC_REGNUM))])
    ; srlg op0, op0, 56
    (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
-  "TARGET_Z196 && TARGET_64BIT"
+  "TARGET_Z196"
   "operands[2] = gen_reg_rtx (DImode);")
 
-(define_expand "popcountsi2"
+(define_expand "popcountdi2"
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+         (popcount:DI (match_operand:DI 1 "register_operand")))
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_Z196"
+{
+  if (!TARGET_ARCH13)
+    {
+      emit_insn (gen_popcountdi2_z196 (operands[0], operands[1]));
+      DONE;
+    }
+ })
+
+(define_expand "popcountsi2_z196"
   [; popcnt op0, op1
    (parallel [(set (match_operand:SI 0 "register_operand" "")
                   (unspec:SI [(match_operand:SI 1 "register_operand")]
   "TARGET_Z196"
   "operands[2] = gen_reg_rtx (SImode);")
 
-(define_expand "popcounthi2"
+; popcount always counts on the full 64 bit. With the z196 version
+; counting bits per byte we just ignore the upper 4 bytes.  With the
+; arch13 version we have to zero out the upper 32 bits first.
+(define_expand "popcountsi2"
+  [(set (match_dup 2)
+       (zero_extend:DI (match_operand:SI 1 "register_operand")))
+   (parallel [(set (match_dup 3) (popcount:DI (match_dup 2)))
+             (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:SI 0 "register_operand")
+       (subreg:SI (match_dup 3) 4))]
+  "TARGET_Z196"
+{
+  if (!TARGET_ARCH13)
+    {
+      emit_insn (gen_popcountsi2_z196 (operands[0], operands[1]));
+      DONE;
+    }
+  else
+    {
+      operands[2] = gen_reg_rtx (DImode);
+      operands[3] = gen_reg_rtx (DImode);
+    }
+})
+
+(define_expand "popcounthi2_z196"
   [; popcnt op0, op1
    (parallel [(set (match_operand:HI 0 "register_operand" "")
                   (unspec:HI [(match_operand:HI 1 "register_operand")]
   "TARGET_Z196"
   "operands[2] = gen_reg_rtx (SImode);")
 
+(define_expand "popcounthi2"
+  [(set (match_dup 2)
+       (zero_extend:DI (match_operand:HI 1 "register_operand")))
+   (parallel [(set (match_dup 3) (popcount:DI (match_dup 2)))
+             (clobber (reg:CC CC_REGNUM))])
+   (set (match_operand:HI 0 "register_operand")
+       (subreg:HI (match_dup 3) 6))]
+  "TARGET_Z196"
+{
+  if (!TARGET_ARCH13)
+    {
+      emit_insn (gen_popcounthi2_z196 (operands[0], operands[1]));
+      DONE;
+    }
+  else
+    {
+      operands[2] = gen_reg_rtx (DImode);
+      operands[3] = gen_reg_rtx (DImode);
+    }
+})
+
+; For popcount on a single byte the old z196 style popcount
+; instruction is ideal.  Since it anyway does a byte-wise popcount we
+; just use it instead of zero extending the QImode input to DImode and
+; using the arch13 popcount variant.
 (define_expand "popcountqi2"
   [; popcnt op0, op1
    (parallel [(set (match_operand:QI 0 "register_operand" "")
index f7f1883c1fec64d56cb304bf689980837c5f48c0..8bc2b862b095302c09675885285c172cc017dffc 100644 (file)
@@ -1,3 +1,7 @@
+2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+       * gcc.target/s390/arch13/popcount-1.c: New test.
+
 2019-04-02  Andreas Krebbel  <krebbel@linux.ibm.com>
 
        * gcc.target/s390/arch13/bitops-1.c: New test.
diff --git a/gcc/testsuite/gcc.target/s390/arch13/popcount-1.c b/gcc/testsuite/gcc.target/s390/arch13/popcount-1.c
new file mode 100644 (file)
index 0000000..c98a327
--- /dev/null
@@ -0,0 +1,25 @@
+/* { dg-compile } */
+
+unsigned int ui;
+unsigned long ul;
+unsigned long long ull;
+
+int
+f1 ()
+{
+  return __builtin_popcount (ui);
+}
+
+int
+f2 ()
+{
+  return __builtin_popcountl (ul);
+}
+
+int
+f3 ()
+{
+  return __builtin_popcountll (ull);
+}
+
+/* { dg-final { scan-assembler-times "popcnt\t%r2,%r2,8" 3 } } */