gallivm: Altivec vector max/min intrisics
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>
Thu, 22 Nov 2012 17:03:11 +0000 (11:03 -0600)
committerJosé Fonseca <jfonseca@vmware.com>
Thu, 29 Nov 2012 11:51:46 +0000 (11:51 +0000)
This patch adds the PPC Altivec instrics max/min instruction for
supported Altivec vector types (16xi8, 8xi16, 4xi32, 4xf32).

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_arit.c

index ca96a6b1df755a14271d54bf5fbcc6bcf2ded940..a4d7d98cc94439b460207067d8ea0d23fe3b396f 100644 (file)
@@ -116,7 +116,12 @@ lp_build_min_simple(struct lp_build_context *bld,
          }
       }
    }
-   else if (util_cpu_caps.has_sse2 && type.length >= 2) {
+   else if (type.floating && util_cpu_caps.has_altivec) {
+      if (type.width == 32 && type.length == 4) {
+         intrinsic = "llvm.ppc.altivec.vminfp";
+         intr_size = 128;
+      }
+   } else if (util_cpu_caps.has_sse2 && type.length >= 2) {
       intr_size = 128;
       if ((type.width == 8 || type.width == 16) &&
           (type.width * type.length <= 64) &&
@@ -144,6 +149,27 @@ lp_build_min_simple(struct lp_build_context *bld,
             intrinsic = "llvm.x86.sse41.pminsd";
          }
       }
+   } else if (util_cpu_caps.has_altivec) {
+     intr_size = 128;
+     if (type.width == 8) {
+       if (!type.sign) {
+         intrinsic = "llvm.ppc.altivec.vminub";
+       } else {
+         intrinsic = "llvm.ppc.altivec.vminsb";
+       }
+     } else if (type.width == 16) {
+       if (!type.sign) {
+         intrinsic = "llvm.ppc.altivec.vminuh";
+       } else {
+         intrinsic = "llvm.ppc.altivec.vminsh";
+       }
+     } else if (type.width == 32) {
+       if (!type.sign) {
+         intrinsic = "llvm.ppc.altivec.vminuw";
+       } else {
+         intrinsic = "llvm.ppc.altivec.vminsw";
+       }
+     }
    }
 
    if(intrinsic) {
@@ -206,7 +232,12 @@ lp_build_max_simple(struct lp_build_context *bld,
          }
       }
    }
-   else if (util_cpu_caps.has_sse2 && type.length >= 2) {
+   else if (type.floating && util_cpu_caps.has_altivec) {
+      if (type.width == 32 || type.length == 4) {
+         intrinsic = "llvm.ppc.altivec.vmaxfp";
+         intr_size = 128;
+      }
+   } else if (util_cpu_caps.has_sse2 && type.length >= 2) {
       intr_size = 128;
       if ((type.width == 8 || type.width == 16) &&
           (type.width * type.length <= 64) &&
@@ -235,6 +266,27 @@ lp_build_max_simple(struct lp_build_context *bld,
             intrinsic = "llvm.x86.sse41.pmaxsd";
          }
       }
+   } else if (util_cpu_caps.has_altivec) {
+     intr_size = 128;
+     if (type.width == 8) {
+       if (!type.sign) {
+         intrinsic = "llvm.ppc.altivec.vmaxub";
+       } else {
+         intrinsic = "llvm.ppc.altivec.vmaxsb";
+       }
+     } else if (type.width == 16) {
+       if (!type.sign) {
+         intrinsic = "llvm.ppc.altivec.vmaxuh";
+       } else {
+         intrinsic = "llvm.ppc.altivec.vmaxsh";
+       }
+     } else if (type.width == 32) {
+       if (!type.sign) {
+         intrinsic = "llvm.ppc.altivec.vmaxuw";
+       } else {
+         intrinsic = "llvm.ppc.altivec.vmaxsw";
+       }
+     }
    }
 
    if(intrinsic) {