gallivm: clear Altivec NJ bit
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>
Thu, 22 Nov 2012 17:55:35 +0000 (11:55 -0600)
committerJosé Fonseca <jfonseca@vmware.com>
Thu, 29 Nov 2012 11:52:05 +0000 (11:52 +0000)
This patch enforces the clear of NJ bit in VSCR Altivec register so
denormal numbers are handles as expected by IEEE standards.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_init.c

index 0065bb49a4bd89e064103f28c19ba75c746f57bb..050eba7b2b387e1c02d1c7a3376d0eabb86afba2 100644 (file)
@@ -468,6 +468,25 @@ lp_build_init(void)
       util_cpu_caps.has_avx = 0;
    }
 
+#ifdef PIPE_ARCH_PPC_64
+   /* Set the NJ bit in VSCR to 0 so denormalized values are handled as
+    * specified by IEEE standard (PowerISA 2.06 - Section 6.3). This garantees
+    * that some rounding and half-float to float handling does not round
+    * incorrectly to 0.
+    */
+   if (util_cpu_caps.has_altivec) {
+      unsigned short mask[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+                                0xFFFF, 0xFFFF, 0xFFFE, 0xFFFF };
+      __asm (
+        "mfvscr %%v1\n"
+        "vand   %0,%%v1,%0\n"
+        "mtvscr %0"
+        :
+        : "r" (*mask)
+      );
+   }
+#endif
+
    gallivm_initialized = TRUE;
 
 #if 0