gallium/auxiliary: optimize rgb9e5 helper some more

author Roland Scheidegger <sroland@vmware.com>

Sun, 9 Aug 2015 00:50:10 +0000 (02:50 +0200)

committer Roland Scheidegger <sroland@vmware.com>

Wed, 26 Aug 2015 00:57:38 +0000 (02:57 +0200)
author Roland Scheidegger <sroland@vmware.com>
Sun, 9 Aug 2015 00:50:10 +0000 (02:50 +0200)
committer Roland Scheidegger <sroland@vmware.com>
Wed, 26 Aug 2015 00:57:38 +0000 (02:57 +0200)
diff --git a/src/gallium/auxiliary/util/u_format_rgb9e5.h b/src/gallium/auxiliary/util/u_format_rgb9e5.h

index d11bfa833f10c15ccec7779e8aa6a6ab3ac99ebf..21feba7b7108c4c0566010d45411a098219f12f6 100644 (file)
--- a/src/gallium/auxiliary/util/u_format_rgb9e5.h
+++ b/src/gallium/auxiliary/util/u_format_rgb9e5.h
@@ -74,62 +74,59 @@ typedef union {
     } field;
  } rgb9e5;
  
-static inline float rgb9e5_ClampRange(float x)
-{
-   if (x > 0.0f) {
-      if (x >= MAX_RGB9E5) {
-         return MAX_RGB9E5;
-      } else {
-         return x;
-      }
-   } else {
-      /* NaN gets here too since comparisons with NaN always fail! */
-      return 0.0f;
-   }
-}
  
-/* Ok, FloorLog2 is not correct for the denorm and zero values, but we
-   are going to do a max of this value with the minimum rgb9e5 exponent
-   that will hide these problem cases. */
-static inline int rgb9e5_FloorLog2(float x)
+static inline int rgb9e5_ClampRange(float x)
  {
     float754 f;
-
+   float754 max;
     f.value = x;
-   return (f.field.biasedexponent - 127);
+   max.value = MAX_RGB9E5;
+
+   if (f.raw > 0x7f800000)
+  /* catches neg, NaNs */
+      return 0;
+   else if (f.raw >= max.raw)
+      return max.raw;
+   else
+      return f.raw;
  }
  
  static inline unsigned float3_to_rgb9e5(const float rgb[3])
  {
     rgb9e5 retval;
-   float maxrgb;
-   int rm, gm, bm;
-   float rc, gc, bc;
-   int exp_shared, maxm;
+   int rm, gm, bm, exp_shared;
     float754 revdenom = {0};
-
-   rc = rgb9e5_ClampRange(rgb[0]);
-   gc = rgb9e5_ClampRange(rgb[1]);
-   bc = rgb9e5_ClampRange(rgb[2]);
-
-   maxrgb = MAX3(rc, gc, bc);
-   exp_shared = MAX2(-RGB9E5_EXP_BIAS - 1, rgb9e5_FloorLog2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
+   float754 rc, bc, gc, maxrgb;
+
+   rc.raw = rgb9e5_ClampRange(rgb[0]);
+   gc.raw = rgb9e5_ClampRange(rgb[1]);
+   bc.raw = rgb9e5_ClampRange(rgb[2]);
+   maxrgb.raw = MAX3(rc.raw, gc.raw, bc.raw);
+
+   /*
+    * Compared to what the spec suggests, instead of conditionally adjusting
+    * the exponent after the fact do it here by doing the equivalent of +0.5 -
+    * the int add will spill over into the exponent in this case.
+    */
+   maxrgb.raw += maxrgb.raw & (1 << (23-9));
+   exp_shared = MAX2((maxrgb.raw >> 23), -RGB9E5_EXP_BIAS - 1 + 127) +
+                1 + RGB9E5_EXP_BIAS - 127;
+   revdenom.field.biasedexponent = 127 - (exp_shared - RGB9E5_EXP_BIAS -
+                                          RGB9E5_MANTISSA_BITS) + 1;
     assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
-   assert(exp_shared >= 0);
-   revdenom.field.biasedexponent = 127 - (exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);
-
-   maxm = (int) (maxrgb * revdenom.value + 0.5);
-   if (maxm == MAX_RGB9E5_MANTISSA + 1) {
-      revdenom.value *= 0.5f;
-      exp_shared += 1;
-      assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
-   } else {
-      assert(maxm <= MAX_RGB9E5_MANTISSA);
-   }
-
-   rm = (int) (rc * revdenom.value + 0.5);
-   gm = (int) (gc * revdenom.value + 0.5);
-   bm = (int) (bc * revdenom.value + 0.5);
+
+   /*
+    * The spec uses strict round-up behavior (d3d10 disagrees, but in any case
+    * must match what is done above for figuring out exponent).
+    * We avoid the doubles ((int) rc * revdenom + 0.5) by doing the rounding
+    * ourselves (revdenom was adjusted by +1, above).
+    */
+   rm = (int) (rc.value * revdenom.value);
+   gm = (int) (gc.value * revdenom.value);
+   bm = (int) (bc.value * revdenom.value);
+   rm = (rm & 1) + (rm >> 1);
+   gm = (gm & 1) + (gm >> 1);
+   bm = (bm & 1) + (bm >> 1);
  
     assert(rm <= MAX_RGB9E5_MANTISSA);
     assert(gm <= MAX_RGB9E5_MANTISSA);
author	Roland Scheidegger <sroland@vmware.com>
	Sun, 9 Aug 2015 00:50:10 +0000 (02:50 +0200)
committer	Roland Scheidegger <sroland@vmware.com>
	Wed, 26 Aug 2015 00:57:38 +0000 (02:57 +0200)