#include "pipe/p_compiler.h"
+#include "u_pack_color.h"
#include "u_math.h"
extern const uint8_t
util_format_linear_to_srgb_8unorm_table[256];
+extern const unsigned
+util_format_linear_to_srgb_helper_table[104];
+
/**
* Convert a unclamped linear float to srgb value in the [0,255].
- * XXX this hasn't been tested (render to srgb surface).
- * XXX this needs optimization.
*/
static INLINE uint8_t
util_format_linear_float_to_srgb_8unorm(float x)
{
- if (x >= 1.0f)
- return 255;
- else if (x >= 0.0031308f)
- return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f);
- else if (x > 0.0f)
- return float_to_ubyte(12.92f * x);
- else
- return 0;
+ /* this would be exact but (probably much) slower */
+ if (0) {
+ if (x >= 1.0f)
+ return 255;
+ else if (x >= 0.0031308f)
+ return float_to_ubyte(1.055f * powf(x, 0.41666666f) - 0.055f);
+ else if (x > 0.0f)
+ return float_to_ubyte(12.92f * x);
+ else
+ return 0;
+ }
+ else {
+ /*
+ * This is taken from https://gist.github.com/rygorous/2203834
+ * Use LUT and do linear interpolation.
+ */
+ union fi almostone, minval, f;
+ unsigned tab, bias, scale, t;
+
+ almostone.ui = 0x3f7fffff;
+ minval.ui = (127-13) << 23;
+
+ /*
+ * Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
+ * The tests are carefully written so that NaNs map to 0, same as in the
+ * reference implementation.
+ */
+ if (!(x > minval.f))
+ x = minval.f;
+ if (x > almostone.f)
+ x = almostone.f;
+
+ /* Do the table lookup and unpack bias, scale */
+ f.f = x;
+ tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >> 20];
+ bias = (tab >> 16) << 9;
+ scale = tab & 0xffff;
+
+ /* Grab next-highest mantissa bits and perform linear interpolation */
+ t = (f.ui >> 12) & 0xff;
+ return (uint8_t) ((bias + scale*t) >> 16);
+ }
}
import math
+import struct
def srgb_to_linear(x):
def linear_to_srgb(x):
if x >= 0.0031308:
- return 1.055 * math.pow(x, 0.41666) - 0.055
+ return 1.055 * math.pow(x, 0.41666666) - 0.055
else:
return 12.92 * x
+
def generate_srgb_tables():
print 'const float'
print 'util_format_srgb_8unorm_to_linear_float_table[256] = {'
print '};'
print
+# calculate the table interpolation values used in float linear to unorm8 srgb
+ numexp = 13
+ mantissa_msb = 3
+# stepshift is just used to only use every x-th float to make things faster,
+# 5 is largest value which still gives exact same table as 0
+ stepshift = 5
+ nbuckets = numexp << mantissa_msb
+ bucketsize = (1 << (23 - mantissa_msb)) >> stepshift
+ mantshift = 12
+ valtable = []
+ sum_aa = float(bucketsize)
+ sum_ab = 0.0
+ sum_bb = 0.0
+ for i in range(0, bucketsize):
+ j = (i << stepshift) >> mantshift
+ sum_ab += j
+ sum_bb += j*j
+ inv_det = 1.0 / (sum_aa * sum_bb - sum_ab * sum_ab)
+
+ for bucket in range(0, nbuckets):
+ start = ((127 - numexp) << 23) + bucket*(bucketsize << stepshift)
+ sum_a = 0.0
+ sum_b = 0.0
+
+ for i in range(0, bucketsize):
+ j = (i << stepshift) >> mantshift
+ fint = start + (i << stepshift)
+ ffloat = struct.unpack('f', struct.pack('I', fint))[0]
+ val = linear_to_srgb(ffloat) * 255.0 + 0.5
+ sum_a += val
+ sum_b += j*val
+
+ solved_a = inv_det * (sum_bb*sum_a - sum_ab*sum_b)
+ solved_b = inv_det * (sum_aa*sum_b - sum_ab*sum_a)
+
+ scaled_a = solved_a * 65536.0 / 512.0
+ scaled_b = solved_b * 65536.0
+
+ int_a = int(scaled_a + 0.5)
+ int_b = int(scaled_b + 0.5)
+
+ valtable.append((int_a << 16) + int_b)
+
+ print 'const unsigned'
+ print 'util_format_linear_to_srgb_helper_table[104] = {'
+
+ for j in range(0, nbuckets, 4):
+ print ' ',
+ for i in range(j, j + 4):
+ print '0x%08x,' % (valtable[i],),
+ print
+ print '};'
+ print
def main():
print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */'