From e3b5e2db1b189092522c43ba789c4f244ddb7c60 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 6 Aug 2013 16:55:47 +0200 Subject: [PATCH] util: implement table-based + linear interpolation linear-to-srgb conversion Should be much faster, seems to work in softpipe. While here (also it's now disabled) fix up the pow factor - the former value is what is in GL core it is however not actually accurate to fp32 standard (as it is 1.0/2.4), and if someone would do all the accurate math there's no reason to waste 8 mantissa bits or so... v2: use real table generating function instead of just printing the values (might take a bit longer as it does calculations on some 3+ million floats but much more descriptive obviously). Also fix up another inaccurate pow factor (this time in the python code) - wondering where the couple one bit errors came from :-(. Reviewed-by: Jose Fonseca Reviewed-by: Zack Rusin --- src/gallium/auxiliary/util/u_format_srgb.h | 56 ++++++++++++++++---- src/gallium/auxiliary/util/u_format_srgb.py | 57 ++++++++++++++++++++- 2 files changed, 102 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/util/u_format_srgb.h b/src/gallium/auxiliary/util/u_format_srgb.h index 82ed9575d96..740a9197460 100644 --- a/src/gallium/auxiliary/util/u_format_srgb.h +++ b/src/gallium/auxiliary/util/u_format_srgb.h @@ -39,6 +39,7 @@ #include "pipe/p_compiler.h" +#include "u_pack_color.h" #include "u_math.h" @@ -51,23 +52,58 @@ util_format_srgb_to_linear_8unorm_table[256]; extern const uint8_t util_format_linear_to_srgb_8unorm_table[256]; +extern const unsigned +util_format_linear_to_srgb_helper_table[104]; + /** * Convert a unclamped linear float to srgb value in the [0,255]. - * XXX this hasn't been tested (render to srgb surface). - * XXX this needs optimization. */ static INLINE uint8_t util_format_linear_float_to_srgb_8unorm(float x) { - if (x >= 1.0f) - return 255; - else if (x >= 0.0031308f) - return float_to_ubyte(1.055f * powf(x, 0.41666f) - 0.055f); - else if (x > 0.0f) - return float_to_ubyte(12.92f * x); - else - return 0; + /* this would be exact but (probably much) slower */ + if (0) { + if (x >= 1.0f) + return 255; + else if (x >= 0.0031308f) + return float_to_ubyte(1.055f * powf(x, 0.41666666f) - 0.055f); + else if (x > 0.0f) + return float_to_ubyte(12.92f * x); + else + return 0; + } + else { + /* + * This is taken from https://gist.github.com/rygorous/2203834 + * Use LUT and do linear interpolation. + */ + union fi almostone, minval, f; + unsigned tab, bias, scale, t; + + almostone.ui = 0x3f7fffff; + minval.ui = (127-13) << 23; + + /* + * Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively. + * The tests are carefully written so that NaNs map to 0, same as in the + * reference implementation. + */ + if (!(x > minval.f)) + x = minval.f; + if (x > almostone.f) + x = almostone.f; + + /* Do the table lookup and unpack bias, scale */ + f.f = x; + tab = util_format_linear_to_srgb_helper_table[(f.ui - minval.ui) >> 20]; + bias = (tab >> 16) << 9; + scale = tab & 0xffff; + + /* Grab next-highest mantissa bits and perform linear interpolation */ + t = (f.ui >> 12) & 0xff; + return (uint8_t) ((bias + scale*t) >> 16); + } } diff --git a/src/gallium/auxiliary/util/u_format_srgb.py b/src/gallium/auxiliary/util/u_format_srgb.py index cd63ae78919..c6c02f0538e 100644 --- a/src/gallium/auxiliary/util/u_format_srgb.py +++ b/src/gallium/auxiliary/util/u_format_srgb.py @@ -40,6 +40,7 @@ CopyRight = ''' import math +import struct def srgb_to_linear(x): @@ -51,10 +52,11 @@ def srgb_to_linear(x): def linear_to_srgb(x): if x >= 0.0031308: - return 1.055 * math.pow(x, 0.41666) - 0.055 + return 1.055 * math.pow(x, 0.41666666) - 0.055 else: return 12.92 * x + def generate_srgb_tables(): print 'const float' print 'util_format_srgb_8unorm_to_linear_float_table[256] = {' @@ -84,6 +86,59 @@ def generate_srgb_tables(): print '};' print +# calculate the table interpolation values used in float linear to unorm8 srgb + numexp = 13 + mantissa_msb = 3 +# stepshift is just used to only use every x-th float to make things faster, +# 5 is largest value which still gives exact same table as 0 + stepshift = 5 + nbuckets = numexp << mantissa_msb + bucketsize = (1 << (23 - mantissa_msb)) >> stepshift + mantshift = 12 + valtable = [] + sum_aa = float(bucketsize) + sum_ab = 0.0 + sum_bb = 0.0 + for i in range(0, bucketsize): + j = (i << stepshift) >> mantshift + sum_ab += j + sum_bb += j*j + inv_det = 1.0 / (sum_aa * sum_bb - sum_ab * sum_ab) + + for bucket in range(0, nbuckets): + start = ((127 - numexp) << 23) + bucket*(bucketsize << stepshift) + sum_a = 0.0 + sum_b = 0.0 + + for i in range(0, bucketsize): + j = (i << stepshift) >> mantshift + fint = start + (i << stepshift) + ffloat = struct.unpack('f', struct.pack('I', fint))[0] + val = linear_to_srgb(ffloat) * 255.0 + 0.5 + sum_a += val + sum_b += j*val + + solved_a = inv_det * (sum_bb*sum_a - sum_ab*sum_b) + solved_b = inv_det * (sum_aa*sum_b - sum_ab*sum_a) + + scaled_a = solved_a * 65536.0 / 512.0 + scaled_b = solved_b * 65536.0 + + int_a = int(scaled_a + 0.5) + int_b = int(scaled_b + 0.5) + + valtable.append((int_a << 16) + int_b) + + print 'const unsigned' + print 'util_format_linear_to_srgb_helper_table[104] = {' + + for j in range(0, nbuckets, 4): + print ' ', + for i in range(j, j + 4): + print '0x%08x,' % (valtable[i],), + print + print '};' + print def main(): print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */' -- 2.30.2