X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Futil%2Fu_format_pack.py;h=6d0016c0ad8c60fc529d3fd6d8116e1ca5dc836d;hb=69faf5b48fde9d89b554c75fba7b431ddcd591da;hp=26f87486046c5fb3df3c1cde763d2fd5234e3087;hpb=b8012643e1f5f6e49593ec8f04d3721df53e6afb;p=mesa.git diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py index 26f87486046..6d0016c0ad8 100644 --- a/src/gallium/auxiliary/util/u_format_pack.py +++ b/src/gallium/auxiliary/util/u_format_pack.py @@ -3,7 +3,7 @@ ''' /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -37,69 +37,38 @@ ''' -import sys -import math - from u_format_parse import * -def generate_f16_to_f32(): - '''Naive implementation, need something faster that operates on bits''' - - print ''' -static float -f16_to_f32(uint16_t h) -{ - unsigned mantissa = h & 0x3ff; - unsigned exponent = (h >> 10) & 0x1f; - float sign = (h & 0x8000) ? -1.0f : 1.0f; - - if (exponent == 0) { - if (mantissa == 0) { - return sign * 0.0f; - } - return sign * powf(2.0f, -14.0f) * (float)mantissa / 1024.0f; - } - if (exponent == 31) { - if (mantissa == 0) { - /* XXX: infinity */ - return sign * 100000.0f; - } - /* XXX: NaN */ - return 1000.0f; - } - return sign * powf(2.0f, (float)exponent - 15.0f) * (1.0f + (float)mantissa / 1024.0f); -} -''' - -def generate_f32_to_f16(): - print ''' -static uint16_t -f32_to_f16(float f) -{ - /* TODO */ - return 0; -} -''' - def generate_format_type(format): '''Generate a structure that describes the format.''' + assert format.layout == PLAIN + print 'union util_format_%s {' % format.short_name() - if format.is_bitmask() or format.short_name() == "r11g11b10_float": + + if format.block_size() in (8, 16, 32, 64): print ' uint%u_t value;' % (format.block_size(),) + + use_bitfields = False + for channel in format.channels: + if channel.size % 8 or not is_pot(channel.size): + use_bitfields = True + print ' struct {' for channel in format.channels: - if (format.is_bitmask() or format.is_mixed()) and not format.is_array() or format.short_name() == "r11g11b10_float": + if use_bitfields: if channel.type == VOID: if channel.size: print ' unsigned %s:%u;' % (channel.name, channel.size) elif channel.type == UNSIGNED: print ' unsigned %s:%u;' % (channel.name, channel.size) - elif channel.type == SIGNED: + elif channel.type in (SIGNED, FIXED): print ' int %s:%u;' % (channel.name, channel.size) elif channel.type == FLOAT: - if channel.size == 32: + if channel.size == 64: + print ' double %s;' % (channel.name) + elif channel.size == 32: print ' float %s;' % (channel.name) else: print ' unsigned %s:%u;' % (channel.name, channel.size) @@ -130,24 +99,10 @@ def generate_format_type(format): print -def generate_srgb_tables(): - print 'static ubyte srgb_to_linear[256] = {' - for i in range(256): - print ' %s,' % (int(math.pow((i / 255.0 + 0.055) / 1.055, 2.4) * 255)) - print '};' - print - print 'static ubyte linear_to_srgb[256] = {' - print ' 0,' - for i in range(1, 256): - print ' %s,' % (int((1.055 * math.pow(i / 255.0, 0.41666) - 0.055) * 255)) - print '};' - print - - def bswap_format(format): '''Generate a structure that describes the format.''' - if format.is_bitmask() and not format.is_array(): + if format.is_bitmask() and not format.is_array() and format.block_size() > 8: print '#ifdef PIPE_ARCH_BIG_ENDIAN' print ' pixel.value = util_bswap%u(pixel.value);' % format.block_size() print '#endif' @@ -164,12 +119,10 @@ def is_format_supported(format): for i in range(4): channel = format.channels[i] - if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT): + if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT, FIXED): + return False + if channel.type == FLOAT and channel.size not in (16, 32, 64): return False - - # We can only read a color from a depth/stencil format if the depth channel is present - if format.colorspace == 'zs' and format.swizzles[0] == SWIZZLE_NONE: - return False return True @@ -186,7 +139,7 @@ def native_type(format): channel = format.channels[0] if channel.type in (UNSIGNED, VOID): return 'uint%u_t' % channel.size - elif channel.type == SIGNED: + elif channel.type in (SIGNED, FIXED): return 'int%u_t' % channel.size elif channel.type == FLOAT: if channel.size == 16: @@ -232,12 +185,35 @@ def get_one_shift(type): assert False -def get_one(type): +def value_to_native(type, value): + '''Get the value of unity for this type.''' + if type.type == FLOAT: + return value + if type.type == FIXED: + return int(value * (1 << (type.size/2))) + if not type.norm: + return int(value) + if type.type == UNSIGNED: + return int(value * ((1 << type.size) - 1)) + if type.type == SIGNED: + return int(value * ((1 << (type.size - 1)) - 1)) + assert False + + +def native_to_constant(type, value): '''Get the value of unity for this type.''' - if type.type == 'FLOAT' or not type.norm: - return 1 + if type.type == FLOAT: + if type.size <= 32: + return "%ff" % value + else: + return "%ff" % value else: - return (1 << get_one_shift(type)) - 1 + return str(int(value)) + + +def get_one(type): + '''Get the value of unity for this type.''' + return value_to_native(type, 1) def clamp_expr(src_channel, dst_channel, dst_native_type, value): @@ -251,85 +227,153 @@ def clamp_expr(src_channel, dst_channel, dst_native_type, value): src_max = src_channel.max() dst_min = dst_channel.min() dst_max = dst_channel.max() + + # Translate the destination range to the src native value + dst_min_native = value_to_native(src_channel, dst_min) + dst_max_native = value_to_native(src_channel, dst_max) if src_min < dst_min and src_max > dst_max: - return 'CLAMP(%s, %s, %s)' % (value, dst_min, dst_max) + return 'CLAMP(%s, %s, %s)' % (value, dst_min_native, dst_max_native) if src_max > dst_max: - return 'MIN2(%s, %s)' % (value, dst_max) + return 'MIN2(%s, %s)' % (value, dst_max_native) if src_min < dst_min: - return 'MAX2(%s, %s)' % (value, dst_min) + return 'MAX2(%s, %s)' % (value, dst_min_native) return value -def conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=True): +def conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + clamp=True, + src_colorspace = RGB, + dst_colorspace = RGB): '''Generate the expression to convert a value between two types.''' + if src_colorspace != dst_colorspace: + if src_colorspace == SRGB: + assert src_channel.type == UNSIGNED + assert src_channel.norm + assert src_channel.size == 8 + assert dst_colorspace == RGB + if dst_channel.type == FLOAT: + return 'util_format_srgb_8unorm_to_linear_float(%s)' % value + else: + assert dst_channel.type == UNSIGNED + assert dst_channel.norm + assert dst_channel.size == 8 + return 'util_format_srgb_to_linear_8unorm(%s)' % value + elif dst_colorspace == SRGB: + assert dst_channel.type == UNSIGNED + assert dst_channel.norm + assert dst_channel.size == 8 + assert src_colorspace == RGB + if src_channel.type == FLOAT: + return 'util_format_linear_float_to_srgb_8unorm(%s)' % value + else: + assert src_channel.type == UNSIGNED + assert src_channel.norm + assert src_channel.size == 8 + return 'util_format_linear_to_srgb_8unorm(%s)' % value + elif src_colorspace == ZS: + pass + elif dst_colorspace == ZS: + pass + else: + assert 0 + if src_channel == dst_channel: return value - if src_channel.type == FLOAT and dst_channel.type == FLOAT: - if src_channel.size == dst_channel.size: - return value - if src_channel.size == 64: - value = '(float)%s' % (value) - elif src_channel.size == 16: - value = 'f16_to_f32(%s)' % (value) - if dst_channel.size == 16: - value = 'f32_to_f16(%s)' % (value) - elif dst_channel.size == 64: - value = '(double)%s' % (value) - return value - - if clamp: - value = clamp_expr(src_channel, dst_channel, dst_native_type, value) + src_type = src_channel.type + src_size = src_channel.size + src_norm = src_channel.norm - if dst_channel.type == FLOAT: - if src_channel.norm: - one = get_one(src_channel) - if src_channel.size <= 23: - scale = '(1.0f/0x%x)' % one - else: - # bigger than single precision mantissa, use double - scale = '(1.0/0x%x)' % one - value = '(%s * %s)' % (value, scale) - return '(%s)%s' % (dst_native_type, value) + # Promote half to float + if src_type == FLOAT and src_size == 16: + value = 'util_half_to_float(%s)' % value + src_size = 32 - if src_channel.type == FLOAT: - if dst_channel.norm: - dst_one = get_one(dst_channel) - if dst_channel.size <= 23: - scale = '0x%x' % dst_one - else: - # bigger than single precision mantissa, use double - scale = '(double)0x%x' % dst_one - value = '(%s * %s)' % (value, scale) - return '(%s)%s' % (dst_native_type, value) + # Special case for float <-> ubytes for more accurate results + # Done before clamping since these functions already take care of that + if src_type == UNSIGNED and src_norm and src_size == 8 and dst_channel.type == FLOAT and dst_channel.size == 32: + return 'ubyte_to_float(%s)' % value + if src_type == FLOAT and src_size == 32 and dst_channel.type == UNSIGNED and dst_channel.norm and dst_channel.size == 8: + return 'float_to_ubyte(%s)' % value + + if clamp: + if dst_channel.type != FLOAT or src_type != FLOAT: + value = clamp_expr(src_channel, dst_channel, dst_native_type, value) - if src_channel.type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED): - if not src_channel.norm and not dst_channel.norm: + if src_type in (SIGNED, UNSIGNED) and dst_channel.type in (SIGNED, UNSIGNED): + if not src_norm and not dst_channel.norm: # neither is normalized -- just cast return '(%s)%s' % (dst_native_type, value) src_one = get_one(src_channel) dst_one = get_one(dst_channel) - if src_one > dst_one and src_channel.norm and dst_channel.norm: + if src_one > dst_one and src_norm and dst_channel.norm: # We can just bitshift src_shift = get_one_shift(src_channel) dst_shift = get_one_shift(dst_channel) value = '(%s >> %s)' % (value, src_shift - dst_shift) else: # We need to rescale using an intermediate type big enough to hold the multiplication of both - tmp_native_type = intermediate_native_type(src_channel.size + dst_channel.size, src_channel.sign and dst_channel.sign) + tmp_native_type = intermediate_native_type(src_size + dst_channel.size, src_channel.sign and dst_channel.sign) value = '((%s)%s)' % (tmp_native_type, value) value = '(%s * 0x%x / 0x%x)' % (value, dst_one, src_one) value = '(%s)%s' % (dst_native_type, value) return value - assert False + # Promote to either float or double + if src_type != FLOAT: + if src_norm or src_type == FIXED: + one = get_one(src_channel) + if src_size <= 23: + value = '(%s * (1.0f/0x%x))' % (value, one) + if dst_channel.size <= 32: + value = '(float)%s' % value + src_size = 32 + else: + # bigger than single precision mantissa, use double + value = '(%s * (1.0/0x%x))' % (value, one) + src_size = 64 + src_norm = False + else: + if src_size <= 23 or dst_channel.size <= 32: + value = '(float)%s' % value + src_size = 32 + else: + # bigger than single precision mantissa, use double + value = '(double)%s' % value + src_size = 64 + src_type = FLOAT + + # Convert double or float to non-float + if dst_channel.type != FLOAT: + if dst_channel.norm or dst_channel.type == FIXED: + dst_one = get_one(dst_channel) + if dst_channel.size <= 23: + value = '(%s * 0x%x)' % (value, dst_one) + else: + # bigger than single precision mantissa, use double + value = '(%s * (double)0x%x)' % (value, dst_one) + value = '(%s)%s' % (dst_native_type, value) + else: + # Cast double to float when converting to either half or float + if dst_channel.size <= 32 and src_size > 32: + value = '(float)%s' % value + src_size = 32 + + if dst_channel.size == 16: + value = 'util_float_to_half(%s)' % value + elif dst_channel.size == 64 and src_size < 64: + value = '(double)%s' % value + + return value def generate_unpack_kernel(format, dst_channel, dst_native_type): @@ -343,19 +387,20 @@ def generate_unpack_kernel(format, dst_channel, dst_native_type): if format.is_bitmask(): depth = format.block_size() - print ' uint%u_t value = *(uint%u_t *)src;' % (depth, depth) + print ' uint%u_t value = *(const uint%u_t *)src;' % (depth, depth) # Declare the intermediate variables for i in range(format.nr_channels()): src_channel = format.channels[i] if src_channel.type == UNSIGNED: - print ' uint%u_t %s;' % (depth, src_channel.name) + print ' uint%u_t %s;' % (depth, src_channel.name) elif src_channel.type == SIGNED: - print ' int%u_t %s;' % (depth, src_channel.name) + print ' int%u_t %s;' % (depth, src_channel.name) - print ' #ifdef PIPE_ARCH_BIG_ENDIAN' - print ' value = util_bswap%u(value);' % depth - print ' #endif' + if depth > 8: + print '#ifdef PIPE_ARCH_BIG_ENDIAN' + print ' value = util_bswap%u(value);' % depth + print '#endif' # Compute the intermediate unshifted values shift = 0 @@ -382,7 +427,7 @@ def generate_unpack_kernel(format, dst_channel, dst_native_type): value = None if value is not None: - print ' %s = %s;' % (src_channel.name, value) + print ' %s = %s;' % (src_channel.name, value) shift += src_channel.size @@ -391,8 +436,15 @@ def generate_unpack_kernel(format, dst_channel, dst_native_type): swizzle = format.swizzles[i] if swizzle < 4: src_channel = format.channels[swizzle] + src_colorspace = format.colorspace + if src_colorspace == SRGB and i == 3: + # Alpha channel is linear + src_colorspace = RGB value = src_channel.name - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + value = conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + src_colorspace = src_colorspace) elif swizzle == SWIZZLE_0: value = '0' elif swizzle == SWIZZLE_1: @@ -401,24 +453,26 @@ def generate_unpack_kernel(format, dst_channel, dst_native_type): value = '0' else: assert False - if format.colorspace == ZS: - if i == 3: - value = get_one(dst_channel) - elif i >= 1: - value = 'dst[0]' - print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) + print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) else: - print ' union util_format_%s pixel;' % format.short_name() - print ' memcpy(&pixel, src, sizeof pixel);' + print ' union util_format_%s pixel;' % format.short_name() + print ' memcpy(&pixel, src, sizeof pixel);' bswap_format(format) for i in range(4): swizzle = format.swizzles[i] if swizzle < 4: src_channel = format.channels[swizzle] + src_colorspace = format.colorspace + if src_colorspace == SRGB and i == 3: + # Alpha channel is linear + src_colorspace = RGB value = 'pixel.chan.%s' % src_channel.name - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) + value = conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + src_colorspace = src_colorspace) elif swizzle == SWIZZLE_0: value = '0' elif swizzle == SWIZZLE_1: @@ -427,12 +481,7 @@ def generate_unpack_kernel(format, dst_channel, dst_native_type): value = '0' else: assert False - if format.colorspace == ZS: - if i == 3: - value = get_one(dst_channel) - elif i >= 1: - value = 'dst[0]' - print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) + print ' dst[%u] = %s; /* %s */' % (i, value, 'rgba'[i]) def generate_pack_kernel(format, src_channel, src_native_type): @@ -448,19 +497,21 @@ def generate_pack_kernel(format, src_channel, src_native_type): if format.is_bitmask(): depth = format.block_size() - print ' uint%u_t value = 0;' % depth + print ' uint%u_t value = 0;' % depth shift = 0 for i in range(4): dst_channel = format.channels[i] if inv_swizzle[i] is not None: value ='src[%u]' % inv_swizzle[i] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - if format.colorspace == ZS: - if i == 3: - value = get_one(dst_channel) - elif i >= 1: - value = '0' + dst_colorspace = format.colorspace + if dst_colorspace == SRGB and inv_swizzle[i] == 3: + # Alpha channel is linear + dst_colorspace = RGB + value = conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + dst_colorspace = dst_colorspace) if dst_channel.type in (UNSIGNED, SIGNED): if shift + dst_channel.size < depth: value = '(%s) & 0x%x' % (value, (1 << dst_channel.size) - 1) @@ -472,35 +523,38 @@ def generate_pack_kernel(format, src_channel, src_native_type): else: value = None if value is not None: - print ' value |= %s;' % (value) + print ' value |= %s;' % (value) shift += dst_channel.size - print '#ifdef PIPE_ARCH_BIG_ENDIAN' - print ' value = util_bswap%u(value);' % depth - print '#endif' + if depth > 8: + print '#ifdef PIPE_ARCH_BIG_ENDIAN' + print ' value = util_bswap%u(value);' % depth + print '#endif' - print ' *(uint%u_t *)dst = value;' % depth + print ' *(uint%u_t *)dst = value;' % depth else: - print ' union util_format_%s pixel;' % format.short_name() + print ' union util_format_%s pixel;' % format.short_name() for i in range(4): dst_channel = format.channels[i] width = dst_channel.size if inv_swizzle[i] is None: continue + dst_colorspace = format.colorspace + if dst_colorspace == SRGB and inv_swizzle[i] == 3: + # Alpha channel is linear + dst_colorspace = RGB value ='src[%u]' % inv_swizzle[i] - value = conversion_expr(src_channel, dst_channel, dst_native_type, value) - if format.colorspace == ZS: - if i == 3: - value = get_one(dst_channel) - elif i >= 1: - value = '0' - print ' pixel.chan.%s = %s;' % (dst_channel.name, value) + value = conversion_expr(src_channel, + dst_channel, dst_native_type, + value, + dst_colorspace = dst_colorspace) + print ' pixel.chan.%s = %s;' % (dst_channel.name, value) bswap_format(format) - print ' memcpy(dst, &pixel, sizeof pixel);' + print ' memcpy(dst, &pixel, sizeof pixel);' def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): @@ -509,16 +563,23 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix): name = format.short_name() print 'static INLINE void' - print 'util_format_%s_unpack_%s(%s *dst, const uint8_t *src, unsigned length)' % (name, dst_suffix, dst_native_type) + print 'util_format_%s_unpack_%s(%s *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, dst_suffix, dst_native_type) print '{' if is_format_supported(format): - print ' while(length--) {' - + print ' unsigned x, y;' + print ' for(y = 0; y < height; y += %u) {' % (format.block_height,) + print ' %s *dst = dst_row;' % (dst_native_type) + print ' const uint8_t *src = src_row;' + print ' for(x = 0; x < width; x += %u) {' % (format.block_width,) + generate_unpack_kernel(format, dst_channel, dst_native_type) - print ' src += %u;' % (format.block_size() / 8,) - print ' dst += 4;' + print ' src += %u;' % (format.block_size() / 8,) + print ' dst += 4;' + print ' }' + print ' src_row += src_stride;' + print ' dst_row += dst_stride/sizeof(*dst_row);' print ' }' print '}' @@ -531,18 +592,25 @@ def generate_format_pack(format, src_channel, src_native_type, src_suffix): name = format.short_name() print 'static INLINE void' - print 'util_format_%s_pack_%s(uint8_t *dst, const %s *src, unsigned length)' % (name, src_suffix, src_native_type) + print 'util_format_%s_pack_%s(uint8_t *dst_row, unsigned dst_stride, const %s *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, src_suffix, src_native_type) print '{' if is_format_supported(format): - print ' while(length--) {' + print ' unsigned x, y;' + print ' for(y = 0; y < height; y += %u) {' % (format.block_height,) + print ' const %s *src = src_row;' % (src_native_type) + print ' uint8_t *dst = dst_row;' + print ' for(x = 0; x < width; x += %u) {' % (format.block_width,) generate_pack_kernel(format, src_channel, src_native_type) - print ' src += 4;' - print ' dst += %u;' % (format.block_size() / 8,) + print ' src += 4;' + print ' dst += %u;' % (format.block_size() / 8,) + print ' }' + print ' dst_row += dst_stride;' + print ' src_row += src_stride/sizeof(*src_row);' print ' }' - + print '}' print @@ -563,34 +631,40 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix): print +def is_format_hand_written(format): + return format.layout in ('s3tc', 'subsampled', 'other') or format.colorspace == ZS + + def generate(formats): print print '#include "pipe/p_compiler.h"' print '#include "u_math.h"' + print '#include "u_half.h"' print '#include "u_format.h"' + print '#include "u_format_other.h"' + print '#include "u_format_srgb.h"' + print '#include "u_format_yuv.h"' + print '#include "u_format_zs.h"' print - generate_f16_to_f32() - generate_f32_to_f16() - for format in formats: - if is_format_supported(format): - generate_format_type(format) + if not is_format_hand_written(format): + + if is_format_supported(format): + generate_format_type(format) - channel = Channel(FLOAT, False, 32) - native_type = 'float' - suffix = 'float' + channel = Channel(FLOAT, False, 32) + native_type = 'float' + suffix = 'rgba_float' - for format in formats: - generate_format_unpack(format, channel, native_type, suffix) - generate_format_pack(format, channel, native_type, suffix) - generate_format_fetch(format, channel, native_type, suffix) + generate_format_unpack(format, channel, native_type, suffix) + generate_format_pack(format, channel, native_type, suffix) + generate_format_fetch(format, channel, native_type, suffix) - channel = Channel(UNSIGNED, True, 8) - native_type = 'uint8_t' - suffix = '8unorm' + channel = Channel(UNSIGNED, True, 8) + native_type = 'uint8_t' + suffix = 'rgba_8unorm' - for format in formats: - generate_format_unpack(format, channel, native_type, suffix) - generate_format_pack(format, channel, native_type, suffix) + generate_format_unpack(format, channel, native_type, suffix) + generate_format_pack(format, channel, native_type, suffix)