From: Denis Pauk Date: Tue, 26 Jun 2018 20:30:49 +0000 (+0300) Subject: mesa: add header for share bptc decompress functions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bf4871f9e83def8a38bbecd0b1fe0b909075523d;p=mesa.git mesa: add header for share bptc decompress functions Move shared bptc functions to texcompress_bptc_tmp.h: * fetch_rgba_unorm_from_block * fetch_rgb_float_from_block * compress_rgba_unorm * compress_rgb_float Create decompress functions: * decompress_rgba_unorm * decompress_rgb_float Functions will be reused in gallium/auxiliary code. v2: Add block decompress function v3: Move all shared code to header Suggested-by: Marek Olšák Signed-off-by: Denis Pauk CC: Marek Olšák Signed-off-by: Marek Olšák --- diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 00aba0a2f78..63f3734c322 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -216,6 +216,7 @@ MAIN_FILES = \ main/texcompress.c \ main/texcompress_bptc.c \ main/texcompress_bptc.h \ + main/texcompress_bptc_tmp.h \ main/texcompress_cpal.c \ main/texcompress_cpal.h \ main/texcompress_etc.c \ diff --git a/src/mesa/main/texcompress_bptc.c b/src/mesa/main/texcompress_bptc.c index fd37be97f36..46279f144ff 100644 --- a/src/mesa/main/texcompress_bptc.c +++ b/src/mesa/main/texcompress_bptc.c @@ -29,632 +29,38 @@ #include #include "texcompress.h" #include "texcompress_bptc.h" -#include "util/format_srgb.h" -#include "util/half_float.h" +#include "texcompress_bptc_tmp.h" #include "texstore.h" -#include "macros.h" #include "image.h" #include "mtypes.h" -#define BLOCK_SIZE 4 -#define N_PARTITIONS 64 -#define BLOCK_BYTES 16 - -struct bptc_unorm_mode { - int n_subsets; - int n_partition_bits; - bool has_rotation_bits; - bool has_index_selection_bit; - int n_color_bits; - int n_alpha_bits; - bool has_endpoint_pbits; - bool has_shared_pbits; - int n_index_bits; - int n_secondary_index_bits; -}; - -struct bptc_float_bitfield { - int8_t endpoint; - uint8_t component; - uint8_t offset; - uint8_t n_bits; - bool reverse; -}; - -struct bptc_float_mode { - bool reserved; - bool transformed_endpoints; - int n_partition_bits; - int n_endpoint_bits; - int n_index_bits; - int n_delta_bits[3]; - struct bptc_float_bitfield bitfields[24]; -}; - -struct bit_writer { - uint8_t buf; - int pos; - uint8_t *dst; -}; - -static const struct bptc_unorm_mode -bptc_unorm_modes[] = { - /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 }, - /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 }, - /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 }, - /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 }, - /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 }, - /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 }, - /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 }, - /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 } -}; - -static const struct bptc_float_mode -bptc_float_modes[] = { - /* 00 */ - { false, true, 5, 10, 3, { 5, 5, 5 }, - { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false }, - { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, - { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, - { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, - { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, - { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, - { 3, 2, 3, 1, false }, - { -1 } } - }, - /* 01 */ - { false, true, 5, 7, 3, { 6, 6, 6 }, - { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false }, - { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false }, - { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false }, - { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false }, - { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, - { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false }, - { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, - { 2, 0, 0, 6, false }, - { 3, 0, 0, 6, false }, - { -1 } } - }, - /* 00010 */ - { false, true, 5, 11, 3, { 5, 4, 4 }, - { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, - { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false }, - { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false }, - { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false }, - { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, - { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, - { -1 } } - }, - /* 00011 */ - { false, false, 0, 10, 4, { 10, 10, 10 }, - { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, - { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false }, - { -1 } } - }, - /* 00110 */ - { false, true, 5, 11, 3, { 4, 5, 4 }, - { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, - { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false }, - { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false }, - { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false }, - { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false }, - { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false }, - { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false }, - { -1 } } - }, - /* 00111 */ - { false, true, 0, 11, 4, { 9, 9, 9 }, - { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, - { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false }, - { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false }, - { -1 } } - }, - /* 01010 */ - { false, true, 5, 11, 3, { 4, 4, 5 }, - { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, - { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false }, - { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, - { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, - { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false }, - { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false }, - { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false }, - { -1 } } - }, - /* 01011 */ - { false, true, 0, 12, 4, { 8, 8, 8 }, - { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, - { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false }, - { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true }, - { -1 } } - }, - /* 01110 */ - { false, true, 5, 9, 3, { 5, 5, 5 }, - { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false }, - { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false }, - { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, - { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, - { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, - { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, - { 3, 2, 3, 1, false }, - { -1 } } - }, - /* 01111 */ - { false, true, 0, 16, 4, { 4, 4, 4 }, - { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, - { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false }, - { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true }, - { -1 } } - }, - /* 10010 */ - { false, true, 5, 8, 3, { 6, 5, 5 }, - { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false }, - { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, - { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false }, - { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, - { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, - { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false }, - { 3, 0, 0, 6, false }, - { -1 } } - }, - /* 10011 */ - { true /* reserved */ }, - /* 10110 */ - { false, true, 5, 8, 3, { 5, 6, 5 }, - { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false }, - { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false }, - { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false }, - { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, - { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, - { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, - { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, - { -1 } } - }, - /* 10111 */ - { true /* reserved */ }, - /* 11010 */ - { false, true, 5, 8, 3, { 5, 5, 6 }, - { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, - { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false }, - { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, - { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, - { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, - { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, - { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, - { -1 } } - }, - /* 11011 */ - { true /* reserved */ }, - /* 11110 */ - { false, false, 5, 6, 3, { 6, 6, 6 }, - { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false }, - { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false }, - { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false }, - { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false }, - { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, - { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false }, - { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, - { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false }, - { -1 } } - }, - /* 11111 */ - { true /* reserved */ }, -}; - -/* This partition table is used when the mode has two subsets. Each - * partition is represented by a 32-bit value which gives 2 bits per texel - * within the block. The value of the two bits represents which subset to use - * (0 or 1). - */ -static const uint32_t -partition_table1[N_PARTITIONS] = { - 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U, - 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U, - 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U, - 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U, - 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U, - 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U, - 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U, - 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U, - 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U, - 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U, - 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U, - 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U, - 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U, - 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U, - 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U, - 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U, -}; - -/* This partition table is used when the mode has three subsets. In this case - * the values can be 0, 1 or 2. - */ -static const uint32_t -partition_table2[N_PARTITIONS] = { - 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U, - 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U, - 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U, - 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U, - 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U, - 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U, - 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U, - 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U, - 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U, - 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U, - 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U, - 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U, - 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U, - 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U, - 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U, - 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U -}; - -static const uint8_t -anchor_indices[][N_PARTITIONS] = { - /* Anchor index values for the second subset of two-subset partitioning */ - { - 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, - 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2, - 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6, - 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf - }, - - /* Anchor index values for the second subset of three-subset partitioning */ - { - 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3, - 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf, - 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf, - 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3 - }, - - /* Anchor index values for the third subset of three-subset - * partitioning - */ - { - 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8, - 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8, - 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8, - 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8 - } -}; - -static int -extract_bits(const uint8_t *block, - int offset, - int n_bits) -{ - int byte_index = offset / 8; - int bit_index = offset % 8; - int n_bits_in_byte = MIN2(n_bits, 8 - bit_index); - int result = 0; - int bit = 0; - - while (true) { - result |= ((block[byte_index] >> bit_index) & - ((1 << n_bits_in_byte) - 1)) << bit; - - n_bits -= n_bits_in_byte; - - if (n_bits <= 0) - return result; - - bit += n_bits_in_byte; - byte_index++; - bit_index = 0; - n_bits_in_byte = MIN2(n_bits, 8); - } -} - -static uint8_t -expand_component(uint8_t byte, - int n_bits) -{ - /* Expands a n-bit quantity into a byte by copying the most-significant - * bits into the unused least-significant bits. - */ - return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8)); -} - -static int -extract_unorm_endpoints(const struct bptc_unorm_mode *mode, - const uint8_t *block, - int bit_offset, - uint8_t endpoints[][4]) -{ - int component; - int subset; - int endpoint; - int pbit; - int n_components; - - /* Extract each color component */ - for (component = 0; component < 3; component++) { - for (subset = 0; subset < mode->n_subsets; subset++) { - for (endpoint = 0; endpoint < 2; endpoint++) { - endpoints[subset * 2 + endpoint][component] = - extract_bits(block, bit_offset, mode->n_color_bits); - bit_offset += mode->n_color_bits; - } - } - } - - /* Extract the alpha values */ - if (mode->n_alpha_bits > 0) { - for (subset = 0; subset < mode->n_subsets; subset++) { - for (endpoint = 0; endpoint < 2; endpoint++) { - endpoints[subset * 2 + endpoint][3] = - extract_bits(block, bit_offset, mode->n_alpha_bits); - bit_offset += mode->n_alpha_bits; - } - } - - n_components = 4; - } else { - for (subset = 0; subset < mode->n_subsets; subset++) - for (endpoint = 0; endpoint < 2; endpoint++) - endpoints[subset * 2 + endpoint][3] = 255; - - n_components = 3; - } - - /* Add in the p-bits */ - if (mode->has_endpoint_pbits) { - for (subset = 0; subset < mode->n_subsets; subset++) { - for (endpoint = 0; endpoint < 2; endpoint++) { - pbit = extract_bits(block, bit_offset, 1); - bit_offset += 1; - - for (component = 0; component < n_components; component++) { - endpoints[subset * 2 + endpoint][component] <<= 1; - endpoints[subset * 2 + endpoint][component] |= pbit; - } - } - } - } else if (mode->has_shared_pbits) { - for (subset = 0; subset < mode->n_subsets; subset++) { - pbit = extract_bits(block, bit_offset, 1); - bit_offset += 1; - - for (endpoint = 0; endpoint < 2; endpoint++) { - for (component = 0; component < n_components; component++) { - endpoints[subset * 2 + endpoint][component] <<= 1; - endpoints[subset * 2 + endpoint][component] |= pbit; - } - } - } - } - - /* Expand the n-bit values to a byte */ - for (subset = 0; subset < mode->n_subsets; subset++) { - for (endpoint = 0; endpoint < 2; endpoint++) { - for (component = 0; component < 3; component++) { - endpoints[subset * 2 + endpoint][component] = - expand_component(endpoints[subset * 2 + endpoint][component], - mode->n_color_bits + - mode->has_endpoint_pbits + - mode->has_shared_pbits); - } - - if (mode->n_alpha_bits > 0) { - endpoints[subset * 2 + endpoint][3] = - expand_component(endpoints[subset * 2 + endpoint][3], - mode->n_alpha_bits + - mode->has_endpoint_pbits + - mode->has_shared_pbits); - } - } - } - - return bit_offset; -} - -static bool -is_anchor(int n_subsets, - int partition_num, - int texel) -{ - if (texel == 0) - return true; - - switch (n_subsets) { - case 1: - return false; - case 2: - return anchor_indices[0][partition_num] == texel; - case 3: - return (anchor_indices[1][partition_num] == texel || - anchor_indices[2][partition_num] == texel); - default: - assert(false); - return false; - } -} - -static int -count_anchors_before_texel(int n_subsets, - int partition_num, - int texel) -{ - int count = 1; - - if (texel == 0) - return 0; - - switch (n_subsets) { - case 1: - break; - case 2: - if (texel > anchor_indices[0][partition_num]) - count++; - break; - case 3: - if (texel > anchor_indices[1][partition_num]) - count++; - if (texel > anchor_indices[2][partition_num]) - count++; - break; - default: - assert(false); - return 0; - } - - return count; -} - -static int32_t -interpolate(int32_t a, int32_t b, - int index, - int index_bits) +static void +fetch_bptc_rgb_float(const GLubyte *map, + GLint rowStride, GLint i, GLint j, + GLfloat *texel, + bool is_signed) { - static const uint8_t weights2[] = { 0, 21, 43, 64 }; - static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; - static const uint8_t weights4[] = - { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; - static const uint8_t *weights[] = { - NULL, NULL, weights2, weights3, weights4 - }; - int weight; + const GLubyte *block; - weight = weights[index_bits][index]; + block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16; - return ((64 - weight) * a + weight * b + 32) >> 6; + fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed); } static void -apply_rotation(int rotation, - uint8_t *result) +fetch_bptc_rgb_signed_float(const GLubyte *map, + GLint rowStride, GLint i, GLint j, + GLfloat *texel) { - uint8_t t; - - if (rotation == 0) - return; - - rotation--; - - t = result[rotation]; - result[rotation] = result[3]; - result[3] = t; + fetch_bptc_rgb_float(map, rowStride, i, j, texel, true); } static void -fetch_rgba_unorm_from_block(const uint8_t *block, - uint8_t *result, - int texel) +fetch_bptc_rgb_unsigned_float(const GLubyte *map, + GLint rowStride, GLint i, GLint j, + GLfloat *texel) { - int mode_num = ffs(block[0]); - const struct bptc_unorm_mode *mode; - int bit_offset, secondary_bit_offset; - int partition_num; - int subset_num; - int rotation; - int index_selection; - int index_bits; - int indices[2]; - int index; - int anchors_before_texel; - bool anchor; - uint8_t endpoints[3 * 2][4]; - uint32_t subsets; - int component; - - if (mode_num == 0) { - /* According to the spec this mode is reserved and shouldn't be used. */ - memset(result, 0, 3); - result[3] = 0xff; - return; - } - - mode = bptc_unorm_modes + mode_num - 1; - bit_offset = mode_num; - - partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); - bit_offset += mode->n_partition_bits; - - switch (mode->n_subsets) { - case 1: - subsets = 0; - break; - case 2: - subsets = partition_table1[partition_num]; - break; - case 3: - subsets = partition_table2[partition_num]; - break; - default: - assert(false); - return; - } - - if (mode->has_rotation_bits) { - rotation = extract_bits(block, bit_offset, 2); - bit_offset += 2; - } else { - rotation = 0; - } - - if (mode->has_index_selection_bit) { - index_selection = extract_bits(block, bit_offset, 1); - bit_offset++; - } else { - index_selection = 0; - } - - bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints); - - anchors_before_texel = count_anchors_before_texel(mode->n_subsets, - partition_num, texel); - - /* Calculate the offset to the secondary index */ - secondary_bit_offset = (bit_offset + - BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits - - mode->n_subsets + - mode->n_secondary_index_bits * texel - - anchors_before_texel); - - /* Calculate the offset to the primary index for this texel */ - bit_offset += mode->n_index_bits * texel - anchors_before_texel; - - subset_num = (subsets >> (texel * 2)) & 3; - - anchor = is_anchor(mode->n_subsets, partition_num, texel); - - index_bits = mode->n_index_bits; - if (anchor) - index_bits--; - indices[0] = extract_bits(block, bit_offset, index_bits); - - if (mode->n_secondary_index_bits) { - index_bits = mode->n_secondary_index_bits; - if (anchor) - index_bits--; - indices[1] = extract_bits(block, secondary_bit_offset, index_bits); - } - - index = indices[index_selection]; - index_bits = (index_selection ? - mode->n_secondary_index_bits : - mode->n_index_bits); - - for (component = 0; component < 3; component++) - result[component] = interpolate(endpoints[subset_num * 2][component], - endpoints[subset_num * 2 + 1][component], - index, - index_bits); - - /* Alpha uses the opposite index from the color components */ - if (mode->n_secondary_index_bits && !index_selection) { - index = indices[1]; - index_bits = mode->n_secondary_index_bits; - } else { - index = indices[0]; - index_bits = mode->n_index_bits; - } - - result[3] = interpolate(endpoints[subset_num * 2][3], - endpoints[subset_num * 2 + 1][3], - index, - index_bits); - - apply_rotation(rotation, result); + fetch_bptc_rgb_float(map, rowStride, i, j, texel, false); } static void @@ -699,257 +105,6 @@ fetch_bptc_srgb_alpha_unorm(const GLubyte *map, texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]); } -static int32_t -sign_extend(int32_t value, - int n_bits) -{ - if ((value & (1 << (n_bits - 1)))) { - value |= (~(int32_t) 0) << n_bits; - } - - return value; -} - -static int -signed_unquantize(int value, int n_endpoint_bits) -{ - bool sign; - - if (n_endpoint_bits >= 16) - return value; - - if (value == 0) - return 0; - - sign = false; - - if (value < 0) { - sign = true; - value = -value; - } - - if (value >= (1 << (n_endpoint_bits - 1)) - 1) - value = 0x7fff; - else - value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1); - - if (sign) - value = -value; - - return value; -} - -static int -unsigned_unquantize(int value, int n_endpoint_bits) -{ - if (n_endpoint_bits >= 15) - return value; - - if (value == 0) - return 0; - - if (value == (1 << n_endpoint_bits) - 1) - return 0xffff; - - return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1); -} - -static int -extract_float_endpoints(const struct bptc_float_mode *mode, - const uint8_t *block, - int bit_offset, - int32_t endpoints[][3], - bool is_signed) -{ - const struct bptc_float_bitfield *bitfield; - int endpoint, component; - int n_endpoints; - int value; - int i; - - if (mode->n_partition_bits) - n_endpoints = 4; - else - n_endpoints = 2; - - memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3); - - for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) { - value = extract_bits(block, bit_offset, bitfield->n_bits); - bit_offset += bitfield->n_bits; - - if (bitfield->reverse) { - for (i = 0; i < bitfield->n_bits; i++) { - if (value & (1 << i)) - endpoints[bitfield->endpoint][bitfield->component] |= - 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset); - } - } else { - endpoints[bitfield->endpoint][bitfield->component] |= - value << bitfield->offset; - } - } - - if (mode->transformed_endpoints) { - /* The endpoints are specified as signed offsets from e0 */ - for (endpoint = 1; endpoint < n_endpoints; endpoint++) { - for (component = 0; component < 3; component++) { - value = sign_extend(endpoints[endpoint][component], - mode->n_delta_bits[component]); - endpoints[endpoint][component] = - ((endpoints[0][component] + value) & - ((1 << mode->n_endpoint_bits) - 1)); - } - } - } - - if (is_signed) { - for (endpoint = 0; endpoint < n_endpoints; endpoint++) { - for (component = 0; component < 3; component++) { - value = sign_extend(endpoints[endpoint][component], - mode->n_endpoint_bits); - endpoints[endpoint][component] = - signed_unquantize(value, mode->n_endpoint_bits); - } - } - } else { - for (endpoint = 0; endpoint < n_endpoints; endpoint++) { - for (component = 0; component < 3; component++) { - endpoints[endpoint][component] = - unsigned_unquantize(endpoints[endpoint][component], - mode->n_endpoint_bits); - } - } - } - - return bit_offset; -} - -static int32_t -finish_unsigned_unquantize(int32_t value) -{ - return value * 31 / 64; -} - -static int32_t -finish_signed_unquantize(int32_t value) -{ - if (value < 0) - return (-value * 31 / 32) | 0x8000; - else - return value * 31 / 32; -} - -static void -fetch_rgb_float_from_block(const uint8_t *block, - float *result, - int texel, - bool is_signed) -{ - int mode_num; - const struct bptc_float_mode *mode; - int bit_offset; - int partition_num; - int subset_num; - int index_bits; - int index; - int anchors_before_texel; - int32_t endpoints[2 * 2][3]; - uint32_t subsets; - int n_subsets; - int component; - int32_t value; - - if (block[0] & 0x2) { - mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2; - bit_offset = 5; - } else { - mode_num = block[0] & 3; - bit_offset = 2; - } - - mode = bptc_float_modes + mode_num; - - if (mode->reserved) { - memset(result, 0, sizeof result[0] * 3); - result[3] = 1.0f; - return; - } - - bit_offset = extract_float_endpoints(mode, block, bit_offset, - endpoints, is_signed); - - if (mode->n_partition_bits) { - partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); - bit_offset += mode->n_partition_bits; - - subsets = partition_table1[partition_num]; - n_subsets = 2; - } else { - partition_num = 0; - subsets = 0; - n_subsets = 1; - } - - anchors_before_texel = - count_anchors_before_texel(n_subsets, partition_num, texel); - - /* Calculate the offset to the primary index for this texel */ - bit_offset += mode->n_index_bits * texel - anchors_before_texel; - - subset_num = (subsets >> (texel * 2)) & 3; - - index_bits = mode->n_index_bits; - if (is_anchor(n_subsets, partition_num, texel)) - index_bits--; - index = extract_bits(block, bit_offset, index_bits); - - for (component = 0; component < 3; component++) { - value = interpolate(endpoints[subset_num * 2][component], - endpoints[subset_num * 2 + 1][component], - index, - mode->n_index_bits); - - if (is_signed) - value = finish_signed_unquantize(value); - else - value = finish_unsigned_unquantize(value); - - result[component] = _mesa_half_to_float(value); - } - - result[3] = 1.0f; -} - -static void -fetch_bptc_rgb_float(const GLubyte *map, - GLint rowStride, GLint i, GLint j, - GLfloat *texel, - bool is_signed) -{ - const GLubyte *block; - - block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16; - - fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed); -} - -static void -fetch_bptc_rgb_signed_float(const GLubyte *map, - GLint rowStride, GLint i, GLint j, - GLfloat *texel) -{ - fetch_bptc_rgb_float(map, rowStride, i, j, texel, true); -} - -static void -fetch_bptc_rgb_unsigned_float(const GLubyte *map, - GLint rowStride, GLint i, GLint j, - GLfloat *texel) -{ - fetch_bptc_rgb_float(map, rowStride, i, j, texel, false); -} - compressed_fetch_func _mesa_get_bptc_fetch_func(mesa_format format) { @@ -967,312 +122,6 @@ _mesa_get_bptc_fetch_func(mesa_format format) } } -static void -write_bits(struct bit_writer *writer, int n_bits, int value) -{ - do { - if (n_bits + writer->pos >= 8) { - *(writer->dst++) = writer->buf | (value << writer->pos); - writer->buf = 0; - value >>= (8 - writer->pos); - n_bits -= (8 - writer->pos); - writer->pos = 0; - } else { - writer->buf |= value << writer->pos; - writer->pos += n_bits; - break; - } - } while (n_bits > 0); -} - -static void -get_average_luminance_alpha_unorm(int width, int height, - const uint8_t *src, int src_rowstride, - int *average_luminance, int *average_alpha) -{ - int luminance_sum = 0, alpha_sum = 0; - int y, x; - - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - luminance_sum += src[0] + src[1] + src[2]; - alpha_sum += src[3]; - src += 4; - } - src += src_rowstride - width * 4; - } - - *average_luminance = luminance_sum / (width * height); - *average_alpha = alpha_sum / (width * height); -} - -static void -get_rgba_endpoints_unorm(int width, int height, - const uint8_t *src, int src_rowstride, - int average_luminance, int average_alpha, - uint8_t endpoints[][4]) -{ - int endpoint_luminances[2]; - int midpoint; - int sums[2][4]; - int endpoint; - int luminance; - uint8_t temp[3]; - const uint8_t *p = src; - int rgb_left_endpoint_count = 0; - int alpha_left_endpoint_count = 0; - int y, x, i; - - memset(sums, 0, sizeof sums); - - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - luminance = p[0] + p[1] + p[2]; - if (luminance < average_luminance) { - endpoint = 0; - rgb_left_endpoint_count++; - } else { - endpoint = 1; - } - for (i = 0; i < 3; i++) - sums[endpoint][i] += p[i]; - - if (p[2] < average_alpha) { - endpoint = 0; - alpha_left_endpoint_count++; - } else { - endpoint = 1; - } - sums[endpoint][3] += p[3]; - - p += 4; - } - - p += src_rowstride - width * 4; - } - - if (rgb_left_endpoint_count == 0 || - rgb_left_endpoint_count == width * height) { - for (i = 0; i < 3; i++) - endpoints[0][i] = endpoints[1][i] = - (sums[0][i] + sums[1][i]) / (width * height); - } else { - for (i = 0; i < 3; i++) { - endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count; - endpoints[1][i] = (sums[1][i] / - (width * height - rgb_left_endpoint_count)); - } - } - - if (alpha_left_endpoint_count == 0 || - alpha_left_endpoint_count == width * height) { - endpoints[0][3] = endpoints[1][3] = - (sums[0][3] + sums[1][3]) / (width * height); - } else { - endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count; - endpoints[1][3] = (sums[1][3] / - (width * height - alpha_left_endpoint_count)); - } - - /* We may need to swap the endpoints to ensure the most-significant bit of - * the first index is zero */ - - for (endpoint = 0; endpoint < 2; endpoint++) { - endpoint_luminances[endpoint] = - endpoints[endpoint][0] + - endpoints[endpoint][1] + - endpoints[endpoint][2]; - } - midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2; - - if ((src[0] + src[1] + src[2] <= midpoint) != - (endpoint_luminances[0] <= midpoint)) { - memcpy(temp, endpoints[0], 3); - memcpy(endpoints[0], endpoints[1], 3); - memcpy(endpoints[1], temp, 3); - } - - /* Same for the alpha endpoints */ - - midpoint = (endpoints[0][3] + endpoints[1][3]) / 2; - - if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) { - temp[0] = endpoints[0][3]; - endpoints[0][3] = endpoints[1][3]; - endpoints[1][3] = temp[0]; - } -} - -static void -write_rgb_indices_unorm(struct bit_writer *writer, - int src_width, int src_height, - const uint8_t *src, int src_rowstride, - uint8_t endpoints[][4]) -{ - int luminance; - int endpoint_luminances[2]; - int endpoint; - int index; - int y, x; - - for (endpoint = 0; endpoint < 2; endpoint++) { - endpoint_luminances[endpoint] = - endpoints[endpoint][0] + - endpoints[endpoint][1] + - endpoints[endpoint][2]; - } - - /* If the endpoints have the same luminance then we'll just use index 0 for - * all of the texels */ - if (endpoint_luminances[0] == endpoint_luminances[1]) { - write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0); - return; - } - - for (y = 0; y < src_height; y++) { - for (x = 0; x < src_width; x++) { - luminance = src[0] + src[1] + src[2]; - - index = ((luminance - endpoint_luminances[0]) * 3 / - (endpoint_luminances[1] - endpoint_luminances[0])); - if (index < 0) - index = 0; - else if (index > 3) - index = 3; - - assert(x != 0 || y != 0 || index < 2); - - write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index); - - src += 4; - } - - /* Pad the indices out to the block size */ - if (src_width < BLOCK_SIZE) - write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0); - - src += src_rowstride - src_width * 4; - } - - /* Pad the indices out to the block size */ - if (src_height < BLOCK_SIZE) - write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); -} - -static void -write_alpha_indices_unorm(struct bit_writer *writer, - int src_width, int src_height, - const uint8_t *src, int src_rowstride, - uint8_t endpoints[][4]) -{ - int index; - int y, x; - - /* If the endpoints have the same alpha then we'll just use index 0 for - * all of the texels */ - if (endpoints[0][3] == endpoints[1][3]) { - write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0); - return; - } - - for (y = 0; y < src_height; y++) { - for (x = 0; x < src_width; x++) { - index = (((int) src[3] - (int) endpoints[0][3]) * 7 / - ((int) endpoints[1][3] - endpoints[0][3])); - if (index < 0) - index = 0; - else if (index > 7) - index = 7; - - assert(x != 0 || y != 0 || index < 4); - - /* The first index has one less bit */ - write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index); - - src += 4; - } - - /* Pad the indices out to the block size */ - if (src_width < BLOCK_SIZE) - write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0); - - src += src_rowstride - src_width * 4; - } - - /* Pad the indices out to the block size */ - if (src_height < BLOCK_SIZE) - write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); -} - -static void -compress_rgba_unorm_block(int src_width, int src_height, - const uint8_t *src, int src_rowstride, - uint8_t *dst) -{ - int average_luminance, average_alpha; - uint8_t endpoints[2][4]; - struct bit_writer writer; - int component, endpoint; - - get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride, - &average_luminance, &average_alpha); - get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride, - average_luminance, average_alpha, - endpoints); - - writer.dst = dst; - writer.pos = 0; - writer.buf = 0; - - write_bits(&writer, 5, 0x10); /* mode 4 */ - write_bits(&writer, 2, 0); /* rotation 0 */ - write_bits(&writer, 1, 0); /* index selection bit */ - - /* Write the color endpoints */ - for (component = 0; component < 3; component++) - for (endpoint = 0; endpoint < 2; endpoint++) - write_bits(&writer, 5, endpoints[endpoint][component] >> 3); - - /* Write the alpha endpoints */ - for (endpoint = 0; endpoint < 2; endpoint++) - write_bits(&writer, 6, endpoints[endpoint][3] >> 2); - - write_rgb_indices_unorm(&writer, - src_width, src_height, - src, src_rowstride, - endpoints); - write_alpha_indices_unorm(&writer, - src_width, src_height, - src, src_rowstride, - endpoints); -} - -static void -compress_rgba_unorm(int width, int height, - const uint8_t *src, int src_rowstride, - uint8_t *dst, int dst_rowstride) -{ - int dst_row_diff; - int y, x; - - if (dst_rowstride >= width * 4) - dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4; - else - dst_row_diff = 0; - - for (y = 0; y < height; y += BLOCK_SIZE) { - for (x = 0; x < width; x += BLOCK_SIZE) { - compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE), - MIN2(height - y, BLOCK_SIZE), - src + x * 4 + y * src_rowstride, - src_rowstride, - dst); - dst += BLOCK_BYTES; - } - dst += dst_row_diff; - } -} - GLboolean _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS) { @@ -1318,272 +167,6 @@ _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS) return GL_TRUE; } -static float -get_average_luminance_float(int width, int height, - const float *src, int src_rowstride) -{ - float luminance_sum = 0; - int y, x; - - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - luminance_sum += src[0] + src[1] + src[2]; - src += 3; - } - src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float); - } - - return luminance_sum / (width * height); -} - -static float -clamp_value(float value, bool is_signed) -{ - if (value > 65504.0f) - return 65504.0f; - - if (is_signed) { - if (value < -65504.0f) - return -65504.0f; - else - return value; - } - - if (value < 0.0f) - return 0.0f; - - return value; -} - -static void -get_endpoints_float(int width, int height, - const float *src, int src_rowstride, - float average_luminance, float endpoints[][3], - bool is_signed) -{ - float endpoint_luminances[2]; - float midpoint; - float sums[2][3]; - int endpoint, component; - float luminance; - float temp[3]; - const float *p = src; - int left_endpoint_count = 0; - int y, x, i; - - memset(sums, 0, sizeof sums); - - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - luminance = p[0] + p[1] + p[2]; - if (luminance < average_luminance) { - endpoint = 0; - left_endpoint_count++; - } else { - endpoint = 1; - } - for (i = 0; i < 3; i++) - sums[endpoint][i] += p[i]; - - p += 3; - } - - p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float); - } - - if (left_endpoint_count == 0 || - left_endpoint_count == width * height) { - for (i = 0; i < 3; i++) - endpoints[0][i] = endpoints[1][i] = - (sums[0][i] + sums[1][i]) / (width * height); - } else { - for (i = 0; i < 3; i++) { - endpoints[0][i] = sums[0][i] / left_endpoint_count; - endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count); - } - } - - /* Clamp the endpoints to the range of a half float and strip out - * infinities */ - for (endpoint = 0; endpoint < 2; endpoint++) { - for (component = 0; component < 3; component++) { - endpoints[endpoint][component] = - clamp_value(endpoints[endpoint][component], is_signed); - } - } - - /* We may need to swap the endpoints to ensure the most-significant bit of - * the first index is zero */ - - for (endpoint = 0; endpoint < 2; endpoint++) { - endpoint_luminances[endpoint] = - endpoints[endpoint][0] + - endpoints[endpoint][1] + - endpoints[endpoint][2]; - } - midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f; - - if ((src[0] + src[1] + src[2] <= midpoint) != - (endpoint_luminances[0] <= midpoint)) { - memcpy(temp, endpoints[0], sizeof temp); - memcpy(endpoints[0], endpoints[1], sizeof temp); - memcpy(endpoints[1], temp, sizeof temp); - } -} - -static void -write_rgb_indices_float(struct bit_writer *writer, - int src_width, int src_height, - const float *src, int src_rowstride, - float endpoints[][3]) -{ - float luminance; - float endpoint_luminances[2]; - int endpoint; - int index; - int y, x; - - for (endpoint = 0; endpoint < 2; endpoint++) { - endpoint_luminances[endpoint] = - endpoints[endpoint][0] + - endpoints[endpoint][1] + - endpoints[endpoint][2]; - } - - /* If the endpoints have the same luminance then we'll just use index 0 for - * all of the texels */ - if (endpoint_luminances[0] == endpoint_luminances[1]) { - write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0); - return; - } - - for (y = 0; y < src_height; y++) { - for (x = 0; x < src_width; x++) { - luminance = src[0] + src[1] + src[2]; - - index = ((luminance - endpoint_luminances[0]) * 15 / - (endpoint_luminances[1] - endpoint_luminances[0])); - if (index < 0) - index = 0; - else if (index > 15) - index = 15; - - assert(x != 0 || y != 0 || index < 8); - - write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index); - - src += 3; - } - - /* Pad the indices out to the block size */ - if (src_width < BLOCK_SIZE) - write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0); - - src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float); - } - - /* Pad the indices out to the block size */ - if (src_height < BLOCK_SIZE) - write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); -} - -static int -get_endpoint_value(float value, bool is_signed) -{ - bool sign = false; - int half; - - if (is_signed) { - half = _mesa_float_to_half(value); - - if (half & 0x8000) { - half &= 0x7fff; - sign = true; - } - - half = (32 * half / 31) >> 6; - - if (sign) - half = -half & ((1 << 10) - 1); - - return half; - } else { - if (value <= 0.0f) - return 0; - - half = _mesa_float_to_half(value); - - return (64 * half / 31) >> 6; - } -} - -static void -compress_rgb_float_block(int src_width, int src_height, - const float *src, int src_rowstride, - uint8_t *dst, - bool is_signed) -{ - float average_luminance; - float endpoints[2][3]; - struct bit_writer writer; - int component, endpoint; - int endpoint_value; - - average_luminance = - get_average_luminance_float(src_width, src_height, src, src_rowstride); - get_endpoints_float(src_width, src_height, src, src_rowstride, - average_luminance, endpoints, is_signed); - - writer.dst = dst; - writer.pos = 0; - writer.buf = 0; - - write_bits(&writer, 5, 3); /* mode 3 */ - - /* Write the endpoints */ - for (endpoint = 0; endpoint < 2; endpoint++) { - for (component = 0; component < 3; component++) { - endpoint_value = - get_endpoint_value(endpoints[endpoint][component], is_signed); - write_bits(&writer, 10, endpoint_value); - } - } - - write_rgb_indices_float(&writer, - src_width, src_height, - src, src_rowstride, - endpoints); -} - -static void -compress_rgb_float(int width, int height, - const float *src, int src_rowstride, - uint8_t *dst, int dst_rowstride, - bool is_signed) -{ - int dst_row_diff; - int y, x; - - if (dst_rowstride >= width * 4) - dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4; - else - dst_row_diff = 0; - - for (y = 0; y < height; y += BLOCK_SIZE) { - for (x = 0; x < width; x += BLOCK_SIZE) { - compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE), - MIN2(height - y, BLOCK_SIZE), - src + x * 3 + - y * src_rowstride / sizeof (float), - src_rowstride, - dst, - is_signed); - dst += BLOCK_BYTES; - } - dst += dst_row_diff; - } -} - static GLboolean texstore_bptc_rgb_float(TEXSTORE_PARAMS, bool is_signed) diff --git a/src/mesa/main/texcompress_bptc_tmp.h b/src/mesa/main/texcompress_bptc_tmp.h new file mode 100644 index 00000000000..3c4ea2c0131 --- /dev/null +++ b/src/mesa/main/texcompress_bptc_tmp.h @@ -0,0 +1,1743 @@ +/* + * Copyright (C) 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* + * Included by texcompress_bptc and gallium to define BPTC decoding routines. + */ + +#include "util/format_srgb.h" +#include "util/half_float.h" +#include "macros.h" + +#define BLOCK_SIZE 4 +#define N_PARTITIONS 64 +#define BLOCK_BYTES 16 + +struct bptc_unorm_mode { + int n_subsets; + int n_partition_bits; + bool has_rotation_bits; + bool has_index_selection_bit; + int n_color_bits; + int n_alpha_bits; + bool has_endpoint_pbits; + bool has_shared_pbits; + int n_index_bits; + int n_secondary_index_bits; +}; + +struct bptc_float_bitfield { + int8_t endpoint; + uint8_t component; + uint8_t offset; + uint8_t n_bits; + bool reverse; +}; + +struct bptc_float_mode { + bool reserved; + bool transformed_endpoints; + int n_partition_bits; + int n_endpoint_bits; + int n_index_bits; + int n_delta_bits[3]; + struct bptc_float_bitfield bitfields[24]; +}; + +struct bit_writer { + uint8_t buf; + int pos; + uint8_t *dst; +}; + +static const struct bptc_unorm_mode +bptc_unorm_modes[] = { + /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 }, + /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 }, + /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 }, + /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 }, + /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 }, + /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 }, + /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 }, + /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 } +}; + +static const struct bptc_float_mode +bptc_float_modes[] = { + /* 00 */ + { false, true, 5, 10, 3, { 5, 5, 5 }, + { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false }, + { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, + { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, + { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, + { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, + { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, + { 3, 2, 3, 1, false }, + { -1 } } + }, + /* 01 */ + { false, true, 5, 7, 3, { 6, 6, 6 }, + { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false }, + { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false }, + { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false }, + { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false }, + { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, + { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false }, + { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, + { 2, 0, 0, 6, false }, + { 3, 0, 0, 6, false }, + { -1 } } + }, + /* 00010 */ + { false, true, 5, 11, 3, { 5, 4, 4 }, + { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, + { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false }, + { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false }, + { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false }, + { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, + { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, + { -1 } } + }, + /* 00011 */ + { false, false, 0, 10, 4, { 10, 10, 10 }, + { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, + { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false }, + { -1 } } + }, + /* 00110 */ + { false, true, 5, 11, 3, { 4, 5, 4 }, + { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, + { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false }, + { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false }, + { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false }, + { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false }, + { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false }, + { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false }, + { -1 } } + }, + /* 00111 */ + { false, true, 0, 11, 4, { 9, 9, 9 }, + { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, + { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false }, + { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false }, + { -1 } } + }, + /* 01010 */ + { false, true, 5, 11, 3, { 4, 4, 5 }, + { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, + { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false }, + { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, + { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, + { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false }, + { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false }, + { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false }, + { -1 } } + }, + /* 01011 */ + { false, true, 0, 12, 4, { 8, 8, 8 }, + { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, + { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false }, + { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true }, + { -1 } } + }, + /* 01110 */ + { false, true, 5, 9, 3, { 5, 5, 5 }, + { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false }, + { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false }, + { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, + { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, + { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, + { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, + { 3, 2, 3, 1, false }, + { -1 } } + }, + /* 01111 */ + { false, true, 0, 16, 4, { 4, 4, 4 }, + { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false }, + { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false }, + { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true }, + { -1 } } + }, + /* 10010 */ + { false, true, 5, 8, 3, { 6, 5, 5 }, + { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false }, + { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, + { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false }, + { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, + { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, + { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false }, + { 3, 0, 0, 6, false }, + { -1 } } + }, + /* 10011 */ + { true /* reserved */ }, + /* 10110 */ + { false, true, 5, 8, 3, { 5, 6, 5 }, + { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false }, + { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false }, + { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false }, + { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, + { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false }, + { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, + { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, + { -1 } } + }, + /* 10111 */ + { true /* reserved */ }, + /* 11010 */ + { false, true, 5, 8, 3, { 5, 5, 6 }, + { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, + { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false }, + { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, + { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false }, + { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, + { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false }, + { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false }, + { -1 } } + }, + /* 11011 */ + { true /* reserved */ }, + /* 11110 */ + { false, false, 5, 6, 3, { 6, 6, 6 }, + { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false }, + { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false }, + { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false }, + { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false }, + { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false }, + { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false }, + { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, + { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false }, + { -1 } } + }, + /* 11111 */ + { true /* reserved */ }, +}; + +/* This partition table is used when the mode has two subsets. Each + * partition is represented by a 32-bit value which gives 2 bits per texel + * within the block. The value of the two bits represents which subset to use + * (0 or 1). + */ +static const uint32_t +partition_table1[N_PARTITIONS] = { + 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U, + 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U, + 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U, + 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U, + 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U, + 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U, + 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U, + 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U, + 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U, + 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U, + 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U, + 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U, + 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U, + 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U, + 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U, + 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U, +}; + +/* This partition table is used when the mode has three subsets. In this case + * the values can be 0, 1 or 2. + */ +static const uint32_t +partition_table2[N_PARTITIONS] = { + 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U, + 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U, + 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U, + 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U, + 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U, + 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U, + 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U, + 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U, + 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U, + 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U, + 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U, + 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U, + 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U, + 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U, + 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U, + 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U +}; + +static const uint8_t +anchor_indices[][N_PARTITIONS] = { + /* Anchor index values for the second subset of two-subset partitioning */ + { + 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, + 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2, + 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6, + 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf + }, + + /* Anchor index values for the second subset of three-subset partitioning */ + { + 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3, + 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf, + 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf, + 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3 + }, + + /* Anchor index values for the third subset of three-subset + * partitioning + */ + { + 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8, + 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8, + 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8, + 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8 + } +}; + +static int +extract_bits(const uint8_t *block, + int offset, + int n_bits) +{ + int byte_index = offset / 8; + int bit_index = offset % 8; + int n_bits_in_byte = MIN2(n_bits, 8 - bit_index); + int result = 0; + int bit = 0; + + while (true) { + result |= ((block[byte_index] >> bit_index) & + ((1 << n_bits_in_byte) - 1)) << bit; + + n_bits -= n_bits_in_byte; + + if (n_bits <= 0) + return result; + + bit += n_bits_in_byte; + byte_index++; + bit_index = 0; + n_bits_in_byte = MIN2(n_bits, 8); + } +} + +static uint8_t +expand_component(uint8_t byte, + int n_bits) +{ + /* Expands a n-bit quantity into a byte by copying the most-significant + * bits into the unused least-significant bits. + */ + return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8)); +} + +static int +extract_unorm_endpoints(const struct bptc_unorm_mode *mode, + const uint8_t *block, + int bit_offset, + uint8_t endpoints[][4]) +{ + int component; + int subset; + int endpoint; + int pbit; + int n_components; + + /* Extract each color component */ + for (component = 0; component < 3; component++) { + for (subset = 0; subset < mode->n_subsets; subset++) { + for (endpoint = 0; endpoint < 2; endpoint++) { + endpoints[subset * 2 + endpoint][component] = + extract_bits(block, bit_offset, mode->n_color_bits); + bit_offset += mode->n_color_bits; + } + } + } + + /* Extract the alpha values */ + if (mode->n_alpha_bits > 0) { + for (subset = 0; subset < mode->n_subsets; subset++) { + for (endpoint = 0; endpoint < 2; endpoint++) { + endpoints[subset * 2 + endpoint][3] = + extract_bits(block, bit_offset, mode->n_alpha_bits); + bit_offset += mode->n_alpha_bits; + } + } + + n_components = 4; + } else { + for (subset = 0; subset < mode->n_subsets; subset++) + for (endpoint = 0; endpoint < 2; endpoint++) + endpoints[subset * 2 + endpoint][3] = 255; + + n_components = 3; + } + + /* Add in the p-bits */ + if (mode->has_endpoint_pbits) { + for (subset = 0; subset < mode->n_subsets; subset++) { + for (endpoint = 0; endpoint < 2; endpoint++) { + pbit = extract_bits(block, bit_offset, 1); + bit_offset += 1; + + for (component = 0; component < n_components; component++) { + endpoints[subset * 2 + endpoint][component] <<= 1; + endpoints[subset * 2 + endpoint][component] |= pbit; + } + } + } + } else if (mode->has_shared_pbits) { + for (subset = 0; subset < mode->n_subsets; subset++) { + pbit = extract_bits(block, bit_offset, 1); + bit_offset += 1; + + for (endpoint = 0; endpoint < 2; endpoint++) { + for (component = 0; component < n_components; component++) { + endpoints[subset * 2 + endpoint][component] <<= 1; + endpoints[subset * 2 + endpoint][component] |= pbit; + } + } + } + } + + /* Expand the n-bit values to a byte */ + for (subset = 0; subset < mode->n_subsets; subset++) { + for (endpoint = 0; endpoint < 2; endpoint++) { + for (component = 0; component < 3; component++) { + endpoints[subset * 2 + endpoint][component] = + expand_component(endpoints[subset * 2 + endpoint][component], + mode->n_color_bits + + mode->has_endpoint_pbits + + mode->has_shared_pbits); + } + + if (mode->n_alpha_bits > 0) { + endpoints[subset * 2 + endpoint][3] = + expand_component(endpoints[subset * 2 + endpoint][3], + mode->n_alpha_bits + + mode->has_endpoint_pbits + + mode->has_shared_pbits); + } + } + } + + return bit_offset; +} + +static bool +is_anchor(int n_subsets, + int partition_num, + int texel) +{ + if (texel == 0) + return true; + + switch (n_subsets) { + case 1: + return false; + case 2: + return anchor_indices[0][partition_num] == texel; + case 3: + return (anchor_indices[1][partition_num] == texel || + anchor_indices[2][partition_num] == texel); + default: + assert(false); + return false; + } +} + +static int +count_anchors_before_texel(int n_subsets, + int partition_num, + int texel) +{ + int count = 1; + + if (texel == 0) + return 0; + + switch (n_subsets) { + case 1: + break; + case 2: + if (texel > anchor_indices[0][partition_num]) + count++; + break; + case 3: + if (texel > anchor_indices[1][partition_num]) + count++; + if (texel > anchor_indices[2][partition_num]) + count++; + break; + default: + assert(false); + return 0; + } + + return count; +} + +static int32_t +interpolate(int32_t a, int32_t b, + int index, + int index_bits) +{ + static const uint8_t weights2[] = { 0, 21, 43, 64 }; + static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; + static const uint8_t weights4[] = + { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + static const uint8_t *weights[] = { + NULL, NULL, weights2, weights3, weights4 + }; + int weight; + + weight = weights[index_bits][index]; + + return ((64 - weight) * a + weight * b + 32) >> 6; +} + +static void +apply_rotation(int rotation, + uint8_t *result) +{ + uint8_t t; + + if (rotation == 0) + return; + + rotation--; + + t = result[rotation]; + result[rotation] = result[3]; + result[3] = t; +} + +static void +fetch_rgba_unorm_from_block(const uint8_t *block, + uint8_t *result, + int texel) +{ + int mode_num = ffs(block[0]); + const struct bptc_unorm_mode *mode; + int bit_offset, secondary_bit_offset; + int partition_num; + int subset_num; + int rotation; + int index_selection; + int index_bits; + int indices[2]; + int index; + int anchors_before_texel; + bool anchor; + uint8_t endpoints[3 * 2][4]; + uint32_t subsets; + int component; + + if (mode_num == 0) { + /* According to the spec this mode is reserved and shouldn't be used. */ + memset(result, 0, 3); + result[3] = 0xff; + return; + } + + mode = bptc_unorm_modes + mode_num - 1; + bit_offset = mode_num; + + partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); + bit_offset += mode->n_partition_bits; + + switch (mode->n_subsets) { + case 1: + subsets = 0; + break; + case 2: + subsets = partition_table1[partition_num]; + break; + case 3: + subsets = partition_table2[partition_num]; + break; + default: + assert(false); + return; + } + + if (mode->has_rotation_bits) { + rotation = extract_bits(block, bit_offset, 2); + bit_offset += 2; + } else { + rotation = 0; + } + + if (mode->has_index_selection_bit) { + index_selection = extract_bits(block, bit_offset, 1); + bit_offset++; + } else { + index_selection = 0; + } + + bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints); + + anchors_before_texel = count_anchors_before_texel(mode->n_subsets, + partition_num, texel); + + /* Calculate the offset to the secondary index */ + secondary_bit_offset = (bit_offset + + BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits - + mode->n_subsets + + mode->n_secondary_index_bits * texel - + anchors_before_texel); + + /* Calculate the offset to the primary index for this texel */ + bit_offset += mode->n_index_bits * texel - anchors_before_texel; + + subset_num = (subsets >> (texel * 2)) & 3; + + anchor = is_anchor(mode->n_subsets, partition_num, texel); + + index_bits = mode->n_index_bits; + if (anchor) + index_bits--; + indices[0] = extract_bits(block, bit_offset, index_bits); + + if (mode->n_secondary_index_bits) { + index_bits = mode->n_secondary_index_bits; + if (anchor) + index_bits--; + indices[1] = extract_bits(block, secondary_bit_offset, index_bits); + } + + index = indices[index_selection]; + index_bits = (index_selection ? + mode->n_secondary_index_bits : + mode->n_index_bits); + + for (component = 0; component < 3; component++) + result[component] = interpolate(endpoints[subset_num * 2][component], + endpoints[subset_num * 2 + 1][component], + index, + index_bits); + + /* Alpha uses the opposite index from the color components */ + if (mode->n_secondary_index_bits && !index_selection) { + index = indices[1]; + index_bits = mode->n_secondary_index_bits; + } else { + index = indices[0]; + index_bits = mode->n_index_bits; + } + + result[3] = interpolate(endpoints[subset_num * 2][3], + endpoints[subset_num * 2 + 1][3], + index, + index_bits); + + apply_rotation(rotation, result); +} + +#ifdef BPTC_BLOCK_DECODE +static void +decompress_rgba_unorm_block(int src_width, int src_height, + const uint8_t *block, + uint8_t *dst_row, int dst_rowstride) +{ + int mode_num = ffs(block[0]); + const struct bptc_unorm_mode *mode; + int bit_offset, secondary_bit_offset; + int partition_num; + int subset_num; + int rotation; + int index_selection; + int index_bits; + int indices[2]; + int index; + int anchors_before_texel; + bool anchor; + uint8_t endpoints[3 * 2][4]; + uint32_t subsets; + int component; + unsigned x, y; + + if (mode_num == 0) { + /* According to the spec this mode is reserved and shouldn't be used. */ + for(y = 0; y < src_height; y += 1) { + uint8_t *result = dst_row; + memset(result, 0, 4 * src_width); + for(x = 0; x < src_width; x += 1) { + result[3] = 0xff; + result += 4; + } + dst_row += dst_rowstride; + } + return; + } + + mode = bptc_unorm_modes + mode_num - 1; + bit_offset = mode_num; + + partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); + bit_offset += mode->n_partition_bits; + + switch (mode->n_subsets) { + case 1: + subsets = 0; + break; + case 2: + subsets = partition_table1[partition_num]; + break; + case 3: + subsets = partition_table2[partition_num]; + break; + default: + assert(false); + return; + } + + if (mode->has_rotation_bits) { + rotation = extract_bits(block, bit_offset, 2); + bit_offset += 2; + } else { + rotation = 0; + } + + if (mode->has_index_selection_bit) { + index_selection = extract_bits(block, bit_offset, 1); + bit_offset++; + } else { + index_selection = 0; + } + + bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints); + + for(y = 0; y < src_height; y += 1) { + uint8_t *result = dst_row; + for(x = 0; x < src_width; x += 1) { + int texel; + texel = x + y * 4; + + anchors_before_texel = count_anchors_before_texel(mode->n_subsets, + partition_num, + texel); + + /* Calculate the offset to the secondary index */ + secondary_bit_offset = (bit_offset + + BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits - + mode->n_subsets + + mode->n_secondary_index_bits * texel - + anchors_before_texel); + + /* Calculate the offset to the primary index for this texel */ + bit_offset += mode->n_index_bits * texel - anchors_before_texel; + + subset_num = (subsets >> (texel * 2)) & 3; + + anchor = is_anchor(mode->n_subsets, partition_num, texel); + + index_bits = mode->n_index_bits; + if (anchor) + index_bits--; + indices[0] = extract_bits(block, bit_offset, index_bits); + + if (mode->n_secondary_index_bits) { + index_bits = mode->n_secondary_index_bits; + if (anchor) + index_bits--; + indices[1] = extract_bits(block, secondary_bit_offset, index_bits); + } + + index = indices[index_selection]; + index_bits = (index_selection ? + mode->n_secondary_index_bits : + mode->n_index_bits); + + for (component = 0; component < 3; component++) + result[component] = interpolate(endpoints[subset_num * 2][component], + endpoints[subset_num * 2 + 1][component], + index, + index_bits); + + /* Alpha uses the opposite index from the color components */ + if (mode->n_secondary_index_bits && !index_selection) { + index = indices[1]; + index_bits = mode->n_secondary_index_bits; + } else { + index = indices[0]; + index_bits = mode->n_index_bits; + } + + result[3] = interpolate(endpoints[subset_num * 2][3], + endpoints[subset_num * 2 + 1][3], + index, + index_bits); + + apply_rotation(rotation, result); + result += 4; + } + dst_row += dst_rowstride; + } +} + +static void +decompress_rgba_unorm(int width, int height, + const uint8_t *src, int src_rowstride, + uint8_t *dst, int dst_rowstride) +{ + int src_row_diff; + int y, x; + + if (src_rowstride >= width * 4) + src_row_diff = src_rowstride - ((width + 3) & ~3) * 4; + else + src_row_diff = 0; + + for (y = 0; y < height; y += BLOCK_SIZE) { + for (x = 0; x < width; x += BLOCK_SIZE) { + decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE), + MIN2(height - y, BLOCK_SIZE), + src, + dst + x * 4 + y * dst_rowstride, + dst_rowstride); + src += BLOCK_BYTES; + } + src += src_row_diff; + } +} +#endif // BPTC_BLOCK_DECODE + +static int32_t +sign_extend(int32_t value, + int n_bits) +{ + if ((value & (1 << (n_bits - 1)))) { + value |= (~(int32_t) 0) << n_bits; + } + + return value; +} + +static int +signed_unquantize(int value, int n_endpoint_bits) +{ + bool sign; + + if (n_endpoint_bits >= 16) + return value; + + if (value == 0) + return 0; + + sign = false; + + if (value < 0) { + sign = true; + value = -value; + } + + if (value >= (1 << (n_endpoint_bits - 1)) - 1) + value = 0x7fff; + else + value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1); + + if (sign) + value = -value; + + return value; +} + +static int +unsigned_unquantize(int value, int n_endpoint_bits) +{ + if (n_endpoint_bits >= 15) + return value; + + if (value == 0) + return 0; + + if (value == (1 << n_endpoint_bits) - 1) + return 0xffff; + + return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1); +} + +static int +extract_float_endpoints(const struct bptc_float_mode *mode, + const uint8_t *block, + int bit_offset, + int32_t endpoints[][3], + bool is_signed) +{ + const struct bptc_float_bitfield *bitfield; + int endpoint, component; + int n_endpoints; + int value; + int i; + + if (mode->n_partition_bits) + n_endpoints = 4; + else + n_endpoints = 2; + + memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3); + + for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) { + value = extract_bits(block, bit_offset, bitfield->n_bits); + bit_offset += bitfield->n_bits; + + if (bitfield->reverse) { + for (i = 0; i < bitfield->n_bits; i++) { + if (value & (1 << i)) + endpoints[bitfield->endpoint][bitfield->component] |= + 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset); + } + } else { + endpoints[bitfield->endpoint][bitfield->component] |= + value << bitfield->offset; + } + } + + if (mode->transformed_endpoints) { + /* The endpoints are specified as signed offsets from e0 */ + for (endpoint = 1; endpoint < n_endpoints; endpoint++) { + for (component = 0; component < 3; component++) { + value = sign_extend(endpoints[endpoint][component], + mode->n_delta_bits[component]); + endpoints[endpoint][component] = + ((endpoints[0][component] + value) & + ((1 << mode->n_endpoint_bits) - 1)); + } + } + } + + if (is_signed) { + for (endpoint = 0; endpoint < n_endpoints; endpoint++) { + for (component = 0; component < 3; component++) { + value = sign_extend(endpoints[endpoint][component], + mode->n_endpoint_bits); + endpoints[endpoint][component] = + signed_unquantize(value, mode->n_endpoint_bits); + } + } + } else { + for (endpoint = 0; endpoint < n_endpoints; endpoint++) { + for (component = 0; component < 3; component++) { + endpoints[endpoint][component] = + unsigned_unquantize(endpoints[endpoint][component], + mode->n_endpoint_bits); + } + } + } + + return bit_offset; +} + +static int32_t +finish_unsigned_unquantize(int32_t value) +{ + return value * 31 / 64; +} + +static int32_t +finish_signed_unquantize(int32_t value) +{ + if (value < 0) + return (-value * 31 / 32) | 0x8000; + else + return value * 31 / 32; +} + +static void +fetch_rgb_float_from_block(const uint8_t *block, + float *result, + int texel, + bool is_signed) +{ + int mode_num; + const struct bptc_float_mode *mode; + int bit_offset; + int partition_num; + int subset_num; + int index_bits; + int index; + int anchors_before_texel; + int32_t endpoints[2 * 2][3]; + uint32_t subsets; + int n_subsets; + int component; + int32_t value; + + if (block[0] & 0x2) { + mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2; + bit_offset = 5; + } else { + mode_num = block[0] & 3; + bit_offset = 2; + } + + mode = bptc_float_modes + mode_num; + + if (mode->reserved) { + memset(result, 0, sizeof result[0] * 3); + result[3] = 1.0f; + return; + } + + bit_offset = extract_float_endpoints(mode, block, bit_offset, + endpoints, is_signed); + + if (mode->n_partition_bits) { + partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); + bit_offset += mode->n_partition_bits; + + subsets = partition_table1[partition_num]; + n_subsets = 2; + } else { + partition_num = 0; + subsets = 0; + n_subsets = 1; + } + + anchors_before_texel = + count_anchors_before_texel(n_subsets, partition_num, texel); + + /* Calculate the offset to the primary index for this texel */ + bit_offset += mode->n_index_bits * texel - anchors_before_texel; + + subset_num = (subsets >> (texel * 2)) & 3; + + index_bits = mode->n_index_bits; + if (is_anchor(n_subsets, partition_num, texel)) + index_bits--; + index = extract_bits(block, bit_offset, index_bits); + + for (component = 0; component < 3; component++) { + value = interpolate(endpoints[subset_num * 2][component], + endpoints[subset_num * 2 + 1][component], + index, + mode->n_index_bits); + + if (is_signed) + value = finish_signed_unquantize(value); + else + value = finish_unsigned_unquantize(value); + + result[component] = _mesa_half_to_float(value); + } + + result[3] = 1.0f; +} + +#ifdef BPTC_BLOCK_DECODE +static void +decompress_rgb_float_block(unsigned src_width, unsigned src_height, + const uint8_t *block, + float *dst_row, unsigned dst_rowstride, + bool is_signed) +{ + int mode_num; + const struct bptc_float_mode *mode; + int bit_offset; + int partition_num; + int subset_num; + int index_bits; + int index; + int anchors_before_texel; + int32_t endpoints[2 * 2][3]; + uint32_t subsets; + int n_subsets; + int component; + int32_t value; + unsigned x, y; + + if (block[0] & 0x2) { + mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2; + bit_offset = 5; + } else { + mode_num = block[0] & 3; + bit_offset = 2; + } + + mode = bptc_float_modes + mode_num; + + if (mode->reserved) { + for(y = 0; y < src_height; y += 1) { + float *result = dst_row; + memset(result, 0, sizeof result[0] * 4 * src_width); + for(x = 0; x < src_width; x += 1) { + result[3] = 1.0f; + result += 4; + } + dst_row += dst_rowstride / sizeof dst_row[0]; + } + return; + } + + bit_offset = extract_float_endpoints(mode, block, bit_offset, + endpoints, is_signed); + + if (mode->n_partition_bits) { + partition_num = extract_bits(block, bit_offset, mode->n_partition_bits); + bit_offset += mode->n_partition_bits; + + subsets = partition_table1[partition_num]; + n_subsets = 2; + } else { + partition_num = 0; + subsets = 0; + n_subsets = 1; + } + + for(y = 0; y < src_height; y += 1) { + float *result = dst_row; + for(x = 0; x < src_width; x += 1) { + int texel; + + texel = x + y * 4; + + anchors_before_texel = + count_anchors_before_texel(n_subsets, partition_num, texel); + + /* Calculate the offset to the primary index for this texel */ + bit_offset += mode->n_index_bits * texel - anchors_before_texel; + + subset_num = (subsets >> (texel * 2)) & 3; + + index_bits = mode->n_index_bits; + if (is_anchor(n_subsets, partition_num, texel)) + index_bits--; + index = extract_bits(block, bit_offset, index_bits); + + for (component = 0; component < 3; component++) { + value = interpolate(endpoints[subset_num * 2][component], + endpoints[subset_num * 2 + 1][component], + index, + mode->n_index_bits); + + if (is_signed) + value = finish_signed_unquantize(value); + else + value = finish_unsigned_unquantize(value); + + result[component] = _mesa_half_to_float(value); + } + + result[3] = 1.0f; + result += 4; + } + dst_row += dst_rowstride / sizeof dst_row[0]; + } +} + +static void +decompress_rgb_float(int width, int height, + const uint8_t *src, int src_rowstride, + float *dst, int dst_rowstride, bool is_signed) +{ + int src_row_diff; + int y, x; + + if (src_rowstride >= width * 4) + src_row_diff = src_rowstride - ((width + 3) & ~3) * 4; + else + src_row_diff = 0; + + for (y = 0; y < height; y += BLOCK_SIZE) { + for (x = 0; x < width; x += BLOCK_SIZE) { + decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE), + MIN2(height - y, BLOCK_SIZE), + src, + (dst + x * 4 + + (y * dst_rowstride / sizeof dst[0])), + dst_rowstride, is_signed); + src += BLOCK_BYTES; + } + src += src_row_diff; + } +} +#endif // BPTC_BLOCK_DECODE + +static void +write_bits(struct bit_writer *writer, int n_bits, int value) +{ + do { + if (n_bits + writer->pos >= 8) { + *(writer->dst++) = writer->buf | (value << writer->pos); + writer->buf = 0; + value >>= (8 - writer->pos); + n_bits -= (8 - writer->pos); + writer->pos = 0; + } else { + writer->buf |= value << writer->pos; + writer->pos += n_bits; + break; + } + } while (n_bits > 0); +} + +static void +get_average_luminance_alpha_unorm(int width, int height, + const uint8_t *src, int src_rowstride, + int *average_luminance, int *average_alpha) +{ + int luminance_sum = 0, alpha_sum = 0; + int y, x; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + luminance_sum += src[0] + src[1] + src[2]; + alpha_sum += src[3]; + src += 4; + } + src += src_rowstride - width * 4; + } + + *average_luminance = luminance_sum / (width * height); + *average_alpha = alpha_sum / (width * height); +} + +static void +get_rgba_endpoints_unorm(int width, int height, + const uint8_t *src, int src_rowstride, + int average_luminance, int average_alpha, + uint8_t endpoints[][4]) +{ + int endpoint_luminances[2]; + int midpoint; + int sums[2][4]; + int endpoint; + int luminance; + uint8_t temp[3]; + const uint8_t *p = src; + int rgb_left_endpoint_count = 0; + int alpha_left_endpoint_count = 0; + int y, x, i; + + memset(sums, 0, sizeof sums); + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + luminance = p[0] + p[1] + p[2]; + if (luminance < average_luminance) { + endpoint = 0; + rgb_left_endpoint_count++; + } else { + endpoint = 1; + } + for (i = 0; i < 3; i++) + sums[endpoint][i] += p[i]; + + if (p[2] < average_alpha) { + endpoint = 0; + alpha_left_endpoint_count++; + } else { + endpoint = 1; + } + sums[endpoint][3] += p[3]; + + p += 4; + } + + p += src_rowstride - width * 4; + } + + if (rgb_left_endpoint_count == 0 || + rgb_left_endpoint_count == width * height) { + for (i = 0; i < 3; i++) + endpoints[0][i] = endpoints[1][i] = + (sums[0][i] + sums[1][i]) / (width * height); + } else { + for (i = 0; i < 3; i++) { + endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count; + endpoints[1][i] = (sums[1][i] / + (width * height - rgb_left_endpoint_count)); + } + } + + if (alpha_left_endpoint_count == 0 || + alpha_left_endpoint_count == width * height) { + endpoints[0][3] = endpoints[1][3] = + (sums[0][3] + sums[1][3]) / (width * height); + } else { + endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count; + endpoints[1][3] = (sums[1][3] / + (width * height - alpha_left_endpoint_count)); + } + + /* We may need to swap the endpoints to ensure the most-significant bit of + * the first index is zero */ + + for (endpoint = 0; endpoint < 2; endpoint++) { + endpoint_luminances[endpoint] = + endpoints[endpoint][0] + + endpoints[endpoint][1] + + endpoints[endpoint][2]; + } + midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2; + + if ((src[0] + src[1] + src[2] <= midpoint) != + (endpoint_luminances[0] <= midpoint)) { + memcpy(temp, endpoints[0], 3); + memcpy(endpoints[0], endpoints[1], 3); + memcpy(endpoints[1], temp, 3); + } + + /* Same for the alpha endpoints */ + + midpoint = (endpoints[0][3] + endpoints[1][3]) / 2; + + if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) { + temp[0] = endpoints[0][3]; + endpoints[0][3] = endpoints[1][3]; + endpoints[1][3] = temp[0]; + } +} + +static void +write_rgb_indices_unorm(struct bit_writer *writer, + int src_width, int src_height, + const uint8_t *src, int src_rowstride, + uint8_t endpoints[][4]) +{ + int luminance; + int endpoint_luminances[2]; + int endpoint; + int index; + int y, x; + + for (endpoint = 0; endpoint < 2; endpoint++) { + endpoint_luminances[endpoint] = + endpoints[endpoint][0] + + endpoints[endpoint][1] + + endpoints[endpoint][2]; + } + + /* If the endpoints have the same luminance then we'll just use index 0 for + * all of the texels */ + if (endpoint_luminances[0] == endpoint_luminances[1]) { + write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0); + return; + } + + for (y = 0; y < src_height; y++) { + for (x = 0; x < src_width; x++) { + luminance = src[0] + src[1] + src[2]; + + index = ((luminance - endpoint_luminances[0]) * 3 / + (endpoint_luminances[1] - endpoint_luminances[0])); + if (index < 0) + index = 0; + else if (index > 3) + index = 3; + + assert(x != 0 || y != 0 || index < 2); + + write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index); + + src += 4; + } + + /* Pad the indices out to the block size */ + if (src_width < BLOCK_SIZE) + write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0); + + src += src_rowstride - src_width * 4; + } + + /* Pad the indices out to the block size */ + if (src_height < BLOCK_SIZE) + write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); +} + +static void +write_alpha_indices_unorm(struct bit_writer *writer, + int src_width, int src_height, + const uint8_t *src, int src_rowstride, + uint8_t endpoints[][4]) +{ + int index; + int y, x; + + /* If the endpoints have the same alpha then we'll just use index 0 for + * all of the texels */ + if (endpoints[0][3] == endpoints[1][3]) { + write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0); + return; + } + + for (y = 0; y < src_height; y++) { + for (x = 0; x < src_width; x++) { + index = (((int) src[3] - (int) endpoints[0][3]) * 7 / + ((int) endpoints[1][3] - endpoints[0][3])); + if (index < 0) + index = 0; + else if (index > 7) + index = 7; + + assert(x != 0 || y != 0 || index < 4); + + /* The first index has one less bit */ + write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index); + + src += 4; + } + + /* Pad the indices out to the block size */ + if (src_width < BLOCK_SIZE) + write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0); + + src += src_rowstride - src_width * 4; + } + + /* Pad the indices out to the block size */ + if (src_height < BLOCK_SIZE) + write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); +} + +static void +compress_rgba_unorm_block(int src_width, int src_height, + const uint8_t *src, int src_rowstride, + uint8_t *dst) +{ + int average_luminance, average_alpha; + uint8_t endpoints[2][4]; + struct bit_writer writer; + int component, endpoint; + + get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride, + &average_luminance, &average_alpha); + get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride, + average_luminance, average_alpha, + endpoints); + + writer.dst = dst; + writer.pos = 0; + writer.buf = 0; + + write_bits(&writer, 5, 0x10); /* mode 4 */ + write_bits(&writer, 2, 0); /* rotation 0 */ + write_bits(&writer, 1, 0); /* index selection bit */ + + /* Write the color endpoints */ + for (component = 0; component < 3; component++) + for (endpoint = 0; endpoint < 2; endpoint++) + write_bits(&writer, 5, endpoints[endpoint][component] >> 3); + + /* Write the alpha endpoints */ + for (endpoint = 0; endpoint < 2; endpoint++) + write_bits(&writer, 6, endpoints[endpoint][3] >> 2); + + write_rgb_indices_unorm(&writer, + src_width, src_height, + src, src_rowstride, + endpoints); + write_alpha_indices_unorm(&writer, + src_width, src_height, + src, src_rowstride, + endpoints); +} + +static void +compress_rgba_unorm(int width, int height, + const uint8_t *src, int src_rowstride, + uint8_t *dst, int dst_rowstride) +{ + int dst_row_diff; + int y, x; + + if (dst_rowstride >= width * 4) + dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4; + else + dst_row_diff = 0; + + for (y = 0; y < height; y += BLOCK_SIZE) { + for (x = 0; x < width; x += BLOCK_SIZE) { + compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE), + MIN2(height - y, BLOCK_SIZE), + src + x * 4 + y * src_rowstride, + src_rowstride, + dst); + dst += BLOCK_BYTES; + } + dst += dst_row_diff; + } +} + +static float +get_average_luminance_float(int width, int height, + const float *src, int src_rowstride) +{ + float luminance_sum = 0; + int y, x; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + luminance_sum += src[0] + src[1] + src[2]; + src += 3; + } + src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float); + } + + return luminance_sum / (width * height); +} + +static float +clamp_value(float value, bool is_signed) +{ + if (value > 65504.0f) + return 65504.0f; + + if (is_signed) { + if (value < -65504.0f) + return -65504.0f; + else + return value; + } + + if (value < 0.0f) + return 0.0f; + + return value; +} + +static void +get_endpoints_float(int width, int height, + const float *src, int src_rowstride, + float average_luminance, float endpoints[][3], + bool is_signed) +{ + float endpoint_luminances[2]; + float midpoint; + float sums[2][3]; + int endpoint, component; + float luminance; + float temp[3]; + const float *p = src; + int left_endpoint_count = 0; + int y, x, i; + + memset(sums, 0, sizeof sums); + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + luminance = p[0] + p[1] + p[2]; + if (luminance < average_luminance) { + endpoint = 0; + left_endpoint_count++; + } else { + endpoint = 1; + } + for (i = 0; i < 3; i++) + sums[endpoint][i] += p[i]; + + p += 3; + } + + p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float); + } + + if (left_endpoint_count == 0 || + left_endpoint_count == width * height) { + for (i = 0; i < 3; i++) + endpoints[0][i] = endpoints[1][i] = + (sums[0][i] + sums[1][i]) / (width * height); + } else { + for (i = 0; i < 3; i++) { + endpoints[0][i] = sums[0][i] / left_endpoint_count; + endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count); + } + } + + /* Clamp the endpoints to the range of a half float and strip out + * infinities */ + for (endpoint = 0; endpoint < 2; endpoint++) { + for (component = 0; component < 3; component++) { + endpoints[endpoint][component] = + clamp_value(endpoints[endpoint][component], is_signed); + } + } + + /* We may need to swap the endpoints to ensure the most-significant bit of + * the first index is zero */ + + for (endpoint = 0; endpoint < 2; endpoint++) { + endpoint_luminances[endpoint] = + endpoints[endpoint][0] + + endpoints[endpoint][1] + + endpoints[endpoint][2]; + } + midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f; + + if ((src[0] + src[1] + src[2] <= midpoint) != + (endpoint_luminances[0] <= midpoint)) { + memcpy(temp, endpoints[0], sizeof temp); + memcpy(endpoints[0], endpoints[1], sizeof temp); + memcpy(endpoints[1], temp, sizeof temp); + } +} + +static void +write_rgb_indices_float(struct bit_writer *writer, + int src_width, int src_height, + const float *src, int src_rowstride, + float endpoints[][3]) +{ + float luminance; + float endpoint_luminances[2]; + int endpoint; + int index; + int y, x; + + for (endpoint = 0; endpoint < 2; endpoint++) { + endpoint_luminances[endpoint] = + endpoints[endpoint][0] + + endpoints[endpoint][1] + + endpoints[endpoint][2]; + } + + /* If the endpoints have the same luminance then we'll just use index 0 for + * all of the texels */ + if (endpoint_luminances[0] == endpoint_luminances[1]) { + write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0); + return; + } + + for (y = 0; y < src_height; y++) { + for (x = 0; x < src_width; x++) { + luminance = src[0] + src[1] + src[2]; + + index = ((luminance - endpoint_luminances[0]) * 15 / + (endpoint_luminances[1] - endpoint_luminances[0])); + if (index < 0) + index = 0; + else if (index > 15) + index = 15; + + assert(x != 0 || y != 0 || index < 8); + + write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index); + + src += 3; + } + + /* Pad the indices out to the block size */ + if (src_width < BLOCK_SIZE) + write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0); + + src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float); + } + + /* Pad the indices out to the block size */ + if (src_height < BLOCK_SIZE) + write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0); +} + +static int +get_endpoint_value(float value, bool is_signed) +{ + bool sign = false; + int half; + + if (is_signed) { + half = _mesa_float_to_half(value); + + if (half & 0x8000) { + half &= 0x7fff; + sign = true; + } + + half = (32 * half / 31) >> 6; + + if (sign) + half = -half & ((1 << 10) - 1); + + return half; + } else { + if (value <= 0.0f) + return 0; + + half = _mesa_float_to_half(value); + + return (64 * half / 31) >> 6; + } +} + +static void +compress_rgb_float_block(int src_width, int src_height, + const float *src, int src_rowstride, + uint8_t *dst, + bool is_signed) +{ + float average_luminance; + float endpoints[2][3]; + struct bit_writer writer; + int component, endpoint; + int endpoint_value; + + average_luminance = + get_average_luminance_float(src_width, src_height, src, src_rowstride); + get_endpoints_float(src_width, src_height, src, src_rowstride, + average_luminance, endpoints, is_signed); + + writer.dst = dst; + writer.pos = 0; + writer.buf = 0; + + write_bits(&writer, 5, 3); /* mode 3 */ + + /* Write the endpoints */ + for (endpoint = 0; endpoint < 2; endpoint++) { + for (component = 0; component < 3; component++) { + endpoint_value = + get_endpoint_value(endpoints[endpoint][component], is_signed); + write_bits(&writer, 10, endpoint_value); + } + } + + write_rgb_indices_float(&writer, + src_width, src_height, + src, src_rowstride, + endpoints); +} + +static void +compress_rgb_float(int width, int height, + const float *src, int src_rowstride, + uint8_t *dst, int dst_rowstride, + bool is_signed) +{ + int dst_row_diff; + int y, x; + + if (dst_rowstride >= width * 4) + dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4; + else + dst_row_diff = 0; + + for (y = 0; y < height; y += BLOCK_SIZE) { + for (x = 0; x < width; x += BLOCK_SIZE) { + compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE), + MIN2(height - y, BLOCK_SIZE), + src + x * 3 + + y * src_rowstride / sizeof (float), + src_rowstride, + dst, + is_signed); + dst += BLOCK_BYTES; + } + dst += dst_row_diff; + } +}