src/mesa/main/texcompress_bptc_tmp.h

   1 /*
   2  * Copyright (C) 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /*
  25  * Included by texcompress_bptc and gallium to define BPTC decoding routines.
  26  */
  27
  28 #ifndef TEXCOMPRESS_BPTC_TMP_H
  29 #define TEXCOMPRESS_BPTC_TMP_H
  30
  31 #include "util/format_srgb.h"
  32 #include "util/half_float.h"
  33 #include "macros.h"
  34
  35 #define BLOCK_SIZE 4
  36 #define N_PARTITIONS 64
  37 #define BLOCK_BYTES 16
  38
  39 struct bptc_unorm_mode {
  40    int n_subsets;
  41    int n_partition_bits;
  42    bool has_rotation_bits;
  43    bool has_index_selection_bit;
  44    int n_color_bits;
  45    int n_alpha_bits;
  46    bool has_endpoint_pbits;
  47    bool has_shared_pbits;
  48    int n_index_bits;
  49    int n_secondary_index_bits;
  50 };
  51
  52 struct bptc_float_bitfield {
  53    int8_t endpoint;
  54    uint8_t component;
  55    uint8_t offset;
  56    uint8_t n_bits;
  57    bool reverse;
  58 };
  59
  60 struct bptc_float_mode {
  61    bool reserved;
  62    bool transformed_endpoints;
  63    int n_partition_bits;
  64    int n_endpoint_bits;
  65    int n_index_bits;
  66    int n_delta_bits[3];
  67    struct bptc_float_bitfield bitfields[24];
  68 };
  69
  70 struct bit_writer {
  71    uint8_t buf;
  72    int pos;
  73    uint8_t *dst;
  74 };
  75
  76 static const struct bptc_unorm_mode
  77 bptc_unorm_modes[] = {
  78    /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
  79    /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
  80    /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
  81    /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
  82    /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
  83    /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
  84    /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
  85    /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
  86 };
  87
  88 static const struct bptc_float_mode
  89 bptc_float_modes[] = {
  90    /* 00 */
  91    { false, true, 5, 10, 3, { 5, 5, 5 },
  92      { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
  93        { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  94        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
  95        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
  96        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
  97        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
  98        { 3, 2, 3, 1, false },
  99        { -1 } }
 100    },
 101    /* 01 */
 102    { false, true, 5, 7, 3, { 6, 6, 6 },
 103      { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
 104        { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
 105        { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
 106        { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
 107        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
 108        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
 109        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
 110        { 2, 0, 0, 6, false },
 111        { 3, 0, 0, 6, false },
 112        { -1 } }
 113    },
 114    /* 00010 */
 115    { false, true, 5, 11, 3, { 5, 4, 4 },
 116      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 117        { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
 118        { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
 119        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
 120        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
 121        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
 122        { -1 } }
 123    },
 124    /* 00011 */
 125    { false, false, 0, 10, 4, { 10, 10, 10 },
 126      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 127        { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
 128        { -1 } }
 129    },
 130    /* 00110 */
 131    { false, true, 5, 11, 3, { 4, 5, 4 },
 132      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 133        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
 134        { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
 135        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
 136        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
 137        { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
 138        { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
 139        { -1 } }
 140    },
 141    /* 00111 */
 142    { false, true, 0, 11, 4, { 9, 9, 9 },
 143      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 144        { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
 145        { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
 146        { -1 } }
 147    },
 148    /* 01010 */
 149    { false, true, 5, 11, 3, { 4, 4, 5 },
 150      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 151        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
 152        { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
 153        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
 154        { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
 155        { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
 156        { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
 157        { -1 } }
 158    },
 159    /* 01011 */
 160    { false, true, 0, 12, 4, { 8, 8, 8 },
 161      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 162        { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
 163        { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
 164        { -1 } }
 165    },
 166    /* 01110 */
 167    { false, true, 5, 9, 3, { 5, 5, 5 },
 168      { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
 169        { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
 170        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
 171        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
 172        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
 173        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
 174        { 3, 2, 3, 1, false },
 175        { -1 } }
 176    },
 177    /* 01111 */
 178    { false, true, 0, 16, 4, { 4, 4, 4 },
 179      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 180        { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
 181        { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
 182        { -1 } }
 183    },
 184    /* 10010 */
 185    { false, true, 5, 8, 3, { 6, 5, 5 },
 186      { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
 187        { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
 188        { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
 189        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
 190        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
 191        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
 192        { 3, 0, 0, 6, false },
 193        { -1 } }
 194    },
 195    /* 10011 */
 196    { true /* reserved */ },
 197    /* 10110 */
 198    { false, true, 5, 8, 3, { 5, 6, 5 },
 199      { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
 200        { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
 201        { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
 202        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
 203        { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
 204        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
 205        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
 206        { -1 } }
 207    },
 208    /* 10111 */
 209    { true /* reserved */ },
 210    /* 11010 */
 211    { false, true, 5, 8, 3, { 5, 5, 6 },
 212      { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
 213        { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
 214        { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
 215        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
 216        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
 217        { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
 218        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
 219        { -1 } }
 220    },
 221    /* 11011 */
 222    { true /* reserved */ },
 223    /* 11110 */
 224    { false, false, 5, 6, 3, { 6, 6, 6 },
 225      { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
 226        { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
 227        { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
 228        { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
 229        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
 230        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
 231        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
 232        { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
 233        { -1 } }
 234    },
 235    /* 11111 */
 236    { true /* reserved */ },
 237 };
 238
 239 /* This partition table is used when the mode has two subsets. Each
 240  * partition is represented by a 32-bit value which gives 2 bits per texel
 241  * within the block. The value of the two bits represents which subset to use
 242  * (0 or 1).
 243  */
 244 static const uint32_t
 245 partition_table1[N_PARTITIONS] = {
 246    0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
 247    0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
 248    0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
 249    0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
 250    0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
 251    0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
 252    0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
 253    0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
 254    0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
 255    0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
 256    0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
 257    0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
 258    0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
 259    0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
 260    0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
 261    0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
 262 };
 263
 264 /* This partition table is used when the mode has three subsets. In this case
 265  * the values can be 0, 1 or 2.
 266  */
 267 static const uint32_t
 268 partition_table2[N_PARTITIONS] = {
 269    0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
 270    0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
 271    0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
 272    0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
 273    0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
 274    0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
 275    0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
 276    0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
 277    0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
 278    0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
 279    0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
 280    0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
 281    0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
 282    0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
 283    0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
 284    0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
 285 };
 286
 287 static const uint8_t
 288 anchor_indices[][N_PARTITIONS] = {
 289    /* Anchor index values for the second subset of two-subset partitioning */
 290    {
 291       0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
 292       0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
 293       0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
 294       0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
 295    },
 296
 297    /* Anchor index values for the second subset of three-subset partitioning */
 298    {
 299       0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
 300       0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
 301       0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
 302       0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
 303    },
 304
 305    /* Anchor index values for the third subset of three-subset
 306     * partitioning
 307     */
 308    {
 309       0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
 310       0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
 311       0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
 312       0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
 313    }
 314 };
 315
 316 static int
 317 extract_bits(const uint8_t *block,
 318              int offset,
 319              int n_bits)
 320 {
 321    int byte_index = offset / 8;
 322    int bit_index = offset % 8;
 323    int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
 324    int result = 0;
 325    int bit = 0;
 326
 327    while (true) {
 328       result |= ((block[byte_index] >> bit_index) &
 329                  ((1 << n_bits_in_byte) - 1)) << bit;
 330
 331       n_bits -= n_bits_in_byte;
 332
 333       if (n_bits <= 0)
 334          return result;
 335
 336       bit += n_bits_in_byte;
 337       byte_index++;
 338       bit_index = 0;
 339       n_bits_in_byte = MIN2(n_bits, 8);
 340    }
 341 }
 342
 343 static uint8_t
 344 expand_component(uint8_t byte,
 345                  int n_bits)
 346 {
 347    /* Expands a n-bit quantity into a byte by copying the most-significant
 348     * bits into the unused least-significant bits.
 349     */
 350    return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
 351 }
 352
 353 static int
 354 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
 355                         const uint8_t *block,
 356                         int bit_offset,
 357                         uint8_t endpoints[][4])
 358 {
 359    int component;
 360    int subset;
 361    int endpoint;
 362    int pbit;
 363    int n_components;
 364
 365    /* Extract each color component */
 366    for (component = 0; component < 3; component++) {
 367       for (subset = 0; subset < mode->n_subsets; subset++) {
 368          for (endpoint = 0; endpoint < 2; endpoint++) {
 369             endpoints[subset * 2 + endpoint][component] =
 370                extract_bits(block, bit_offset, mode->n_color_bits);
 371             bit_offset += mode->n_color_bits;
 372          }
 373       }
 374    }
 375
 376    /* Extract the alpha values */
 377    if (mode->n_alpha_bits > 0) {
 378       for (subset = 0; subset < mode->n_subsets; subset++) {
 379          for (endpoint = 0; endpoint < 2; endpoint++) {
 380             endpoints[subset * 2 + endpoint][3] =
 381                extract_bits(block, bit_offset, mode->n_alpha_bits);
 382             bit_offset += mode->n_alpha_bits;
 383          }
 384       }
 385
 386       n_components = 4;
 387    } else {
 388       for (subset = 0; subset < mode->n_subsets; subset++)
 389          for (endpoint = 0; endpoint < 2; endpoint++)
 390             endpoints[subset * 2 + endpoint][3] = 255;
 391
 392       n_components = 3;
 393    }
 394
 395    /* Add in the p-bits */
 396    if (mode->has_endpoint_pbits) {
 397       for (subset = 0; subset < mode->n_subsets; subset++) {
 398          for (endpoint = 0; endpoint < 2; endpoint++) {
 399             pbit = extract_bits(block, bit_offset, 1);
 400             bit_offset += 1;
 401
 402             for (component = 0; component < n_components; component++) {
 403                endpoints[subset * 2 + endpoint][component] <<= 1;
 404                endpoints[subset * 2 + endpoint][component] |= pbit;
 405             }
 406          }
 407       }
 408    } else if (mode->has_shared_pbits) {
 409       for (subset = 0; subset < mode->n_subsets; subset++) {
 410          pbit = extract_bits(block, bit_offset, 1);
 411          bit_offset += 1;
 412
 413          for (endpoint = 0; endpoint < 2; endpoint++) {
 414             for (component = 0; component < n_components; component++) {
 415                endpoints[subset * 2 + endpoint][component] <<= 1;
 416                endpoints[subset * 2 + endpoint][component] |= pbit;
 417             }
 418          }
 419       }
 420    }
 421
 422    /* Expand the n-bit values to a byte */
 423    for (subset = 0; subset < mode->n_subsets; subset++) {
 424       for (endpoint = 0; endpoint < 2; endpoint++) {
 425          for (component = 0; component < 3; component++) {
 426             endpoints[subset * 2 + endpoint][component] =
 427                expand_component(endpoints[subset * 2 + endpoint][component],
 428                                 mode->n_color_bits +
 429                                 mode->has_endpoint_pbits +
 430                                 mode->has_shared_pbits);
 431          }
 432
 433          if (mode->n_alpha_bits > 0) {
 434             endpoints[subset * 2 + endpoint][3] =
 435                expand_component(endpoints[subset * 2 + endpoint][3],
 436                                 mode->n_alpha_bits +
 437                                 mode->has_endpoint_pbits +
 438                                 mode->has_shared_pbits);
 439          }
 440       }
 441    }
 442
 443    return bit_offset;
 444 }
 445
 446 static bool
 447 is_anchor(int n_subsets,
 448           int partition_num,
 449           int texel)
 450 {
 451    if (texel == 0)
 452       return true;
 453
 454    switch (n_subsets) {
 455    case 1:
 456       return false;
 457    case 2:
 458       return anchor_indices[0][partition_num] == texel;
 459    case 3:
 460       return (anchor_indices[1][partition_num] == texel ||
 461               anchor_indices[2][partition_num] == texel);
 462    default:
 463       assert(false);
 464       return false;
 465    }
 466 }
 467
 468 static int
 469 count_anchors_before_texel(int n_subsets,
 470                            int partition_num,
 471                            int texel)
 472 {
 473    int count = 1;
 474
 475    if (texel == 0)
 476       return 0;
 477
 478    switch (n_subsets) {
 479    case 1:
 480       break;
 481    case 2:
 482       if (texel > anchor_indices[0][partition_num])
 483          count++;
 484       break;
 485    case 3:
 486       if (texel > anchor_indices[1][partition_num])
 487          count++;
 488       if (texel > anchor_indices[2][partition_num])
 489          count++;
 490       break;
 491    default:
 492       assert(false);
 493       return 0;
 494    }
 495
 496    return count;
 497 }
 498
 499 static int32_t
 500 interpolate(int32_t a, int32_t b,
 501             int index,
 502             int index_bits)
 503 {
 504    static const uint8_t weights2[] = { 0, 21, 43, 64 };
 505    static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
 506    static const uint8_t weights4[] =
 507       { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
 508    static const uint8_t *weights[] = {
 509       NULL, NULL, weights2, weights3, weights4
 510    };
 511    int weight;
 512
 513    weight = weights[index_bits][index];
 514
 515    return ((64 - weight) * a + weight * b + 32) >> 6;
 516 }
 517
 518 static void
 519 apply_rotation(int rotation,
 520                uint8_t *result)
 521 {
 522    uint8_t t;
 523
 524    if (rotation == 0)
 525       return;
 526
 527    rotation--;
 528
 529    t = result[rotation];
 530    result[rotation] = result[3];
 531    result[3] = t;
 532 }
 533
 534 static void
 535 fetch_rgba_unorm_from_block(const uint8_t *block,
 536                             uint8_t *result,
 537                             int texel)
 538 {
 539    int mode_num = ffs(block[0]);
 540    const struct bptc_unorm_mode *mode;
 541    int bit_offset, secondary_bit_offset;
 542    int partition_num;
 543    int subset_num;
 544    int rotation;
 545    int index_selection;
 546    int index_bits;
 547    int indices[2];
 548    int index;
 549    int anchors_before_texel;
 550    bool anchor;
 551    uint8_t endpoints[3 * 2][4];
 552    uint32_t subsets;
 553    int component;
 554
 555    if (mode_num == 0) {
 556       /* According to the spec this mode is reserved and shouldn't be used. */
 557       memset(result, 0, 3);
 558       result[3] = 0xff;
 559       return;
 560    }
 561
 562    mode = bptc_unorm_modes + mode_num - 1;
 563    bit_offset = mode_num;
 564
 565    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
 566    bit_offset += mode->n_partition_bits;
 567
 568    switch (mode->n_subsets) {
 569    case 1:
 570       subsets = 0;
 571       break;
 572    case 2:
 573       subsets = partition_table1[partition_num];
 574       break;
 575    case 3:
 576       subsets = partition_table2[partition_num];
 577       break;
 578    default:
 579       assert(false);
 580       return;
 581    }
 582
 583    if (mode->has_rotation_bits) {
 584       rotation = extract_bits(block, bit_offset, 2);
 585       bit_offset += 2;
 586    } else {
 587       rotation = 0;
 588    }
 589
 590    if (mode->has_index_selection_bit) {
 591       index_selection = extract_bits(block, bit_offset, 1);
 592       bit_offset++;
 593    } else {
 594       index_selection = 0;
 595    }
 596
 597    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
 598
 599    anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
 600                                                      partition_num, texel);
 601
 602    /* Calculate the offset to the secondary index */
 603    secondary_bit_offset = (bit_offset +
 604                            BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
 605                            mode->n_subsets +
 606                            mode->n_secondary_index_bits * texel -
 607                            anchors_before_texel);
 608
 609    /* Calculate the offset to the primary index for this texel */
 610    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
 611
 612    subset_num = (subsets >> (texel * 2)) & 3;
 613
 614    anchor = is_anchor(mode->n_subsets, partition_num, texel);
 615
 616    index_bits = mode->n_index_bits;
 617    if (anchor)
 618       index_bits--;
 619    indices[0] = extract_bits(block, bit_offset, index_bits);
 620
 621    if (mode->n_secondary_index_bits) {
 622       index_bits = mode->n_secondary_index_bits;
 623       if (anchor)
 624          index_bits--;
 625       indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
 626    }
 627
 628    index = indices[index_selection];
 629    index_bits = (index_selection ?
 630                  mode->n_secondary_index_bits :
 631                  mode->n_index_bits);
 632
 633    for (component = 0; component < 3; component++)
 634       result[component] = interpolate(endpoints[subset_num * 2][component],
 635                                       endpoints[subset_num * 2 + 1][component],
 636                                       index,
 637                                       index_bits);
 638
 639    /* Alpha uses the opposite index from the color components */
 640    if (mode->n_secondary_index_bits && !index_selection) {
 641       index = indices[1];
 642       index_bits = mode->n_secondary_index_bits;
 643    } else {
 644       index = indices[0];
 645       index_bits = mode->n_index_bits;
 646    }
 647
 648    result[3] = interpolate(endpoints[subset_num * 2][3],
 649                            endpoints[subset_num * 2 + 1][3],
 650                            index,
 651                            index_bits);
 652
 653    apply_rotation(rotation, result);
 654 }
 655
 656 #ifdef BPTC_BLOCK_DECODE
 657 static void
 658 decompress_rgba_unorm_block(int src_width, int src_height,
 659                             const uint8_t *block,
 660                             uint8_t *dst_row, int dst_rowstride)
 661 {
 662    int mode_num = ffs(block[0]);
 663    const struct bptc_unorm_mode *mode;
 664    int bit_offset, secondary_bit_offset;
 665    int partition_num;
 666    int subset_num;
 667    int rotation;
 668    int index_selection;
 669    int index_bits;
 670    int indices[2];
 671    int index;
 672    int anchors_before_texel;
 673    bool anchor;
 674    uint8_t endpoints[3 * 2][4];
 675    uint32_t subsets;
 676    int component;
 677    unsigned x, y;
 678
 679    if (mode_num == 0) {
 680       /* According to the spec this mode is reserved and shouldn't be used. */
 681       for(y = 0; y < src_height; y += 1) {
 682          uint8_t *result = dst_row;
 683          memset(result, 0, 4 * src_width);
 684          for(x = 0; x < src_width; x += 1) {
 685             result[3] = 0xff;
 686             result += 4;
 687          }
 688          dst_row += dst_rowstride;
 689       }
 690       return;
 691    }
 692
 693    mode = bptc_unorm_modes + mode_num - 1;
 694    bit_offset = mode_num;
 695
 696    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
 697    bit_offset += mode->n_partition_bits;
 698
 699    switch (mode->n_subsets) {
 700    case 1:
 701       subsets = 0;
 702       break;
 703    case 2:
 704       subsets = partition_table1[partition_num];
 705       break;
 706    case 3:
 707       subsets = partition_table2[partition_num];
 708       break;
 709    default:
 710       assert(false);
 711       return;
 712    }
 713
 714    if (mode->has_rotation_bits) {
 715       rotation = extract_bits(block, bit_offset, 2);
 716       bit_offset += 2;
 717    } else {
 718       rotation = 0;
 719    }
 720
 721    if (mode->has_index_selection_bit) {
 722       index_selection = extract_bits(block, bit_offset, 1);
 723       bit_offset++;
 724    } else {
 725       index_selection = 0;
 726    }
 727
 728    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
 729
 730    for(y = 0; y < src_height; y += 1) {
 731       uint8_t *result = dst_row;
 732       for(x = 0; x < src_width; x += 1) {
 733          int texel;
 734          texel = x + y * 4;
 735
 736          anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
 737                                                            partition_num,
 738                                                            texel);
 739
 740          /* Calculate the offset to the secondary index */
 741          secondary_bit_offset = (bit_offset +
 742                                  BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
 743                                  mode->n_subsets +
 744                                  mode->n_secondary_index_bits * texel -
 745                                  anchors_before_texel);
 746
 747          /* Calculate the offset to the primary index for this texel */
 748          bit_offset += mode->n_index_bits * texel - anchors_before_texel;
 749
 750          subset_num = (subsets >> (texel * 2)) & 3;
 751
 752          anchor = is_anchor(mode->n_subsets, partition_num, texel);
 753
 754          index_bits = mode->n_index_bits;
 755          if (anchor)
 756             index_bits--;
 757          indices[0] = extract_bits(block, bit_offset, index_bits);
 758
 759          if (mode->n_secondary_index_bits) {
 760             index_bits = mode->n_secondary_index_bits;
 761             if (anchor)
 762                index_bits--;
 763             indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
 764          }
 765
 766          index = indices[index_selection];
 767          index_bits = (index_selection ?
 768                        mode->n_secondary_index_bits :
 769                        mode->n_index_bits);
 770
 771          for (component = 0; component < 3; component++)
 772             result[component] = interpolate(endpoints[subset_num * 2][component],
 773                                             endpoints[subset_num * 2 + 1][component],
 774                                             index,
 775                                             index_bits);
 776
 777          /* Alpha uses the opposite index from the color components */
 778          if (mode->n_secondary_index_bits && !index_selection) {
 779             index = indices[1];
 780             index_bits = mode->n_secondary_index_bits;
 781          } else {
 782             index = indices[0];
 783             index_bits = mode->n_index_bits;
 784          }
 785
 786          result[3] = interpolate(endpoints[subset_num * 2][3],
 787                                  endpoints[subset_num * 2 + 1][3],
 788                                  index,
 789                                  index_bits);
 790
 791          apply_rotation(rotation, result);
 792          result += 4;
 793       }
 794       dst_row += dst_rowstride;
 795    }
 796 }
 797
 798 static void
 799 decompress_rgba_unorm(int width, int height,
 800                       const uint8_t *src, int src_rowstride,
 801                       uint8_t *dst, int dst_rowstride)
 802 {
 803    int src_row_diff;
 804    int y, x;
 805
 806    if (src_rowstride >= width * 4)
 807       src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
 808    else
 809       src_row_diff = 0;
 810
 811    for (y = 0; y < height; y += BLOCK_SIZE) {
 812       for (x = 0; x < width; x += BLOCK_SIZE) {
 813          decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
 814                                      MIN2(height - y, BLOCK_SIZE),
 815                                      src,
 816                                      dst + x * 4 + y * dst_rowstride,
 817                                      dst_rowstride);
 818          src += BLOCK_BYTES;
 819       }
 820       src += src_row_diff;
 821    }
 822 }
 823 #endif // BPTC_BLOCK_DECODE
 824
 825 static int32_t
 826 sign_extend(int32_t value,
 827             int n_bits)
 828 {
 829    if ((value & (1 << (n_bits - 1)))) {
 830       value |= (~(int32_t) 0) << n_bits;
 831    }
 832
 833    return value;
 834 }
 835
 836 static int
 837 signed_unquantize(int value, int n_endpoint_bits)
 838 {
 839    bool sign;
 840
 841    if (n_endpoint_bits >= 16)
 842       return value;
 843
 844    if (value == 0)
 845       return 0;
 846
 847    sign = false;
 848
 849    if (value < 0) {
 850       sign = true;
 851       value = -value;
 852    }
 853
 854    if (value >= (1 << (n_endpoint_bits - 1)) - 1)
 855       value = 0x7fff;
 856    else
 857       value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
 858
 859    if (sign)
 860       value = -value;
 861
 862    return value;
 863 }
 864
 865 static int
 866 unsigned_unquantize(int value, int n_endpoint_bits)
 867 {
 868    if (n_endpoint_bits >= 15)
 869       return value;
 870
 871    if (value == 0)
 872       return 0;
 873
 874    if (value == (1 << n_endpoint_bits) - 1)
 875       return 0xffff;
 876
 877    return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
 878 }
 879
 880 static int
 881 extract_float_endpoints(const struct bptc_float_mode *mode,
 882                         const uint8_t *block,
 883                         int bit_offset,
 884                         int32_t endpoints[][3],
 885                         bool is_signed)
 886 {
 887    const struct bptc_float_bitfield *bitfield;
 888    int endpoint, component;
 889    int n_endpoints;
 890    int value;
 891    int i;
 892
 893    if (mode->n_partition_bits)
 894       n_endpoints = 4;
 895    else
 896       n_endpoints = 2;
 897
 898    memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
 899
 900    for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
 901       value = extract_bits(block, bit_offset, bitfield->n_bits);
 902       bit_offset += bitfield->n_bits;
 903
 904       if (bitfield->reverse) {
 905          for (i = 0; i < bitfield->n_bits; i++) {
 906             if (value & (1 << i))
 907                endpoints[bitfield->endpoint][bitfield->component] |=
 908                   1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
 909          }
 910       } else {
 911          endpoints[bitfield->endpoint][bitfield->component] |=
 912             value << bitfield->offset;
 913       }
 914    }
 915
 916    if (mode->transformed_endpoints) {
 917       /* The endpoints are specified as signed offsets from e0 */
 918       for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
 919          for (component = 0; component < 3; component++) {
 920             value = sign_extend(endpoints[endpoint][component],
 921                                 mode->n_delta_bits[component]);
 922             endpoints[endpoint][component] =
 923                ((endpoints[0][component] + value) &
 924                 ((1 << mode->n_endpoint_bits) - 1));
 925          }
 926       }
 927    }
 928
 929    if (is_signed) {
 930       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
 931          for (component = 0; component < 3; component++) {
 932             value = sign_extend(endpoints[endpoint][component],
 933                                 mode->n_endpoint_bits);
 934             endpoints[endpoint][component] =
 935                signed_unquantize(value, mode->n_endpoint_bits);
 936          }
 937       }
 938    } else {
 939       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
 940          for (component = 0; component < 3; component++) {
 941             endpoints[endpoint][component] =
 942                unsigned_unquantize(endpoints[endpoint][component],
 943                                    mode->n_endpoint_bits);
 944          }
 945       }
 946    }
 947
 948    return bit_offset;
 949 }
 950
 951 static int32_t
 952 finish_unsigned_unquantize(int32_t value)
 953 {
 954    return value * 31 / 64;
 955 }
 956
 957 static int32_t
 958 finish_signed_unquantize(int32_t value)
 959 {
 960    if (value < 0)
 961       return (-value * 31 / 32) | 0x8000;
 962    else
 963       return value * 31 / 32;
 964 }
 965
 966 static void
 967 fetch_rgb_float_from_block(const uint8_t *block,
 968                            float *result,
 969                            int texel,
 970                            bool is_signed)
 971 {
 972    int mode_num;
 973    const struct bptc_float_mode *mode;
 974    int bit_offset;
 975    int partition_num;
 976    int subset_num;
 977    int index_bits;
 978    int index;
 979    int anchors_before_texel;
 980    int32_t endpoints[2 * 2][3];
 981    uint32_t subsets;
 982    int n_subsets;
 983    int component;
 984    int32_t value;
 985
 986    if (block[0] & 0x2) {
 987       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
 988       bit_offset = 5;
 989    } else {
 990       mode_num = block[0] & 3;
 991       bit_offset = 2;
 992    }
 993
 994    mode = bptc_float_modes + mode_num;
 995
 996    if (mode->reserved) {
 997       memset(result, 0, sizeof result[0] * 3);
 998       result[3] = 1.0f;
 999       return;
1000    }
1001
1002    bit_offset = extract_float_endpoints(mode, block, bit_offset,
1003                                         endpoints, is_signed);
1004
1005    if (mode->n_partition_bits) {
1006       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1007       bit_offset += mode->n_partition_bits;
1008
1009       subsets = partition_table1[partition_num];
1010       n_subsets = 2;
1011    } else {
1012       partition_num = 0;
1013       subsets = 0;
1014       n_subsets = 1;
1015    }
1016
1017    anchors_before_texel =
1018       count_anchors_before_texel(n_subsets, partition_num, texel);
1019
1020    /* Calculate the offset to the primary index for this texel */
1021    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1022
1023    subset_num = (subsets >> (texel * 2)) & 3;
1024
1025    index_bits = mode->n_index_bits;
1026    if (is_anchor(n_subsets, partition_num, texel))
1027       index_bits--;
1028    index = extract_bits(block, bit_offset, index_bits);
1029
1030    for (component = 0; component < 3; component++) {
1031       value = interpolate(endpoints[subset_num * 2][component],
1032                           endpoints[subset_num * 2 + 1][component],
1033                           index,
1034                           mode->n_index_bits);
1035
1036       if (is_signed)
1037          value = finish_signed_unquantize(value);
1038       else
1039          value = finish_unsigned_unquantize(value);
1040
1041       result[component] = _mesa_half_to_float(value);
1042    }
1043
1044    result[3] = 1.0f;
1045 }
1046
1047 #ifdef BPTC_BLOCK_DECODE
1048 static void
1049 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1050                            const uint8_t *block,
1051                            float *dst_row, unsigned dst_rowstride,
1052                            bool is_signed)
1053 {
1054    int mode_num;
1055    const struct bptc_float_mode *mode;
1056    int bit_offset;
1057    int partition_num;
1058    int subset_num;
1059    int index_bits;
1060    int index;
1061    int anchors_before_texel;
1062    int32_t endpoints[2 * 2][3];
1063    uint32_t subsets;
1064    int n_subsets;
1065    int component;
1066    int32_t value;
1067    unsigned x, y;
1068
1069    if (block[0] & 0x2) {
1070       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1071       bit_offset = 5;
1072    } else {
1073       mode_num = block[0] & 3;
1074       bit_offset = 2;
1075    }
1076
1077    mode = bptc_float_modes + mode_num;
1078
1079    if (mode->reserved) {
1080       for(y = 0; y < src_height; y += 1) {
1081          float *result = dst_row;
1082          memset(result, 0, sizeof result[0] * 4 * src_width);
1083          for(x = 0; x < src_width; x += 1) {
1084             result[3] = 1.0f;
1085             result += 4;
1086          }
1087          dst_row += dst_rowstride / sizeof dst_row[0];
1088       }
1089       return;
1090    }
1091
1092    bit_offset = extract_float_endpoints(mode, block, bit_offset,
1093                                         endpoints, is_signed);
1094
1095    if (mode->n_partition_bits) {
1096       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1097       bit_offset += mode->n_partition_bits;
1098
1099       subsets = partition_table1[partition_num];
1100       n_subsets = 2;
1101    } else {
1102       partition_num = 0;
1103       subsets = 0;
1104       n_subsets = 1;
1105    }
1106
1107    for(y = 0; y < src_height; y += 1) {
1108       float *result = dst_row;
1109       for(x = 0; x < src_width; x += 1) {
1110          int texel;
1111
1112          texel = x + y * 4;
1113
1114          anchors_before_texel =
1115             count_anchors_before_texel(n_subsets, partition_num, texel);
1116
1117          /* Calculate the offset to the primary index for this texel */
1118          bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1119
1120          subset_num = (subsets >> (texel * 2)) & 3;
1121
1122          index_bits = mode->n_index_bits;
1123          if (is_anchor(n_subsets, partition_num, texel))
1124             index_bits--;
1125          index = extract_bits(block, bit_offset, index_bits);
1126
1127          for (component = 0; component < 3; component++) {
1128             value = interpolate(endpoints[subset_num * 2][component],
1129                                 endpoints[subset_num * 2 + 1][component],
1130                                 index,
1131                                 mode->n_index_bits);
1132
1133             if (is_signed)
1134                value = finish_signed_unquantize(value);
1135             else
1136                value = finish_unsigned_unquantize(value);
1137
1138             result[component] = _mesa_half_to_float(value);
1139          }
1140
1141          result[3] = 1.0f;
1142          result += 4;
1143       }
1144       dst_row += dst_rowstride / sizeof dst_row[0];
1145    }
1146 }
1147
1148 static void
1149 decompress_rgb_float(int width, int height,
1150                       const uint8_t *src, int src_rowstride,
1151                       float *dst, int dst_rowstride, bool is_signed)
1152 {
1153    int src_row_diff;
1154    int y, x;
1155
1156    if (src_rowstride >= width * 4)
1157       src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1158    else
1159       src_row_diff = 0;
1160
1161    for (y = 0; y < height; y += BLOCK_SIZE) {
1162       for (x = 0; x < width; x += BLOCK_SIZE) {
1163          decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1164                                     MIN2(height - y, BLOCK_SIZE),
1165                                     src,
1166                                     (dst + x * 4 +
1167                                      (y * dst_rowstride / sizeof dst[0])),
1168                                     dst_rowstride, is_signed);
1169          src += BLOCK_BYTES;
1170       }
1171       src += src_row_diff;
1172    }
1173 }
1174 #endif // BPTC_BLOCK_DECODE
1175
1176 static void
1177 write_bits(struct bit_writer *writer, int n_bits, int value)
1178 {
1179    do {
1180       if (n_bits + writer->pos >= 8) {
1181          *(writer->dst++) = writer->buf | (value << writer->pos);
1182          writer->buf = 0;
1183          value >>= (8 - writer->pos);
1184          n_bits -= (8 - writer->pos);
1185          writer->pos = 0;
1186       } else {
1187          writer->buf |= value << writer->pos;
1188          writer->pos += n_bits;
1189          break;
1190       }
1191    } while (n_bits > 0);
1192 }
1193
1194 static void
1195 get_average_luminance_alpha_unorm(int width, int height,
1196                                   const uint8_t *src, int src_rowstride,
1197                                   int *average_luminance, int *average_alpha)
1198 {
1199    int luminance_sum = 0, alpha_sum = 0;
1200    int y, x;
1201
1202    for (y = 0; y < height; y++) {
1203       for (x = 0; x < width; x++) {
1204          luminance_sum += src[0] + src[1] + src[2];
1205          alpha_sum += src[3];
1206          src += 4;
1207       }
1208       src += src_rowstride - width * 4;
1209    }
1210
1211    *average_luminance = luminance_sum / (width * height);
1212    *average_alpha = alpha_sum / (width * height);
1213 }
1214
1215 static void
1216 get_rgba_endpoints_unorm(int width, int height,
1217                          const uint8_t *src, int src_rowstride,
1218                          int average_luminance, int average_alpha,
1219                          uint8_t endpoints[][4])
1220 {
1221    int endpoint_luminances[2];
1222    int midpoint;
1223    int sums[2][4];
1224    int endpoint;
1225    int luminance;
1226    uint8_t temp[3];
1227    const uint8_t *p = src;
1228    int rgb_left_endpoint_count = 0;
1229    int alpha_left_endpoint_count = 0;
1230    int y, x, i;
1231
1232    memset(sums, 0, sizeof sums);
1233
1234    for (y = 0; y < height; y++) {
1235       for (x = 0; x < width; x++) {
1236          luminance = p[0] + p[1] + p[2];
1237          if (luminance < average_luminance) {
1238             endpoint = 0;
1239             rgb_left_endpoint_count++;
1240          } else {
1241             endpoint = 1;
1242          }
1243          for (i = 0; i < 3; i++)
1244             sums[endpoint][i] += p[i];
1245
1246          if (p[2] < average_alpha) {
1247             endpoint = 0;
1248             alpha_left_endpoint_count++;
1249          } else {
1250             endpoint = 1;
1251          }
1252          sums[endpoint][3] += p[3];
1253
1254          p += 4;
1255       }
1256
1257       p += src_rowstride - width * 4;
1258    }
1259
1260    if (rgb_left_endpoint_count == 0 ||
1261        rgb_left_endpoint_count == width * height) {
1262       for (i = 0; i < 3; i++)
1263          endpoints[0][i] = endpoints[1][i] =
1264             (sums[0][i] + sums[1][i]) / (width * height);
1265    } else {
1266       for (i = 0; i < 3; i++) {
1267          endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1268          endpoints[1][i] = (sums[1][i] /
1269                             (width * height - rgb_left_endpoint_count));
1270       }
1271    }
1272
1273    if (alpha_left_endpoint_count == 0 ||
1274        alpha_left_endpoint_count == width * height) {
1275       endpoints[0][3] = endpoints[1][3] =
1276          (sums[0][3] + sums[1][3]) / (width * height);
1277    } else {
1278          endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1279          endpoints[1][3] = (sums[1][3] /
1280                             (width * height - alpha_left_endpoint_count));
1281    }
1282
1283    /* We may need to swap the endpoints to ensure the most-significant bit of
1284     * the first index is zero */
1285
1286    for (endpoint = 0; endpoint < 2; endpoint++) {
1287       endpoint_luminances[endpoint] =
1288          endpoints[endpoint][0] +
1289          endpoints[endpoint][1] +
1290          endpoints[endpoint][2];
1291    }
1292    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1293
1294    if ((src[0] + src[1] + src[2] <= midpoint) !=
1295        (endpoint_luminances[0] <= midpoint)) {
1296       memcpy(temp, endpoints[0], 3);
1297       memcpy(endpoints[0], endpoints[1], 3);
1298       memcpy(endpoints[1], temp, 3);
1299    }
1300
1301    /* Same for the alpha endpoints */
1302
1303    midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1304
1305    if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1306       temp[0] = endpoints[0][3];
1307       endpoints[0][3] = endpoints[1][3];
1308       endpoints[1][3] = temp[0];
1309    }
1310 }
1311
1312 static void
1313 write_rgb_indices_unorm(struct bit_writer *writer,
1314                         int src_width, int src_height,
1315                         const uint8_t *src, int src_rowstride,
1316                         uint8_t endpoints[][4])
1317 {
1318    int luminance;
1319    int endpoint_luminances[2];
1320    int endpoint;
1321    int index;
1322    int y, x;
1323
1324    for (endpoint = 0; endpoint < 2; endpoint++) {
1325       endpoint_luminances[endpoint] =
1326          endpoints[endpoint][0] +
1327          endpoints[endpoint][1] +
1328          endpoints[endpoint][2];
1329    }
1330
1331    /* If the endpoints have the same luminance then we'll just use index 0 for
1332     * all of the texels */
1333    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1334       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1335       return;
1336    }
1337
1338    for (y = 0; y < src_height; y++) {
1339       for (x = 0; x < src_width; x++) {
1340          luminance = src[0] + src[1] + src[2];
1341
1342          index = ((luminance - endpoint_luminances[0]) * 3 /
1343                   (endpoint_luminances[1] - endpoint_luminances[0]));
1344          if (index < 0)
1345             index = 0;
1346          else if (index > 3)
1347             index = 3;
1348
1349          assert(x != 0 || y != 0 || index < 2);
1350
1351          write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1352
1353          src += 4;
1354       }
1355
1356       /* Pad the indices out to the block size */
1357       if (src_width < BLOCK_SIZE)
1358          write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1359
1360       src += src_rowstride - src_width * 4;
1361    }
1362
1363    /* Pad the indices out to the block size */
1364    if (src_height < BLOCK_SIZE)
1365       write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1366 }
1367
1368 static void
1369 write_alpha_indices_unorm(struct bit_writer *writer,
1370                           int src_width, int src_height,
1371                           const uint8_t *src, int src_rowstride,
1372                           uint8_t endpoints[][4])
1373 {
1374    int index;
1375    int y, x;
1376
1377    /* If the endpoints have the same alpha then we'll just use index 0 for
1378     * all of the texels */
1379    if (endpoints[0][3] == endpoints[1][3]) {
1380       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1381       return;
1382    }
1383
1384    for (y = 0; y < src_height; y++) {
1385       for (x = 0; x < src_width; x++) {
1386          index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1387                   ((int) endpoints[1][3] - endpoints[0][3]));
1388          if (index < 0)
1389             index = 0;
1390          else if (index > 7)
1391             index = 7;
1392
1393          assert(x != 0 || y != 0 || index < 4);
1394
1395          /* The first index has one less bit */
1396          write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1397
1398          src += 4;
1399       }
1400
1401       /* Pad the indices out to the block size */
1402       if (src_width < BLOCK_SIZE)
1403          write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1404
1405       src += src_rowstride - src_width * 4;
1406    }
1407
1408    /* Pad the indices out to the block size */
1409    if (src_height < BLOCK_SIZE)
1410       write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1411 }
1412
1413 static void
1414 compress_rgba_unorm_block(int src_width, int src_height,
1415                           const uint8_t *src, int src_rowstride,
1416                           uint8_t *dst)
1417 {
1418    int average_luminance, average_alpha;
1419    uint8_t endpoints[2][4];
1420    struct bit_writer writer;
1421    int component, endpoint;
1422
1423    get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1424                                      &average_luminance, &average_alpha);
1425    get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1426                             average_luminance, average_alpha,
1427                             endpoints);
1428
1429    writer.dst = dst;
1430    writer.pos = 0;
1431    writer.buf = 0;
1432
1433    write_bits(&writer, 5, 0x10); /* mode 4 */
1434    write_bits(&writer, 2, 0); /* rotation 0 */
1435    write_bits(&writer, 1, 0); /* index selection bit */
1436
1437    /* Write the color endpoints */
1438    for (component = 0; component < 3; component++)
1439       for (endpoint = 0; endpoint < 2; endpoint++)
1440          write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1441
1442    /* Write the alpha endpoints */
1443    for (endpoint = 0; endpoint < 2; endpoint++)
1444       write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1445
1446    write_rgb_indices_unorm(&writer,
1447                            src_width, src_height,
1448                            src, src_rowstride,
1449                            endpoints);
1450    write_alpha_indices_unorm(&writer,
1451                              src_width, src_height,
1452                              src, src_rowstride,
1453                              endpoints);
1454 }
1455
1456 static void
1457 compress_rgba_unorm(int width, int height,
1458                     const uint8_t *src, int src_rowstride,
1459                     uint8_t *dst, int dst_rowstride)
1460 {
1461    int dst_row_diff;
1462    int y, x;
1463
1464    if (dst_rowstride >= width * 4)
1465       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1466    else
1467       dst_row_diff = 0;
1468
1469    for (y = 0; y < height; y += BLOCK_SIZE) {
1470       for (x = 0; x < width; x += BLOCK_SIZE) {
1471          compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1472                                    MIN2(height - y, BLOCK_SIZE),
1473                                    src + x * 4 + y * src_rowstride,
1474                                    src_rowstride,
1475                                    dst);
1476          dst += BLOCK_BYTES;
1477       }
1478       dst += dst_row_diff;
1479    }
1480 }
1481
1482 static float
1483 get_average_luminance_float(int width, int height,
1484                             const float *src, int src_rowstride)
1485 {
1486    float luminance_sum = 0;
1487    int y, x;
1488
1489    for (y = 0; y < height; y++) {
1490       for (x = 0; x < width; x++) {
1491          luminance_sum += src[0] + src[1] + src[2];
1492          src += 3;
1493       }
1494       src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1495    }
1496
1497    return luminance_sum / (width * height);
1498 }
1499
1500 static float
1501 clamp_value(float value, bool is_signed)
1502 {
1503    if (value > 65504.0f)
1504       return 65504.0f;
1505
1506    if (is_signed) {
1507       if (value < -65504.0f)
1508          return -65504.0f;
1509       else
1510          return value;
1511    }
1512
1513    if (value < 0.0f)
1514       return 0.0f;
1515
1516    return value;
1517 }
1518
1519 static void
1520 get_endpoints_float(int width, int height,
1521                     const float *src, int src_rowstride,
1522                     float average_luminance, float endpoints[][3],
1523                     bool is_signed)
1524 {
1525    float endpoint_luminances[2];
1526    float midpoint;
1527    float sums[2][3];
1528    int endpoint, component;
1529    float luminance;
1530    float temp[3];
1531    const float *p = src;
1532    int left_endpoint_count = 0;
1533    int y, x, i;
1534
1535    memset(sums, 0, sizeof sums);
1536
1537    for (y = 0; y < height; y++) {
1538       for (x = 0; x < width; x++) {
1539          luminance = p[0] + p[1] + p[2];
1540          if (luminance < average_luminance) {
1541             endpoint = 0;
1542             left_endpoint_count++;
1543          } else {
1544             endpoint = 1;
1545          }
1546          for (i = 0; i < 3; i++)
1547             sums[endpoint][i] += p[i];
1548
1549          p += 3;
1550       }
1551
1552       p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1553    }
1554
1555    if (left_endpoint_count == 0 ||
1556        left_endpoint_count == width * height) {
1557       for (i = 0; i < 3; i++)
1558          endpoints[0][i] = endpoints[1][i] =
1559             (sums[0][i] + sums[1][i]) / (width * height);
1560    } else {
1561       for (i = 0; i < 3; i++) {
1562          endpoints[0][i] = sums[0][i] / left_endpoint_count;
1563          endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1564       }
1565    }
1566
1567    /* Clamp the endpoints to the range of a half float and strip out
1568     * infinities */
1569    for (endpoint = 0; endpoint < 2; endpoint++) {
1570       for (component = 0; component < 3; component++) {
1571          endpoints[endpoint][component] =
1572             clamp_value(endpoints[endpoint][component], is_signed);
1573       }
1574    }
1575
1576    /* We may need to swap the endpoints to ensure the most-significant bit of
1577     * the first index is zero */
1578
1579    for (endpoint = 0; endpoint < 2; endpoint++) {
1580       endpoint_luminances[endpoint] =
1581          endpoints[endpoint][0] +
1582          endpoints[endpoint][1] +
1583          endpoints[endpoint][2];
1584    }
1585    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1586
1587    if ((src[0] + src[1] + src[2] <= midpoint) !=
1588        (endpoint_luminances[0] <= midpoint)) {
1589       memcpy(temp, endpoints[0], sizeof temp);
1590       memcpy(endpoints[0], endpoints[1], sizeof temp);
1591       memcpy(endpoints[1], temp, sizeof temp);
1592    }
1593 }
1594
1595 static void
1596 write_rgb_indices_float(struct bit_writer *writer,
1597                         int src_width, int src_height,
1598                         const float *src, int src_rowstride,
1599                         float endpoints[][3])
1600 {
1601    float luminance;
1602    float endpoint_luminances[2];
1603    int endpoint;
1604    int index;
1605    int y, x;
1606
1607    for (endpoint = 0; endpoint < 2; endpoint++) {
1608       endpoint_luminances[endpoint] =
1609          endpoints[endpoint][0] +
1610          endpoints[endpoint][1] +
1611          endpoints[endpoint][2];
1612    }
1613
1614    /* If the endpoints have the same luminance then we'll just use index 0 for
1615     * all of the texels */
1616    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1617       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1618       return;
1619    }
1620
1621    for (y = 0; y < src_height; y++) {
1622       for (x = 0; x < src_width; x++) {
1623          luminance = src[0] + src[1] + src[2];
1624
1625          index = ((luminance - endpoint_luminances[0]) * 15 /
1626                   (endpoint_luminances[1] - endpoint_luminances[0]));
1627          if (index < 0)
1628             index = 0;
1629          else if (index > 15)
1630             index = 15;
1631
1632          assert(x != 0 || y != 0 || index < 8);
1633
1634          write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1635
1636          src += 3;
1637       }
1638
1639       /* Pad the indices out to the block size */
1640       if (src_width < BLOCK_SIZE)
1641          write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1642
1643       src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1644    }
1645
1646    /* Pad the indices out to the block size */
1647    if (src_height < BLOCK_SIZE)
1648       write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1649 }
1650
1651 static int
1652 get_endpoint_value(float value, bool is_signed)
1653 {
1654    bool sign = false;
1655    int half;
1656
1657    if (is_signed) {
1658       half = _mesa_float_to_half(value);
1659
1660       if (half & 0x8000) {
1661          half &= 0x7fff;
1662          sign = true;
1663       }
1664
1665       half = (32 * half / 31) >> 6;
1666
1667       if (sign)
1668          half = -half & ((1 << 10) - 1);
1669
1670       return half;
1671    } else {
1672       if (value <= 0.0f)
1673          return 0;
1674
1675       half = _mesa_float_to_half(value);
1676
1677       return (64 * half / 31) >> 6;
1678    }
1679 }
1680
1681 static void
1682 compress_rgb_float_block(int src_width, int src_height,
1683                          const float *src, int src_rowstride,
1684                          uint8_t *dst,
1685                          bool is_signed)
1686 {
1687    float average_luminance;
1688    float endpoints[2][3];
1689    struct bit_writer writer;
1690    int component, endpoint;
1691    int endpoint_value;
1692
1693    average_luminance =
1694       get_average_luminance_float(src_width, src_height, src, src_rowstride);
1695    get_endpoints_float(src_width, src_height, src, src_rowstride,
1696                        average_luminance, endpoints, is_signed);
1697
1698    writer.dst = dst;
1699    writer.pos = 0;
1700    writer.buf = 0;
1701
1702    write_bits(&writer, 5, 3); /* mode 3 */
1703
1704    /* Write the endpoints */
1705    for (endpoint = 0; endpoint < 2; endpoint++) {
1706       for (component = 0; component < 3; component++) {
1707          endpoint_value =
1708             get_endpoint_value(endpoints[endpoint][component], is_signed);
1709          write_bits(&writer, 10, endpoint_value);
1710       }
1711    }
1712
1713    write_rgb_indices_float(&writer,
1714                            src_width, src_height,
1715                            src, src_rowstride,
1716                            endpoints);
1717 }
1718
1719 static void
1720 compress_rgb_float(int width, int height,
1721                    const float *src, int src_rowstride,
1722                    uint8_t *dst, int dst_rowstride,
1723                    bool is_signed)
1724 {
1725    int dst_row_diff;
1726    int y, x;
1727
1728    if (dst_rowstride >= width * 4)
1729       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1730    else
1731       dst_row_diff = 0;
1732
1733    for (y = 0; y < height; y += BLOCK_SIZE) {
1734       for (x = 0; x < width; x += BLOCK_SIZE) {
1735          compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1736                                   MIN2(height - y, BLOCK_SIZE),
1737                                   src + x * 3 +
1738                                   y * src_rowstride / sizeof (float),
1739                                   src_rowstride,
1740                                   dst,
1741                                   is_signed);
1742          dst += BLOCK_BYTES;
1743       }
1744       dst += dst_row_diff;
1745    }
1746 }
1747
1748 #endif