src/mesa/main/texcompress_bptc.c

   1 /*
   2  * Copyright (C) 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file texcompress_bptc.c
  26  * GL_ARB_texture_compression_bptc support.
  27  */
  28
  29 #include <stdbool.h>
  30 #include "texcompress.h"
  31 #include "texcompress_bptc.h"
  32 #include "util/format_srgb.h"
  33 #include "texstore.h"
  34 #include "macros.h"
  35 #include "image.h"
  36
  37 #define BLOCK_SIZE 4
  38 #define N_PARTITIONS 64
  39 #define BLOCK_BYTES 16
  40
  41 struct bptc_unorm_mode {
  42    int n_subsets;
  43    int n_partition_bits;
  44    bool has_rotation_bits;
  45    bool has_index_selection_bit;
  46    int n_color_bits;
  47    int n_alpha_bits;
  48    bool has_endpoint_pbits;
  49    bool has_shared_pbits;
  50    int n_index_bits;
  51    int n_secondary_index_bits;
  52 };
  53
  54 struct bptc_float_bitfield {
  55    int8_t endpoint;
  56    uint8_t component;
  57    uint8_t offset;
  58    uint8_t n_bits;
  59    bool reverse;
  60 };
  61
  62 struct bptc_float_mode {
  63    bool reserved;
  64    bool transformed_endpoints;
  65    int n_partition_bits;
  66    int n_endpoint_bits;
  67    int n_index_bits;
  68    int n_delta_bits[3];
  69    struct bptc_float_bitfield bitfields[24];
  70 };
  71
  72 struct bit_writer {
  73    uint8_t buf;
  74    int pos;
  75    uint8_t *dst;
  76 };
  77
  78 static const struct bptc_unorm_mode
  79 bptc_unorm_modes[] = {
  80    /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
  81    /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
  82    /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
  83    /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
  84    /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
  85    /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
  86    /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
  87    /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
  88 };
  89
  90 static const struct bptc_float_mode
  91 bptc_float_modes[] = {
  92    /* 00 */
  93    { false, true, 5, 10, 3, { 5, 5, 5 },
  94      { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
  95        { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  96        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
  97        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
  98        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
  99        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
 100        { 3, 2, 3, 1, false },
 101        { -1 } }
 102    },
 103    /* 01 */
 104    { false, true, 5, 7, 3, { 6, 6, 6 },
 105      { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
 106        { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
 107        { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
 108        { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
 109        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
 110        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
 111        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
 112        { 2, 0, 0, 6, false },
 113        { 3, 0, 0, 6, false },
 114        { -1 } }
 115    },
 116    /* 00010 */
 117    { false, true, 5, 11, 3, { 5, 4, 4 },
 118      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 119        { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
 120        { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
 121        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
 122        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
 123        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
 124        { -1 } }
 125    },
 126    /* 00011 */
 127    { false, false, 0, 10, 4, { 10, 10, 10 },
 128      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 129        { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
 130        { -1 } }
 131    },
 132    /* 00110 */
 133    { false, true, 5, 11, 3, { 4, 5, 4 },
 134      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 135        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
 136        { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
 137        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
 138        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
 139        { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
 140        { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
 141        { -1 } }
 142    },
 143    /* 00111 */
 144    { false, true, 0, 11, 4, { 9, 9, 9 },
 145      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 146        { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
 147        { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
 148        { -1 } }
 149    },
 150    /* 01010 */
 151    { false, true, 5, 11, 3, { 4, 4, 5 },
 152      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 153        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
 154        { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
 155        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
 156        { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
 157        { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
 158        { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
 159        { -1 } }
 160    },
 161    /* 01011 */
 162    { false, true, 0, 12, 4, { 8, 8, 8 },
 163      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 164        { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
 165        { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
 166        { -1 } }
 167    },
 168    /* 01110 */
 169    { false, true, 5, 9, 3, { 5, 5, 5 },
 170      { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
 171        { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
 172        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
 173        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
 174        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
 175        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
 176        { 3, 2, 3, 1, false },
 177        { -1 } }
 178    },
 179    /* 01111 */
 180    { false, true, 0, 16, 4, { 4, 4, 4 },
 181      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 182        { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
 183        { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
 184        { -1 } }
 185    },
 186    /* 10010 */
 187    { false, true, 5, 8, 3, { 6, 5, 5 },
 188      { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
 189        { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
 190        { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
 191        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
 192        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
 193        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
 194        { 3, 0, 0, 6, false },
 195        { -1 } }
 196    },
 197    /* 10011 */
 198    { true /* reserved */ },
 199    /* 10110 */
 200    { false, true, 5, 8, 3, { 5, 6, 5 },
 201      { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
 202        { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
 203        { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
 204        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
 205        { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
 206        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
 207        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
 208        { -1 } }
 209    },
 210    /* 10111 */
 211    { true /* reserved */ },
 212    /* 11010 */
 213    { false, true, 5, 8, 3, { 5, 5, 6 },
 214      { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
 215        { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
 216        { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
 217        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
 218        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
 219        { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
 220        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
 221        { -1 } }
 222    },
 223    /* 11011 */
 224    { true /* reserved */ },
 225    /* 11110 */
 226    { false, false, 5, 6, 3, { 6, 6, 6 },
 227      { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
 228        { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
 229        { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
 230        { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
 231        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
 232        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
 233        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
 234        { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
 235        { -1 } }
 236    },
 237    /* 11111 */
 238    { true /* reserved */ },
 239 };
 240
 241 /* This partition table is used when the mode has two subsets. Each
 242  * partition is represented by a 32-bit value which gives 2 bits per texel
 243  * within the block. The value of the two bits represents which subset to use
 244  * (0 or 1).
 245  */
 246 static const uint32_t
 247 partition_table1[N_PARTITIONS] = {
 248    0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
 249    0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
 250    0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
 251    0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
 252    0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
 253    0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
 254    0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
 255    0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
 256    0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
 257    0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
 258    0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
 259    0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
 260    0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
 261    0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
 262    0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
 263    0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
 264 };
 265
 266 /* This partition table is used when the mode has three subsets. In this case
 267  * the values can be 0, 1 or 2.
 268  */
 269 static const uint32_t
 270 partition_table2[N_PARTITIONS] = {
 271    0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
 272    0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
 273    0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
 274    0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
 275    0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
 276    0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
 277    0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
 278    0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
 279    0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
 280    0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
 281    0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
 282    0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
 283    0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
 284    0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
 285    0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
 286    0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
 287 };
 288
 289 static const uint8_t
 290 anchor_indices[][N_PARTITIONS] = {
 291    /* Anchor index values for the second subset of two-subset partitioning */
 292    {
 293       0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
 294       0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
 295       0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
 296       0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
 297    },
 298
 299    /* Anchor index values for the second subset of three-subset partitioning */
 300    {
 301       0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
 302       0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
 303       0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
 304       0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
 305    },
 306
 307    /* Anchor index values for the third subset of three-subset
 308     * partitioning
 309     */
 310    {
 311       0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
 312       0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
 313       0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
 314       0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
 315    }
 316 };
 317
 318 static int
 319 extract_bits(const uint8_t *block,
 320              int offset,
 321              int n_bits)
 322 {
 323    int byte_index = offset / 8;
 324    int bit_index = offset % 8;
 325    int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
 326    int result = 0;
 327    int bit = 0;
 328
 329    while (true) {
 330       result |= ((block[byte_index] >> bit_index) &
 331                  ((1 << n_bits_in_byte) - 1)) << bit;
 332
 333       n_bits -= n_bits_in_byte;
 334
 335       if (n_bits <= 0)
 336          return result;
 337
 338       bit += n_bits_in_byte;
 339       byte_index++;
 340       bit_index = 0;
 341       n_bits_in_byte = MIN2(n_bits, 8);
 342    }
 343 }
 344
 345 static uint8_t
 346 expand_component(uint8_t byte,
 347                  int n_bits)
 348 {
 349    /* Expands a n-bit quantity into a byte by copying the most-significant
 350     * bits into the unused least-significant bits.
 351     */
 352    return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
 353 }
 354
 355 static int
 356 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
 357                         const uint8_t *block,
 358                         int bit_offset,
 359                         uint8_t endpoints[][4])
 360 {
 361    int component;
 362    int subset;
 363    int endpoint;
 364    int pbit;
 365    int n_components;
 366
 367    /* Extract each color component */
 368    for (component = 0; component < 3; component++) {
 369       for (subset = 0; subset < mode->n_subsets; subset++) {
 370          for (endpoint = 0; endpoint < 2; endpoint++) {
 371             endpoints[subset * 2 + endpoint][component] =
 372                extract_bits(block, bit_offset, mode->n_color_bits);
 373             bit_offset += mode->n_color_bits;
 374          }
 375       }
 376    }
 377
 378    /* Extract the alpha values */
 379    if (mode->n_alpha_bits > 0) {
 380       for (subset = 0; subset < mode->n_subsets; subset++) {
 381          for (endpoint = 0; endpoint < 2; endpoint++) {
 382             endpoints[subset * 2 + endpoint][3] =
 383                extract_bits(block, bit_offset, mode->n_alpha_bits);
 384             bit_offset += mode->n_alpha_bits;
 385          }
 386       }
 387
 388       n_components = 4;
 389    } else {
 390       for (subset = 0; subset < mode->n_subsets; subset++)
 391          for (endpoint = 0; endpoint < 2; endpoint++)
 392             endpoints[subset * 2 + endpoint][3] = 255;
 393
 394       n_components = 3;
 395    }
 396
 397    /* Add in the p-bits */
 398    if (mode->has_endpoint_pbits) {
 399       for (subset = 0; subset < mode->n_subsets; subset++) {
 400          for (endpoint = 0; endpoint < 2; endpoint++) {
 401             pbit = extract_bits(block, bit_offset, 1);
 402             bit_offset += 1;
 403
 404             for (component = 0; component < n_components; component++) {
 405                endpoints[subset * 2 + endpoint][component] <<= 1;
 406                endpoints[subset * 2 + endpoint][component] |= pbit;
 407             }
 408          }
 409       }
 410    } else if (mode->has_shared_pbits) {
 411       for (subset = 0; subset < mode->n_subsets; subset++) {
 412          pbit = extract_bits(block, bit_offset, 1);
 413          bit_offset += 1;
 414
 415          for (endpoint = 0; endpoint < 2; endpoint++) {
 416             for (component = 0; component < n_components; component++) {
 417                endpoints[subset * 2 + endpoint][component] <<= 1;
 418                endpoints[subset * 2 + endpoint][component] |= pbit;
 419             }
 420          }
 421       }
 422    }
 423
 424    /* Expand the n-bit values to a byte */
 425    for (subset = 0; subset < mode->n_subsets; subset++) {
 426       for (endpoint = 0; endpoint < 2; endpoint++) {
 427          for (component = 0; component < 3; component++) {
 428             endpoints[subset * 2 + endpoint][component] =
 429                expand_component(endpoints[subset * 2 + endpoint][component],
 430                                 mode->n_color_bits +
 431                                 mode->has_endpoint_pbits +
 432                                 mode->has_shared_pbits);
 433          }
 434
 435          if (mode->n_alpha_bits > 0) {
 436             endpoints[subset * 2 + endpoint][3] =
 437                expand_component(endpoints[subset * 2 + endpoint][3],
 438                                 mode->n_alpha_bits +
 439                                 mode->has_endpoint_pbits +
 440                                 mode->has_shared_pbits);
 441          }
 442       }
 443    }
 444
 445    return bit_offset;
 446 }
 447
 448 static bool
 449 is_anchor(int n_subsets,
 450           int partition_num,
 451           int texel)
 452 {
 453    if (texel == 0)
 454       return true;
 455
 456    switch (n_subsets) {
 457    case 1:
 458       return false;
 459    case 2:
 460       return anchor_indices[0][partition_num] == texel;
 461    case 3:
 462       return (anchor_indices[1][partition_num] == texel ||
 463               anchor_indices[2][partition_num] == texel);
 464    default:
 465       assert(false);
 466       return false;
 467    }
 468 }
 469
 470 static int
 471 count_anchors_before_texel(int n_subsets,
 472                            int partition_num,
 473                            int texel)
 474 {
 475    int count = 1;
 476
 477    if (texel == 0)
 478       return 0;
 479
 480    switch (n_subsets) {
 481    case 1:
 482       break;
 483    case 2:
 484       if (texel > anchor_indices[0][partition_num])
 485          count++;
 486       break;
 487    case 3:
 488       if (texel > anchor_indices[1][partition_num])
 489          count++;
 490       if (texel > anchor_indices[2][partition_num])
 491          count++;
 492       break;
 493    default:
 494       assert(false);
 495       return 0;
 496    }
 497
 498    return count;
 499 }
 500
 501 static int32_t
 502 interpolate(int32_t a, int32_t b,
 503             int index,
 504             int index_bits)
 505 {
 506    static const uint8_t weights2[] = { 0, 21, 43, 64 };
 507    static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
 508    static const uint8_t weights4[] =
 509       { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
 510    static const uint8_t *weights[] = {
 511       NULL, NULL, weights2, weights3, weights4
 512    };
 513    int weight;
 514
 515    weight = weights[index_bits][index];
 516
 517    return ((64 - weight) * a + weight * b + 32) >> 6;
 518 }
 519
 520 static void
 521 apply_rotation(int rotation,
 522                uint8_t *result)
 523 {
 524    uint8_t t;
 525
 526    if (rotation == 0)
 527       return;
 528
 529    rotation--;
 530
 531    t = result[rotation];
 532    result[rotation] = result[3];
 533    result[3] = t;
 534 }
 535
 536 static void
 537 fetch_rgba_unorm_from_block(const uint8_t *block,
 538                             uint8_t *result,
 539                             int texel)
 540 {
 541    int mode_num = ffs(block[0]);
 542    const struct bptc_unorm_mode *mode;
 543    int bit_offset, secondary_bit_offset;
 544    int partition_num;
 545    int subset_num;
 546    int rotation;
 547    int index_selection;
 548    int index_bits;
 549    int indices[2];
 550    int index;
 551    int anchors_before_texel;
 552    bool anchor;
 553    uint8_t endpoints[3 * 2][4];
 554    uint32_t subsets;
 555    int component;
 556
 557    if (mode_num == 0) {
 558       /* According to the spec this mode is reserved and shouldn't be used. */
 559       memset(result, 0, 3);
 560       result[3] = 0xff;
 561       return;
 562    }
 563
 564    mode = bptc_unorm_modes + mode_num - 1;
 565    bit_offset = mode_num;
 566
 567    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
 568    bit_offset += mode->n_partition_bits;
 569
 570    switch (mode->n_subsets) {
 571    case 1:
 572       subsets = 0;
 573       break;
 574    case 2:
 575       subsets = partition_table1[partition_num];
 576       break;
 577    case 3:
 578       subsets = partition_table2[partition_num];
 579       break;
 580    default:
 581       assert(false);
 582       return;
 583    }
 584
 585    if (mode->has_rotation_bits) {
 586       rotation = extract_bits(block, bit_offset, 2);
 587       bit_offset += 2;
 588    } else {
 589       rotation = 0;
 590    }
 591
 592    if (mode->has_index_selection_bit) {
 593       index_selection = extract_bits(block, bit_offset, 1);
 594       bit_offset++;
 595    } else {
 596       index_selection = 0;
 597    }
 598
 599    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
 600
 601    anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
 602                                                      partition_num, texel);
 603
 604    /* Calculate the offset to the secondary index */
 605    secondary_bit_offset = (bit_offset +
 606                            BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
 607                            mode->n_subsets +
 608                            mode->n_secondary_index_bits * texel -
 609                            anchors_before_texel);
 610
 611    /* Calculate the offset to the primary index for this texel */
 612    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
 613
 614    subset_num = (subsets >> (texel * 2)) & 3;
 615
 616    anchor = is_anchor(mode->n_subsets, partition_num, texel);
 617
 618    index_bits = mode->n_index_bits;
 619    if (anchor)
 620       index_bits--;
 621    indices[0] = extract_bits(block, bit_offset, index_bits);
 622
 623    if (mode->n_secondary_index_bits) {
 624       index_bits = mode->n_secondary_index_bits;
 625       if (anchor)
 626          index_bits--;
 627       indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
 628    }
 629
 630    index = indices[index_selection];
 631    index_bits = (index_selection ?
 632                  mode->n_secondary_index_bits :
 633                  mode->n_index_bits);
 634
 635    for (component = 0; component < 3; component++)
 636       result[component] = interpolate(endpoints[subset_num * 2][component],
 637                                       endpoints[subset_num * 2 + 1][component],
 638                                       index,
 639                                       index_bits);
 640
 641    /* Alpha uses the opposite index from the color components */
 642    if (mode->n_secondary_index_bits && !index_selection) {
 643       index = indices[1];
 644       index_bits = mode->n_secondary_index_bits;
 645    } else {
 646       index = indices[0];
 647       index_bits = mode->n_index_bits;
 648    }
 649
 650    result[3] = interpolate(endpoints[subset_num * 2][3],
 651                            endpoints[subset_num * 2 + 1][3],
 652                            index,
 653                            index_bits);
 654
 655    apply_rotation(rotation, result);
 656 }
 657
 658 static void
 659 fetch_bptc_rgba_unorm_bytes(const GLubyte *map,
 660                             GLint rowStride, GLint i, GLint j,
 661                             GLubyte *texel)
 662 {
 663    const GLubyte *block;
 664
 665    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
 666
 667    fetch_rgba_unorm_from_block(block, texel, (i % 4) + (j % 4) * 4);
 668 }
 669
 670 static void
 671 fetch_bptc_rgba_unorm(const GLubyte *map,
 672                       GLint rowStride, GLint i, GLint j,
 673                       GLfloat *texel)
 674 {
 675    GLubyte texel_bytes[4];
 676
 677    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
 678
 679    texel[RCOMP] = UBYTE_TO_FLOAT(texel_bytes[0]);
 680    texel[GCOMP] = UBYTE_TO_FLOAT(texel_bytes[1]);
 681    texel[BCOMP] = UBYTE_TO_FLOAT(texel_bytes[2]);
 682    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
 683 }
 684
 685 static void
 686 fetch_bptc_srgb_alpha_unorm(const GLubyte *map,
 687                             GLint rowStride, GLint i, GLint j,
 688                             GLfloat *texel)
 689 {
 690    GLubyte texel_bytes[4];
 691
 692    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
 693
 694    texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[0]);
 695    texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[1]);
 696    texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[2]);
 697    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
 698 }
 699
 700 static int32_t
 701 sign_extend(int32_t value,
 702             int n_bits)
 703 {
 704    if ((value & (1 << (n_bits - 1)))) {
 705       value |= (~(int32_t) 0) << n_bits;
 706    }
 707
 708    return value;
 709 }
 710
 711 static int
 712 signed_unquantize(int value, int n_endpoint_bits)
 713 {
 714    bool sign;
 715
 716    if (n_endpoint_bits >= 16)
 717       return value;
 718
 719    if (value == 0)
 720       return 0;
 721
 722    sign = false;
 723
 724    if (value < 0) {
 725       sign = true;
 726       value = -value;
 727    }
 728
 729    if (value >= (1 << (n_endpoint_bits - 1)) - 1)
 730       value = 0x7fff;
 731    else
 732       value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
 733
 734    if (sign)
 735       value = -value;
 736
 737    return value;
 738 }
 739
 740 static int
 741 unsigned_unquantize(int value, int n_endpoint_bits)
 742 {
 743    if (n_endpoint_bits >= 15)
 744       return value;
 745
 746    if (value == 0)
 747       return 0;
 748
 749    if (value == (1 << n_endpoint_bits) - 1)
 750       return 0xffff;
 751
 752    return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
 753 }
 754
 755 static int
 756 extract_float_endpoints(const struct bptc_float_mode *mode,
 757                         const uint8_t *block,
 758                         int bit_offset,
 759                         int32_t endpoints[][3],
 760                         bool is_signed)
 761 {
 762    const struct bptc_float_bitfield *bitfield;
 763    int endpoint, component;
 764    int n_endpoints;
 765    int value;
 766    int i;
 767
 768    if (mode->n_partition_bits)
 769       n_endpoints = 4;
 770    else
 771       n_endpoints = 2;
 772
 773    memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
 774
 775    for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
 776       value = extract_bits(block, bit_offset, bitfield->n_bits);
 777       bit_offset += bitfield->n_bits;
 778
 779       if (bitfield->reverse) {
 780          for (i = 0; i < bitfield->n_bits; i++) {
 781             if (value & (1 << i))
 782                endpoints[bitfield->endpoint][bitfield->component] |=
 783                   1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
 784          }
 785       } else {
 786          endpoints[bitfield->endpoint][bitfield->component] |=
 787             value << bitfield->offset;
 788       }
 789    }
 790
 791    if (mode->transformed_endpoints) {
 792       /* The endpoints are specified as signed offsets from e0 */
 793       for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
 794          for (component = 0; component < 3; component++) {
 795             value = sign_extend(endpoints[endpoint][component],
 796                                 mode->n_delta_bits[component]);
 797             endpoints[endpoint][component] =
 798                ((endpoints[0][component] + value) &
 799                 ((1 << mode->n_endpoint_bits) - 1));
 800          }
 801       }
 802    }
 803
 804    if (is_signed) {
 805       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
 806          for (component = 0; component < 3; component++) {
 807             value = sign_extend(endpoints[endpoint][component],
 808                                 mode->n_endpoint_bits);
 809             endpoints[endpoint][component] =
 810                signed_unquantize(value, mode->n_endpoint_bits);
 811          }
 812       }
 813    } else {
 814       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
 815          for (component = 0; component < 3; component++) {
 816             endpoints[endpoint][component] =
 817                unsigned_unquantize(endpoints[endpoint][component],
 818                                    mode->n_endpoint_bits);
 819          }
 820       }
 821    }
 822
 823    return bit_offset;
 824 }
 825
 826 static int32_t
 827 finish_unsigned_unquantize(int32_t value)
 828 {
 829    return value * 31 / 64;
 830 }
 831
 832 static int32_t
 833 finish_signed_unquantize(int32_t value)
 834 {
 835    if (value < 0)
 836       return (-value * 31 / 32) | 0x8000;
 837    else
 838       return value * 31 / 32;
 839 }
 840
 841 static void
 842 fetch_rgb_float_from_block(const uint8_t *block,
 843                            float *result,
 844                            int texel,
 845                            bool is_signed)
 846 {
 847    int mode_num;
 848    const struct bptc_float_mode *mode;
 849    int bit_offset;
 850    int partition_num;
 851    int subset_num;
 852    int index_bits;
 853    int index;
 854    int anchors_before_texel;
 855    int32_t endpoints[2 * 2][3];
 856    uint32_t subsets;
 857    int n_subsets;
 858    int component;
 859    int32_t value;
 860
 861    if (block[0] & 0x2) {
 862       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
 863       bit_offset = 5;
 864    } else {
 865       mode_num = block[0] & 3;
 866       bit_offset = 2;
 867    }
 868
 869    mode = bptc_float_modes + mode_num;
 870
 871    if (mode->reserved) {
 872       memset(result, 0, sizeof result[0] * 3);
 873       result[3] = 1.0f;
 874       return;
 875    }
 876
 877    bit_offset = extract_float_endpoints(mode, block, bit_offset,
 878                                         endpoints, is_signed);
 879
 880    if (mode->n_partition_bits) {
 881       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
 882       bit_offset += mode->n_partition_bits;
 883
 884       subsets = partition_table1[partition_num];
 885       n_subsets = 2;
 886    } else {
 887       partition_num = 0;
 888       subsets = 0;
 889       n_subsets = 1;
 890    }
 891
 892    anchors_before_texel =
 893       count_anchors_before_texel(n_subsets, partition_num, texel);
 894
 895    /* Calculate the offset to the primary index for this texel */
 896    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
 897
 898    subset_num = (subsets >> (texel * 2)) & 3;
 899
 900    index_bits = mode->n_index_bits;
 901    if (is_anchor(n_subsets, partition_num, texel))
 902       index_bits--;
 903    index = extract_bits(block, bit_offset, index_bits);
 904
 905    for (component = 0; component < 3; component++) {
 906       value = interpolate(endpoints[subset_num * 2][component],
 907                           endpoints[subset_num * 2 + 1][component],
 908                           index,
 909                           mode->n_index_bits);
 910
 911       if (is_signed)
 912          value = finish_signed_unquantize(value);
 913       else
 914          value = finish_unsigned_unquantize(value);
 915
 916       result[component] = _mesa_half_to_float(value);
 917    }
 918
 919    result[3] = 1.0f;
 920 }
 921
 922 static void
 923 fetch_bptc_rgb_float(const GLubyte *map,
 924                      GLint rowStride, GLint i, GLint j,
 925                      GLfloat *texel,
 926                      bool is_signed)
 927 {
 928    const GLubyte *block;
 929
 930    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
 931
 932    fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
 933 }
 934
 935 static void
 936 fetch_bptc_rgb_signed_float(const GLubyte *map,
 937                             GLint rowStride, GLint i, GLint j,
 938                             GLfloat *texel)
 939 {
 940    fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
 941 }
 942
 943 static void
 944 fetch_bptc_rgb_unsigned_float(const GLubyte *map,
 945                               GLint rowStride, GLint i, GLint j,
 946                               GLfloat *texel)
 947 {
 948    fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
 949 }
 950
 951 compressed_fetch_func
 952 _mesa_get_bptc_fetch_func(mesa_format format)
 953 {
 954    switch (format) {
 955    case MESA_FORMAT_BPTC_RGBA_UNORM:
 956       return fetch_bptc_rgba_unorm;
 957    case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
 958       return fetch_bptc_srgb_alpha_unorm;
 959    case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT:
 960       return fetch_bptc_rgb_signed_float;
 961    case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
 962       return fetch_bptc_rgb_unsigned_float;
 963    default:
 964       return NULL;
 965    }
 966 }
 967
 968 static void
 969 write_bits(struct bit_writer *writer, int n_bits, int value)
 970 {
 971    do {
 972       if (n_bits + writer->pos >= 8) {
 973          *(writer->dst++) = writer->buf | (value << writer->pos);
 974          writer->buf = 0;
 975          value >>= (8 - writer->pos);
 976          n_bits -= (8 - writer->pos);
 977          writer->pos = 0;
 978       } else {
 979          writer->buf |= value << writer->pos;
 980          writer->pos += n_bits;
 981          break;
 982       }
 983    } while (n_bits > 0);
 984 }
 985
 986 static void
 987 get_average_luminance_alpha_unorm(int width, int height,
 988                                   const uint8_t *src, int src_rowstride,
 989                                   int *average_luminance, int *average_alpha)
 990 {
 991    int luminance_sum = 0, alpha_sum = 0;
 992    int y, x;
 993
 994    for (y = 0; y < height; y++) {
 995       for (x = 0; x < width; x++) {
 996          luminance_sum += src[0] + src[1] + src[2];
 997          alpha_sum += src[3];
 998          src += 4;
 999       }
1000       src += src_rowstride - width * 4;
1001    }
1002
1003    *average_luminance = luminance_sum / (width * height);
1004    *average_alpha = alpha_sum / (width * height);
1005 }
1006
1007 static void
1008 get_rgba_endpoints_unorm(int width, int height,
1009                          const uint8_t *src, int src_rowstride,
1010                          int average_luminance, int average_alpha,
1011                          uint8_t endpoints[][4])
1012 {
1013    int endpoint_luminances[2];
1014    int midpoint;
1015    int sums[2][4];
1016    int endpoint;
1017    int luminance;
1018    uint8_t temp[3];
1019    const uint8_t *p = src;
1020    int rgb_left_endpoint_count = 0;
1021    int alpha_left_endpoint_count = 0;
1022    int y, x, i;
1023
1024    memset(sums, 0, sizeof sums);
1025
1026    for (y = 0; y < height; y++) {
1027       for (x = 0; x < width; x++) {
1028          luminance = p[0] + p[1] + p[2];
1029          if (luminance < average_luminance) {
1030             endpoint = 0;
1031             rgb_left_endpoint_count++;
1032          } else {
1033             endpoint = 1;
1034          }
1035          for (i = 0; i < 3; i++)
1036             sums[endpoint][i] += p[i];
1037
1038          if (p[2] < average_alpha) {
1039             endpoint = 0;
1040             alpha_left_endpoint_count++;
1041          } else {
1042             endpoint = 1;
1043          }
1044          sums[endpoint][3] += p[3];
1045
1046          p += 4;
1047       }
1048
1049       p += src_rowstride - width * 4;
1050    }
1051
1052    if (rgb_left_endpoint_count == 0 ||
1053        rgb_left_endpoint_count == width * height) {
1054       for (i = 0; i < 3; i++)
1055          endpoints[0][i] = endpoints[1][i] =
1056             (sums[0][i] + sums[1][i]) / (width * height);
1057    } else {
1058       for (i = 0; i < 3; i++) {
1059          endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1060          endpoints[1][i] = (sums[1][i] /
1061                             (width * height - rgb_left_endpoint_count));
1062       }
1063    }
1064
1065    if (alpha_left_endpoint_count == 0 ||
1066        alpha_left_endpoint_count == width * height) {
1067       endpoints[0][3] = endpoints[1][3] =
1068          (sums[0][3] + sums[1][3]) / (width * height);
1069    } else {
1070          endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1071          endpoints[1][3] = (sums[1][3] /
1072                             (width * height - alpha_left_endpoint_count));
1073    }
1074
1075    /* We may need to swap the endpoints to ensure the most-significant bit of
1076     * the first index is zero */
1077
1078    for (endpoint = 0; endpoint < 2; endpoint++) {
1079       endpoint_luminances[endpoint] =
1080          endpoints[endpoint][0] +
1081          endpoints[endpoint][1] +
1082          endpoints[endpoint][2];
1083    }
1084    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1085
1086    if ((src[0] + src[1] + src[2] <= midpoint) !=
1087        (endpoint_luminances[0] <= midpoint)) {
1088       memcpy(temp, endpoints[0], 3);
1089       memcpy(endpoints[0], endpoints[1], 3);
1090       memcpy(endpoints[1], temp, 3);
1091    }
1092
1093    /* Same for the alpha endpoints */
1094
1095    midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1096
1097    if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1098       temp[0] = endpoints[0][3];
1099       endpoints[0][3] = endpoints[1][3];
1100       endpoints[1][3] = temp[0];
1101    }
1102 }
1103
1104 static void
1105 write_rgb_indices_unorm(struct bit_writer *writer,
1106                         int src_width, int src_height,
1107                         const uint8_t *src, int src_rowstride,
1108                         uint8_t endpoints[][4])
1109 {
1110    int luminance;
1111    int endpoint_luminances[2];
1112    int endpoint;
1113    int index;
1114    int y, x;
1115
1116    for (endpoint = 0; endpoint < 2; endpoint++) {
1117       endpoint_luminances[endpoint] =
1118          endpoints[endpoint][0] +
1119          endpoints[endpoint][1] +
1120          endpoints[endpoint][2];
1121    }
1122
1123    /* If the endpoints have the same luminance then we'll just use index 0 for
1124     * all of the texels */
1125    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1126       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1127       return;
1128    }
1129
1130    for (y = 0; y < src_height; y++) {
1131       for (x = 0; x < src_width; x++) {
1132          luminance = src[0] + src[1] + src[2];
1133
1134          index = ((luminance - endpoint_luminances[0]) * 3 /
1135                   (endpoint_luminances[1] - endpoint_luminances[0]));
1136          if (index < 0)
1137             index = 0;
1138          else if (index > 3)
1139             index = 3;
1140
1141          assert(x != 0 || y != 0 || index < 2);
1142
1143          write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1144
1145          src += 4;
1146       }
1147
1148       /* Pad the indices out to the block size */
1149       if (src_width < BLOCK_SIZE)
1150          write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1151
1152       src += src_rowstride - src_width * 4;
1153    }
1154
1155    /* Pad the indices out to the block size */
1156    if (src_height < BLOCK_SIZE)
1157       write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1158 }
1159
1160 static void
1161 write_alpha_indices_unorm(struct bit_writer *writer,
1162                           int src_width, int src_height,
1163                           const uint8_t *src, int src_rowstride,
1164                           uint8_t endpoints[][4])
1165 {
1166    int index;
1167    int y, x;
1168
1169    /* If the endpoints have the same alpha then we'll just use index 0 for
1170     * all of the texels */
1171    if (endpoints[0][3] == endpoints[1][3]) {
1172       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1173       return;
1174    }
1175
1176    for (y = 0; y < src_height; y++) {
1177       for (x = 0; x < src_width; x++) {
1178          index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1179                   ((int) endpoints[1][3] - endpoints[0][3]));
1180          if (index < 0)
1181             index = 0;
1182          else if (index > 7)
1183             index = 7;
1184
1185          assert(x != 0 || y != 0 || index < 4);
1186
1187          /* The first index has one less bit */
1188          write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1189
1190          src += 4;
1191       }
1192
1193       /* Pad the indices out to the block size */
1194       if (src_width < BLOCK_SIZE)
1195          write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1196
1197       src += src_rowstride - src_width * 4;
1198    }
1199
1200    /* Pad the indices out to the block size */
1201    if (src_height < BLOCK_SIZE)
1202       write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1203 }
1204
1205 static void
1206 compress_rgba_unorm_block(int src_width, int src_height,
1207                           const uint8_t *src, int src_rowstride,
1208                           uint8_t *dst)
1209 {
1210    int average_luminance, average_alpha;
1211    uint8_t endpoints[2][4];
1212    struct bit_writer writer;
1213    int component, endpoint;
1214
1215    get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1216                                      &average_luminance, &average_alpha);
1217    get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1218                             average_luminance, average_alpha,
1219                             endpoints);
1220
1221    writer.dst = dst;
1222    writer.pos = 0;
1223    writer.buf = 0;
1224
1225    write_bits(&writer, 5, 0x10); /* mode 4 */
1226    write_bits(&writer, 2, 0); /* rotation 0 */
1227    write_bits(&writer, 1, 0); /* index selection bit */
1228
1229    /* Write the color endpoints */
1230    for (component = 0; component < 3; component++)
1231       for (endpoint = 0; endpoint < 2; endpoint++)
1232          write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1233
1234    /* Write the alpha endpoints */
1235    for (endpoint = 0; endpoint < 2; endpoint++)
1236       write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1237
1238    write_rgb_indices_unorm(&writer,
1239                            src_width, src_height,
1240                            src, src_rowstride,
1241                            endpoints);
1242    write_alpha_indices_unorm(&writer,
1243                              src_width, src_height,
1244                              src, src_rowstride,
1245                              endpoints);
1246 }
1247
1248 static void
1249 compress_rgba_unorm(int width, int height,
1250                     const uint8_t *src, int src_rowstride,
1251                     uint8_t *dst, int dst_rowstride)
1252 {
1253    int dst_row_diff;
1254    int y, x;
1255
1256    if (dst_rowstride >= width * 4)
1257       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1258    else
1259       dst_row_diff = 0;
1260
1261    for (y = 0; y < height; y += BLOCK_SIZE) {
1262       for (x = 0; x < width; x += BLOCK_SIZE) {
1263          compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1264                                    MIN2(height - y, BLOCK_SIZE),
1265                                    src + x * 4 + y * src_rowstride,
1266                                    src_rowstride,
1267                                    dst);
1268          dst += BLOCK_BYTES;
1269       }
1270       dst += dst_row_diff;
1271    }
1272 }
1273
1274 GLboolean
1275 _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
1276 {
1277    const GLubyte *pixels;
1278    const GLubyte *tempImage = NULL;
1279    int rowstride;
1280
1281    if (srcFormat != GL_RGBA ||
1282        srcType != GL_UNSIGNED_BYTE ||
1283        ctx->_ImageTransferState ||
1284        srcPacking->SwapBytes) {
1285       /* convert image to RGBA/ubyte */
1286       GLubyte *tempImageSlices[1];
1287       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
1288       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
1289       if (!tempImage)
1290          return GL_FALSE; /* out of memory */
1291       tempImageSlices[0] = (GLubyte *) tempImage;
1292       _mesa_texstore(ctx, dims,
1293                      baseInternalFormat,
1294                      MESA_FORMAT_R8G8B8A8_UNORM,
1295                      rgbaRowStride, tempImageSlices,
1296                      srcWidth, srcHeight, srcDepth,
1297                      srcFormat, srcType, srcAddr,
1298                      srcPacking);
1299
1300       pixels = tempImage;
1301       rowstride = srcWidth * 4;
1302    } else {
1303       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1304                                      srcFormat, srcType, 0, 0);
1305       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1306                                          srcFormat, srcType);
1307    }
1308
1309    compress_rgba_unorm(srcWidth, srcHeight,
1310                        pixels, rowstride,
1311                        dstSlices[0], dstRowStride);
1312
1313    free((void *) tempImage);
1314
1315    return GL_TRUE;
1316 }
1317
1318 static float
1319 get_average_luminance_float(int width, int height,
1320                             const float *src, int src_rowstride)
1321 {
1322    float luminance_sum = 0;
1323    int y, x;
1324
1325    for (y = 0; y < height; y++) {
1326       for (x = 0; x < width; x++) {
1327          luminance_sum += src[0] + src[1] + src[2];
1328          src += 3;
1329       }
1330       src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1331    }
1332
1333    return luminance_sum / (width * height);
1334 }
1335
1336 static float
1337 clamp_value(float value, bool is_signed)
1338 {
1339    if (value > 65504.0f)
1340       return 65504.0f;
1341
1342    if (is_signed) {
1343       if (value < -65504.0f)
1344          return -65504.0f;
1345       else
1346          return value;
1347    }
1348
1349    if (value < 0.0f)
1350       return 0.0f;
1351
1352    return value;
1353 }
1354
1355 static void
1356 get_endpoints_float(int width, int height,
1357                     const float *src, int src_rowstride,
1358                     float average_luminance, float endpoints[][3],
1359                     bool is_signed)
1360 {
1361    float endpoint_luminances[2];
1362    float midpoint;
1363    float sums[2][3];
1364    int endpoint, component;
1365    float luminance;
1366    float temp[3];
1367    const float *p = src;
1368    int left_endpoint_count = 0;
1369    int y, x, i;
1370
1371    memset(sums, 0, sizeof sums);
1372
1373    for (y = 0; y < height; y++) {
1374       for (x = 0; x < width; x++) {
1375          luminance = p[0] + p[1] + p[2];
1376          if (luminance < average_luminance) {
1377             endpoint = 0;
1378             left_endpoint_count++;
1379          } else {
1380             endpoint = 1;
1381          }
1382          for (i = 0; i < 3; i++)
1383             sums[endpoint][i] += p[i];
1384
1385          p += 3;
1386       }
1387
1388       p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1389    }
1390
1391    if (left_endpoint_count == 0 ||
1392        left_endpoint_count == width * height) {
1393       for (i = 0; i < 3; i++)
1394          endpoints[0][i] = endpoints[1][i] =
1395             (sums[0][i] + sums[1][i]) / (width * height);
1396    } else {
1397       for (i = 0; i < 3; i++) {
1398          endpoints[0][i] = sums[0][i] / left_endpoint_count;
1399          endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1400       }
1401    }
1402
1403    /* Clamp the endpoints to the range of a half float and strip out
1404     * infinities */
1405    for (endpoint = 0; endpoint < 2; endpoint++) {
1406       for (component = 0; component < 3; component++) {
1407          endpoints[endpoint][component] =
1408             clamp_value(endpoints[endpoint][component], is_signed);
1409       }
1410    }
1411
1412    /* We may need to swap the endpoints to ensure the most-significant bit of
1413     * the first index is zero */
1414
1415    for (endpoint = 0; endpoint < 2; endpoint++) {
1416       endpoint_luminances[endpoint] =
1417          endpoints[endpoint][0] +
1418          endpoints[endpoint][1] +
1419          endpoints[endpoint][2];
1420    }
1421    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1422
1423    if ((src[0] + src[1] + src[2] <= midpoint) !=
1424        (endpoint_luminances[0] <= midpoint)) {
1425       memcpy(temp, endpoints[0], sizeof temp);
1426       memcpy(endpoints[0], endpoints[1], sizeof temp);
1427       memcpy(endpoints[1], temp, sizeof temp);
1428    }
1429 }
1430
1431 static void
1432 write_rgb_indices_float(struct bit_writer *writer,
1433                         int src_width, int src_height,
1434                         const float *src, int src_rowstride,
1435                         float endpoints[][3])
1436 {
1437    float luminance;
1438    float endpoint_luminances[2];
1439    int endpoint;
1440    int index;
1441    int y, x;
1442
1443    for (endpoint = 0; endpoint < 2; endpoint++) {
1444       endpoint_luminances[endpoint] =
1445          endpoints[endpoint][0] +
1446          endpoints[endpoint][1] +
1447          endpoints[endpoint][2];
1448    }
1449
1450    /* If the endpoints have the same luminance then we'll just use index 0 for
1451     * all of the texels */
1452    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1453       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1454       return;
1455    }
1456
1457    for (y = 0; y < src_height; y++) {
1458       for (x = 0; x < src_width; x++) {
1459          luminance = src[0] + src[1] + src[2];
1460
1461          index = ((luminance - endpoint_luminances[0]) * 15 /
1462                   (endpoint_luminances[1] - endpoint_luminances[0]));
1463          if (index < 0)
1464             index = 0;
1465          else if (index > 15)
1466             index = 15;
1467
1468          assert(x != 0 || y != 0 || index < 8);
1469
1470          write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1471
1472          src += 3;
1473       }
1474
1475       /* Pad the indices out to the block size */
1476       if (src_width < BLOCK_SIZE)
1477          write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1478
1479       src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1480    }
1481
1482    /* Pad the indices out to the block size */
1483    if (src_height < BLOCK_SIZE)
1484       write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1485 }
1486
1487 static int
1488 get_endpoint_value(float value, bool is_signed)
1489 {
1490    bool sign = false;
1491    int half;
1492
1493    if (is_signed) {
1494       half = _mesa_float_to_half(value);
1495
1496       if (half & 0x8000) {
1497          half &= 0x7fff;
1498          sign = true;
1499       }
1500
1501       half = (32 * half / 31) >> 6;
1502
1503       if (sign)
1504          half = -half & ((1 << 10) - 1);
1505
1506       return half;
1507    } else {
1508       if (value <= 0.0f)
1509          return 0;
1510
1511       half = _mesa_float_to_half(value);
1512
1513       return (64 * half / 31) >> 6;
1514    }
1515 }
1516
1517 static void
1518 compress_rgb_float_block(int src_width, int src_height,
1519                          const float *src, int src_rowstride,
1520                          uint8_t *dst,
1521                          bool is_signed)
1522 {
1523    float average_luminance;
1524    float endpoints[2][3];
1525    struct bit_writer writer;
1526    int component, endpoint;
1527    int endpoint_value;
1528
1529    average_luminance =
1530       get_average_luminance_float(src_width, src_height, src, src_rowstride);
1531    get_endpoints_float(src_width, src_height, src, src_rowstride,
1532                        average_luminance, endpoints, is_signed);
1533
1534    writer.dst = dst;
1535    writer.pos = 0;
1536    writer.buf = 0;
1537
1538    write_bits(&writer, 5, 3); /* mode 3 */
1539
1540    /* Write the endpoints */
1541    for (endpoint = 0; endpoint < 2; endpoint++) {
1542       for (component = 0; component < 3; component++) {
1543          endpoint_value =
1544             get_endpoint_value(endpoints[endpoint][component], is_signed);
1545          write_bits(&writer, 10, endpoint_value);
1546       }
1547    }
1548
1549    write_rgb_indices_float(&writer,
1550                            src_width, src_height,
1551                            src, src_rowstride,
1552                            endpoints);
1553 }
1554
1555 static void
1556 compress_rgb_float(int width, int height,
1557                    const float *src, int src_rowstride,
1558                    uint8_t *dst, int dst_rowstride,
1559                    bool is_signed)
1560 {
1561    int dst_row_diff;
1562    int y, x;
1563
1564    if (dst_rowstride >= width * 4)
1565       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1566    else
1567       dst_row_diff = 0;
1568
1569    for (y = 0; y < height; y += BLOCK_SIZE) {
1570       for (x = 0; x < width; x += BLOCK_SIZE) {
1571          compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1572                                   MIN2(height - y, BLOCK_SIZE),
1573                                   src + x * 3 +
1574                                   y * src_rowstride / sizeof (float),
1575                                   src_rowstride,
1576                                   dst,
1577                                   is_signed);
1578          dst += BLOCK_BYTES;
1579       }
1580       dst += dst_row_diff;
1581    }
1582 }
1583
1584 static GLboolean
1585 texstore_bptc_rgb_float(TEXSTORE_PARAMS,
1586                         bool is_signed)
1587 {
1588    const float *pixels;
1589    const float *tempImage = NULL;
1590    int rowstride;
1591
1592    if (srcFormat != GL_RGB ||
1593        srcType != GL_FLOAT ||
1594        ctx->_ImageTransferState ||
1595        srcPacking->SwapBytes) {
1596       /* convert image to RGB/float */
1597       GLfloat *tempImageSlices[1];
1598       int rgbRowStride = 3 * srcWidth * sizeof(GLfloat);
1599       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat));
1600       if (!tempImage)
1601          return GL_FALSE; /* out of memory */
1602       tempImageSlices[0] = (GLfloat *) tempImage;
1603       _mesa_texstore(ctx, dims,
1604                      baseInternalFormat,
1605                      MESA_FORMAT_RGB_FLOAT32,
1606                      rgbRowStride, (GLubyte **)tempImageSlices,
1607                      srcWidth, srcHeight, srcDepth,
1608                      srcFormat, srcType, srcAddr,
1609                      srcPacking);
1610
1611       pixels = tempImage;
1612       rowstride = srcWidth * sizeof(float) * 3;
1613    } else {
1614       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1615                                      srcFormat, srcType, 0, 0);
1616       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1617                                          srcFormat, srcType);
1618    }
1619
1620    compress_rgb_float(srcWidth, srcHeight,
1621                       pixels, rowstride,
1622                       dstSlices[0], dstRowStride,
1623                       is_signed);
1624
1625    free((void *) tempImage);
1626
1627    return GL_TRUE;
1628 }
1629
1630 GLboolean
1631 _mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS)
1632 {
1633    assert(dstFormat == MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT);
1634
1635    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1636                                   dstFormat, dstRowStride, dstSlices,
1637                                   srcWidth, srcHeight, srcDepth,
1638                                   srcFormat, srcType,
1639                                   srcAddr, srcPacking,
1640                                   true /* signed */);
1641 }
1642
1643 GLboolean
1644 _mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS)
1645 {
1646    assert(dstFormat == MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT);
1647
1648    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1649                                   dstFormat, dstRowStride, dstSlices,
1650                                   srcWidth, srcHeight, srcDepth,
1651                                   srcFormat, srcType,
1652                                   srcAddr, srcPacking,
1653                                   false /* unsigned */);
1654 }