src/mesa/main/texcompress_bptc.c

   1 /*
   2  * Copyright (C) 2014 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file texcompress_bptc.c
  26  * GL_ARB_texture_compression_bptc support.
  27  */
  28
  29 #include <stdbool.h>
  30 #include "texcompress.h"
  31 #include "texcompress_bptc.h"
  32 #include "util/format_srgb.h"
  33 #include "util/half_float.h"
  34 #include "texstore.h"
  35 #include "macros.h"
  36 #include "image.h"
  37
  38 #define BLOCK_SIZE 4
  39 #define N_PARTITIONS 64
  40 #define BLOCK_BYTES 16
  41
  42 struct bptc_unorm_mode {
  43    int n_subsets;
  44    int n_partition_bits;
  45    bool has_rotation_bits;
  46    bool has_index_selection_bit;
  47    int n_color_bits;
  48    int n_alpha_bits;
  49    bool has_endpoint_pbits;
  50    bool has_shared_pbits;
  51    int n_index_bits;
  52    int n_secondary_index_bits;
  53 };
  54
  55 struct bptc_float_bitfield {
  56    int8_t endpoint;
  57    uint8_t component;
  58    uint8_t offset;
  59    uint8_t n_bits;
  60    bool reverse;
  61 };
  62
  63 struct bptc_float_mode {
  64    bool reserved;
  65    bool transformed_endpoints;
  66    int n_partition_bits;
  67    int n_endpoint_bits;
  68    int n_index_bits;
  69    int n_delta_bits[3];
  70    struct bptc_float_bitfield bitfields[24];
  71 };
  72
  73 struct bit_writer {
  74    uint8_t buf;
  75    int pos;
  76    uint8_t *dst;
  77 };
  78
  79 static const struct bptc_unorm_mode
  80 bptc_unorm_modes[] = {
  81    /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
  82    /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
  83    /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
  84    /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
  85    /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
  86    /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
  87    /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
  88    /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
  89 };
  90
  91 static const struct bptc_float_mode
  92 bptc_float_modes[] = {
  93    /* 00 */
  94    { false, true, 5, 10, 3, { 5, 5, 5 },
  95      { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
  96        { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
  97        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
  98        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
  99        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
 100        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
 101        { 3, 2, 3, 1, false },
 102        { -1 } }
 103    },
 104    /* 01 */
 105    { false, true, 5, 7, 3, { 6, 6, 6 },
 106      { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
 107        { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
 108        { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
 109        { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
 110        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
 111        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
 112        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
 113        { 2, 0, 0, 6, false },
 114        { 3, 0, 0, 6, false },
 115        { -1 } }
 116    },
 117    /* 00010 */
 118    { false, true, 5, 11, 3, { 5, 4, 4 },
 119      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 120        { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
 121        { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
 122        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
 123        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
 124        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
 125        { -1 } }
 126    },
 127    /* 00011 */
 128    { false, false, 0, 10, 4, { 10, 10, 10 },
 129      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 130        { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
 131        { -1 } }
 132    },
 133    /* 00110 */
 134    { false, true, 5, 11, 3, { 4, 5, 4 },
 135      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 136        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
 137        { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
 138        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
 139        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
 140        { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
 141        { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
 142        { -1 } }
 143    },
 144    /* 00111 */
 145    { false, true, 0, 11, 4, { 9, 9, 9 },
 146      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 147        { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
 148        { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
 149        { -1 } }
 150    },
 151    /* 01010 */
 152    { false, true, 5, 11, 3, { 4, 4, 5 },
 153      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 154        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
 155        { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
 156        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
 157        { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
 158        { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
 159        { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
 160        { -1 } }
 161    },
 162    /* 01011 */
 163    { false, true, 0, 12, 4, { 8, 8, 8 },
 164      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 165        { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
 166        { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
 167        { -1 } }
 168    },
 169    /* 01110 */
 170    { false, true, 5, 9, 3, { 5, 5, 5 },
 171      { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
 172        { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
 173        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
 174        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
 175        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
 176        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
 177        { 3, 2, 3, 1, false },
 178        { -1 } }
 179    },
 180    /* 01111 */
 181    { false, true, 0, 16, 4, { 4, 4, 4 },
 182      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
 183        { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
 184        { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
 185        { -1 } }
 186    },
 187    /* 10010 */
 188    { false, true, 5, 8, 3, { 6, 5, 5 },
 189      { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
 190        { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
 191        { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
 192        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
 193        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
 194        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
 195        { 3, 0, 0, 6, false },
 196        { -1 } }
 197    },
 198    /* 10011 */
 199    { true /* reserved */ },
 200    /* 10110 */
 201    { false, true, 5, 8, 3, { 5, 6, 5 },
 202      { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
 203        { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
 204        { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
 205        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
 206        { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
 207        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
 208        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
 209        { -1 } }
 210    },
 211    /* 10111 */
 212    { true /* reserved */ },
 213    /* 11010 */
 214    { false, true, 5, 8, 3, { 5, 5, 6 },
 215      { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
 216        { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
 217        { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
 218        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
 219        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
 220        { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
 221        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
 222        { -1 } }
 223    },
 224    /* 11011 */
 225    { true /* reserved */ },
 226    /* 11110 */
 227    { false, false, 5, 6, 3, { 6, 6, 6 },
 228      { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
 229        { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
 230        { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
 231        { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
 232        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
 233        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
 234        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
 235        { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
 236        { -1 } }
 237    },
 238    /* 11111 */
 239    { true /* reserved */ },
 240 };
 241
 242 /* This partition table is used when the mode has two subsets. Each
 243  * partition is represented by a 32-bit value which gives 2 bits per texel
 244  * within the block. The value of the two bits represents which subset to use
 245  * (0 or 1).
 246  */
 247 static const uint32_t
 248 partition_table1[N_PARTITIONS] = {
 249    0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
 250    0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
 251    0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
 252    0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
 253    0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
 254    0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
 255    0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
 256    0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
 257    0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
 258    0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
 259    0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
 260    0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
 261    0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
 262    0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
 263    0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
 264    0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
 265 };
 266
 267 /* This partition table is used when the mode has three subsets. In this case
 268  * the values can be 0, 1 or 2.
 269  */
 270 static const uint32_t
 271 partition_table2[N_PARTITIONS] = {
 272    0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
 273    0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
 274    0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
 275    0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
 276    0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
 277    0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
 278    0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
 279    0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
 280    0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
 281    0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
 282    0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
 283    0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
 284    0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
 285    0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
 286    0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
 287    0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
 288 };
 289
 290 static const uint8_t
 291 anchor_indices[][N_PARTITIONS] = {
 292    /* Anchor index values for the second subset of two-subset partitioning */
 293    {
 294       0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
 295       0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
 296       0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
 297       0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
 298    },
 299
 300    /* Anchor index values for the second subset of three-subset partitioning */
 301    {
 302       0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
 303       0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
 304       0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
 305       0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
 306    },
 307
 308    /* Anchor index values for the third subset of three-subset
 309     * partitioning
 310     */
 311    {
 312       0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
 313       0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
 314       0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
 315       0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
 316    }
 317 };
 318
 319 static int
 320 extract_bits(const uint8_t *block,
 321              int offset,
 322              int n_bits)
 323 {
 324    int byte_index = offset / 8;
 325    int bit_index = offset % 8;
 326    int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
 327    int result = 0;
 328    int bit = 0;
 329
 330    while (true) {
 331       result |= ((block[byte_index] >> bit_index) &
 332                  ((1 << n_bits_in_byte) - 1)) << bit;
 333
 334       n_bits -= n_bits_in_byte;
 335
 336       if (n_bits <= 0)
 337          return result;
 338
 339       bit += n_bits_in_byte;
 340       byte_index++;
 341       bit_index = 0;
 342       n_bits_in_byte = MIN2(n_bits, 8);
 343    }
 344 }
 345
 346 static uint8_t
 347 expand_component(uint8_t byte,
 348                  int n_bits)
 349 {
 350    /* Expands a n-bit quantity into a byte by copying the most-significant
 351     * bits into the unused least-significant bits.
 352     */
 353    return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
 354 }
 355
 356 static int
 357 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
 358                         const uint8_t *block,
 359                         int bit_offset,
 360                         uint8_t endpoints[][4])
 361 {
 362    int component;
 363    int subset;
 364    int endpoint;
 365    int pbit;
 366    int n_components;
 367
 368    /* Extract each color component */
 369    for (component = 0; component < 3; component++) {
 370       for (subset = 0; subset < mode->n_subsets; subset++) {
 371          for (endpoint = 0; endpoint < 2; endpoint++) {
 372             endpoints[subset * 2 + endpoint][component] =
 373                extract_bits(block, bit_offset, mode->n_color_bits);
 374             bit_offset += mode->n_color_bits;
 375          }
 376       }
 377    }
 378
 379    /* Extract the alpha values */
 380    if (mode->n_alpha_bits > 0) {
 381       for (subset = 0; subset < mode->n_subsets; subset++) {
 382          for (endpoint = 0; endpoint < 2; endpoint++) {
 383             endpoints[subset * 2 + endpoint][3] =
 384                extract_bits(block, bit_offset, mode->n_alpha_bits);
 385             bit_offset += mode->n_alpha_bits;
 386          }
 387       }
 388
 389       n_components = 4;
 390    } else {
 391       for (subset = 0; subset < mode->n_subsets; subset++)
 392          for (endpoint = 0; endpoint < 2; endpoint++)
 393             endpoints[subset * 2 + endpoint][3] = 255;
 394
 395       n_components = 3;
 396    }
 397
 398    /* Add in the p-bits */
 399    if (mode->has_endpoint_pbits) {
 400       for (subset = 0; subset < mode->n_subsets; subset++) {
 401          for (endpoint = 0; endpoint < 2; endpoint++) {
 402             pbit = extract_bits(block, bit_offset, 1);
 403             bit_offset += 1;
 404
 405             for (component = 0; component < n_components; component++) {
 406                endpoints[subset * 2 + endpoint][component] <<= 1;
 407                endpoints[subset * 2 + endpoint][component] |= pbit;
 408             }
 409          }
 410       }
 411    } else if (mode->has_shared_pbits) {
 412       for (subset = 0; subset < mode->n_subsets; subset++) {
 413          pbit = extract_bits(block, bit_offset, 1);
 414          bit_offset += 1;
 415
 416          for (endpoint = 0; endpoint < 2; endpoint++) {
 417             for (component = 0; component < n_components; component++) {
 418                endpoints[subset * 2 + endpoint][component] <<= 1;
 419                endpoints[subset * 2 + endpoint][component] |= pbit;
 420             }
 421          }
 422       }
 423    }
 424
 425    /* Expand the n-bit values to a byte */
 426    for (subset = 0; subset < mode->n_subsets; subset++) {
 427       for (endpoint = 0; endpoint < 2; endpoint++) {
 428          for (component = 0; component < 3; component++) {
 429             endpoints[subset * 2 + endpoint][component] =
 430                expand_component(endpoints[subset * 2 + endpoint][component],
 431                                 mode->n_color_bits +
 432                                 mode->has_endpoint_pbits +
 433                                 mode->has_shared_pbits);
 434          }
 435
 436          if (mode->n_alpha_bits > 0) {
 437             endpoints[subset * 2 + endpoint][3] =
 438                expand_component(endpoints[subset * 2 + endpoint][3],
 439                                 mode->n_alpha_bits +
 440                                 mode->has_endpoint_pbits +
 441                                 mode->has_shared_pbits);
 442          }
 443       }
 444    }
 445
 446    return bit_offset;
 447 }
 448
 449 static bool
 450 is_anchor(int n_subsets,
 451           int partition_num,
 452           int texel)
 453 {
 454    if (texel == 0)
 455       return true;
 456
 457    switch (n_subsets) {
 458    case 1:
 459       return false;
 460    case 2:
 461       return anchor_indices[0][partition_num] == texel;
 462    case 3:
 463       return (anchor_indices[1][partition_num] == texel ||
 464               anchor_indices[2][partition_num] == texel);
 465    default:
 466       assert(false);
 467       return false;
 468    }
 469 }
 470
 471 static int
 472 count_anchors_before_texel(int n_subsets,
 473                            int partition_num,
 474                            int texel)
 475 {
 476    int count = 1;
 477
 478    if (texel == 0)
 479       return 0;
 480
 481    switch (n_subsets) {
 482    case 1:
 483       break;
 484    case 2:
 485       if (texel > anchor_indices[0][partition_num])
 486          count++;
 487       break;
 488    case 3:
 489       if (texel > anchor_indices[1][partition_num])
 490          count++;
 491       if (texel > anchor_indices[2][partition_num])
 492          count++;
 493       break;
 494    default:
 495       assert(false);
 496       return 0;
 497    }
 498
 499    return count;
 500 }
 501
 502 static int32_t
 503 interpolate(int32_t a, int32_t b,
 504             int index,
 505             int index_bits)
 506 {
 507    static const uint8_t weights2[] = { 0, 21, 43, 64 };
 508    static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
 509    static const uint8_t weights4[] =
 510       { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
 511    static const uint8_t *weights[] = {
 512       NULL, NULL, weights2, weights3, weights4
 513    };
 514    int weight;
 515
 516    weight = weights[index_bits][index];
 517
 518    return ((64 - weight) * a + weight * b + 32) >> 6;
 519 }
 520
 521 static void
 522 apply_rotation(int rotation,
 523                uint8_t *result)
 524 {
 525    uint8_t t;
 526
 527    if (rotation == 0)
 528       return;
 529
 530    rotation--;
 531
 532    t = result[rotation];
 533    result[rotation] = result[3];
 534    result[3] = t;
 535 }
 536
 537 static void
 538 fetch_rgba_unorm_from_block(const uint8_t *block,
 539                             uint8_t *result,
 540                             int texel)
 541 {
 542    int mode_num = ffs(block[0]);
 543    const struct bptc_unorm_mode *mode;
 544    int bit_offset, secondary_bit_offset;
 545    int partition_num;
 546    int subset_num;
 547    int rotation;
 548    int index_selection;
 549    int index_bits;
 550    int indices[2];
 551    int index;
 552    int anchors_before_texel;
 553    bool anchor;
 554    uint8_t endpoints[3 * 2][4];
 555    uint32_t subsets;
 556    int component;
 557
 558    if (mode_num == 0) {
 559       /* According to the spec this mode is reserved and shouldn't be used. */
 560       memset(result, 0, 3);
 561       result[3] = 0xff;
 562       return;
 563    }
 564
 565    mode = bptc_unorm_modes + mode_num - 1;
 566    bit_offset = mode_num;
 567
 568    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
 569    bit_offset += mode->n_partition_bits;
 570
 571    switch (mode->n_subsets) {
 572    case 1:
 573       subsets = 0;
 574       break;
 575    case 2:
 576       subsets = partition_table1[partition_num];
 577       break;
 578    case 3:
 579       subsets = partition_table2[partition_num];
 580       break;
 581    default:
 582       assert(false);
 583       return;
 584    }
 585
 586    if (mode->has_rotation_bits) {
 587       rotation = extract_bits(block, bit_offset, 2);
 588       bit_offset += 2;
 589    } else {
 590       rotation = 0;
 591    }
 592
 593    if (mode->has_index_selection_bit) {
 594       index_selection = extract_bits(block, bit_offset, 1);
 595       bit_offset++;
 596    } else {
 597       index_selection = 0;
 598    }
 599
 600    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
 601
 602    anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
 603                                                      partition_num, texel);
 604
 605    /* Calculate the offset to the secondary index */
 606    secondary_bit_offset = (bit_offset +
 607                            BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
 608                            mode->n_subsets +
 609                            mode->n_secondary_index_bits * texel -
 610                            anchors_before_texel);
 611
 612    /* Calculate the offset to the primary index for this texel */
 613    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
 614
 615    subset_num = (subsets >> (texel * 2)) & 3;
 616
 617    anchor = is_anchor(mode->n_subsets, partition_num, texel);
 618
 619    index_bits = mode->n_index_bits;
 620    if (anchor)
 621       index_bits--;
 622    indices[0] = extract_bits(block, bit_offset, index_bits);
 623
 624    if (mode->n_secondary_index_bits) {
 625       index_bits = mode->n_secondary_index_bits;
 626       if (anchor)
 627          index_bits--;
 628       indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
 629    }
 630
 631    index = indices[index_selection];
 632    index_bits = (index_selection ?
 633                  mode->n_secondary_index_bits :
 634                  mode->n_index_bits);
 635
 636    for (component = 0; component < 3; component++)
 637       result[component] = interpolate(endpoints[subset_num * 2][component],
 638                                       endpoints[subset_num * 2 + 1][component],
 639                                       index,
 640                                       index_bits);
 641
 642    /* Alpha uses the opposite index from the color components */
 643    if (mode->n_secondary_index_bits && !index_selection) {
 644       index = indices[1];
 645       index_bits = mode->n_secondary_index_bits;
 646    } else {
 647       index = indices[0];
 648       index_bits = mode->n_index_bits;
 649    }
 650
 651    result[3] = interpolate(endpoints[subset_num * 2][3],
 652                            endpoints[subset_num * 2 + 1][3],
 653                            index,
 654                            index_bits);
 655
 656    apply_rotation(rotation, result);
 657 }
 658
 659 static void
 660 fetch_bptc_rgba_unorm_bytes(const GLubyte *map,
 661                             GLint rowStride, GLint i, GLint j,
 662                             GLubyte *texel)
 663 {
 664    const GLubyte *block;
 665
 666    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
 667
 668    fetch_rgba_unorm_from_block(block, texel, (i % 4) + (j % 4) * 4);
 669 }
 670
 671 static void
 672 fetch_bptc_rgba_unorm(const GLubyte *map,
 673                       GLint rowStride, GLint i, GLint j,
 674                       GLfloat *texel)
 675 {
 676    GLubyte texel_bytes[4];
 677
 678    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
 679
 680    texel[RCOMP] = UBYTE_TO_FLOAT(texel_bytes[0]);
 681    texel[GCOMP] = UBYTE_TO_FLOAT(texel_bytes[1]);
 682    texel[BCOMP] = UBYTE_TO_FLOAT(texel_bytes[2]);
 683    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
 684 }
 685
 686 static void
 687 fetch_bptc_srgb_alpha_unorm(const GLubyte *map,
 688                             GLint rowStride, GLint i, GLint j,
 689                             GLfloat *texel)
 690 {
 691    GLubyte texel_bytes[4];
 692
 693    fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
 694
 695    texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[0]);
 696    texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[1]);
 697    texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[2]);
 698    texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
 699 }
 700
 701 static int32_t
 702 sign_extend(int32_t value,
 703             int n_bits)
 704 {
 705    if ((value & (1 << (n_bits - 1)))) {
 706       value |= (~(int32_t) 0) << n_bits;
 707    }
 708
 709    return value;
 710 }
 711
 712 static int
 713 signed_unquantize(int value, int n_endpoint_bits)
 714 {
 715    bool sign;
 716
 717    if (n_endpoint_bits >= 16)
 718       return value;
 719
 720    if (value == 0)
 721       return 0;
 722
 723    sign = false;
 724
 725    if (value < 0) {
 726       sign = true;
 727       value = -value;
 728    }
 729
 730    if (value >= (1 << (n_endpoint_bits - 1)) - 1)
 731       value = 0x7fff;
 732    else
 733       value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
 734
 735    if (sign)
 736       value = -value;
 737
 738    return value;
 739 }
 740
 741 static int
 742 unsigned_unquantize(int value, int n_endpoint_bits)
 743 {
 744    if (n_endpoint_bits >= 15)
 745       return value;
 746
 747    if (value == 0)
 748       return 0;
 749
 750    if (value == (1 << n_endpoint_bits) - 1)
 751       return 0xffff;
 752
 753    return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
 754 }
 755
 756 static int
 757 extract_float_endpoints(const struct bptc_float_mode *mode,
 758                         const uint8_t *block,
 759                         int bit_offset,
 760                         int32_t endpoints[][3],
 761                         bool is_signed)
 762 {
 763    const struct bptc_float_bitfield *bitfield;
 764    int endpoint, component;
 765    int n_endpoints;
 766    int value;
 767    int i;
 768
 769    if (mode->n_partition_bits)
 770       n_endpoints = 4;
 771    else
 772       n_endpoints = 2;
 773
 774    memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
 775
 776    for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
 777       value = extract_bits(block, bit_offset, bitfield->n_bits);
 778       bit_offset += bitfield->n_bits;
 779
 780       if (bitfield->reverse) {
 781          for (i = 0; i < bitfield->n_bits; i++) {
 782             if (value & (1 << i))
 783                endpoints[bitfield->endpoint][bitfield->component] |=
 784                   1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
 785          }
 786       } else {
 787          endpoints[bitfield->endpoint][bitfield->component] |=
 788             value << bitfield->offset;
 789       }
 790    }
 791
 792    if (mode->transformed_endpoints) {
 793       /* The endpoints are specified as signed offsets from e0 */
 794       for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
 795          for (component = 0; component < 3; component++) {
 796             value = sign_extend(endpoints[endpoint][component],
 797                                 mode->n_delta_bits[component]);
 798             endpoints[endpoint][component] =
 799                ((endpoints[0][component] + value) &
 800                 ((1 << mode->n_endpoint_bits) - 1));
 801          }
 802       }
 803    }
 804
 805    if (is_signed) {
 806       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
 807          for (component = 0; component < 3; component++) {
 808             value = sign_extend(endpoints[endpoint][component],
 809                                 mode->n_endpoint_bits);
 810             endpoints[endpoint][component] =
 811                signed_unquantize(value, mode->n_endpoint_bits);
 812          }
 813       }
 814    } else {
 815       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
 816          for (component = 0; component < 3; component++) {
 817             endpoints[endpoint][component] =
 818                unsigned_unquantize(endpoints[endpoint][component],
 819                                    mode->n_endpoint_bits);
 820          }
 821       }
 822    }
 823
 824    return bit_offset;
 825 }
 826
 827 static int32_t
 828 finish_unsigned_unquantize(int32_t value)
 829 {
 830    return value * 31 / 64;
 831 }
 832
 833 static int32_t
 834 finish_signed_unquantize(int32_t value)
 835 {
 836    if (value < 0)
 837       return (-value * 31 / 32) | 0x8000;
 838    else
 839       return value * 31 / 32;
 840 }
 841
 842 static void
 843 fetch_rgb_float_from_block(const uint8_t *block,
 844                            float *result,
 845                            int texel,
 846                            bool is_signed)
 847 {
 848    int mode_num;
 849    const struct bptc_float_mode *mode;
 850    int bit_offset;
 851    int partition_num;
 852    int subset_num;
 853    int index_bits;
 854    int index;
 855    int anchors_before_texel;
 856    int32_t endpoints[2 * 2][3];
 857    uint32_t subsets;
 858    int n_subsets;
 859    int component;
 860    int32_t value;
 861
 862    if (block[0] & 0x2) {
 863       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
 864       bit_offset = 5;
 865    } else {
 866       mode_num = block[0] & 3;
 867       bit_offset = 2;
 868    }
 869
 870    mode = bptc_float_modes + mode_num;
 871
 872    if (mode->reserved) {
 873       memset(result, 0, sizeof result[0] * 3);
 874       result[3] = 1.0f;
 875       return;
 876    }
 877
 878    bit_offset = extract_float_endpoints(mode, block, bit_offset,
 879                                         endpoints, is_signed);
 880
 881    if (mode->n_partition_bits) {
 882       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
 883       bit_offset += mode->n_partition_bits;
 884
 885       subsets = partition_table1[partition_num];
 886       n_subsets = 2;
 887    } else {
 888       partition_num = 0;
 889       subsets = 0;
 890       n_subsets = 1;
 891    }
 892
 893    anchors_before_texel =
 894       count_anchors_before_texel(n_subsets, partition_num, texel);
 895
 896    /* Calculate the offset to the primary index for this texel */
 897    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
 898
 899    subset_num = (subsets >> (texel * 2)) & 3;
 900
 901    index_bits = mode->n_index_bits;
 902    if (is_anchor(n_subsets, partition_num, texel))
 903       index_bits--;
 904    index = extract_bits(block, bit_offset, index_bits);
 905
 906    for (component = 0; component < 3; component++) {
 907       value = interpolate(endpoints[subset_num * 2][component],
 908                           endpoints[subset_num * 2 + 1][component],
 909                           index,
 910                           mode->n_index_bits);
 911
 912       if (is_signed)
 913          value = finish_signed_unquantize(value);
 914       else
 915          value = finish_unsigned_unquantize(value);
 916
 917       result[component] = _mesa_half_to_float(value);
 918    }
 919
 920    result[3] = 1.0f;
 921 }
 922
 923 static void
 924 fetch_bptc_rgb_float(const GLubyte *map,
 925                      GLint rowStride, GLint i, GLint j,
 926                      GLfloat *texel,
 927                      bool is_signed)
 928 {
 929    const GLubyte *block;
 930
 931    block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
 932
 933    fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
 934 }
 935
 936 static void
 937 fetch_bptc_rgb_signed_float(const GLubyte *map,
 938                             GLint rowStride, GLint i, GLint j,
 939                             GLfloat *texel)
 940 {
 941    fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
 942 }
 943
 944 static void
 945 fetch_bptc_rgb_unsigned_float(const GLubyte *map,
 946                               GLint rowStride, GLint i, GLint j,
 947                               GLfloat *texel)
 948 {
 949    fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
 950 }
 951
 952 compressed_fetch_func
 953 _mesa_get_bptc_fetch_func(mesa_format format)
 954 {
 955    switch (format) {
 956    case MESA_FORMAT_BPTC_RGBA_UNORM:
 957       return fetch_bptc_rgba_unorm;
 958    case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
 959       return fetch_bptc_srgb_alpha_unorm;
 960    case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT:
 961       return fetch_bptc_rgb_signed_float;
 962    case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
 963       return fetch_bptc_rgb_unsigned_float;
 964    default:
 965       return NULL;
 966    }
 967 }
 968
 969 static void
 970 write_bits(struct bit_writer *writer, int n_bits, int value)
 971 {
 972    do {
 973       if (n_bits + writer->pos >= 8) {
 974          *(writer->dst++) = writer->buf | (value << writer->pos);
 975          writer->buf = 0;
 976          value >>= (8 - writer->pos);
 977          n_bits -= (8 - writer->pos);
 978          writer->pos = 0;
 979       } else {
 980          writer->buf |= value << writer->pos;
 981          writer->pos += n_bits;
 982          break;
 983       }
 984    } while (n_bits > 0);
 985 }
 986
 987 static void
 988 get_average_luminance_alpha_unorm(int width, int height,
 989                                   const uint8_t *src, int src_rowstride,
 990                                   int *average_luminance, int *average_alpha)
 991 {
 992    int luminance_sum = 0, alpha_sum = 0;
 993    int y, x;
 994
 995    for (y = 0; y < height; y++) {
 996       for (x = 0; x < width; x++) {
 997          luminance_sum += src[0] + src[1] + src[2];
 998          alpha_sum += src[3];
 999          src += 4;
1000       }
1001       src += src_rowstride - width * 4;
1002    }
1003
1004    *average_luminance = luminance_sum / (width * height);
1005    *average_alpha = alpha_sum / (width * height);
1006 }
1007
1008 static void
1009 get_rgba_endpoints_unorm(int width, int height,
1010                          const uint8_t *src, int src_rowstride,
1011                          int average_luminance, int average_alpha,
1012                          uint8_t endpoints[][4])
1013 {
1014    int endpoint_luminances[2];
1015    int midpoint;
1016    int sums[2][4];
1017    int endpoint;
1018    int luminance;
1019    uint8_t temp[3];
1020    const uint8_t *p = src;
1021    int rgb_left_endpoint_count = 0;
1022    int alpha_left_endpoint_count = 0;
1023    int y, x, i;
1024
1025    memset(sums, 0, sizeof sums);
1026
1027    for (y = 0; y < height; y++) {
1028       for (x = 0; x < width; x++) {
1029          luminance = p[0] + p[1] + p[2];
1030          if (luminance < average_luminance) {
1031             endpoint = 0;
1032             rgb_left_endpoint_count++;
1033          } else {
1034             endpoint = 1;
1035          }
1036          for (i = 0; i < 3; i++)
1037             sums[endpoint][i] += p[i];
1038
1039          if (p[2] < average_alpha) {
1040             endpoint = 0;
1041             alpha_left_endpoint_count++;
1042          } else {
1043             endpoint = 1;
1044          }
1045          sums[endpoint][3] += p[3];
1046
1047          p += 4;
1048       }
1049
1050       p += src_rowstride - width * 4;
1051    }
1052
1053    if (rgb_left_endpoint_count == 0 ||
1054        rgb_left_endpoint_count == width * height) {
1055       for (i = 0; i < 3; i++)
1056          endpoints[0][i] = endpoints[1][i] =
1057             (sums[0][i] + sums[1][i]) / (width * height);
1058    } else {
1059       for (i = 0; i < 3; i++) {
1060          endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1061          endpoints[1][i] = (sums[1][i] /
1062                             (width * height - rgb_left_endpoint_count));
1063       }
1064    }
1065
1066    if (alpha_left_endpoint_count == 0 ||
1067        alpha_left_endpoint_count == width * height) {
1068       endpoints[0][3] = endpoints[1][3] =
1069          (sums[0][3] + sums[1][3]) / (width * height);
1070    } else {
1071          endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1072          endpoints[1][3] = (sums[1][3] /
1073                             (width * height - alpha_left_endpoint_count));
1074    }
1075
1076    /* We may need to swap the endpoints to ensure the most-significant bit of
1077     * the first index is zero */
1078
1079    for (endpoint = 0; endpoint < 2; endpoint++) {
1080       endpoint_luminances[endpoint] =
1081          endpoints[endpoint][0] +
1082          endpoints[endpoint][1] +
1083          endpoints[endpoint][2];
1084    }
1085    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1086
1087    if ((src[0] + src[1] + src[2] <= midpoint) !=
1088        (endpoint_luminances[0] <= midpoint)) {
1089       memcpy(temp, endpoints[0], 3);
1090       memcpy(endpoints[0], endpoints[1], 3);
1091       memcpy(endpoints[1], temp, 3);
1092    }
1093
1094    /* Same for the alpha endpoints */
1095
1096    midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1097
1098    if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1099       temp[0] = endpoints[0][3];
1100       endpoints[0][3] = endpoints[1][3];
1101       endpoints[1][3] = temp[0];
1102    }
1103 }
1104
1105 static void
1106 write_rgb_indices_unorm(struct bit_writer *writer,
1107                         int src_width, int src_height,
1108                         const uint8_t *src, int src_rowstride,
1109                         uint8_t endpoints[][4])
1110 {
1111    int luminance;
1112    int endpoint_luminances[2];
1113    int endpoint;
1114    int index;
1115    int y, x;
1116
1117    for (endpoint = 0; endpoint < 2; endpoint++) {
1118       endpoint_luminances[endpoint] =
1119          endpoints[endpoint][0] +
1120          endpoints[endpoint][1] +
1121          endpoints[endpoint][2];
1122    }
1123
1124    /* If the endpoints have the same luminance then we'll just use index 0 for
1125     * all of the texels */
1126    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1127       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1128       return;
1129    }
1130
1131    for (y = 0; y < src_height; y++) {
1132       for (x = 0; x < src_width; x++) {
1133          luminance = src[0] + src[1] + src[2];
1134
1135          index = ((luminance - endpoint_luminances[0]) * 3 /
1136                   (endpoint_luminances[1] - endpoint_luminances[0]));
1137          if (index < 0)
1138             index = 0;
1139          else if (index > 3)
1140             index = 3;
1141
1142          assert(x != 0 || y != 0 || index < 2);
1143
1144          write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1145
1146          src += 4;
1147       }
1148
1149       /* Pad the indices out to the block size */
1150       if (src_width < BLOCK_SIZE)
1151          write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1152
1153       src += src_rowstride - src_width * 4;
1154    }
1155
1156    /* Pad the indices out to the block size */
1157    if (src_height < BLOCK_SIZE)
1158       write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1159 }
1160
1161 static void
1162 write_alpha_indices_unorm(struct bit_writer *writer,
1163                           int src_width, int src_height,
1164                           const uint8_t *src, int src_rowstride,
1165                           uint8_t endpoints[][4])
1166 {
1167    int index;
1168    int y, x;
1169
1170    /* If the endpoints have the same alpha then we'll just use index 0 for
1171     * all of the texels */
1172    if (endpoints[0][3] == endpoints[1][3]) {
1173       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1174       return;
1175    }
1176
1177    for (y = 0; y < src_height; y++) {
1178       for (x = 0; x < src_width; x++) {
1179          index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1180                   ((int) endpoints[1][3] - endpoints[0][3]));
1181          if (index < 0)
1182             index = 0;
1183          else if (index > 7)
1184             index = 7;
1185
1186          assert(x != 0 || y != 0 || index < 4);
1187
1188          /* The first index has one less bit */
1189          write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1190
1191          src += 4;
1192       }
1193
1194       /* Pad the indices out to the block size */
1195       if (src_width < BLOCK_SIZE)
1196          write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1197
1198       src += src_rowstride - src_width * 4;
1199    }
1200
1201    /* Pad the indices out to the block size */
1202    if (src_height < BLOCK_SIZE)
1203       write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1204 }
1205
1206 static void
1207 compress_rgba_unorm_block(int src_width, int src_height,
1208                           const uint8_t *src, int src_rowstride,
1209                           uint8_t *dst)
1210 {
1211    int average_luminance, average_alpha;
1212    uint8_t endpoints[2][4];
1213    struct bit_writer writer;
1214    int component, endpoint;
1215
1216    get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1217                                      &average_luminance, &average_alpha);
1218    get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1219                             average_luminance, average_alpha,
1220                             endpoints);
1221
1222    writer.dst = dst;
1223    writer.pos = 0;
1224    writer.buf = 0;
1225
1226    write_bits(&writer, 5, 0x10); /* mode 4 */
1227    write_bits(&writer, 2, 0); /* rotation 0 */
1228    write_bits(&writer, 1, 0); /* index selection bit */
1229
1230    /* Write the color endpoints */
1231    for (component = 0; component < 3; component++)
1232       for (endpoint = 0; endpoint < 2; endpoint++)
1233          write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1234
1235    /* Write the alpha endpoints */
1236    for (endpoint = 0; endpoint < 2; endpoint++)
1237       write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1238
1239    write_rgb_indices_unorm(&writer,
1240                            src_width, src_height,
1241                            src, src_rowstride,
1242                            endpoints);
1243    write_alpha_indices_unorm(&writer,
1244                              src_width, src_height,
1245                              src, src_rowstride,
1246                              endpoints);
1247 }
1248
1249 static void
1250 compress_rgba_unorm(int width, int height,
1251                     const uint8_t *src, int src_rowstride,
1252                     uint8_t *dst, int dst_rowstride)
1253 {
1254    int dst_row_diff;
1255    int y, x;
1256
1257    if (dst_rowstride >= width * 4)
1258       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1259    else
1260       dst_row_diff = 0;
1261
1262    for (y = 0; y < height; y += BLOCK_SIZE) {
1263       for (x = 0; x < width; x += BLOCK_SIZE) {
1264          compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1265                                    MIN2(height - y, BLOCK_SIZE),
1266                                    src + x * 4 + y * src_rowstride,
1267                                    src_rowstride,
1268                                    dst);
1269          dst += BLOCK_BYTES;
1270       }
1271       dst += dst_row_diff;
1272    }
1273 }
1274
1275 GLboolean
1276 _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
1277 {
1278    const GLubyte *pixels;
1279    const GLubyte *tempImage = NULL;
1280    int rowstride;
1281
1282    if (srcFormat != GL_RGBA ||
1283        srcType != GL_UNSIGNED_BYTE ||
1284        ctx->_ImageTransferState ||
1285        srcPacking->SwapBytes) {
1286       /* convert image to RGBA/ubyte */
1287       GLubyte *tempImageSlices[1];
1288       int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
1289       tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
1290       if (!tempImage)
1291          return GL_FALSE; /* out of memory */
1292       tempImageSlices[0] = (GLubyte *) tempImage;
1293       _mesa_texstore(ctx, dims,
1294                      baseInternalFormat,
1295                      _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
1296                                            : MESA_FORMAT_A8B8G8R8_UNORM,
1297                      rgbaRowStride, tempImageSlices,
1298                      srcWidth, srcHeight, srcDepth,
1299                      srcFormat, srcType, srcAddr,
1300                      srcPacking);
1301
1302       pixels = tempImage;
1303       rowstride = srcWidth * 4;
1304    } else {
1305       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1306                                      srcFormat, srcType, 0, 0);
1307       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1308                                          srcFormat, srcType);
1309    }
1310
1311    compress_rgba_unorm(srcWidth, srcHeight,
1312                        pixels, rowstride,
1313                        dstSlices[0], dstRowStride);
1314
1315    free((void *) tempImage);
1316
1317    return GL_TRUE;
1318 }
1319
1320 static float
1321 get_average_luminance_float(int width, int height,
1322                             const float *src, int src_rowstride)
1323 {
1324    float luminance_sum = 0;
1325    int y, x;
1326
1327    for (y = 0; y < height; y++) {
1328       for (x = 0; x < width; x++) {
1329          luminance_sum += src[0] + src[1] + src[2];
1330          src += 3;
1331       }
1332       src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1333    }
1334
1335    return luminance_sum / (width * height);
1336 }
1337
1338 static float
1339 clamp_value(float value, bool is_signed)
1340 {
1341    if (value > 65504.0f)
1342       return 65504.0f;
1343
1344    if (is_signed) {
1345       if (value < -65504.0f)
1346          return -65504.0f;
1347       else
1348          return value;
1349    }
1350
1351    if (value < 0.0f)
1352       return 0.0f;
1353
1354    return value;
1355 }
1356
1357 static void
1358 get_endpoints_float(int width, int height,
1359                     const float *src, int src_rowstride,
1360                     float average_luminance, float endpoints[][3],
1361                     bool is_signed)
1362 {
1363    float endpoint_luminances[2];
1364    float midpoint;
1365    float sums[2][3];
1366    int endpoint, component;
1367    float luminance;
1368    float temp[3];
1369    const float *p = src;
1370    int left_endpoint_count = 0;
1371    int y, x, i;
1372
1373    memset(sums, 0, sizeof sums);
1374
1375    for (y = 0; y < height; y++) {
1376       for (x = 0; x < width; x++) {
1377          luminance = p[0] + p[1] + p[2];
1378          if (luminance < average_luminance) {
1379             endpoint = 0;
1380             left_endpoint_count++;
1381          } else {
1382             endpoint = 1;
1383          }
1384          for (i = 0; i < 3; i++)
1385             sums[endpoint][i] += p[i];
1386
1387          p += 3;
1388       }
1389
1390       p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1391    }
1392
1393    if (left_endpoint_count == 0 ||
1394        left_endpoint_count == width * height) {
1395       for (i = 0; i < 3; i++)
1396          endpoints[0][i] = endpoints[1][i] =
1397             (sums[0][i] + sums[1][i]) / (width * height);
1398    } else {
1399       for (i = 0; i < 3; i++) {
1400          endpoints[0][i] = sums[0][i] / left_endpoint_count;
1401          endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1402       }
1403    }
1404
1405    /* Clamp the endpoints to the range of a half float and strip out
1406     * infinities */
1407    for (endpoint = 0; endpoint < 2; endpoint++) {
1408       for (component = 0; component < 3; component++) {
1409          endpoints[endpoint][component] =
1410             clamp_value(endpoints[endpoint][component], is_signed);
1411       }
1412    }
1413
1414    /* We may need to swap the endpoints to ensure the most-significant bit of
1415     * the first index is zero */
1416
1417    for (endpoint = 0; endpoint < 2; endpoint++) {
1418       endpoint_luminances[endpoint] =
1419          endpoints[endpoint][0] +
1420          endpoints[endpoint][1] +
1421          endpoints[endpoint][2];
1422    }
1423    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1424
1425    if ((src[0] + src[1] + src[2] <= midpoint) !=
1426        (endpoint_luminances[0] <= midpoint)) {
1427       memcpy(temp, endpoints[0], sizeof temp);
1428       memcpy(endpoints[0], endpoints[1], sizeof temp);
1429       memcpy(endpoints[1], temp, sizeof temp);
1430    }
1431 }
1432
1433 static void
1434 write_rgb_indices_float(struct bit_writer *writer,
1435                         int src_width, int src_height,
1436                         const float *src, int src_rowstride,
1437                         float endpoints[][3])
1438 {
1439    float luminance;
1440    float endpoint_luminances[2];
1441    int endpoint;
1442    int index;
1443    int y, x;
1444
1445    for (endpoint = 0; endpoint < 2; endpoint++) {
1446       endpoint_luminances[endpoint] =
1447          endpoints[endpoint][0] +
1448          endpoints[endpoint][1] +
1449          endpoints[endpoint][2];
1450    }
1451
1452    /* If the endpoints have the same luminance then we'll just use index 0 for
1453     * all of the texels */
1454    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1455       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1456       return;
1457    }
1458
1459    for (y = 0; y < src_height; y++) {
1460       for (x = 0; x < src_width; x++) {
1461          luminance = src[0] + src[1] + src[2];
1462
1463          index = ((luminance - endpoint_luminances[0]) * 15 /
1464                   (endpoint_luminances[1] - endpoint_luminances[0]));
1465          if (index < 0)
1466             index = 0;
1467          else if (index > 15)
1468             index = 15;
1469
1470          assert(x != 0 || y != 0 || index < 8);
1471
1472          write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1473
1474          src += 3;
1475       }
1476
1477       /* Pad the indices out to the block size */
1478       if (src_width < BLOCK_SIZE)
1479          write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1480
1481       src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1482    }
1483
1484    /* Pad the indices out to the block size */
1485    if (src_height < BLOCK_SIZE)
1486       write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1487 }
1488
1489 static int
1490 get_endpoint_value(float value, bool is_signed)
1491 {
1492    bool sign = false;
1493    int half;
1494
1495    if (is_signed) {
1496       half = _mesa_float_to_half(value);
1497
1498       if (half & 0x8000) {
1499          half &= 0x7fff;
1500          sign = true;
1501       }
1502
1503       half = (32 * half / 31) >> 6;
1504
1505       if (sign)
1506          half = -half & ((1 << 10) - 1);
1507
1508       return half;
1509    } else {
1510       if (value <= 0.0f)
1511          return 0;
1512
1513       half = _mesa_float_to_half(value);
1514
1515       return (64 * half / 31) >> 6;
1516    }
1517 }
1518
1519 static void
1520 compress_rgb_float_block(int src_width, int src_height,
1521                          const float *src, int src_rowstride,
1522                          uint8_t *dst,
1523                          bool is_signed)
1524 {
1525    float average_luminance;
1526    float endpoints[2][3];
1527    struct bit_writer writer;
1528    int component, endpoint;
1529    int endpoint_value;
1530
1531    average_luminance =
1532       get_average_luminance_float(src_width, src_height, src, src_rowstride);
1533    get_endpoints_float(src_width, src_height, src, src_rowstride,
1534                        average_luminance, endpoints, is_signed);
1535
1536    writer.dst = dst;
1537    writer.pos = 0;
1538    writer.buf = 0;
1539
1540    write_bits(&writer, 5, 3); /* mode 3 */
1541
1542    /* Write the endpoints */
1543    for (endpoint = 0; endpoint < 2; endpoint++) {
1544       for (component = 0; component < 3; component++) {
1545          endpoint_value =
1546             get_endpoint_value(endpoints[endpoint][component], is_signed);
1547          write_bits(&writer, 10, endpoint_value);
1548       }
1549    }
1550
1551    write_rgb_indices_float(&writer,
1552                            src_width, src_height,
1553                            src, src_rowstride,
1554                            endpoints);
1555 }
1556
1557 static void
1558 compress_rgb_float(int width, int height,
1559                    const float *src, int src_rowstride,
1560                    uint8_t *dst, int dst_rowstride,
1561                    bool is_signed)
1562 {
1563    int dst_row_diff;
1564    int y, x;
1565
1566    if (dst_rowstride >= width * 4)
1567       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1568    else
1569       dst_row_diff = 0;
1570
1571    for (y = 0; y < height; y += BLOCK_SIZE) {
1572       for (x = 0; x < width; x += BLOCK_SIZE) {
1573          compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1574                                   MIN2(height - y, BLOCK_SIZE),
1575                                   src + x * 3 +
1576                                   y * src_rowstride / sizeof (float),
1577                                   src_rowstride,
1578                                   dst,
1579                                   is_signed);
1580          dst += BLOCK_BYTES;
1581       }
1582       dst += dst_row_diff;
1583    }
1584 }
1585
1586 static GLboolean
1587 texstore_bptc_rgb_float(TEXSTORE_PARAMS,
1588                         bool is_signed)
1589 {
1590    const float *pixels;
1591    const float *tempImage = NULL;
1592    int rowstride;
1593
1594    if (srcFormat != GL_RGB ||
1595        srcType != GL_FLOAT ||
1596        ctx->_ImageTransferState ||
1597        srcPacking->SwapBytes) {
1598       /* convert image to RGB/float */
1599       GLfloat *tempImageSlices[1];
1600       int rgbRowStride = 3 * srcWidth * sizeof(GLfloat);
1601       tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat));
1602       if (!tempImage)
1603          return GL_FALSE; /* out of memory */
1604       tempImageSlices[0] = (GLfloat *) tempImage;
1605       _mesa_texstore(ctx, dims,
1606                      baseInternalFormat,
1607                      MESA_FORMAT_RGB_FLOAT32,
1608                      rgbRowStride, (GLubyte **)tempImageSlices,
1609                      srcWidth, srcHeight, srcDepth,
1610                      srcFormat, srcType, srcAddr,
1611                      srcPacking);
1612
1613       pixels = tempImage;
1614       rowstride = srcWidth * sizeof(float) * 3;
1615    } else {
1616       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1617                                      srcFormat, srcType, 0, 0);
1618       rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1619                                          srcFormat, srcType);
1620    }
1621
1622    compress_rgb_float(srcWidth, srcHeight,
1623                       pixels, rowstride,
1624                       dstSlices[0], dstRowStride,
1625                       is_signed);
1626
1627    free((void *) tempImage);
1628
1629    return GL_TRUE;
1630 }
1631
1632 GLboolean
1633 _mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS)
1634 {
1635    assert(dstFormat == MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT);
1636
1637    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1638                                   dstFormat, dstRowStride, dstSlices,
1639                                   srcWidth, srcHeight, srcDepth,
1640                                   srcFormat, srcType,
1641                                   srcAddr, srcPacking,
1642                                   true /* signed */);
1643 }
1644
1645 GLboolean
1646 _mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS)
1647 {
1648    assert(dstFormat == MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT);
1649
1650    return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1651                                   dstFormat, dstRowStride, dstSlices,
1652                                   srcWidth, srcHeight, srcDepth,
1653                                   srcFormat, srcType,
1654                                   srcAddr, srcPacking,
1655                                   false /* unsigned */);
1656 }