2 * Copyright (C) 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * Included by texcompress_bptc and gallium to define BPTC decoding routines.
28 #include "util/format_srgb.h"
29 #include "util/half_float.h"
33 #define N_PARTITIONS 64
34 #define BLOCK_BYTES 16
36 struct bptc_unorm_mode
{
39 bool has_rotation_bits
;
40 bool has_index_selection_bit
;
43 bool has_endpoint_pbits
;
44 bool has_shared_pbits
;
46 int n_secondary_index_bits
;
49 struct bptc_float_bitfield
{
57 struct bptc_float_mode
{
59 bool transformed_endpoints
;
64 struct bptc_float_bitfield bitfields
[24];
73 static const struct bptc_unorm_mode
74 bptc_unorm_modes
[] = {
75 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
76 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
77 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
78 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
79 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
80 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
81 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
82 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
85 static const struct bptc_float_mode
86 bptc_float_modes
[] = {
88 { false, true, 5, 10, 3, { 5, 5, 5 },
89 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
90 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
91 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
92 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
93 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
94 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
95 { 3, 2, 3, 1, false },
99 { false, true, 5, 7, 3, { 6, 6, 6 },
100 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
101 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
102 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
103 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
104 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
105 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
106 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
107 { 2, 0, 0, 6, false },
108 { 3, 0, 0, 6, false },
112 { false, true, 5, 11, 3, { 5, 4, 4 },
113 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
114 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
115 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
116 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
117 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
118 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
122 { false, false, 0, 10, 4, { 10, 10, 10 },
123 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
124 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
128 { false, true, 5, 11, 3, { 4, 5, 4 },
129 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
130 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
131 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
132 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
133 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
134 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
135 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
139 { false, true, 0, 11, 4, { 9, 9, 9 },
140 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
141 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
142 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
146 { false, true, 5, 11, 3, { 4, 4, 5 },
147 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
148 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
149 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
150 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
151 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
152 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
153 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
157 { false, true, 0, 12, 4, { 8, 8, 8 },
158 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
159 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
160 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
164 { false, true, 5, 9, 3, { 5, 5, 5 },
165 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
166 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
167 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
168 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
169 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
170 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
171 { 3, 2, 3, 1, false },
175 { false, true, 0, 16, 4, { 4, 4, 4 },
176 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
177 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
178 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
182 { false, true, 5, 8, 3, { 6, 5, 5 },
183 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
184 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
185 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
186 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
187 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
188 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
189 { 3, 0, 0, 6, false },
193 { true /* reserved */ },
195 { false, true, 5, 8, 3, { 5, 6, 5 },
196 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
197 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
198 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
199 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
200 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
201 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
202 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
206 { true /* reserved */ },
208 { false, true, 5, 8, 3, { 5, 5, 6 },
209 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
210 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
211 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
212 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
213 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
214 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
215 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
219 { true /* reserved */ },
221 { false, false, 5, 6, 3, { 6, 6, 6 },
222 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
223 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
224 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
225 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
226 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
227 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
228 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
229 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
233 { true /* reserved */ },
236 /* This partition table is used when the mode has two subsets. Each
237 * partition is represented by a 32-bit value which gives 2 bits per texel
238 * within the block. The value of the two bits represents which subset to use
241 static const uint32_t
242 partition_table1
[N_PARTITIONS
] = {
243 0x50505050U
, 0x40404040U
, 0x54545454U
, 0x54505040U
,
244 0x50404000U
, 0x55545450U
, 0x55545040U
, 0x54504000U
,
245 0x50400000U
, 0x55555450U
, 0x55544000U
, 0x54400000U
,
246 0x55555440U
, 0x55550000U
, 0x55555500U
, 0x55000000U
,
247 0x55150100U
, 0x00004054U
, 0x15010000U
, 0x00405054U
,
248 0x00004050U
, 0x15050100U
, 0x05010000U
, 0x40505054U
,
249 0x00404050U
, 0x05010100U
, 0x14141414U
, 0x05141450U
,
250 0x01155440U
, 0x00555500U
, 0x15014054U
, 0x05414150U
,
251 0x44444444U
, 0x55005500U
, 0x11441144U
, 0x05055050U
,
252 0x05500550U
, 0x11114444U
, 0x41144114U
, 0x44111144U
,
253 0x15055054U
, 0x01055040U
, 0x05041050U
, 0x05455150U
,
254 0x14414114U
, 0x50050550U
, 0x41411414U
, 0x00141400U
,
255 0x00041504U
, 0x00105410U
, 0x10541000U
, 0x04150400U
,
256 0x50410514U
, 0x41051450U
, 0x05415014U
, 0x14054150U
,
257 0x41050514U
, 0x41505014U
, 0x40011554U
, 0x54150140U
,
258 0x50505500U
, 0x00555050U
, 0x15151010U
, 0x54540404U
,
261 /* This partition table is used when the mode has three subsets. In this case
262 * the values can be 0, 1 or 2.
264 static const uint32_t
265 partition_table2
[N_PARTITIONS
] = {
266 0xaa685050U
, 0x6a5a5040U
, 0x5a5a4200U
, 0x5450a0a8U
,
267 0xa5a50000U
, 0xa0a05050U
, 0x5555a0a0U
, 0x5a5a5050U
,
268 0xaa550000U
, 0xaa555500U
, 0xaaaa5500U
, 0x90909090U
,
269 0x94949494U
, 0xa4a4a4a4U
, 0xa9a59450U
, 0x2a0a4250U
,
270 0xa5945040U
, 0x0a425054U
, 0xa5a5a500U
, 0x55a0a0a0U
,
271 0xa8a85454U
, 0x6a6a4040U
, 0xa4a45000U
, 0x1a1a0500U
,
272 0x0050a4a4U
, 0xaaa59090U
, 0x14696914U
, 0x69691400U
,
273 0xa08585a0U
, 0xaa821414U
, 0x50a4a450U
, 0x6a5a0200U
,
274 0xa9a58000U
, 0x5090a0a8U
, 0xa8a09050U
, 0x24242424U
,
275 0x00aa5500U
, 0x24924924U
, 0x24499224U
, 0x50a50a50U
,
276 0x500aa550U
, 0xaaaa4444U
, 0x66660000U
, 0xa5a0a5a0U
,
277 0x50a050a0U
, 0x69286928U
, 0x44aaaa44U
, 0x66666600U
,
278 0xaa444444U
, 0x54a854a8U
, 0x95809580U
, 0x96969600U
,
279 0xa85454a8U
, 0x80959580U
, 0xaa141414U
, 0x96960000U
,
280 0xaaaa1414U
, 0xa05050a0U
, 0xa0a5a5a0U
, 0x96000000U
,
281 0x40804080U
, 0xa9a8a9a8U
, 0xaaaaaa44U
, 0x2a4a5254U
285 anchor_indices
[][N_PARTITIONS
] = {
286 /* Anchor index values for the second subset of two-subset partitioning */
288 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
289 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
290 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
291 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
294 /* Anchor index values for the second subset of three-subset partitioning */
296 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
297 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
298 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
299 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
302 /* Anchor index values for the third subset of three-subset
306 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
307 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
308 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
309 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
314 extract_bits(const uint8_t *block
,
318 int byte_index
= offset
/ 8;
319 int bit_index
= offset
% 8;
320 int n_bits_in_byte
= MIN2(n_bits
, 8 - bit_index
);
325 result
|= ((block
[byte_index
] >> bit_index
) &
326 ((1 << n_bits_in_byte
) - 1)) << bit
;
328 n_bits
-= n_bits_in_byte
;
333 bit
+= n_bits_in_byte
;
336 n_bits_in_byte
= MIN2(n_bits
, 8);
341 expand_component(uint8_t byte
,
344 /* Expands a n-bit quantity into a byte by copying the most-significant
345 * bits into the unused least-significant bits.
347 return byte
<< (8 - n_bits
) | (byte
>> (2 * n_bits
- 8));
351 extract_unorm_endpoints(const struct bptc_unorm_mode
*mode
,
352 const uint8_t *block
,
354 uint8_t endpoints
[][4])
362 /* Extract each color component */
363 for (component
= 0; component
< 3; component
++) {
364 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
365 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
366 endpoints
[subset
* 2 + endpoint
][component
] =
367 extract_bits(block
, bit_offset
, mode
->n_color_bits
);
368 bit_offset
+= mode
->n_color_bits
;
373 /* Extract the alpha values */
374 if (mode
->n_alpha_bits
> 0) {
375 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
376 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
377 endpoints
[subset
* 2 + endpoint
][3] =
378 extract_bits(block
, bit_offset
, mode
->n_alpha_bits
);
379 bit_offset
+= mode
->n_alpha_bits
;
385 for (subset
= 0; subset
< mode
->n_subsets
; subset
++)
386 for (endpoint
= 0; endpoint
< 2; endpoint
++)
387 endpoints
[subset
* 2 + endpoint
][3] = 255;
392 /* Add in the p-bits */
393 if (mode
->has_endpoint_pbits
) {
394 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
395 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
396 pbit
= extract_bits(block
, bit_offset
, 1);
399 for (component
= 0; component
< n_components
; component
++) {
400 endpoints
[subset
* 2 + endpoint
][component
] <<= 1;
401 endpoints
[subset
* 2 + endpoint
][component
] |= pbit
;
405 } else if (mode
->has_shared_pbits
) {
406 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
407 pbit
= extract_bits(block
, bit_offset
, 1);
410 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
411 for (component
= 0; component
< n_components
; component
++) {
412 endpoints
[subset
* 2 + endpoint
][component
] <<= 1;
413 endpoints
[subset
* 2 + endpoint
][component
] |= pbit
;
419 /* Expand the n-bit values to a byte */
420 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
421 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
422 for (component
= 0; component
< 3; component
++) {
423 endpoints
[subset
* 2 + endpoint
][component
] =
424 expand_component(endpoints
[subset
* 2 + endpoint
][component
],
426 mode
->has_endpoint_pbits
+
427 mode
->has_shared_pbits
);
430 if (mode
->n_alpha_bits
> 0) {
431 endpoints
[subset
* 2 + endpoint
][3] =
432 expand_component(endpoints
[subset
* 2 + endpoint
][3],
434 mode
->has_endpoint_pbits
+
435 mode
->has_shared_pbits
);
444 is_anchor(int n_subsets
,
455 return anchor_indices
[0][partition_num
] == texel
;
457 return (anchor_indices
[1][partition_num
] == texel
||
458 anchor_indices
[2][partition_num
] == texel
);
466 count_anchors_before_texel(int n_subsets
,
479 if (texel
> anchor_indices
[0][partition_num
])
483 if (texel
> anchor_indices
[1][partition_num
])
485 if (texel
> anchor_indices
[2][partition_num
])
497 interpolate(int32_t a
, int32_t b
,
501 static const uint8_t weights2
[] = { 0, 21, 43, 64 };
502 static const uint8_t weights3
[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
503 static const uint8_t weights4
[] =
504 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
505 static const uint8_t *weights
[] = {
506 NULL
, NULL
, weights2
, weights3
, weights4
510 weight
= weights
[index_bits
][index
];
512 return ((64 - weight
) * a
+ weight
* b
+ 32) >> 6;
516 apply_rotation(int rotation
,
526 t
= result
[rotation
];
527 result
[rotation
] = result
[3];
532 fetch_rgba_unorm_from_block(const uint8_t *block
,
536 int mode_num
= ffs(block
[0]);
537 const struct bptc_unorm_mode
*mode
;
538 int bit_offset
, secondary_bit_offset
;
546 int anchors_before_texel
;
548 uint8_t endpoints
[3 * 2][4];
553 /* According to the spec this mode is reserved and shouldn't be used. */
554 memset(result
, 0, 3);
559 mode
= bptc_unorm_modes
+ mode_num
- 1;
560 bit_offset
= mode_num
;
562 partition_num
= extract_bits(block
, bit_offset
, mode
->n_partition_bits
);
563 bit_offset
+= mode
->n_partition_bits
;
565 switch (mode
->n_subsets
) {
570 subsets
= partition_table1
[partition_num
];
573 subsets
= partition_table2
[partition_num
];
580 if (mode
->has_rotation_bits
) {
581 rotation
= extract_bits(block
, bit_offset
, 2);
587 if (mode
->has_index_selection_bit
) {
588 index_selection
= extract_bits(block
, bit_offset
, 1);
594 bit_offset
= extract_unorm_endpoints(mode
, block
, bit_offset
, endpoints
);
596 anchors_before_texel
= count_anchors_before_texel(mode
->n_subsets
,
597 partition_num
, texel
);
599 /* Calculate the offset to the secondary index */
600 secondary_bit_offset
= (bit_offset
+
601 BLOCK_SIZE
* BLOCK_SIZE
* mode
->n_index_bits
-
603 mode
->n_secondary_index_bits
* texel
-
604 anchors_before_texel
);
606 /* Calculate the offset to the primary index for this texel */
607 bit_offset
+= mode
->n_index_bits
* texel
- anchors_before_texel
;
609 subset_num
= (subsets
>> (texel
* 2)) & 3;
611 anchor
= is_anchor(mode
->n_subsets
, partition_num
, texel
);
613 index_bits
= mode
->n_index_bits
;
616 indices
[0] = extract_bits(block
, bit_offset
, index_bits
);
618 if (mode
->n_secondary_index_bits
) {
619 index_bits
= mode
->n_secondary_index_bits
;
622 indices
[1] = extract_bits(block
, secondary_bit_offset
, index_bits
);
625 index
= indices
[index_selection
];
626 index_bits
= (index_selection
?
627 mode
->n_secondary_index_bits
:
630 for (component
= 0; component
< 3; component
++)
631 result
[component
] = interpolate(endpoints
[subset_num
* 2][component
],
632 endpoints
[subset_num
* 2 + 1][component
],
636 /* Alpha uses the opposite index from the color components */
637 if (mode
->n_secondary_index_bits
&& !index_selection
) {
639 index_bits
= mode
->n_secondary_index_bits
;
642 index_bits
= mode
->n_index_bits
;
645 result
[3] = interpolate(endpoints
[subset_num
* 2][3],
646 endpoints
[subset_num
* 2 + 1][3],
650 apply_rotation(rotation
, result
);
653 #ifdef BPTC_BLOCK_DECODE
655 decompress_rgba_unorm_block(int src_width
, int src_height
,
656 const uint8_t *block
,
657 uint8_t *dst_row
, int dst_rowstride
)
659 int mode_num
= ffs(block
[0]);
660 const struct bptc_unorm_mode
*mode
;
661 int bit_offset
, secondary_bit_offset
;
669 int anchors_before_texel
;
671 uint8_t endpoints
[3 * 2][4];
677 /* According to the spec this mode is reserved and shouldn't be used. */
678 for(y
= 0; y
< src_height
; y
+= 1) {
679 uint8_t *result
= dst_row
;
680 memset(result
, 0, 4 * src_width
);
681 for(x
= 0; x
< src_width
; x
+= 1) {
685 dst_row
+= dst_rowstride
;
690 mode
= bptc_unorm_modes
+ mode_num
- 1;
691 bit_offset
= mode_num
;
693 partition_num
= extract_bits(block
, bit_offset
, mode
->n_partition_bits
);
694 bit_offset
+= mode
->n_partition_bits
;
696 switch (mode
->n_subsets
) {
701 subsets
= partition_table1
[partition_num
];
704 subsets
= partition_table2
[partition_num
];
711 if (mode
->has_rotation_bits
) {
712 rotation
= extract_bits(block
, bit_offset
, 2);
718 if (mode
->has_index_selection_bit
) {
719 index_selection
= extract_bits(block
, bit_offset
, 1);
725 bit_offset
= extract_unorm_endpoints(mode
, block
, bit_offset
, endpoints
);
727 for(y
= 0; y
< src_height
; y
+= 1) {
728 uint8_t *result
= dst_row
;
729 for(x
= 0; x
< src_width
; x
+= 1) {
733 anchors_before_texel
= count_anchors_before_texel(mode
->n_subsets
,
737 /* Calculate the offset to the secondary index */
738 secondary_bit_offset
= (bit_offset
+
739 BLOCK_SIZE
* BLOCK_SIZE
* mode
->n_index_bits
-
741 mode
->n_secondary_index_bits
* texel
-
742 anchors_before_texel
);
744 /* Calculate the offset to the primary index for this texel */
745 bit_offset
+= mode
->n_index_bits
* texel
- anchors_before_texel
;
747 subset_num
= (subsets
>> (texel
* 2)) & 3;
749 anchor
= is_anchor(mode
->n_subsets
, partition_num
, texel
);
751 index_bits
= mode
->n_index_bits
;
754 indices
[0] = extract_bits(block
, bit_offset
, index_bits
);
756 if (mode
->n_secondary_index_bits
) {
757 index_bits
= mode
->n_secondary_index_bits
;
760 indices
[1] = extract_bits(block
, secondary_bit_offset
, index_bits
);
763 index
= indices
[index_selection
];
764 index_bits
= (index_selection
?
765 mode
->n_secondary_index_bits
:
768 for (component
= 0; component
< 3; component
++)
769 result
[component
] = interpolate(endpoints
[subset_num
* 2][component
],
770 endpoints
[subset_num
* 2 + 1][component
],
774 /* Alpha uses the opposite index from the color components */
775 if (mode
->n_secondary_index_bits
&& !index_selection
) {
777 index_bits
= mode
->n_secondary_index_bits
;
780 index_bits
= mode
->n_index_bits
;
783 result
[3] = interpolate(endpoints
[subset_num
* 2][3],
784 endpoints
[subset_num
* 2 + 1][3],
788 apply_rotation(rotation
, result
);
791 dst_row
+= dst_rowstride
;
796 decompress_rgba_unorm(int width
, int height
,
797 const uint8_t *src
, int src_rowstride
,
798 uint8_t *dst
, int dst_rowstride
)
803 if (src_rowstride
>= width
* 4)
804 src_row_diff
= src_rowstride
- ((width
+ 3) & ~3) * 4;
808 for (y
= 0; y
< height
; y
+= BLOCK_SIZE
) {
809 for (x
= 0; x
< width
; x
+= BLOCK_SIZE
) {
810 decompress_rgba_unorm_block(MIN2(width
- x
, BLOCK_SIZE
),
811 MIN2(height
- y
, BLOCK_SIZE
),
813 dst
+ x
* 4 + y
* dst_rowstride
,
820 #endif // BPTC_BLOCK_DECODE
823 sign_extend(int32_t value
,
826 if ((value
& (1 << (n_bits
- 1)))) {
827 value
|= (~(int32_t) 0) << n_bits
;
834 signed_unquantize(int value
, int n_endpoint_bits
)
838 if (n_endpoint_bits
>= 16)
851 if (value
>= (1 << (n_endpoint_bits
- 1)) - 1)
854 value
= ((value
<< 15) + 0x4000) >> (n_endpoint_bits
- 1);
863 unsigned_unquantize(int value
, int n_endpoint_bits
)
865 if (n_endpoint_bits
>= 15)
871 if (value
== (1 << n_endpoint_bits
) - 1)
874 return ((value
<< 15) + 0x4000) >> (n_endpoint_bits
- 1);
878 extract_float_endpoints(const struct bptc_float_mode
*mode
,
879 const uint8_t *block
,
881 int32_t endpoints
[][3],
884 const struct bptc_float_bitfield
*bitfield
;
885 int endpoint
, component
;
890 if (mode
->n_partition_bits
)
895 memset(endpoints
, 0, sizeof endpoints
[0][0] * n_endpoints
* 3);
897 for (bitfield
= mode
->bitfields
; bitfield
->endpoint
!= -1; bitfield
++) {
898 value
= extract_bits(block
, bit_offset
, bitfield
->n_bits
);
899 bit_offset
+= bitfield
->n_bits
;
901 if (bitfield
->reverse
) {
902 for (i
= 0; i
< bitfield
->n_bits
; i
++) {
903 if (value
& (1 << i
))
904 endpoints
[bitfield
->endpoint
][bitfield
->component
] |=
905 1 << ((bitfield
->n_bits
- 1 - i
) + bitfield
->offset
);
908 endpoints
[bitfield
->endpoint
][bitfield
->component
] |=
909 value
<< bitfield
->offset
;
913 if (mode
->transformed_endpoints
) {
914 /* The endpoints are specified as signed offsets from e0 */
915 for (endpoint
= 1; endpoint
< n_endpoints
; endpoint
++) {
916 for (component
= 0; component
< 3; component
++) {
917 value
= sign_extend(endpoints
[endpoint
][component
],
918 mode
->n_delta_bits
[component
]);
919 endpoints
[endpoint
][component
] =
920 ((endpoints
[0][component
] + value
) &
921 ((1 << mode
->n_endpoint_bits
) - 1));
927 for (endpoint
= 0; endpoint
< n_endpoints
; endpoint
++) {
928 for (component
= 0; component
< 3; component
++) {
929 value
= sign_extend(endpoints
[endpoint
][component
],
930 mode
->n_endpoint_bits
);
931 endpoints
[endpoint
][component
] =
932 signed_unquantize(value
, mode
->n_endpoint_bits
);
936 for (endpoint
= 0; endpoint
< n_endpoints
; endpoint
++) {
937 for (component
= 0; component
< 3; component
++) {
938 endpoints
[endpoint
][component
] =
939 unsigned_unquantize(endpoints
[endpoint
][component
],
940 mode
->n_endpoint_bits
);
949 finish_unsigned_unquantize(int32_t value
)
951 return value
* 31 / 64;
955 finish_signed_unquantize(int32_t value
)
958 return (-value
* 31 / 32) | 0x8000;
960 return value
* 31 / 32;
964 fetch_rgb_float_from_block(const uint8_t *block
,
970 const struct bptc_float_mode
*mode
;
976 int anchors_before_texel
;
977 int32_t endpoints
[2 * 2][3];
983 if (block
[0] & 0x2) {
984 mode_num
= (((block
[0] >> 1) & 0xe) | (block
[0] & 1)) + 2;
987 mode_num
= block
[0] & 3;
991 mode
= bptc_float_modes
+ mode_num
;
993 if (mode
->reserved
) {
994 memset(result
, 0, sizeof result
[0] * 3);
999 bit_offset
= extract_float_endpoints(mode
, block
, bit_offset
,
1000 endpoints
, is_signed
);
1002 if (mode
->n_partition_bits
) {
1003 partition_num
= extract_bits(block
, bit_offset
, mode
->n_partition_bits
);
1004 bit_offset
+= mode
->n_partition_bits
;
1006 subsets
= partition_table1
[partition_num
];
1014 anchors_before_texel
=
1015 count_anchors_before_texel(n_subsets
, partition_num
, texel
);
1017 /* Calculate the offset to the primary index for this texel */
1018 bit_offset
+= mode
->n_index_bits
* texel
- anchors_before_texel
;
1020 subset_num
= (subsets
>> (texel
* 2)) & 3;
1022 index_bits
= mode
->n_index_bits
;
1023 if (is_anchor(n_subsets
, partition_num
, texel
))
1025 index
= extract_bits(block
, bit_offset
, index_bits
);
1027 for (component
= 0; component
< 3; component
++) {
1028 value
= interpolate(endpoints
[subset_num
* 2][component
],
1029 endpoints
[subset_num
* 2 + 1][component
],
1031 mode
->n_index_bits
);
1034 value
= finish_signed_unquantize(value
);
1036 value
= finish_unsigned_unquantize(value
);
1038 result
[component
] = _mesa_half_to_float(value
);
1044 #ifdef BPTC_BLOCK_DECODE
1046 decompress_rgb_float_block(unsigned src_width
, unsigned src_height
,
1047 const uint8_t *block
,
1048 float *dst_row
, unsigned dst_rowstride
,
1052 const struct bptc_float_mode
*mode
;
1058 int anchors_before_texel
;
1059 int32_t endpoints
[2 * 2][3];
1066 if (block
[0] & 0x2) {
1067 mode_num
= (((block
[0] >> 1) & 0xe) | (block
[0] & 1)) + 2;
1070 mode_num
= block
[0] & 3;
1074 mode
= bptc_float_modes
+ mode_num
;
1076 if (mode
->reserved
) {
1077 for(y
= 0; y
< src_height
; y
+= 1) {
1078 float *result
= dst_row
;
1079 memset(result
, 0, sizeof result
[0] * 4 * src_width
);
1080 for(x
= 0; x
< src_width
; x
+= 1) {
1084 dst_row
+= dst_rowstride
/ sizeof dst_row
[0];
1089 bit_offset
= extract_float_endpoints(mode
, block
, bit_offset
,
1090 endpoints
, is_signed
);
1092 if (mode
->n_partition_bits
) {
1093 partition_num
= extract_bits(block
, bit_offset
, mode
->n_partition_bits
);
1094 bit_offset
+= mode
->n_partition_bits
;
1096 subsets
= partition_table1
[partition_num
];
1104 for(y
= 0; y
< src_height
; y
+= 1) {
1105 float *result
= dst_row
;
1106 for(x
= 0; x
< src_width
; x
+= 1) {
1111 anchors_before_texel
=
1112 count_anchors_before_texel(n_subsets
, partition_num
, texel
);
1114 /* Calculate the offset to the primary index for this texel */
1115 bit_offset
+= mode
->n_index_bits
* texel
- anchors_before_texel
;
1117 subset_num
= (subsets
>> (texel
* 2)) & 3;
1119 index_bits
= mode
->n_index_bits
;
1120 if (is_anchor(n_subsets
, partition_num
, texel
))
1122 index
= extract_bits(block
, bit_offset
, index_bits
);
1124 for (component
= 0; component
< 3; component
++) {
1125 value
= interpolate(endpoints
[subset_num
* 2][component
],
1126 endpoints
[subset_num
* 2 + 1][component
],
1128 mode
->n_index_bits
);
1131 value
= finish_signed_unquantize(value
);
1133 value
= finish_unsigned_unquantize(value
);
1135 result
[component
] = _mesa_half_to_float(value
);
1141 dst_row
+= dst_rowstride
/ sizeof dst_row
[0];
1146 decompress_rgb_float(int width
, int height
,
1147 const uint8_t *src
, int src_rowstride
,
1148 float *dst
, int dst_rowstride
, bool is_signed
)
1153 if (src_rowstride
>= width
* 4)
1154 src_row_diff
= src_rowstride
- ((width
+ 3) & ~3) * 4;
1158 for (y
= 0; y
< height
; y
+= BLOCK_SIZE
) {
1159 for (x
= 0; x
< width
; x
+= BLOCK_SIZE
) {
1160 decompress_rgb_float_block(MIN2(width
- x
, BLOCK_SIZE
),
1161 MIN2(height
- y
, BLOCK_SIZE
),
1164 (y
* dst_rowstride
/ sizeof dst
[0])),
1165 dst_rowstride
, is_signed
);
1168 src
+= src_row_diff
;
1171 #endif // BPTC_BLOCK_DECODE
1174 write_bits(struct bit_writer
*writer
, int n_bits
, int value
)
1177 if (n_bits
+ writer
->pos
>= 8) {
1178 *(writer
->dst
++) = writer
->buf
| (value
<< writer
->pos
);
1180 value
>>= (8 - writer
->pos
);
1181 n_bits
-= (8 - writer
->pos
);
1184 writer
->buf
|= value
<< writer
->pos
;
1185 writer
->pos
+= n_bits
;
1188 } while (n_bits
> 0);
1192 get_average_luminance_alpha_unorm(int width
, int height
,
1193 const uint8_t *src
, int src_rowstride
,
1194 int *average_luminance
, int *average_alpha
)
1196 int luminance_sum
= 0, alpha_sum
= 0;
1199 for (y
= 0; y
< height
; y
++) {
1200 for (x
= 0; x
< width
; x
++) {
1201 luminance_sum
+= src
[0] + src
[1] + src
[2];
1202 alpha_sum
+= src
[3];
1205 src
+= src_rowstride
- width
* 4;
1208 *average_luminance
= luminance_sum
/ (width
* height
);
1209 *average_alpha
= alpha_sum
/ (width
* height
);
1213 get_rgba_endpoints_unorm(int width
, int height
,
1214 const uint8_t *src
, int src_rowstride
,
1215 int average_luminance
, int average_alpha
,
1216 uint8_t endpoints
[][4])
1218 int endpoint_luminances
[2];
1224 const uint8_t *p
= src
;
1225 int rgb_left_endpoint_count
= 0;
1226 int alpha_left_endpoint_count
= 0;
1229 memset(sums
, 0, sizeof sums
);
1231 for (y
= 0; y
< height
; y
++) {
1232 for (x
= 0; x
< width
; x
++) {
1233 luminance
= p
[0] + p
[1] + p
[2];
1234 if (luminance
< average_luminance
) {
1236 rgb_left_endpoint_count
++;
1240 for (i
= 0; i
< 3; i
++)
1241 sums
[endpoint
][i
] += p
[i
];
1243 if (p
[2] < average_alpha
) {
1245 alpha_left_endpoint_count
++;
1249 sums
[endpoint
][3] += p
[3];
1254 p
+= src_rowstride
- width
* 4;
1257 if (rgb_left_endpoint_count
== 0 ||
1258 rgb_left_endpoint_count
== width
* height
) {
1259 for (i
= 0; i
< 3; i
++)
1260 endpoints
[0][i
] = endpoints
[1][i
] =
1261 (sums
[0][i
] + sums
[1][i
]) / (width
* height
);
1263 for (i
= 0; i
< 3; i
++) {
1264 endpoints
[0][i
] = sums
[0][i
] / rgb_left_endpoint_count
;
1265 endpoints
[1][i
] = (sums
[1][i
] /
1266 (width
* height
- rgb_left_endpoint_count
));
1270 if (alpha_left_endpoint_count
== 0 ||
1271 alpha_left_endpoint_count
== width
* height
) {
1272 endpoints
[0][3] = endpoints
[1][3] =
1273 (sums
[0][3] + sums
[1][3]) / (width
* height
);
1275 endpoints
[0][3] = sums
[0][3] / alpha_left_endpoint_count
;
1276 endpoints
[1][3] = (sums
[1][3] /
1277 (width
* height
- alpha_left_endpoint_count
));
1280 /* We may need to swap the endpoints to ensure the most-significant bit of
1281 * the first index is zero */
1283 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1284 endpoint_luminances
[endpoint
] =
1285 endpoints
[endpoint
][0] +
1286 endpoints
[endpoint
][1] +
1287 endpoints
[endpoint
][2];
1289 midpoint
= (endpoint_luminances
[0] + endpoint_luminances
[1]) / 2;
1291 if ((src
[0] + src
[1] + src
[2] <= midpoint
) !=
1292 (endpoint_luminances
[0] <= midpoint
)) {
1293 memcpy(temp
, endpoints
[0], 3);
1294 memcpy(endpoints
[0], endpoints
[1], 3);
1295 memcpy(endpoints
[1], temp
, 3);
1298 /* Same for the alpha endpoints */
1300 midpoint
= (endpoints
[0][3] + endpoints
[1][3]) / 2;
1302 if ((src
[3] <= midpoint
) != (endpoints
[0][3] <= midpoint
)) {
1303 temp
[0] = endpoints
[0][3];
1304 endpoints
[0][3] = endpoints
[1][3];
1305 endpoints
[1][3] = temp
[0];
1310 write_rgb_indices_unorm(struct bit_writer
*writer
,
1311 int src_width
, int src_height
,
1312 const uint8_t *src
, int src_rowstride
,
1313 uint8_t endpoints
[][4])
1316 int endpoint_luminances
[2];
1321 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1322 endpoint_luminances
[endpoint
] =
1323 endpoints
[endpoint
][0] +
1324 endpoints
[endpoint
][1] +
1325 endpoints
[endpoint
][2];
1328 /* If the endpoints have the same luminance then we'll just use index 0 for
1329 * all of the texels */
1330 if (endpoint_luminances
[0] == endpoint_luminances
[1]) {
1331 write_bits(writer
, BLOCK_SIZE
* BLOCK_SIZE
* 2 - 1, 0);
1335 for (y
= 0; y
< src_height
; y
++) {
1336 for (x
= 0; x
< src_width
; x
++) {
1337 luminance
= src
[0] + src
[1] + src
[2];
1339 index
= ((luminance
- endpoint_luminances
[0]) * 3 /
1340 (endpoint_luminances
[1] - endpoint_luminances
[0]));
1346 assert(x
!= 0 || y
!= 0 || index
< 2);
1348 write_bits(writer
, (x
== 0 && y
== 0) ? 1 : 2, index
);
1353 /* Pad the indices out to the block size */
1354 if (src_width
< BLOCK_SIZE
)
1355 write_bits(writer
, 2 * (BLOCK_SIZE
- src_width
), 0);
1357 src
+= src_rowstride
- src_width
* 4;
1360 /* Pad the indices out to the block size */
1361 if (src_height
< BLOCK_SIZE
)
1362 write_bits(writer
, 2 * BLOCK_SIZE
* (BLOCK_SIZE
- src_height
), 0);
1366 write_alpha_indices_unorm(struct bit_writer
*writer
,
1367 int src_width
, int src_height
,
1368 const uint8_t *src
, int src_rowstride
,
1369 uint8_t endpoints
[][4])
1374 /* If the endpoints have the same alpha then we'll just use index 0 for
1375 * all of the texels */
1376 if (endpoints
[0][3] == endpoints
[1][3]) {
1377 write_bits(writer
, BLOCK_SIZE
* BLOCK_SIZE
* 3 - 1, 0);
1381 for (y
= 0; y
< src_height
; y
++) {
1382 for (x
= 0; x
< src_width
; x
++) {
1383 index
= (((int) src
[3] - (int) endpoints
[0][3]) * 7 /
1384 ((int) endpoints
[1][3] - endpoints
[0][3]));
1390 assert(x
!= 0 || y
!= 0 || index
< 4);
1392 /* The first index has one less bit */
1393 write_bits(writer
, (x
== 0 && y
== 0) ? 2 : 3, index
);
1398 /* Pad the indices out to the block size */
1399 if (src_width
< BLOCK_SIZE
)
1400 write_bits(writer
, 3 * (BLOCK_SIZE
- src_width
), 0);
1402 src
+= src_rowstride
- src_width
* 4;
1405 /* Pad the indices out to the block size */
1406 if (src_height
< BLOCK_SIZE
)
1407 write_bits(writer
, 3 * BLOCK_SIZE
* (BLOCK_SIZE
- src_height
), 0);
1411 compress_rgba_unorm_block(int src_width
, int src_height
,
1412 const uint8_t *src
, int src_rowstride
,
1415 int average_luminance
, average_alpha
;
1416 uint8_t endpoints
[2][4];
1417 struct bit_writer writer
;
1418 int component
, endpoint
;
1420 get_average_luminance_alpha_unorm(src_width
, src_height
, src
, src_rowstride
,
1421 &average_luminance
, &average_alpha
);
1422 get_rgba_endpoints_unorm(src_width
, src_height
, src
, src_rowstride
,
1423 average_luminance
, average_alpha
,
1430 write_bits(&writer
, 5, 0x10); /* mode 4 */
1431 write_bits(&writer
, 2, 0); /* rotation 0 */
1432 write_bits(&writer
, 1, 0); /* index selection bit */
1434 /* Write the color endpoints */
1435 for (component
= 0; component
< 3; component
++)
1436 for (endpoint
= 0; endpoint
< 2; endpoint
++)
1437 write_bits(&writer
, 5, endpoints
[endpoint
][component
] >> 3);
1439 /* Write the alpha endpoints */
1440 for (endpoint
= 0; endpoint
< 2; endpoint
++)
1441 write_bits(&writer
, 6, endpoints
[endpoint
][3] >> 2);
1443 write_rgb_indices_unorm(&writer
,
1444 src_width
, src_height
,
1447 write_alpha_indices_unorm(&writer
,
1448 src_width
, src_height
,
1454 compress_rgba_unorm(int width
, int height
,
1455 const uint8_t *src
, int src_rowstride
,
1456 uint8_t *dst
, int dst_rowstride
)
1461 if (dst_rowstride
>= width
* 4)
1462 dst_row_diff
= dst_rowstride
- ((width
+ 3) & ~3) * 4;
1466 for (y
= 0; y
< height
; y
+= BLOCK_SIZE
) {
1467 for (x
= 0; x
< width
; x
+= BLOCK_SIZE
) {
1468 compress_rgba_unorm_block(MIN2(width
- x
, BLOCK_SIZE
),
1469 MIN2(height
- y
, BLOCK_SIZE
),
1470 src
+ x
* 4 + y
* src_rowstride
,
1475 dst
+= dst_row_diff
;
1480 get_average_luminance_float(int width
, int height
,
1481 const float *src
, int src_rowstride
)
1483 float luminance_sum
= 0;
1486 for (y
= 0; y
< height
; y
++) {
1487 for (x
= 0; x
< width
; x
++) {
1488 luminance_sum
+= src
[0] + src
[1] + src
[2];
1491 src
+= (src_rowstride
- width
* 3 * sizeof (float)) / sizeof (float);
1494 return luminance_sum
/ (width
* height
);
1498 clamp_value(float value
, bool is_signed
)
1500 if (value
> 65504.0f
)
1504 if (value
< -65504.0f
)
1517 get_endpoints_float(int width
, int height
,
1518 const float *src
, int src_rowstride
,
1519 float average_luminance
, float endpoints
[][3],
1522 float endpoint_luminances
[2];
1525 int endpoint
, component
;
1528 const float *p
= src
;
1529 int left_endpoint_count
= 0;
1532 memset(sums
, 0, sizeof sums
);
1534 for (y
= 0; y
< height
; y
++) {
1535 for (x
= 0; x
< width
; x
++) {
1536 luminance
= p
[0] + p
[1] + p
[2];
1537 if (luminance
< average_luminance
) {
1539 left_endpoint_count
++;
1543 for (i
= 0; i
< 3; i
++)
1544 sums
[endpoint
][i
] += p
[i
];
1549 p
+= (src_rowstride
- width
* 3 * sizeof (float)) / sizeof (float);
1552 if (left_endpoint_count
== 0 ||
1553 left_endpoint_count
== width
* height
) {
1554 for (i
= 0; i
< 3; i
++)
1555 endpoints
[0][i
] = endpoints
[1][i
] =
1556 (sums
[0][i
] + sums
[1][i
]) / (width
* height
);
1558 for (i
= 0; i
< 3; i
++) {
1559 endpoints
[0][i
] = sums
[0][i
] / left_endpoint_count
;
1560 endpoints
[1][i
] = sums
[1][i
] / (width
* height
- left_endpoint_count
);
1564 /* Clamp the endpoints to the range of a half float and strip out
1566 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1567 for (component
= 0; component
< 3; component
++) {
1568 endpoints
[endpoint
][component
] =
1569 clamp_value(endpoints
[endpoint
][component
], is_signed
);
1573 /* We may need to swap the endpoints to ensure the most-significant bit of
1574 * the first index is zero */
1576 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1577 endpoint_luminances
[endpoint
] =
1578 endpoints
[endpoint
][0] +
1579 endpoints
[endpoint
][1] +
1580 endpoints
[endpoint
][2];
1582 midpoint
= (endpoint_luminances
[0] + endpoint_luminances
[1]) / 2.0f
;
1584 if ((src
[0] + src
[1] + src
[2] <= midpoint
) !=
1585 (endpoint_luminances
[0] <= midpoint
)) {
1586 memcpy(temp
, endpoints
[0], sizeof temp
);
1587 memcpy(endpoints
[0], endpoints
[1], sizeof temp
);
1588 memcpy(endpoints
[1], temp
, sizeof temp
);
1593 write_rgb_indices_float(struct bit_writer
*writer
,
1594 int src_width
, int src_height
,
1595 const float *src
, int src_rowstride
,
1596 float endpoints
[][3])
1599 float endpoint_luminances
[2];
1604 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1605 endpoint_luminances
[endpoint
] =
1606 endpoints
[endpoint
][0] +
1607 endpoints
[endpoint
][1] +
1608 endpoints
[endpoint
][2];
1611 /* If the endpoints have the same luminance then we'll just use index 0 for
1612 * all of the texels */
1613 if (endpoint_luminances
[0] == endpoint_luminances
[1]) {
1614 write_bits(writer
, BLOCK_SIZE
* BLOCK_SIZE
* 4 - 1, 0);
1618 for (y
= 0; y
< src_height
; y
++) {
1619 for (x
= 0; x
< src_width
; x
++) {
1620 luminance
= src
[0] + src
[1] + src
[2];
1622 index
= ((luminance
- endpoint_luminances
[0]) * 15 /
1623 (endpoint_luminances
[1] - endpoint_luminances
[0]));
1626 else if (index
> 15)
1629 assert(x
!= 0 || y
!= 0 || index
< 8);
1631 write_bits(writer
, (x
== 0 && y
== 0) ? 3 : 4, index
);
1636 /* Pad the indices out to the block size */
1637 if (src_width
< BLOCK_SIZE
)
1638 write_bits(writer
, 4 * (BLOCK_SIZE
- src_width
), 0);
1640 src
+= (src_rowstride
- src_width
* 3 * sizeof (float)) / sizeof (float);
1643 /* Pad the indices out to the block size */
1644 if (src_height
< BLOCK_SIZE
)
1645 write_bits(writer
, 4 * BLOCK_SIZE
* (BLOCK_SIZE
- src_height
), 0);
1649 get_endpoint_value(float value
, bool is_signed
)
1655 half
= _mesa_float_to_half(value
);
1657 if (half
& 0x8000) {
1662 half
= (32 * half
/ 31) >> 6;
1665 half
= -half
& ((1 << 10) - 1);
1672 half
= _mesa_float_to_half(value
);
1674 return (64 * half
/ 31) >> 6;
1679 compress_rgb_float_block(int src_width
, int src_height
,
1680 const float *src
, int src_rowstride
,
1684 float average_luminance
;
1685 float endpoints
[2][3];
1686 struct bit_writer writer
;
1687 int component
, endpoint
;
1691 get_average_luminance_float(src_width
, src_height
, src
, src_rowstride
);
1692 get_endpoints_float(src_width
, src_height
, src
, src_rowstride
,
1693 average_luminance
, endpoints
, is_signed
);
1699 write_bits(&writer
, 5, 3); /* mode 3 */
1701 /* Write the endpoints */
1702 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1703 for (component
= 0; component
< 3; component
++) {
1705 get_endpoint_value(endpoints
[endpoint
][component
], is_signed
);
1706 write_bits(&writer
, 10, endpoint_value
);
1710 write_rgb_indices_float(&writer
,
1711 src_width
, src_height
,
1717 compress_rgb_float(int width
, int height
,
1718 const float *src
, int src_rowstride
,
1719 uint8_t *dst
, int dst_rowstride
,
1725 if (dst_rowstride
>= width
* 4)
1726 dst_row_diff
= dst_rowstride
- ((width
+ 3) & ~3) * 4;
1730 for (y
= 0; y
< height
; y
+= BLOCK_SIZE
) {
1731 for (x
= 0; x
< width
; x
+= BLOCK_SIZE
) {
1732 compress_rgb_float_block(MIN2(width
- x
, BLOCK_SIZE
),
1733 MIN2(height
- y
, BLOCK_SIZE
),
1735 y
* src_rowstride
/ sizeof (float),
1741 dst
+= dst_row_diff
;