2 * Copyright (C) 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * Included by texcompress_bptc and gallium to define BPTC decoding routines.
28 #ifndef TEXCOMPRESS_BPTC_TMP_H
29 #define TEXCOMPRESS_BPTC_TMP_H
31 #include "util/format_srgb.h"
32 #include "util/half_float.h"
36 #define N_PARTITIONS 64
37 #define BLOCK_BYTES 16
39 struct bptc_unorm_mode
{
42 bool has_rotation_bits
;
43 bool has_index_selection_bit
;
46 bool has_endpoint_pbits
;
47 bool has_shared_pbits
;
49 int n_secondary_index_bits
;
52 struct bptc_float_bitfield
{
60 struct bptc_float_mode
{
62 bool transformed_endpoints
;
67 struct bptc_float_bitfield bitfields
[24];
76 static const struct bptc_unorm_mode
77 bptc_unorm_modes
[] = {
78 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
79 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
80 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
81 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
82 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
83 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
84 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
85 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
88 static const struct bptc_float_mode
89 bptc_float_modes
[] = {
91 { false, true, 5, 10, 3, { 5, 5, 5 },
92 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
93 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
94 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
95 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
96 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
97 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
98 { 3, 2, 3, 1, false },
102 { false, true, 5, 7, 3, { 6, 6, 6 },
103 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
104 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
105 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
106 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
107 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
108 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
109 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
110 { 2, 0, 0, 6, false },
111 { 3, 0, 0, 6, false },
115 { false, true, 5, 11, 3, { 5, 4, 4 },
116 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
117 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
118 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
119 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
120 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
121 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
125 { false, false, 0, 10, 4, { 10, 10, 10 },
126 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
127 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
131 { false, true, 5, 11, 3, { 4, 5, 4 },
132 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
133 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
134 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
135 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
136 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
137 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
138 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
142 { false, true, 0, 11, 4, { 9, 9, 9 },
143 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
144 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
145 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
149 { false, true, 5, 11, 3, { 4, 4, 5 },
150 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
151 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
152 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
153 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
154 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
155 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
156 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
160 { false, true, 0, 12, 4, { 8, 8, 8 },
161 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
162 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
163 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
167 { false, true, 5, 9, 3, { 5, 5, 5 },
168 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
169 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
170 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
171 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
172 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
173 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
174 { 3, 2, 3, 1, false },
178 { false, true, 0, 16, 4, { 4, 4, 4 },
179 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
180 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
181 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
185 { false, true, 5, 8, 3, { 6, 5, 5 },
186 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
187 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
188 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
189 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
190 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
191 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
192 { 3, 0, 0, 6, false },
196 { true /* reserved */ },
198 { false, true, 5, 8, 3, { 5, 6, 5 },
199 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
200 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
201 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
202 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
203 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
204 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
205 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
209 { true /* reserved */ },
211 { false, true, 5, 8, 3, { 5, 5, 6 },
212 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
213 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
214 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
215 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
216 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
217 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
218 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
222 { true /* reserved */ },
224 { false, false, 5, 6, 3, { 6, 6, 6 },
225 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
226 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
227 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
228 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
229 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
230 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
231 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
232 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
236 { true /* reserved */ },
239 /* This partition table is used when the mode has two subsets. Each
240 * partition is represented by a 32-bit value which gives 2 bits per texel
241 * within the block. The value of the two bits represents which subset to use
244 static const uint32_t
245 partition_table1
[N_PARTITIONS
] = {
246 0x50505050U
, 0x40404040U
, 0x54545454U
, 0x54505040U
,
247 0x50404000U
, 0x55545450U
, 0x55545040U
, 0x54504000U
,
248 0x50400000U
, 0x55555450U
, 0x55544000U
, 0x54400000U
,
249 0x55555440U
, 0x55550000U
, 0x55555500U
, 0x55000000U
,
250 0x55150100U
, 0x00004054U
, 0x15010000U
, 0x00405054U
,
251 0x00004050U
, 0x15050100U
, 0x05010000U
, 0x40505054U
,
252 0x00404050U
, 0x05010100U
, 0x14141414U
, 0x05141450U
,
253 0x01155440U
, 0x00555500U
, 0x15014054U
, 0x05414150U
,
254 0x44444444U
, 0x55005500U
, 0x11441144U
, 0x05055050U
,
255 0x05500550U
, 0x11114444U
, 0x41144114U
, 0x44111144U
,
256 0x15055054U
, 0x01055040U
, 0x05041050U
, 0x05455150U
,
257 0x14414114U
, 0x50050550U
, 0x41411414U
, 0x00141400U
,
258 0x00041504U
, 0x00105410U
, 0x10541000U
, 0x04150400U
,
259 0x50410514U
, 0x41051450U
, 0x05415014U
, 0x14054150U
,
260 0x41050514U
, 0x41505014U
, 0x40011554U
, 0x54150140U
,
261 0x50505500U
, 0x00555050U
, 0x15151010U
, 0x54540404U
,
264 /* This partition table is used when the mode has three subsets. In this case
265 * the values can be 0, 1 or 2.
267 static const uint32_t
268 partition_table2
[N_PARTITIONS
] = {
269 0xaa685050U
, 0x6a5a5040U
, 0x5a5a4200U
, 0x5450a0a8U
,
270 0xa5a50000U
, 0xa0a05050U
, 0x5555a0a0U
, 0x5a5a5050U
,
271 0xaa550000U
, 0xaa555500U
, 0xaaaa5500U
, 0x90909090U
,
272 0x94949494U
, 0xa4a4a4a4U
, 0xa9a59450U
, 0x2a0a4250U
,
273 0xa5945040U
, 0x0a425054U
, 0xa5a5a500U
, 0x55a0a0a0U
,
274 0xa8a85454U
, 0x6a6a4040U
, 0xa4a45000U
, 0x1a1a0500U
,
275 0x0050a4a4U
, 0xaaa59090U
, 0x14696914U
, 0x69691400U
,
276 0xa08585a0U
, 0xaa821414U
, 0x50a4a450U
, 0x6a5a0200U
,
277 0xa9a58000U
, 0x5090a0a8U
, 0xa8a09050U
, 0x24242424U
,
278 0x00aa5500U
, 0x24924924U
, 0x24499224U
, 0x50a50a50U
,
279 0x500aa550U
, 0xaaaa4444U
, 0x66660000U
, 0xa5a0a5a0U
,
280 0x50a050a0U
, 0x69286928U
, 0x44aaaa44U
, 0x66666600U
,
281 0xaa444444U
, 0x54a854a8U
, 0x95809580U
, 0x96969600U
,
282 0xa85454a8U
, 0x80959580U
, 0xaa141414U
, 0x96960000U
,
283 0xaaaa1414U
, 0xa05050a0U
, 0xa0a5a5a0U
, 0x96000000U
,
284 0x40804080U
, 0xa9a8a9a8U
, 0xaaaaaa44U
, 0x2a4a5254U
288 anchor_indices
[][N_PARTITIONS
] = {
289 /* Anchor index values for the second subset of two-subset partitioning */
291 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
292 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
293 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
294 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
297 /* Anchor index values for the second subset of three-subset partitioning */
299 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
300 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
301 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
302 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
305 /* Anchor index values for the third subset of three-subset
309 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
310 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
311 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
312 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
317 extract_bits(const uint8_t *block
,
321 int byte_index
= offset
/ 8;
322 int bit_index
= offset
% 8;
323 int n_bits_in_byte
= MIN2(n_bits
, 8 - bit_index
);
328 result
|= ((block
[byte_index
] >> bit_index
) &
329 ((1 << n_bits_in_byte
) - 1)) << bit
;
331 n_bits
-= n_bits_in_byte
;
336 bit
+= n_bits_in_byte
;
339 n_bits_in_byte
= MIN2(n_bits
, 8);
344 expand_component(uint8_t byte
,
347 /* Expands a n-bit quantity into a byte by copying the most-significant
348 * bits into the unused least-significant bits.
350 return byte
<< (8 - n_bits
) | (byte
>> (2 * n_bits
- 8));
354 extract_unorm_endpoints(const struct bptc_unorm_mode
*mode
,
355 const uint8_t *block
,
357 uint8_t endpoints
[][4])
365 /* Extract each color component */
366 for (component
= 0; component
< 3; component
++) {
367 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
368 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
369 endpoints
[subset
* 2 + endpoint
][component
] =
370 extract_bits(block
, bit_offset
, mode
->n_color_bits
);
371 bit_offset
+= mode
->n_color_bits
;
376 /* Extract the alpha values */
377 if (mode
->n_alpha_bits
> 0) {
378 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
379 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
380 endpoints
[subset
* 2 + endpoint
][3] =
381 extract_bits(block
, bit_offset
, mode
->n_alpha_bits
);
382 bit_offset
+= mode
->n_alpha_bits
;
388 for (subset
= 0; subset
< mode
->n_subsets
; subset
++)
389 for (endpoint
= 0; endpoint
< 2; endpoint
++)
390 endpoints
[subset
* 2 + endpoint
][3] = 255;
395 /* Add in the p-bits */
396 if (mode
->has_endpoint_pbits
) {
397 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
398 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
399 pbit
= extract_bits(block
, bit_offset
, 1);
402 for (component
= 0; component
< n_components
; component
++) {
403 endpoints
[subset
* 2 + endpoint
][component
] <<= 1;
404 endpoints
[subset
* 2 + endpoint
][component
] |= pbit
;
408 } else if (mode
->has_shared_pbits
) {
409 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
410 pbit
= extract_bits(block
, bit_offset
, 1);
413 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
414 for (component
= 0; component
< n_components
; component
++) {
415 endpoints
[subset
* 2 + endpoint
][component
] <<= 1;
416 endpoints
[subset
* 2 + endpoint
][component
] |= pbit
;
422 /* Expand the n-bit values to a byte */
423 for (subset
= 0; subset
< mode
->n_subsets
; subset
++) {
424 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
425 for (component
= 0; component
< 3; component
++) {
426 endpoints
[subset
* 2 + endpoint
][component
] =
427 expand_component(endpoints
[subset
* 2 + endpoint
][component
],
429 mode
->has_endpoint_pbits
+
430 mode
->has_shared_pbits
);
433 if (mode
->n_alpha_bits
> 0) {
434 endpoints
[subset
* 2 + endpoint
][3] =
435 expand_component(endpoints
[subset
* 2 + endpoint
][3],
437 mode
->has_endpoint_pbits
+
438 mode
->has_shared_pbits
);
447 is_anchor(int n_subsets
,
458 return anchor_indices
[0][partition_num
] == texel
;
460 return (anchor_indices
[1][partition_num
] == texel
||
461 anchor_indices
[2][partition_num
] == texel
);
469 count_anchors_before_texel(int n_subsets
,
482 if (texel
> anchor_indices
[0][partition_num
])
486 if (texel
> anchor_indices
[1][partition_num
])
488 if (texel
> anchor_indices
[2][partition_num
])
500 interpolate(int32_t a
, int32_t b
,
504 static const uint8_t weights2
[] = { 0, 21, 43, 64 };
505 static const uint8_t weights3
[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
506 static const uint8_t weights4
[] =
507 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
508 static const uint8_t *weights
[] = {
509 NULL
, NULL
, weights2
, weights3
, weights4
513 weight
= weights
[index_bits
][index
];
515 return ((64 - weight
) * a
+ weight
* b
+ 32) >> 6;
519 apply_rotation(int rotation
,
529 t
= result
[rotation
];
530 result
[rotation
] = result
[3];
535 fetch_rgba_unorm_from_block(const uint8_t *block
,
539 int mode_num
= ffs(block
[0]);
540 const struct bptc_unorm_mode
*mode
;
541 int bit_offset
, secondary_bit_offset
;
549 int anchors_before_texel
;
551 uint8_t endpoints
[3 * 2][4];
556 /* According to the spec this mode is reserved and shouldn't be used. */
557 memset(result
, 0, 3);
562 mode
= bptc_unorm_modes
+ mode_num
- 1;
563 bit_offset
= mode_num
;
565 partition_num
= extract_bits(block
, bit_offset
, mode
->n_partition_bits
);
566 bit_offset
+= mode
->n_partition_bits
;
568 switch (mode
->n_subsets
) {
573 subsets
= partition_table1
[partition_num
];
576 subsets
= partition_table2
[partition_num
];
583 if (mode
->has_rotation_bits
) {
584 rotation
= extract_bits(block
, bit_offset
, 2);
590 if (mode
->has_index_selection_bit
) {
591 index_selection
= extract_bits(block
, bit_offset
, 1);
597 bit_offset
= extract_unorm_endpoints(mode
, block
, bit_offset
, endpoints
);
599 anchors_before_texel
= count_anchors_before_texel(mode
->n_subsets
,
600 partition_num
, texel
);
602 /* Calculate the offset to the secondary index */
603 secondary_bit_offset
= (bit_offset
+
604 BLOCK_SIZE
* BLOCK_SIZE
* mode
->n_index_bits
-
606 mode
->n_secondary_index_bits
* texel
-
607 anchors_before_texel
);
609 /* Calculate the offset to the primary index for this texel */
610 bit_offset
+= mode
->n_index_bits
* texel
- anchors_before_texel
;
612 subset_num
= (subsets
>> (texel
* 2)) & 3;
614 anchor
= is_anchor(mode
->n_subsets
, partition_num
, texel
);
616 index_bits
= mode
->n_index_bits
;
619 indices
[0] = extract_bits(block
, bit_offset
, index_bits
);
621 if (mode
->n_secondary_index_bits
) {
622 index_bits
= mode
->n_secondary_index_bits
;
625 indices
[1] = extract_bits(block
, secondary_bit_offset
, index_bits
);
628 index
= indices
[index_selection
];
629 index_bits
= (index_selection
?
630 mode
->n_secondary_index_bits
:
633 for (component
= 0; component
< 3; component
++)
634 result
[component
] = interpolate(endpoints
[subset_num
* 2][component
],
635 endpoints
[subset_num
* 2 + 1][component
],
639 /* Alpha uses the opposite index from the color components */
640 if (mode
->n_secondary_index_bits
&& !index_selection
) {
642 index_bits
= mode
->n_secondary_index_bits
;
645 index_bits
= mode
->n_index_bits
;
648 result
[3] = interpolate(endpoints
[subset_num
* 2][3],
649 endpoints
[subset_num
* 2 + 1][3],
653 apply_rotation(rotation
, result
);
656 #ifdef BPTC_BLOCK_DECODE
658 decompress_rgba_unorm_block(int src_width
, int src_height
,
659 const uint8_t *block
,
660 uint8_t *dst_row
, int dst_rowstride
)
662 int mode_num
= ffs(block
[0]);
663 const struct bptc_unorm_mode
*mode
;
664 int bit_offset
, secondary_bit_offset
;
672 int anchors_before_texel
;
674 uint8_t endpoints
[3 * 2][4];
680 /* According to the spec this mode is reserved and shouldn't be used. */
681 for(y
= 0; y
< src_height
; y
+= 1) {
682 uint8_t *result
= dst_row
;
683 memset(result
, 0, 4 * src_width
);
684 for(x
= 0; x
< src_width
; x
+= 1) {
688 dst_row
+= dst_rowstride
;
693 mode
= bptc_unorm_modes
+ mode_num
- 1;
694 bit_offset
= mode_num
;
696 partition_num
= extract_bits(block
, bit_offset
, mode
->n_partition_bits
);
697 bit_offset
+= mode
->n_partition_bits
;
699 switch (mode
->n_subsets
) {
704 subsets
= partition_table1
[partition_num
];
707 subsets
= partition_table2
[partition_num
];
714 if (mode
->has_rotation_bits
) {
715 rotation
= extract_bits(block
, bit_offset
, 2);
721 if (mode
->has_index_selection_bit
) {
722 index_selection
= extract_bits(block
, bit_offset
, 1);
728 bit_offset
= extract_unorm_endpoints(mode
, block
, bit_offset
, endpoints
);
730 for(y
= 0; y
< src_height
; y
+= 1) {
731 uint8_t *result
= dst_row
;
732 for(x
= 0; x
< src_width
; x
+= 1) {
736 anchors_before_texel
= count_anchors_before_texel(mode
->n_subsets
,
740 /* Calculate the offset to the secondary index */
741 secondary_bit_offset
= (bit_offset
+
742 BLOCK_SIZE
* BLOCK_SIZE
* mode
->n_index_bits
-
744 mode
->n_secondary_index_bits
* texel
-
745 anchors_before_texel
);
747 /* Calculate the offset to the primary index for this texel */
748 bit_offset
+= mode
->n_index_bits
* texel
- anchors_before_texel
;
750 subset_num
= (subsets
>> (texel
* 2)) & 3;
752 anchor
= is_anchor(mode
->n_subsets
, partition_num
, texel
);
754 index_bits
= mode
->n_index_bits
;
757 indices
[0] = extract_bits(block
, bit_offset
, index_bits
);
759 if (mode
->n_secondary_index_bits
) {
760 index_bits
= mode
->n_secondary_index_bits
;
763 indices
[1] = extract_bits(block
, secondary_bit_offset
, index_bits
);
766 index
= indices
[index_selection
];
767 index_bits
= (index_selection
?
768 mode
->n_secondary_index_bits
:
771 for (component
= 0; component
< 3; component
++)
772 result
[component
] = interpolate(endpoints
[subset_num
* 2][component
],
773 endpoints
[subset_num
* 2 + 1][component
],
777 /* Alpha uses the opposite index from the color components */
778 if (mode
->n_secondary_index_bits
&& !index_selection
) {
780 index_bits
= mode
->n_secondary_index_bits
;
783 index_bits
= mode
->n_index_bits
;
786 result
[3] = interpolate(endpoints
[subset_num
* 2][3],
787 endpoints
[subset_num
* 2 + 1][3],
791 apply_rotation(rotation
, result
);
794 dst_row
+= dst_rowstride
;
799 decompress_rgba_unorm(int width
, int height
,
800 const uint8_t *src
, int src_rowstride
,
801 uint8_t *dst
, int dst_rowstride
)
806 if (src_rowstride
>= width
* 4)
807 src_row_diff
= src_rowstride
- ((width
+ 3) & ~3) * 4;
811 for (y
= 0; y
< height
; y
+= BLOCK_SIZE
) {
812 for (x
= 0; x
< width
; x
+= BLOCK_SIZE
) {
813 decompress_rgba_unorm_block(MIN2(width
- x
, BLOCK_SIZE
),
814 MIN2(height
- y
, BLOCK_SIZE
),
816 dst
+ x
* 4 + y
* dst_rowstride
,
823 #endif // BPTC_BLOCK_DECODE
826 sign_extend(int32_t value
,
829 if ((value
& (1 << (n_bits
- 1)))) {
830 value
|= (~(int32_t) 0) << n_bits
;
837 signed_unquantize(int value
, int n_endpoint_bits
)
841 if (n_endpoint_bits
>= 16)
854 if (value
>= (1 << (n_endpoint_bits
- 1)) - 1)
857 value
= ((value
<< 15) + 0x4000) >> (n_endpoint_bits
- 1);
866 unsigned_unquantize(int value
, int n_endpoint_bits
)
868 if (n_endpoint_bits
>= 15)
874 if (value
== (1 << n_endpoint_bits
) - 1)
877 return ((value
<< 15) + 0x4000) >> (n_endpoint_bits
- 1);
881 extract_float_endpoints(const struct bptc_float_mode
*mode
,
882 const uint8_t *block
,
884 int32_t endpoints
[][3],
887 const struct bptc_float_bitfield
*bitfield
;
888 int endpoint
, component
;
893 if (mode
->n_partition_bits
)
898 memset(endpoints
, 0, sizeof endpoints
[0][0] * n_endpoints
* 3);
900 for (bitfield
= mode
->bitfields
; bitfield
->endpoint
!= -1; bitfield
++) {
901 value
= extract_bits(block
, bit_offset
, bitfield
->n_bits
);
902 bit_offset
+= bitfield
->n_bits
;
904 if (bitfield
->reverse
) {
905 for (i
= 0; i
< bitfield
->n_bits
; i
++) {
906 if (value
& (1 << i
))
907 endpoints
[bitfield
->endpoint
][bitfield
->component
] |=
908 1 << ((bitfield
->n_bits
- 1 - i
) + bitfield
->offset
);
911 endpoints
[bitfield
->endpoint
][bitfield
->component
] |=
912 value
<< bitfield
->offset
;
916 if (mode
->transformed_endpoints
) {
917 /* The endpoints are specified as signed offsets from e0 */
918 for (endpoint
= 1; endpoint
< n_endpoints
; endpoint
++) {
919 for (component
= 0; component
< 3; component
++) {
920 value
= sign_extend(endpoints
[endpoint
][component
],
921 mode
->n_delta_bits
[component
]);
922 endpoints
[endpoint
][component
] =
923 ((endpoints
[0][component
] + value
) &
924 ((1 << mode
->n_endpoint_bits
) - 1));
930 for (endpoint
= 0; endpoint
< n_endpoints
; endpoint
++) {
931 for (component
= 0; component
< 3; component
++) {
932 value
= sign_extend(endpoints
[endpoint
][component
],
933 mode
->n_endpoint_bits
);
934 endpoints
[endpoint
][component
] =
935 signed_unquantize(value
, mode
->n_endpoint_bits
);
939 for (endpoint
= 0; endpoint
< n_endpoints
; endpoint
++) {
940 for (component
= 0; component
< 3; component
++) {
941 endpoints
[endpoint
][component
] =
942 unsigned_unquantize(endpoints
[endpoint
][component
],
943 mode
->n_endpoint_bits
);
952 finish_unsigned_unquantize(int32_t value
)
954 return value
* 31 / 64;
958 finish_signed_unquantize(int32_t value
)
961 return (-value
* 31 / 32) | 0x8000;
963 return value
* 31 / 32;
967 fetch_rgb_float_from_block(const uint8_t *block
,
973 const struct bptc_float_mode
*mode
;
979 int anchors_before_texel
;
980 int32_t endpoints
[2 * 2][3];
986 if (block
[0] & 0x2) {
987 mode_num
= (((block
[0] >> 1) & 0xe) | (block
[0] & 1)) + 2;
990 mode_num
= block
[0] & 3;
994 mode
= bptc_float_modes
+ mode_num
;
996 if (mode
->reserved
) {
997 memset(result
, 0, sizeof result
[0] * 3);
1002 bit_offset
= extract_float_endpoints(mode
, block
, bit_offset
,
1003 endpoints
, is_signed
);
1005 if (mode
->n_partition_bits
) {
1006 partition_num
= extract_bits(block
, bit_offset
, mode
->n_partition_bits
);
1007 bit_offset
+= mode
->n_partition_bits
;
1009 subsets
= partition_table1
[partition_num
];
1017 anchors_before_texel
=
1018 count_anchors_before_texel(n_subsets
, partition_num
, texel
);
1020 /* Calculate the offset to the primary index for this texel */
1021 bit_offset
+= mode
->n_index_bits
* texel
- anchors_before_texel
;
1023 subset_num
= (subsets
>> (texel
* 2)) & 3;
1025 index_bits
= mode
->n_index_bits
;
1026 if (is_anchor(n_subsets
, partition_num
, texel
))
1028 index
= extract_bits(block
, bit_offset
, index_bits
);
1030 for (component
= 0; component
< 3; component
++) {
1031 value
= interpolate(endpoints
[subset_num
* 2][component
],
1032 endpoints
[subset_num
* 2 + 1][component
],
1034 mode
->n_index_bits
);
1037 value
= finish_signed_unquantize(value
);
1039 value
= finish_unsigned_unquantize(value
);
1041 result
[component
] = _mesa_half_to_float(value
);
1047 #ifdef BPTC_BLOCK_DECODE
1049 decompress_rgb_float_block(unsigned src_width
, unsigned src_height
,
1050 const uint8_t *block
,
1051 float *dst_row
, unsigned dst_rowstride
,
1055 const struct bptc_float_mode
*mode
;
1061 int anchors_before_texel
;
1062 int32_t endpoints
[2 * 2][3];
1069 if (block
[0] & 0x2) {
1070 mode_num
= (((block
[0] >> 1) & 0xe) | (block
[0] & 1)) + 2;
1073 mode_num
= block
[0] & 3;
1077 mode
= bptc_float_modes
+ mode_num
;
1079 if (mode
->reserved
) {
1080 for(y
= 0; y
< src_height
; y
+= 1) {
1081 float *result
= dst_row
;
1082 memset(result
, 0, sizeof result
[0] * 4 * src_width
);
1083 for(x
= 0; x
< src_width
; x
+= 1) {
1087 dst_row
+= dst_rowstride
/ sizeof dst_row
[0];
1092 bit_offset
= extract_float_endpoints(mode
, block
, bit_offset
,
1093 endpoints
, is_signed
);
1095 if (mode
->n_partition_bits
) {
1096 partition_num
= extract_bits(block
, bit_offset
, mode
->n_partition_bits
);
1097 bit_offset
+= mode
->n_partition_bits
;
1099 subsets
= partition_table1
[partition_num
];
1107 for(y
= 0; y
< src_height
; y
+= 1) {
1108 float *result
= dst_row
;
1109 for(x
= 0; x
< src_width
; x
+= 1) {
1114 anchors_before_texel
=
1115 count_anchors_before_texel(n_subsets
, partition_num
, texel
);
1117 /* Calculate the offset to the primary index for this texel */
1118 bit_offset
+= mode
->n_index_bits
* texel
- anchors_before_texel
;
1120 subset_num
= (subsets
>> (texel
* 2)) & 3;
1122 index_bits
= mode
->n_index_bits
;
1123 if (is_anchor(n_subsets
, partition_num
, texel
))
1125 index
= extract_bits(block
, bit_offset
, index_bits
);
1127 for (component
= 0; component
< 3; component
++) {
1128 value
= interpolate(endpoints
[subset_num
* 2][component
],
1129 endpoints
[subset_num
* 2 + 1][component
],
1131 mode
->n_index_bits
);
1134 value
= finish_signed_unquantize(value
);
1136 value
= finish_unsigned_unquantize(value
);
1138 result
[component
] = _mesa_half_to_float(value
);
1144 dst_row
+= dst_rowstride
/ sizeof dst_row
[0];
1149 decompress_rgb_float(int width
, int height
,
1150 const uint8_t *src
, int src_rowstride
,
1151 float *dst
, int dst_rowstride
, bool is_signed
)
1156 if (src_rowstride
>= width
* 4)
1157 src_row_diff
= src_rowstride
- ((width
+ 3) & ~3) * 4;
1161 for (y
= 0; y
< height
; y
+= BLOCK_SIZE
) {
1162 for (x
= 0; x
< width
; x
+= BLOCK_SIZE
) {
1163 decompress_rgb_float_block(MIN2(width
- x
, BLOCK_SIZE
),
1164 MIN2(height
- y
, BLOCK_SIZE
),
1167 (y
* dst_rowstride
/ sizeof dst
[0])),
1168 dst_rowstride
, is_signed
);
1171 src
+= src_row_diff
;
1174 #endif // BPTC_BLOCK_DECODE
1177 write_bits(struct bit_writer
*writer
, int n_bits
, int value
)
1180 if (n_bits
+ writer
->pos
>= 8) {
1181 *(writer
->dst
++) = writer
->buf
| (value
<< writer
->pos
);
1183 value
>>= (8 - writer
->pos
);
1184 n_bits
-= (8 - writer
->pos
);
1187 writer
->buf
|= value
<< writer
->pos
;
1188 writer
->pos
+= n_bits
;
1191 } while (n_bits
> 0);
1195 get_average_luminance_alpha_unorm(int width
, int height
,
1196 const uint8_t *src
, int src_rowstride
,
1197 int *average_luminance
, int *average_alpha
)
1199 int luminance_sum
= 0, alpha_sum
= 0;
1202 for (y
= 0; y
< height
; y
++) {
1203 for (x
= 0; x
< width
; x
++) {
1204 luminance_sum
+= src
[0] + src
[1] + src
[2];
1205 alpha_sum
+= src
[3];
1208 src
+= src_rowstride
- width
* 4;
1211 *average_luminance
= luminance_sum
/ (width
* height
);
1212 *average_alpha
= alpha_sum
/ (width
* height
);
1216 get_rgba_endpoints_unorm(int width
, int height
,
1217 const uint8_t *src
, int src_rowstride
,
1218 int average_luminance
, int average_alpha
,
1219 uint8_t endpoints
[][4])
1221 int endpoint_luminances
[2];
1227 const uint8_t *p
= src
;
1228 int rgb_left_endpoint_count
= 0;
1229 int alpha_left_endpoint_count
= 0;
1232 memset(sums
, 0, sizeof sums
);
1234 for (y
= 0; y
< height
; y
++) {
1235 for (x
= 0; x
< width
; x
++) {
1236 luminance
= p
[0] + p
[1] + p
[2];
1237 if (luminance
< average_luminance
) {
1239 rgb_left_endpoint_count
++;
1243 for (i
= 0; i
< 3; i
++)
1244 sums
[endpoint
][i
] += p
[i
];
1246 if (p
[2] < average_alpha
) {
1248 alpha_left_endpoint_count
++;
1252 sums
[endpoint
][3] += p
[3];
1257 p
+= src_rowstride
- width
* 4;
1260 if (rgb_left_endpoint_count
== 0 ||
1261 rgb_left_endpoint_count
== width
* height
) {
1262 for (i
= 0; i
< 3; i
++)
1263 endpoints
[0][i
] = endpoints
[1][i
] =
1264 (sums
[0][i
] + sums
[1][i
]) / (width
* height
);
1266 for (i
= 0; i
< 3; i
++) {
1267 endpoints
[0][i
] = sums
[0][i
] / rgb_left_endpoint_count
;
1268 endpoints
[1][i
] = (sums
[1][i
] /
1269 (width
* height
- rgb_left_endpoint_count
));
1273 if (alpha_left_endpoint_count
== 0 ||
1274 alpha_left_endpoint_count
== width
* height
) {
1275 endpoints
[0][3] = endpoints
[1][3] =
1276 (sums
[0][3] + sums
[1][3]) / (width
* height
);
1278 endpoints
[0][3] = sums
[0][3] / alpha_left_endpoint_count
;
1279 endpoints
[1][3] = (sums
[1][3] /
1280 (width
* height
- alpha_left_endpoint_count
));
1283 /* We may need to swap the endpoints to ensure the most-significant bit of
1284 * the first index is zero */
1286 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1287 endpoint_luminances
[endpoint
] =
1288 endpoints
[endpoint
][0] +
1289 endpoints
[endpoint
][1] +
1290 endpoints
[endpoint
][2];
1292 midpoint
= (endpoint_luminances
[0] + endpoint_luminances
[1]) / 2;
1294 if ((src
[0] + src
[1] + src
[2] <= midpoint
) !=
1295 (endpoint_luminances
[0] <= midpoint
)) {
1296 memcpy(temp
, endpoints
[0], 3);
1297 memcpy(endpoints
[0], endpoints
[1], 3);
1298 memcpy(endpoints
[1], temp
, 3);
1301 /* Same for the alpha endpoints */
1303 midpoint
= (endpoints
[0][3] + endpoints
[1][3]) / 2;
1305 if ((src
[3] <= midpoint
) != (endpoints
[0][3] <= midpoint
)) {
1306 temp
[0] = endpoints
[0][3];
1307 endpoints
[0][3] = endpoints
[1][3];
1308 endpoints
[1][3] = temp
[0];
1313 write_rgb_indices_unorm(struct bit_writer
*writer
,
1314 int src_width
, int src_height
,
1315 const uint8_t *src
, int src_rowstride
,
1316 uint8_t endpoints
[][4])
1319 int endpoint_luminances
[2];
1324 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1325 endpoint_luminances
[endpoint
] =
1326 endpoints
[endpoint
][0] +
1327 endpoints
[endpoint
][1] +
1328 endpoints
[endpoint
][2];
1331 /* If the endpoints have the same luminance then we'll just use index 0 for
1332 * all of the texels */
1333 if (endpoint_luminances
[0] == endpoint_luminances
[1]) {
1334 write_bits(writer
, BLOCK_SIZE
* BLOCK_SIZE
* 2 - 1, 0);
1338 for (y
= 0; y
< src_height
; y
++) {
1339 for (x
= 0; x
< src_width
; x
++) {
1340 luminance
= src
[0] + src
[1] + src
[2];
1342 index
= ((luminance
- endpoint_luminances
[0]) * 3 /
1343 (endpoint_luminances
[1] - endpoint_luminances
[0]));
1349 assert(x
!= 0 || y
!= 0 || index
< 2);
1351 write_bits(writer
, (x
== 0 && y
== 0) ? 1 : 2, index
);
1356 /* Pad the indices out to the block size */
1357 if (src_width
< BLOCK_SIZE
)
1358 write_bits(writer
, 2 * (BLOCK_SIZE
- src_width
), 0);
1360 src
+= src_rowstride
- src_width
* 4;
1363 /* Pad the indices out to the block size */
1364 if (src_height
< BLOCK_SIZE
)
1365 write_bits(writer
, 2 * BLOCK_SIZE
* (BLOCK_SIZE
- src_height
), 0);
1369 write_alpha_indices_unorm(struct bit_writer
*writer
,
1370 int src_width
, int src_height
,
1371 const uint8_t *src
, int src_rowstride
,
1372 uint8_t endpoints
[][4])
1377 /* If the endpoints have the same alpha then we'll just use index 0 for
1378 * all of the texels */
1379 if (endpoints
[0][3] == endpoints
[1][3]) {
1380 write_bits(writer
, BLOCK_SIZE
* BLOCK_SIZE
* 3 - 1, 0);
1384 for (y
= 0; y
< src_height
; y
++) {
1385 for (x
= 0; x
< src_width
; x
++) {
1386 index
= (((int) src
[3] - (int) endpoints
[0][3]) * 7 /
1387 ((int) endpoints
[1][3] - endpoints
[0][3]));
1393 assert(x
!= 0 || y
!= 0 || index
< 4);
1395 /* The first index has one less bit */
1396 write_bits(writer
, (x
== 0 && y
== 0) ? 2 : 3, index
);
1401 /* Pad the indices out to the block size */
1402 if (src_width
< BLOCK_SIZE
)
1403 write_bits(writer
, 3 * (BLOCK_SIZE
- src_width
), 0);
1405 src
+= src_rowstride
- src_width
* 4;
1408 /* Pad the indices out to the block size */
1409 if (src_height
< BLOCK_SIZE
)
1410 write_bits(writer
, 3 * BLOCK_SIZE
* (BLOCK_SIZE
- src_height
), 0);
1414 compress_rgba_unorm_block(int src_width
, int src_height
,
1415 const uint8_t *src
, int src_rowstride
,
1418 int average_luminance
, average_alpha
;
1419 uint8_t endpoints
[2][4];
1420 struct bit_writer writer
;
1421 int component
, endpoint
;
1423 get_average_luminance_alpha_unorm(src_width
, src_height
, src
, src_rowstride
,
1424 &average_luminance
, &average_alpha
);
1425 get_rgba_endpoints_unorm(src_width
, src_height
, src
, src_rowstride
,
1426 average_luminance
, average_alpha
,
1433 write_bits(&writer
, 5, 0x10); /* mode 4 */
1434 write_bits(&writer
, 2, 0); /* rotation 0 */
1435 write_bits(&writer
, 1, 0); /* index selection bit */
1437 /* Write the color endpoints */
1438 for (component
= 0; component
< 3; component
++)
1439 for (endpoint
= 0; endpoint
< 2; endpoint
++)
1440 write_bits(&writer
, 5, endpoints
[endpoint
][component
] >> 3);
1442 /* Write the alpha endpoints */
1443 for (endpoint
= 0; endpoint
< 2; endpoint
++)
1444 write_bits(&writer
, 6, endpoints
[endpoint
][3] >> 2);
1446 write_rgb_indices_unorm(&writer
,
1447 src_width
, src_height
,
1450 write_alpha_indices_unorm(&writer
,
1451 src_width
, src_height
,
1457 compress_rgba_unorm(int width
, int height
,
1458 const uint8_t *src
, int src_rowstride
,
1459 uint8_t *dst
, int dst_rowstride
)
1464 if (dst_rowstride
>= width
* 4)
1465 dst_row_diff
= dst_rowstride
- ((width
+ 3) & ~3) * 4;
1469 for (y
= 0; y
< height
; y
+= BLOCK_SIZE
) {
1470 for (x
= 0; x
< width
; x
+= BLOCK_SIZE
) {
1471 compress_rgba_unorm_block(MIN2(width
- x
, BLOCK_SIZE
),
1472 MIN2(height
- y
, BLOCK_SIZE
),
1473 src
+ x
* 4 + y
* src_rowstride
,
1478 dst
+= dst_row_diff
;
1483 get_average_luminance_float(int width
, int height
,
1484 const float *src
, int src_rowstride
)
1486 float luminance_sum
= 0;
1489 for (y
= 0; y
< height
; y
++) {
1490 for (x
= 0; x
< width
; x
++) {
1491 luminance_sum
+= src
[0] + src
[1] + src
[2];
1494 src
+= (src_rowstride
- width
* 3 * sizeof (float)) / sizeof (float);
1497 return luminance_sum
/ (width
* height
);
1501 clamp_value(float value
, bool is_signed
)
1503 if (value
> 65504.0f
)
1507 if (value
< -65504.0f
)
1520 get_endpoints_float(int width
, int height
,
1521 const float *src
, int src_rowstride
,
1522 float average_luminance
, float endpoints
[][3],
1525 float endpoint_luminances
[2];
1528 int endpoint
, component
;
1531 const float *p
= src
;
1532 int left_endpoint_count
= 0;
1535 memset(sums
, 0, sizeof sums
);
1537 for (y
= 0; y
< height
; y
++) {
1538 for (x
= 0; x
< width
; x
++) {
1539 luminance
= p
[0] + p
[1] + p
[2];
1540 if (luminance
< average_luminance
) {
1542 left_endpoint_count
++;
1546 for (i
= 0; i
< 3; i
++)
1547 sums
[endpoint
][i
] += p
[i
];
1552 p
+= (src_rowstride
- width
* 3 * sizeof (float)) / sizeof (float);
1555 if (left_endpoint_count
== 0 ||
1556 left_endpoint_count
== width
* height
) {
1557 for (i
= 0; i
< 3; i
++)
1558 endpoints
[0][i
] = endpoints
[1][i
] =
1559 (sums
[0][i
] + sums
[1][i
]) / (width
* height
);
1561 for (i
= 0; i
< 3; i
++) {
1562 endpoints
[0][i
] = sums
[0][i
] / left_endpoint_count
;
1563 endpoints
[1][i
] = sums
[1][i
] / (width
* height
- left_endpoint_count
);
1567 /* Clamp the endpoints to the range of a half float and strip out
1569 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1570 for (component
= 0; component
< 3; component
++) {
1571 endpoints
[endpoint
][component
] =
1572 clamp_value(endpoints
[endpoint
][component
], is_signed
);
1576 /* We may need to swap the endpoints to ensure the most-significant bit of
1577 * the first index is zero */
1579 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1580 endpoint_luminances
[endpoint
] =
1581 endpoints
[endpoint
][0] +
1582 endpoints
[endpoint
][1] +
1583 endpoints
[endpoint
][2];
1585 midpoint
= (endpoint_luminances
[0] + endpoint_luminances
[1]) / 2.0f
;
1587 if ((src
[0] + src
[1] + src
[2] <= midpoint
) !=
1588 (endpoint_luminances
[0] <= midpoint
)) {
1589 memcpy(temp
, endpoints
[0], sizeof temp
);
1590 memcpy(endpoints
[0], endpoints
[1], sizeof temp
);
1591 memcpy(endpoints
[1], temp
, sizeof temp
);
1596 write_rgb_indices_float(struct bit_writer
*writer
,
1597 int src_width
, int src_height
,
1598 const float *src
, int src_rowstride
,
1599 float endpoints
[][3])
1602 float endpoint_luminances
[2];
1607 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1608 endpoint_luminances
[endpoint
] =
1609 endpoints
[endpoint
][0] +
1610 endpoints
[endpoint
][1] +
1611 endpoints
[endpoint
][2];
1614 /* If the endpoints have the same luminance then we'll just use index 0 for
1615 * all of the texels */
1616 if (endpoint_luminances
[0] == endpoint_luminances
[1]) {
1617 write_bits(writer
, BLOCK_SIZE
* BLOCK_SIZE
* 4 - 1, 0);
1621 for (y
= 0; y
< src_height
; y
++) {
1622 for (x
= 0; x
< src_width
; x
++) {
1623 luminance
= src
[0] + src
[1] + src
[2];
1625 index
= ((luminance
- endpoint_luminances
[0]) * 15 /
1626 (endpoint_luminances
[1] - endpoint_luminances
[0]));
1629 else if (index
> 15)
1632 assert(x
!= 0 || y
!= 0 || index
< 8);
1634 write_bits(writer
, (x
== 0 && y
== 0) ? 3 : 4, index
);
1639 /* Pad the indices out to the block size */
1640 if (src_width
< BLOCK_SIZE
)
1641 write_bits(writer
, 4 * (BLOCK_SIZE
- src_width
), 0);
1643 src
+= (src_rowstride
- src_width
* 3 * sizeof (float)) / sizeof (float);
1646 /* Pad the indices out to the block size */
1647 if (src_height
< BLOCK_SIZE
)
1648 write_bits(writer
, 4 * BLOCK_SIZE
* (BLOCK_SIZE
- src_height
), 0);
1652 get_endpoint_value(float value
, bool is_signed
)
1658 half
= _mesa_float_to_half(value
);
1660 if (half
& 0x8000) {
1665 half
= (32 * half
/ 31) >> 6;
1668 half
= -half
& ((1 << 10) - 1);
1675 half
= _mesa_float_to_half(value
);
1677 return (64 * half
/ 31) >> 6;
1682 compress_rgb_float_block(int src_width
, int src_height
,
1683 const float *src
, int src_rowstride
,
1687 float average_luminance
;
1688 float endpoints
[2][3];
1689 struct bit_writer writer
;
1690 int component
, endpoint
;
1694 get_average_luminance_float(src_width
, src_height
, src
, src_rowstride
);
1695 get_endpoints_float(src_width
, src_height
, src
, src_rowstride
,
1696 average_luminance
, endpoints
, is_signed
);
1702 write_bits(&writer
, 5, 3); /* mode 3 */
1704 /* Write the endpoints */
1705 for (endpoint
= 0; endpoint
< 2; endpoint
++) {
1706 for (component
= 0; component
< 3; component
++) {
1708 get_endpoint_value(endpoints
[endpoint
][component
], is_signed
);
1709 write_bits(&writer
, 10, endpoint_value
);
1713 write_rgb_indices_float(&writer
,
1714 src_width
, src_height
,
1720 compress_rgb_float(int width
, int height
,
1721 const float *src
, int src_rowstride
,
1722 uint8_t *dst
, int dst_rowstride
,
1728 if (dst_rowstride
>= width
* 4)
1729 dst_row_diff
= dst_rowstride
- ((width
+ 3) & ~3) * 4;
1733 for (y
= 0; y
< height
; y
+= BLOCK_SIZE
) {
1734 for (x
= 0; x
< width
; x
+= BLOCK_SIZE
) {
1735 compress_rgb_float_block(MIN2(width
- x
, BLOCK_SIZE
),
1736 MIN2(height
- y
, BLOCK_SIZE
),
1738 y
* src_rowstride
/ sizeof (float),
1744 dst
+= dst_row_diff
;