3c4ea2c01316c071ef7f840912a03504917474a1
[mesa.git] / src / mesa / main / texcompress_bptc_tmp.h
1 /*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*
25 * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26 */
27
28 #include "util/format_srgb.h"
29 #include "util/half_float.h"
30 #include "macros.h"
31
32 #define BLOCK_SIZE 4
33 #define N_PARTITIONS 64
34 #define BLOCK_BYTES 16
35
36 struct bptc_unorm_mode {
37 int n_subsets;
38 int n_partition_bits;
39 bool has_rotation_bits;
40 bool has_index_selection_bit;
41 int n_color_bits;
42 int n_alpha_bits;
43 bool has_endpoint_pbits;
44 bool has_shared_pbits;
45 int n_index_bits;
46 int n_secondary_index_bits;
47 };
48
49 struct bptc_float_bitfield {
50 int8_t endpoint;
51 uint8_t component;
52 uint8_t offset;
53 uint8_t n_bits;
54 bool reverse;
55 };
56
57 struct bptc_float_mode {
58 bool reserved;
59 bool transformed_endpoints;
60 int n_partition_bits;
61 int n_endpoint_bits;
62 int n_index_bits;
63 int n_delta_bits[3];
64 struct bptc_float_bitfield bitfields[24];
65 };
66
67 struct bit_writer {
68 uint8_t buf;
69 int pos;
70 uint8_t *dst;
71 };
72
73 static const struct bptc_unorm_mode
74 bptc_unorm_modes[] = {
75 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
76 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
77 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
78 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
79 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
80 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
81 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
82 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
83 };
84
85 static const struct bptc_float_mode
86 bptc_float_modes[] = {
87 /* 00 */
88 { false, true, 5, 10, 3, { 5, 5, 5 },
89 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
90 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
91 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
92 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
93 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
94 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
95 { 3, 2, 3, 1, false },
96 { -1 } }
97 },
98 /* 01 */
99 { false, true, 5, 7, 3, { 6, 6, 6 },
100 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
101 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
102 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
103 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
104 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
105 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
106 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
107 { 2, 0, 0, 6, false },
108 { 3, 0, 0, 6, false },
109 { -1 } }
110 },
111 /* 00010 */
112 { false, true, 5, 11, 3, { 5, 4, 4 },
113 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
114 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
115 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
116 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
117 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
118 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
119 { -1 } }
120 },
121 /* 00011 */
122 { false, false, 0, 10, 4, { 10, 10, 10 },
123 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
124 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
125 { -1 } }
126 },
127 /* 00110 */
128 { false, true, 5, 11, 3, { 4, 5, 4 },
129 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
130 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
131 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
132 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
133 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
134 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
135 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
136 { -1 } }
137 },
138 /* 00111 */
139 { false, true, 0, 11, 4, { 9, 9, 9 },
140 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
141 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
142 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
143 { -1 } }
144 },
145 /* 01010 */
146 { false, true, 5, 11, 3, { 4, 4, 5 },
147 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
148 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
149 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
150 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
151 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
152 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
153 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
154 { -1 } }
155 },
156 /* 01011 */
157 { false, true, 0, 12, 4, { 8, 8, 8 },
158 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
159 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
160 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
161 { -1 } }
162 },
163 /* 01110 */
164 { false, true, 5, 9, 3, { 5, 5, 5 },
165 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
166 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
167 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
168 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
169 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
170 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
171 { 3, 2, 3, 1, false },
172 { -1 } }
173 },
174 /* 01111 */
175 { false, true, 0, 16, 4, { 4, 4, 4 },
176 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
177 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
178 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
179 { -1 } }
180 },
181 /* 10010 */
182 { false, true, 5, 8, 3, { 6, 5, 5 },
183 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
184 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
185 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
186 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
187 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
188 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
189 { 3, 0, 0, 6, false },
190 { -1 } }
191 },
192 /* 10011 */
193 { true /* reserved */ },
194 /* 10110 */
195 { false, true, 5, 8, 3, { 5, 6, 5 },
196 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
197 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
198 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
199 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
200 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
201 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
202 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
203 { -1 } }
204 },
205 /* 10111 */
206 { true /* reserved */ },
207 /* 11010 */
208 { false, true, 5, 8, 3, { 5, 5, 6 },
209 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
210 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
211 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
212 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
213 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
214 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
215 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
216 { -1 } }
217 },
218 /* 11011 */
219 { true /* reserved */ },
220 /* 11110 */
221 { false, false, 5, 6, 3, { 6, 6, 6 },
222 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
223 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
224 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
225 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
226 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
227 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
228 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
229 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
230 { -1 } }
231 },
232 /* 11111 */
233 { true /* reserved */ },
234 };
235
236 /* This partition table is used when the mode has two subsets. Each
237 * partition is represented by a 32-bit value which gives 2 bits per texel
238 * within the block. The value of the two bits represents which subset to use
239 * (0 or 1).
240 */
241 static const uint32_t
242 partition_table1[N_PARTITIONS] = {
243 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
244 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
245 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
246 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
247 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
248 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
249 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
250 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
251 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
252 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
253 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
254 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
255 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
256 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
257 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
258 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
259 };
260
261 /* This partition table is used when the mode has three subsets. In this case
262 * the values can be 0, 1 or 2.
263 */
264 static const uint32_t
265 partition_table2[N_PARTITIONS] = {
266 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
267 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
268 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
269 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
270 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
271 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
272 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
273 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
274 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
275 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
276 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
277 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
278 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
279 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
280 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
281 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
282 };
283
284 static const uint8_t
285 anchor_indices[][N_PARTITIONS] = {
286 /* Anchor index values for the second subset of two-subset partitioning */
287 {
288 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
289 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
290 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
291 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
292 },
293
294 /* Anchor index values for the second subset of three-subset partitioning */
295 {
296 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
297 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
298 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
299 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
300 },
301
302 /* Anchor index values for the third subset of three-subset
303 * partitioning
304 */
305 {
306 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
307 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
308 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
309 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
310 }
311 };
312
313 static int
314 extract_bits(const uint8_t *block,
315 int offset,
316 int n_bits)
317 {
318 int byte_index = offset / 8;
319 int bit_index = offset % 8;
320 int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
321 int result = 0;
322 int bit = 0;
323
324 while (true) {
325 result |= ((block[byte_index] >> bit_index) &
326 ((1 << n_bits_in_byte) - 1)) << bit;
327
328 n_bits -= n_bits_in_byte;
329
330 if (n_bits <= 0)
331 return result;
332
333 bit += n_bits_in_byte;
334 byte_index++;
335 bit_index = 0;
336 n_bits_in_byte = MIN2(n_bits, 8);
337 }
338 }
339
340 static uint8_t
341 expand_component(uint8_t byte,
342 int n_bits)
343 {
344 /* Expands a n-bit quantity into a byte by copying the most-significant
345 * bits into the unused least-significant bits.
346 */
347 return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
348 }
349
350 static int
351 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
352 const uint8_t *block,
353 int bit_offset,
354 uint8_t endpoints[][4])
355 {
356 int component;
357 int subset;
358 int endpoint;
359 int pbit;
360 int n_components;
361
362 /* Extract each color component */
363 for (component = 0; component < 3; component++) {
364 for (subset = 0; subset < mode->n_subsets; subset++) {
365 for (endpoint = 0; endpoint < 2; endpoint++) {
366 endpoints[subset * 2 + endpoint][component] =
367 extract_bits(block, bit_offset, mode->n_color_bits);
368 bit_offset += mode->n_color_bits;
369 }
370 }
371 }
372
373 /* Extract the alpha values */
374 if (mode->n_alpha_bits > 0) {
375 for (subset = 0; subset < mode->n_subsets; subset++) {
376 for (endpoint = 0; endpoint < 2; endpoint++) {
377 endpoints[subset * 2 + endpoint][3] =
378 extract_bits(block, bit_offset, mode->n_alpha_bits);
379 bit_offset += mode->n_alpha_bits;
380 }
381 }
382
383 n_components = 4;
384 } else {
385 for (subset = 0; subset < mode->n_subsets; subset++)
386 for (endpoint = 0; endpoint < 2; endpoint++)
387 endpoints[subset * 2 + endpoint][3] = 255;
388
389 n_components = 3;
390 }
391
392 /* Add in the p-bits */
393 if (mode->has_endpoint_pbits) {
394 for (subset = 0; subset < mode->n_subsets; subset++) {
395 for (endpoint = 0; endpoint < 2; endpoint++) {
396 pbit = extract_bits(block, bit_offset, 1);
397 bit_offset += 1;
398
399 for (component = 0; component < n_components; component++) {
400 endpoints[subset * 2 + endpoint][component] <<= 1;
401 endpoints[subset * 2 + endpoint][component] |= pbit;
402 }
403 }
404 }
405 } else if (mode->has_shared_pbits) {
406 for (subset = 0; subset < mode->n_subsets; subset++) {
407 pbit = extract_bits(block, bit_offset, 1);
408 bit_offset += 1;
409
410 for (endpoint = 0; endpoint < 2; endpoint++) {
411 for (component = 0; component < n_components; component++) {
412 endpoints[subset * 2 + endpoint][component] <<= 1;
413 endpoints[subset * 2 + endpoint][component] |= pbit;
414 }
415 }
416 }
417 }
418
419 /* Expand the n-bit values to a byte */
420 for (subset = 0; subset < mode->n_subsets; subset++) {
421 for (endpoint = 0; endpoint < 2; endpoint++) {
422 for (component = 0; component < 3; component++) {
423 endpoints[subset * 2 + endpoint][component] =
424 expand_component(endpoints[subset * 2 + endpoint][component],
425 mode->n_color_bits +
426 mode->has_endpoint_pbits +
427 mode->has_shared_pbits);
428 }
429
430 if (mode->n_alpha_bits > 0) {
431 endpoints[subset * 2 + endpoint][3] =
432 expand_component(endpoints[subset * 2 + endpoint][3],
433 mode->n_alpha_bits +
434 mode->has_endpoint_pbits +
435 mode->has_shared_pbits);
436 }
437 }
438 }
439
440 return bit_offset;
441 }
442
443 static bool
444 is_anchor(int n_subsets,
445 int partition_num,
446 int texel)
447 {
448 if (texel == 0)
449 return true;
450
451 switch (n_subsets) {
452 case 1:
453 return false;
454 case 2:
455 return anchor_indices[0][partition_num] == texel;
456 case 3:
457 return (anchor_indices[1][partition_num] == texel ||
458 anchor_indices[2][partition_num] == texel);
459 default:
460 assert(false);
461 return false;
462 }
463 }
464
465 static int
466 count_anchors_before_texel(int n_subsets,
467 int partition_num,
468 int texel)
469 {
470 int count = 1;
471
472 if (texel == 0)
473 return 0;
474
475 switch (n_subsets) {
476 case 1:
477 break;
478 case 2:
479 if (texel > anchor_indices[0][partition_num])
480 count++;
481 break;
482 case 3:
483 if (texel > anchor_indices[1][partition_num])
484 count++;
485 if (texel > anchor_indices[2][partition_num])
486 count++;
487 break;
488 default:
489 assert(false);
490 return 0;
491 }
492
493 return count;
494 }
495
496 static int32_t
497 interpolate(int32_t a, int32_t b,
498 int index,
499 int index_bits)
500 {
501 static const uint8_t weights2[] = { 0, 21, 43, 64 };
502 static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
503 static const uint8_t weights4[] =
504 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
505 static const uint8_t *weights[] = {
506 NULL, NULL, weights2, weights3, weights4
507 };
508 int weight;
509
510 weight = weights[index_bits][index];
511
512 return ((64 - weight) * a + weight * b + 32) >> 6;
513 }
514
515 static void
516 apply_rotation(int rotation,
517 uint8_t *result)
518 {
519 uint8_t t;
520
521 if (rotation == 0)
522 return;
523
524 rotation--;
525
526 t = result[rotation];
527 result[rotation] = result[3];
528 result[3] = t;
529 }
530
531 static void
532 fetch_rgba_unorm_from_block(const uint8_t *block,
533 uint8_t *result,
534 int texel)
535 {
536 int mode_num = ffs(block[0]);
537 const struct bptc_unorm_mode *mode;
538 int bit_offset, secondary_bit_offset;
539 int partition_num;
540 int subset_num;
541 int rotation;
542 int index_selection;
543 int index_bits;
544 int indices[2];
545 int index;
546 int anchors_before_texel;
547 bool anchor;
548 uint8_t endpoints[3 * 2][4];
549 uint32_t subsets;
550 int component;
551
552 if (mode_num == 0) {
553 /* According to the spec this mode is reserved and shouldn't be used. */
554 memset(result, 0, 3);
555 result[3] = 0xff;
556 return;
557 }
558
559 mode = bptc_unorm_modes + mode_num - 1;
560 bit_offset = mode_num;
561
562 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
563 bit_offset += mode->n_partition_bits;
564
565 switch (mode->n_subsets) {
566 case 1:
567 subsets = 0;
568 break;
569 case 2:
570 subsets = partition_table1[partition_num];
571 break;
572 case 3:
573 subsets = partition_table2[partition_num];
574 break;
575 default:
576 assert(false);
577 return;
578 }
579
580 if (mode->has_rotation_bits) {
581 rotation = extract_bits(block, bit_offset, 2);
582 bit_offset += 2;
583 } else {
584 rotation = 0;
585 }
586
587 if (mode->has_index_selection_bit) {
588 index_selection = extract_bits(block, bit_offset, 1);
589 bit_offset++;
590 } else {
591 index_selection = 0;
592 }
593
594 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
595
596 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
597 partition_num, texel);
598
599 /* Calculate the offset to the secondary index */
600 secondary_bit_offset = (bit_offset +
601 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
602 mode->n_subsets +
603 mode->n_secondary_index_bits * texel -
604 anchors_before_texel);
605
606 /* Calculate the offset to the primary index for this texel */
607 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
608
609 subset_num = (subsets >> (texel * 2)) & 3;
610
611 anchor = is_anchor(mode->n_subsets, partition_num, texel);
612
613 index_bits = mode->n_index_bits;
614 if (anchor)
615 index_bits--;
616 indices[0] = extract_bits(block, bit_offset, index_bits);
617
618 if (mode->n_secondary_index_bits) {
619 index_bits = mode->n_secondary_index_bits;
620 if (anchor)
621 index_bits--;
622 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
623 }
624
625 index = indices[index_selection];
626 index_bits = (index_selection ?
627 mode->n_secondary_index_bits :
628 mode->n_index_bits);
629
630 for (component = 0; component < 3; component++)
631 result[component] = interpolate(endpoints[subset_num * 2][component],
632 endpoints[subset_num * 2 + 1][component],
633 index,
634 index_bits);
635
636 /* Alpha uses the opposite index from the color components */
637 if (mode->n_secondary_index_bits && !index_selection) {
638 index = indices[1];
639 index_bits = mode->n_secondary_index_bits;
640 } else {
641 index = indices[0];
642 index_bits = mode->n_index_bits;
643 }
644
645 result[3] = interpolate(endpoints[subset_num * 2][3],
646 endpoints[subset_num * 2 + 1][3],
647 index,
648 index_bits);
649
650 apply_rotation(rotation, result);
651 }
652
653 #ifdef BPTC_BLOCK_DECODE
654 static void
655 decompress_rgba_unorm_block(int src_width, int src_height,
656 const uint8_t *block,
657 uint8_t *dst_row, int dst_rowstride)
658 {
659 int mode_num = ffs(block[0]);
660 const struct bptc_unorm_mode *mode;
661 int bit_offset, secondary_bit_offset;
662 int partition_num;
663 int subset_num;
664 int rotation;
665 int index_selection;
666 int index_bits;
667 int indices[2];
668 int index;
669 int anchors_before_texel;
670 bool anchor;
671 uint8_t endpoints[3 * 2][4];
672 uint32_t subsets;
673 int component;
674 unsigned x, y;
675
676 if (mode_num == 0) {
677 /* According to the spec this mode is reserved and shouldn't be used. */
678 for(y = 0; y < src_height; y += 1) {
679 uint8_t *result = dst_row;
680 memset(result, 0, 4 * src_width);
681 for(x = 0; x < src_width; x += 1) {
682 result[3] = 0xff;
683 result += 4;
684 }
685 dst_row += dst_rowstride;
686 }
687 return;
688 }
689
690 mode = bptc_unorm_modes + mode_num - 1;
691 bit_offset = mode_num;
692
693 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
694 bit_offset += mode->n_partition_bits;
695
696 switch (mode->n_subsets) {
697 case 1:
698 subsets = 0;
699 break;
700 case 2:
701 subsets = partition_table1[partition_num];
702 break;
703 case 3:
704 subsets = partition_table2[partition_num];
705 break;
706 default:
707 assert(false);
708 return;
709 }
710
711 if (mode->has_rotation_bits) {
712 rotation = extract_bits(block, bit_offset, 2);
713 bit_offset += 2;
714 } else {
715 rotation = 0;
716 }
717
718 if (mode->has_index_selection_bit) {
719 index_selection = extract_bits(block, bit_offset, 1);
720 bit_offset++;
721 } else {
722 index_selection = 0;
723 }
724
725 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
726
727 for(y = 0; y < src_height; y += 1) {
728 uint8_t *result = dst_row;
729 for(x = 0; x < src_width; x += 1) {
730 int texel;
731 texel = x + y * 4;
732
733 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
734 partition_num,
735 texel);
736
737 /* Calculate the offset to the secondary index */
738 secondary_bit_offset = (bit_offset +
739 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
740 mode->n_subsets +
741 mode->n_secondary_index_bits * texel -
742 anchors_before_texel);
743
744 /* Calculate the offset to the primary index for this texel */
745 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
746
747 subset_num = (subsets >> (texel * 2)) & 3;
748
749 anchor = is_anchor(mode->n_subsets, partition_num, texel);
750
751 index_bits = mode->n_index_bits;
752 if (anchor)
753 index_bits--;
754 indices[0] = extract_bits(block, bit_offset, index_bits);
755
756 if (mode->n_secondary_index_bits) {
757 index_bits = mode->n_secondary_index_bits;
758 if (anchor)
759 index_bits--;
760 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
761 }
762
763 index = indices[index_selection];
764 index_bits = (index_selection ?
765 mode->n_secondary_index_bits :
766 mode->n_index_bits);
767
768 for (component = 0; component < 3; component++)
769 result[component] = interpolate(endpoints[subset_num * 2][component],
770 endpoints[subset_num * 2 + 1][component],
771 index,
772 index_bits);
773
774 /* Alpha uses the opposite index from the color components */
775 if (mode->n_secondary_index_bits && !index_selection) {
776 index = indices[1];
777 index_bits = mode->n_secondary_index_bits;
778 } else {
779 index = indices[0];
780 index_bits = mode->n_index_bits;
781 }
782
783 result[3] = interpolate(endpoints[subset_num * 2][3],
784 endpoints[subset_num * 2 + 1][3],
785 index,
786 index_bits);
787
788 apply_rotation(rotation, result);
789 result += 4;
790 }
791 dst_row += dst_rowstride;
792 }
793 }
794
795 static void
796 decompress_rgba_unorm(int width, int height,
797 const uint8_t *src, int src_rowstride,
798 uint8_t *dst, int dst_rowstride)
799 {
800 int src_row_diff;
801 int y, x;
802
803 if (src_rowstride >= width * 4)
804 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
805 else
806 src_row_diff = 0;
807
808 for (y = 0; y < height; y += BLOCK_SIZE) {
809 for (x = 0; x < width; x += BLOCK_SIZE) {
810 decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
811 MIN2(height - y, BLOCK_SIZE),
812 src,
813 dst + x * 4 + y * dst_rowstride,
814 dst_rowstride);
815 src += BLOCK_BYTES;
816 }
817 src += src_row_diff;
818 }
819 }
820 #endif // BPTC_BLOCK_DECODE
821
822 static int32_t
823 sign_extend(int32_t value,
824 int n_bits)
825 {
826 if ((value & (1 << (n_bits - 1)))) {
827 value |= (~(int32_t) 0) << n_bits;
828 }
829
830 return value;
831 }
832
833 static int
834 signed_unquantize(int value, int n_endpoint_bits)
835 {
836 bool sign;
837
838 if (n_endpoint_bits >= 16)
839 return value;
840
841 if (value == 0)
842 return 0;
843
844 sign = false;
845
846 if (value < 0) {
847 sign = true;
848 value = -value;
849 }
850
851 if (value >= (1 << (n_endpoint_bits - 1)) - 1)
852 value = 0x7fff;
853 else
854 value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
855
856 if (sign)
857 value = -value;
858
859 return value;
860 }
861
862 static int
863 unsigned_unquantize(int value, int n_endpoint_bits)
864 {
865 if (n_endpoint_bits >= 15)
866 return value;
867
868 if (value == 0)
869 return 0;
870
871 if (value == (1 << n_endpoint_bits) - 1)
872 return 0xffff;
873
874 return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
875 }
876
877 static int
878 extract_float_endpoints(const struct bptc_float_mode *mode,
879 const uint8_t *block,
880 int bit_offset,
881 int32_t endpoints[][3],
882 bool is_signed)
883 {
884 const struct bptc_float_bitfield *bitfield;
885 int endpoint, component;
886 int n_endpoints;
887 int value;
888 int i;
889
890 if (mode->n_partition_bits)
891 n_endpoints = 4;
892 else
893 n_endpoints = 2;
894
895 memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
896
897 for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
898 value = extract_bits(block, bit_offset, bitfield->n_bits);
899 bit_offset += bitfield->n_bits;
900
901 if (bitfield->reverse) {
902 for (i = 0; i < bitfield->n_bits; i++) {
903 if (value & (1 << i))
904 endpoints[bitfield->endpoint][bitfield->component] |=
905 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
906 }
907 } else {
908 endpoints[bitfield->endpoint][bitfield->component] |=
909 value << bitfield->offset;
910 }
911 }
912
913 if (mode->transformed_endpoints) {
914 /* The endpoints are specified as signed offsets from e0 */
915 for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
916 for (component = 0; component < 3; component++) {
917 value = sign_extend(endpoints[endpoint][component],
918 mode->n_delta_bits[component]);
919 endpoints[endpoint][component] =
920 ((endpoints[0][component] + value) &
921 ((1 << mode->n_endpoint_bits) - 1));
922 }
923 }
924 }
925
926 if (is_signed) {
927 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
928 for (component = 0; component < 3; component++) {
929 value = sign_extend(endpoints[endpoint][component],
930 mode->n_endpoint_bits);
931 endpoints[endpoint][component] =
932 signed_unquantize(value, mode->n_endpoint_bits);
933 }
934 }
935 } else {
936 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
937 for (component = 0; component < 3; component++) {
938 endpoints[endpoint][component] =
939 unsigned_unquantize(endpoints[endpoint][component],
940 mode->n_endpoint_bits);
941 }
942 }
943 }
944
945 return bit_offset;
946 }
947
948 static int32_t
949 finish_unsigned_unquantize(int32_t value)
950 {
951 return value * 31 / 64;
952 }
953
954 static int32_t
955 finish_signed_unquantize(int32_t value)
956 {
957 if (value < 0)
958 return (-value * 31 / 32) | 0x8000;
959 else
960 return value * 31 / 32;
961 }
962
963 static void
964 fetch_rgb_float_from_block(const uint8_t *block,
965 float *result,
966 int texel,
967 bool is_signed)
968 {
969 int mode_num;
970 const struct bptc_float_mode *mode;
971 int bit_offset;
972 int partition_num;
973 int subset_num;
974 int index_bits;
975 int index;
976 int anchors_before_texel;
977 int32_t endpoints[2 * 2][3];
978 uint32_t subsets;
979 int n_subsets;
980 int component;
981 int32_t value;
982
983 if (block[0] & 0x2) {
984 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
985 bit_offset = 5;
986 } else {
987 mode_num = block[0] & 3;
988 bit_offset = 2;
989 }
990
991 mode = bptc_float_modes + mode_num;
992
993 if (mode->reserved) {
994 memset(result, 0, sizeof result[0] * 3);
995 result[3] = 1.0f;
996 return;
997 }
998
999 bit_offset = extract_float_endpoints(mode, block, bit_offset,
1000 endpoints, is_signed);
1001
1002 if (mode->n_partition_bits) {
1003 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1004 bit_offset += mode->n_partition_bits;
1005
1006 subsets = partition_table1[partition_num];
1007 n_subsets = 2;
1008 } else {
1009 partition_num = 0;
1010 subsets = 0;
1011 n_subsets = 1;
1012 }
1013
1014 anchors_before_texel =
1015 count_anchors_before_texel(n_subsets, partition_num, texel);
1016
1017 /* Calculate the offset to the primary index for this texel */
1018 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1019
1020 subset_num = (subsets >> (texel * 2)) & 3;
1021
1022 index_bits = mode->n_index_bits;
1023 if (is_anchor(n_subsets, partition_num, texel))
1024 index_bits--;
1025 index = extract_bits(block, bit_offset, index_bits);
1026
1027 for (component = 0; component < 3; component++) {
1028 value = interpolate(endpoints[subset_num * 2][component],
1029 endpoints[subset_num * 2 + 1][component],
1030 index,
1031 mode->n_index_bits);
1032
1033 if (is_signed)
1034 value = finish_signed_unquantize(value);
1035 else
1036 value = finish_unsigned_unquantize(value);
1037
1038 result[component] = _mesa_half_to_float(value);
1039 }
1040
1041 result[3] = 1.0f;
1042 }
1043
1044 #ifdef BPTC_BLOCK_DECODE
1045 static void
1046 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1047 const uint8_t *block,
1048 float *dst_row, unsigned dst_rowstride,
1049 bool is_signed)
1050 {
1051 int mode_num;
1052 const struct bptc_float_mode *mode;
1053 int bit_offset;
1054 int partition_num;
1055 int subset_num;
1056 int index_bits;
1057 int index;
1058 int anchors_before_texel;
1059 int32_t endpoints[2 * 2][3];
1060 uint32_t subsets;
1061 int n_subsets;
1062 int component;
1063 int32_t value;
1064 unsigned x, y;
1065
1066 if (block[0] & 0x2) {
1067 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1068 bit_offset = 5;
1069 } else {
1070 mode_num = block[0] & 3;
1071 bit_offset = 2;
1072 }
1073
1074 mode = bptc_float_modes + mode_num;
1075
1076 if (mode->reserved) {
1077 for(y = 0; y < src_height; y += 1) {
1078 float *result = dst_row;
1079 memset(result, 0, sizeof result[0] * 4 * src_width);
1080 for(x = 0; x < src_width; x += 1) {
1081 result[3] = 1.0f;
1082 result += 4;
1083 }
1084 dst_row += dst_rowstride / sizeof dst_row[0];
1085 }
1086 return;
1087 }
1088
1089 bit_offset = extract_float_endpoints(mode, block, bit_offset,
1090 endpoints, is_signed);
1091
1092 if (mode->n_partition_bits) {
1093 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1094 bit_offset += mode->n_partition_bits;
1095
1096 subsets = partition_table1[partition_num];
1097 n_subsets = 2;
1098 } else {
1099 partition_num = 0;
1100 subsets = 0;
1101 n_subsets = 1;
1102 }
1103
1104 for(y = 0; y < src_height; y += 1) {
1105 float *result = dst_row;
1106 for(x = 0; x < src_width; x += 1) {
1107 int texel;
1108
1109 texel = x + y * 4;
1110
1111 anchors_before_texel =
1112 count_anchors_before_texel(n_subsets, partition_num, texel);
1113
1114 /* Calculate the offset to the primary index for this texel */
1115 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1116
1117 subset_num = (subsets >> (texel * 2)) & 3;
1118
1119 index_bits = mode->n_index_bits;
1120 if (is_anchor(n_subsets, partition_num, texel))
1121 index_bits--;
1122 index = extract_bits(block, bit_offset, index_bits);
1123
1124 for (component = 0; component < 3; component++) {
1125 value = interpolate(endpoints[subset_num * 2][component],
1126 endpoints[subset_num * 2 + 1][component],
1127 index,
1128 mode->n_index_bits);
1129
1130 if (is_signed)
1131 value = finish_signed_unquantize(value);
1132 else
1133 value = finish_unsigned_unquantize(value);
1134
1135 result[component] = _mesa_half_to_float(value);
1136 }
1137
1138 result[3] = 1.0f;
1139 result += 4;
1140 }
1141 dst_row += dst_rowstride / sizeof dst_row[0];
1142 }
1143 }
1144
1145 static void
1146 decompress_rgb_float(int width, int height,
1147 const uint8_t *src, int src_rowstride,
1148 float *dst, int dst_rowstride, bool is_signed)
1149 {
1150 int src_row_diff;
1151 int y, x;
1152
1153 if (src_rowstride >= width * 4)
1154 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1155 else
1156 src_row_diff = 0;
1157
1158 for (y = 0; y < height; y += BLOCK_SIZE) {
1159 for (x = 0; x < width; x += BLOCK_SIZE) {
1160 decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1161 MIN2(height - y, BLOCK_SIZE),
1162 src,
1163 (dst + x * 4 +
1164 (y * dst_rowstride / sizeof dst[0])),
1165 dst_rowstride, is_signed);
1166 src += BLOCK_BYTES;
1167 }
1168 src += src_row_diff;
1169 }
1170 }
1171 #endif // BPTC_BLOCK_DECODE
1172
1173 static void
1174 write_bits(struct bit_writer *writer, int n_bits, int value)
1175 {
1176 do {
1177 if (n_bits + writer->pos >= 8) {
1178 *(writer->dst++) = writer->buf | (value << writer->pos);
1179 writer->buf = 0;
1180 value >>= (8 - writer->pos);
1181 n_bits -= (8 - writer->pos);
1182 writer->pos = 0;
1183 } else {
1184 writer->buf |= value << writer->pos;
1185 writer->pos += n_bits;
1186 break;
1187 }
1188 } while (n_bits > 0);
1189 }
1190
1191 static void
1192 get_average_luminance_alpha_unorm(int width, int height,
1193 const uint8_t *src, int src_rowstride,
1194 int *average_luminance, int *average_alpha)
1195 {
1196 int luminance_sum = 0, alpha_sum = 0;
1197 int y, x;
1198
1199 for (y = 0; y < height; y++) {
1200 for (x = 0; x < width; x++) {
1201 luminance_sum += src[0] + src[1] + src[2];
1202 alpha_sum += src[3];
1203 src += 4;
1204 }
1205 src += src_rowstride - width * 4;
1206 }
1207
1208 *average_luminance = luminance_sum / (width * height);
1209 *average_alpha = alpha_sum / (width * height);
1210 }
1211
1212 static void
1213 get_rgba_endpoints_unorm(int width, int height,
1214 const uint8_t *src, int src_rowstride,
1215 int average_luminance, int average_alpha,
1216 uint8_t endpoints[][4])
1217 {
1218 int endpoint_luminances[2];
1219 int midpoint;
1220 int sums[2][4];
1221 int endpoint;
1222 int luminance;
1223 uint8_t temp[3];
1224 const uint8_t *p = src;
1225 int rgb_left_endpoint_count = 0;
1226 int alpha_left_endpoint_count = 0;
1227 int y, x, i;
1228
1229 memset(sums, 0, sizeof sums);
1230
1231 for (y = 0; y < height; y++) {
1232 for (x = 0; x < width; x++) {
1233 luminance = p[0] + p[1] + p[2];
1234 if (luminance < average_luminance) {
1235 endpoint = 0;
1236 rgb_left_endpoint_count++;
1237 } else {
1238 endpoint = 1;
1239 }
1240 for (i = 0; i < 3; i++)
1241 sums[endpoint][i] += p[i];
1242
1243 if (p[2] < average_alpha) {
1244 endpoint = 0;
1245 alpha_left_endpoint_count++;
1246 } else {
1247 endpoint = 1;
1248 }
1249 sums[endpoint][3] += p[3];
1250
1251 p += 4;
1252 }
1253
1254 p += src_rowstride - width * 4;
1255 }
1256
1257 if (rgb_left_endpoint_count == 0 ||
1258 rgb_left_endpoint_count == width * height) {
1259 for (i = 0; i < 3; i++)
1260 endpoints[0][i] = endpoints[1][i] =
1261 (sums[0][i] + sums[1][i]) / (width * height);
1262 } else {
1263 for (i = 0; i < 3; i++) {
1264 endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1265 endpoints[1][i] = (sums[1][i] /
1266 (width * height - rgb_left_endpoint_count));
1267 }
1268 }
1269
1270 if (alpha_left_endpoint_count == 0 ||
1271 alpha_left_endpoint_count == width * height) {
1272 endpoints[0][3] = endpoints[1][3] =
1273 (sums[0][3] + sums[1][3]) / (width * height);
1274 } else {
1275 endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1276 endpoints[1][3] = (sums[1][3] /
1277 (width * height - alpha_left_endpoint_count));
1278 }
1279
1280 /* We may need to swap the endpoints to ensure the most-significant bit of
1281 * the first index is zero */
1282
1283 for (endpoint = 0; endpoint < 2; endpoint++) {
1284 endpoint_luminances[endpoint] =
1285 endpoints[endpoint][0] +
1286 endpoints[endpoint][1] +
1287 endpoints[endpoint][2];
1288 }
1289 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1290
1291 if ((src[0] + src[1] + src[2] <= midpoint) !=
1292 (endpoint_luminances[0] <= midpoint)) {
1293 memcpy(temp, endpoints[0], 3);
1294 memcpy(endpoints[0], endpoints[1], 3);
1295 memcpy(endpoints[1], temp, 3);
1296 }
1297
1298 /* Same for the alpha endpoints */
1299
1300 midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1301
1302 if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1303 temp[0] = endpoints[0][3];
1304 endpoints[0][3] = endpoints[1][3];
1305 endpoints[1][3] = temp[0];
1306 }
1307 }
1308
1309 static void
1310 write_rgb_indices_unorm(struct bit_writer *writer,
1311 int src_width, int src_height,
1312 const uint8_t *src, int src_rowstride,
1313 uint8_t endpoints[][4])
1314 {
1315 int luminance;
1316 int endpoint_luminances[2];
1317 int endpoint;
1318 int index;
1319 int y, x;
1320
1321 for (endpoint = 0; endpoint < 2; endpoint++) {
1322 endpoint_luminances[endpoint] =
1323 endpoints[endpoint][0] +
1324 endpoints[endpoint][1] +
1325 endpoints[endpoint][2];
1326 }
1327
1328 /* If the endpoints have the same luminance then we'll just use index 0 for
1329 * all of the texels */
1330 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1331 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1332 return;
1333 }
1334
1335 for (y = 0; y < src_height; y++) {
1336 for (x = 0; x < src_width; x++) {
1337 luminance = src[0] + src[1] + src[2];
1338
1339 index = ((luminance - endpoint_luminances[0]) * 3 /
1340 (endpoint_luminances[1] - endpoint_luminances[0]));
1341 if (index < 0)
1342 index = 0;
1343 else if (index > 3)
1344 index = 3;
1345
1346 assert(x != 0 || y != 0 || index < 2);
1347
1348 write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1349
1350 src += 4;
1351 }
1352
1353 /* Pad the indices out to the block size */
1354 if (src_width < BLOCK_SIZE)
1355 write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1356
1357 src += src_rowstride - src_width * 4;
1358 }
1359
1360 /* Pad the indices out to the block size */
1361 if (src_height < BLOCK_SIZE)
1362 write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1363 }
1364
1365 static void
1366 write_alpha_indices_unorm(struct bit_writer *writer,
1367 int src_width, int src_height,
1368 const uint8_t *src, int src_rowstride,
1369 uint8_t endpoints[][4])
1370 {
1371 int index;
1372 int y, x;
1373
1374 /* If the endpoints have the same alpha then we'll just use index 0 for
1375 * all of the texels */
1376 if (endpoints[0][3] == endpoints[1][3]) {
1377 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1378 return;
1379 }
1380
1381 for (y = 0; y < src_height; y++) {
1382 for (x = 0; x < src_width; x++) {
1383 index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1384 ((int) endpoints[1][3] - endpoints[0][3]));
1385 if (index < 0)
1386 index = 0;
1387 else if (index > 7)
1388 index = 7;
1389
1390 assert(x != 0 || y != 0 || index < 4);
1391
1392 /* The first index has one less bit */
1393 write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1394
1395 src += 4;
1396 }
1397
1398 /* Pad the indices out to the block size */
1399 if (src_width < BLOCK_SIZE)
1400 write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1401
1402 src += src_rowstride - src_width * 4;
1403 }
1404
1405 /* Pad the indices out to the block size */
1406 if (src_height < BLOCK_SIZE)
1407 write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1408 }
1409
1410 static void
1411 compress_rgba_unorm_block(int src_width, int src_height,
1412 const uint8_t *src, int src_rowstride,
1413 uint8_t *dst)
1414 {
1415 int average_luminance, average_alpha;
1416 uint8_t endpoints[2][4];
1417 struct bit_writer writer;
1418 int component, endpoint;
1419
1420 get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1421 &average_luminance, &average_alpha);
1422 get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1423 average_luminance, average_alpha,
1424 endpoints);
1425
1426 writer.dst = dst;
1427 writer.pos = 0;
1428 writer.buf = 0;
1429
1430 write_bits(&writer, 5, 0x10); /* mode 4 */
1431 write_bits(&writer, 2, 0); /* rotation 0 */
1432 write_bits(&writer, 1, 0); /* index selection bit */
1433
1434 /* Write the color endpoints */
1435 for (component = 0; component < 3; component++)
1436 for (endpoint = 0; endpoint < 2; endpoint++)
1437 write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1438
1439 /* Write the alpha endpoints */
1440 for (endpoint = 0; endpoint < 2; endpoint++)
1441 write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1442
1443 write_rgb_indices_unorm(&writer,
1444 src_width, src_height,
1445 src, src_rowstride,
1446 endpoints);
1447 write_alpha_indices_unorm(&writer,
1448 src_width, src_height,
1449 src, src_rowstride,
1450 endpoints);
1451 }
1452
1453 static void
1454 compress_rgba_unorm(int width, int height,
1455 const uint8_t *src, int src_rowstride,
1456 uint8_t *dst, int dst_rowstride)
1457 {
1458 int dst_row_diff;
1459 int y, x;
1460
1461 if (dst_rowstride >= width * 4)
1462 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1463 else
1464 dst_row_diff = 0;
1465
1466 for (y = 0; y < height; y += BLOCK_SIZE) {
1467 for (x = 0; x < width; x += BLOCK_SIZE) {
1468 compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1469 MIN2(height - y, BLOCK_SIZE),
1470 src + x * 4 + y * src_rowstride,
1471 src_rowstride,
1472 dst);
1473 dst += BLOCK_BYTES;
1474 }
1475 dst += dst_row_diff;
1476 }
1477 }
1478
1479 static float
1480 get_average_luminance_float(int width, int height,
1481 const float *src, int src_rowstride)
1482 {
1483 float luminance_sum = 0;
1484 int y, x;
1485
1486 for (y = 0; y < height; y++) {
1487 for (x = 0; x < width; x++) {
1488 luminance_sum += src[0] + src[1] + src[2];
1489 src += 3;
1490 }
1491 src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1492 }
1493
1494 return luminance_sum / (width * height);
1495 }
1496
1497 static float
1498 clamp_value(float value, bool is_signed)
1499 {
1500 if (value > 65504.0f)
1501 return 65504.0f;
1502
1503 if (is_signed) {
1504 if (value < -65504.0f)
1505 return -65504.0f;
1506 else
1507 return value;
1508 }
1509
1510 if (value < 0.0f)
1511 return 0.0f;
1512
1513 return value;
1514 }
1515
1516 static void
1517 get_endpoints_float(int width, int height,
1518 const float *src, int src_rowstride,
1519 float average_luminance, float endpoints[][3],
1520 bool is_signed)
1521 {
1522 float endpoint_luminances[2];
1523 float midpoint;
1524 float sums[2][3];
1525 int endpoint, component;
1526 float luminance;
1527 float temp[3];
1528 const float *p = src;
1529 int left_endpoint_count = 0;
1530 int y, x, i;
1531
1532 memset(sums, 0, sizeof sums);
1533
1534 for (y = 0; y < height; y++) {
1535 for (x = 0; x < width; x++) {
1536 luminance = p[0] + p[1] + p[2];
1537 if (luminance < average_luminance) {
1538 endpoint = 0;
1539 left_endpoint_count++;
1540 } else {
1541 endpoint = 1;
1542 }
1543 for (i = 0; i < 3; i++)
1544 sums[endpoint][i] += p[i];
1545
1546 p += 3;
1547 }
1548
1549 p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1550 }
1551
1552 if (left_endpoint_count == 0 ||
1553 left_endpoint_count == width * height) {
1554 for (i = 0; i < 3; i++)
1555 endpoints[0][i] = endpoints[1][i] =
1556 (sums[0][i] + sums[1][i]) / (width * height);
1557 } else {
1558 for (i = 0; i < 3; i++) {
1559 endpoints[0][i] = sums[0][i] / left_endpoint_count;
1560 endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1561 }
1562 }
1563
1564 /* Clamp the endpoints to the range of a half float and strip out
1565 * infinities */
1566 for (endpoint = 0; endpoint < 2; endpoint++) {
1567 for (component = 0; component < 3; component++) {
1568 endpoints[endpoint][component] =
1569 clamp_value(endpoints[endpoint][component], is_signed);
1570 }
1571 }
1572
1573 /* We may need to swap the endpoints to ensure the most-significant bit of
1574 * the first index is zero */
1575
1576 for (endpoint = 0; endpoint < 2; endpoint++) {
1577 endpoint_luminances[endpoint] =
1578 endpoints[endpoint][0] +
1579 endpoints[endpoint][1] +
1580 endpoints[endpoint][2];
1581 }
1582 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1583
1584 if ((src[0] + src[1] + src[2] <= midpoint) !=
1585 (endpoint_luminances[0] <= midpoint)) {
1586 memcpy(temp, endpoints[0], sizeof temp);
1587 memcpy(endpoints[0], endpoints[1], sizeof temp);
1588 memcpy(endpoints[1], temp, sizeof temp);
1589 }
1590 }
1591
1592 static void
1593 write_rgb_indices_float(struct bit_writer *writer,
1594 int src_width, int src_height,
1595 const float *src, int src_rowstride,
1596 float endpoints[][3])
1597 {
1598 float luminance;
1599 float endpoint_luminances[2];
1600 int endpoint;
1601 int index;
1602 int y, x;
1603
1604 for (endpoint = 0; endpoint < 2; endpoint++) {
1605 endpoint_luminances[endpoint] =
1606 endpoints[endpoint][0] +
1607 endpoints[endpoint][1] +
1608 endpoints[endpoint][2];
1609 }
1610
1611 /* If the endpoints have the same luminance then we'll just use index 0 for
1612 * all of the texels */
1613 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1614 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1615 return;
1616 }
1617
1618 for (y = 0; y < src_height; y++) {
1619 for (x = 0; x < src_width; x++) {
1620 luminance = src[0] + src[1] + src[2];
1621
1622 index = ((luminance - endpoint_luminances[0]) * 15 /
1623 (endpoint_luminances[1] - endpoint_luminances[0]));
1624 if (index < 0)
1625 index = 0;
1626 else if (index > 15)
1627 index = 15;
1628
1629 assert(x != 0 || y != 0 || index < 8);
1630
1631 write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1632
1633 src += 3;
1634 }
1635
1636 /* Pad the indices out to the block size */
1637 if (src_width < BLOCK_SIZE)
1638 write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1639
1640 src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1641 }
1642
1643 /* Pad the indices out to the block size */
1644 if (src_height < BLOCK_SIZE)
1645 write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1646 }
1647
1648 static int
1649 get_endpoint_value(float value, bool is_signed)
1650 {
1651 bool sign = false;
1652 int half;
1653
1654 if (is_signed) {
1655 half = _mesa_float_to_half(value);
1656
1657 if (half & 0x8000) {
1658 half &= 0x7fff;
1659 sign = true;
1660 }
1661
1662 half = (32 * half / 31) >> 6;
1663
1664 if (sign)
1665 half = -half & ((1 << 10) - 1);
1666
1667 return half;
1668 } else {
1669 if (value <= 0.0f)
1670 return 0;
1671
1672 half = _mesa_float_to_half(value);
1673
1674 return (64 * half / 31) >> 6;
1675 }
1676 }
1677
1678 static void
1679 compress_rgb_float_block(int src_width, int src_height,
1680 const float *src, int src_rowstride,
1681 uint8_t *dst,
1682 bool is_signed)
1683 {
1684 float average_luminance;
1685 float endpoints[2][3];
1686 struct bit_writer writer;
1687 int component, endpoint;
1688 int endpoint_value;
1689
1690 average_luminance =
1691 get_average_luminance_float(src_width, src_height, src, src_rowstride);
1692 get_endpoints_float(src_width, src_height, src, src_rowstride,
1693 average_luminance, endpoints, is_signed);
1694
1695 writer.dst = dst;
1696 writer.pos = 0;
1697 writer.buf = 0;
1698
1699 write_bits(&writer, 5, 3); /* mode 3 */
1700
1701 /* Write the endpoints */
1702 for (endpoint = 0; endpoint < 2; endpoint++) {
1703 for (component = 0; component < 3; component++) {
1704 endpoint_value =
1705 get_endpoint_value(endpoints[endpoint][component], is_signed);
1706 write_bits(&writer, 10, endpoint_value);
1707 }
1708 }
1709
1710 write_rgb_indices_float(&writer,
1711 src_width, src_height,
1712 src, src_rowstride,
1713 endpoints);
1714 }
1715
1716 static void
1717 compress_rgb_float(int width, int height,
1718 const float *src, int src_rowstride,
1719 uint8_t *dst, int dst_rowstride,
1720 bool is_signed)
1721 {
1722 int dst_row_diff;
1723 int y, x;
1724
1725 if (dst_rowstride >= width * 4)
1726 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1727 else
1728 dst_row_diff = 0;
1729
1730 for (y = 0; y < height; y += BLOCK_SIZE) {
1731 for (x = 0; x < width; x += BLOCK_SIZE) {
1732 compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1733 MIN2(height - y, BLOCK_SIZE),
1734 src + x * 3 +
1735 y * src_rowstride / sizeof (float),
1736 src_rowstride,
1737 dst,
1738 is_signed);
1739 dst += BLOCK_BYTES;
1740 }
1741 dst += dst_row_diff;
1742 }
1743 }