mesa: Add missing include guards
[mesa.git] / src / mesa / main / texcompress_bptc_tmp.h
1 /*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*
25 * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26 */
27
28 #ifndef TEXCOMPRESS_BPTC_TMP_H
29 #define TEXCOMPRESS_BPTC_TMP_H
30
31 #include "util/format_srgb.h"
32 #include "util/half_float.h"
33 #include "macros.h"
34
35 #define BLOCK_SIZE 4
36 #define N_PARTITIONS 64
37 #define BLOCK_BYTES 16
38
39 struct bptc_unorm_mode {
40 int n_subsets;
41 int n_partition_bits;
42 bool has_rotation_bits;
43 bool has_index_selection_bit;
44 int n_color_bits;
45 int n_alpha_bits;
46 bool has_endpoint_pbits;
47 bool has_shared_pbits;
48 int n_index_bits;
49 int n_secondary_index_bits;
50 };
51
52 struct bptc_float_bitfield {
53 int8_t endpoint;
54 uint8_t component;
55 uint8_t offset;
56 uint8_t n_bits;
57 bool reverse;
58 };
59
60 struct bptc_float_mode {
61 bool reserved;
62 bool transformed_endpoints;
63 int n_partition_bits;
64 int n_endpoint_bits;
65 int n_index_bits;
66 int n_delta_bits[3];
67 struct bptc_float_bitfield bitfields[24];
68 };
69
70 struct bit_writer {
71 uint8_t buf;
72 int pos;
73 uint8_t *dst;
74 };
75
76 static const struct bptc_unorm_mode
77 bptc_unorm_modes[] = {
78 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
79 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
80 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
81 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
82 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
83 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
84 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
85 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
86 };
87
88 static const struct bptc_float_mode
89 bptc_float_modes[] = {
90 /* 00 */
91 { false, true, 5, 10, 3, { 5, 5, 5 },
92 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
93 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
94 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
95 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
96 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
97 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
98 { 3, 2, 3, 1, false },
99 { -1 } }
100 },
101 /* 01 */
102 { false, true, 5, 7, 3, { 6, 6, 6 },
103 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
104 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
105 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
106 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
107 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
108 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
109 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
110 { 2, 0, 0, 6, false },
111 { 3, 0, 0, 6, false },
112 { -1 } }
113 },
114 /* 00010 */
115 { false, true, 5, 11, 3, { 5, 4, 4 },
116 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
117 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
118 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
119 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
120 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
121 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
122 { -1 } }
123 },
124 /* 00011 */
125 { false, false, 0, 10, 4, { 10, 10, 10 },
126 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
127 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
128 { -1 } }
129 },
130 /* 00110 */
131 { false, true, 5, 11, 3, { 4, 5, 4 },
132 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
133 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
134 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
135 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
136 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
137 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
138 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
139 { -1 } }
140 },
141 /* 00111 */
142 { false, true, 0, 11, 4, { 9, 9, 9 },
143 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
144 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
145 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
146 { -1 } }
147 },
148 /* 01010 */
149 { false, true, 5, 11, 3, { 4, 4, 5 },
150 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
151 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
152 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
153 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
154 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
155 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
156 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
157 { -1 } }
158 },
159 /* 01011 */
160 { false, true, 0, 12, 4, { 8, 8, 8 },
161 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
162 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
163 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
164 { -1 } }
165 },
166 /* 01110 */
167 { false, true, 5, 9, 3, { 5, 5, 5 },
168 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
169 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
170 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
171 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
172 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
173 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
174 { 3, 2, 3, 1, false },
175 { -1 } }
176 },
177 /* 01111 */
178 { false, true, 0, 16, 4, { 4, 4, 4 },
179 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
180 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
181 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
182 { -1 } }
183 },
184 /* 10010 */
185 { false, true, 5, 8, 3, { 6, 5, 5 },
186 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
187 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
188 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
189 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
190 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
191 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
192 { 3, 0, 0, 6, false },
193 { -1 } }
194 },
195 /* 10011 */
196 { true /* reserved */ },
197 /* 10110 */
198 { false, true, 5, 8, 3, { 5, 6, 5 },
199 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
200 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
201 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
202 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
203 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
204 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
205 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
206 { -1 } }
207 },
208 /* 10111 */
209 { true /* reserved */ },
210 /* 11010 */
211 { false, true, 5, 8, 3, { 5, 5, 6 },
212 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
213 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
214 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
215 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
216 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
217 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
218 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
219 { -1 } }
220 },
221 /* 11011 */
222 { true /* reserved */ },
223 /* 11110 */
224 { false, false, 5, 6, 3, { 6, 6, 6 },
225 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
226 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
227 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
228 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
229 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
230 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
231 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
232 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
233 { -1 } }
234 },
235 /* 11111 */
236 { true /* reserved */ },
237 };
238
239 /* This partition table is used when the mode has two subsets. Each
240 * partition is represented by a 32-bit value which gives 2 bits per texel
241 * within the block. The value of the two bits represents which subset to use
242 * (0 or 1).
243 */
244 static const uint32_t
245 partition_table1[N_PARTITIONS] = {
246 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
247 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
248 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
249 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
250 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
251 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
252 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
253 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
254 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
255 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
256 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
257 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
258 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
259 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
260 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
261 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
262 };
263
264 /* This partition table is used when the mode has three subsets. In this case
265 * the values can be 0, 1 or 2.
266 */
267 static const uint32_t
268 partition_table2[N_PARTITIONS] = {
269 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
270 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
271 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
272 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
273 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
274 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
275 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
276 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
277 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
278 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
279 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
280 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
281 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
282 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
283 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
284 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
285 };
286
287 static const uint8_t
288 anchor_indices[][N_PARTITIONS] = {
289 /* Anchor index values for the second subset of two-subset partitioning */
290 {
291 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
292 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
293 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
294 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
295 },
296
297 /* Anchor index values for the second subset of three-subset partitioning */
298 {
299 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
300 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
301 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
302 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
303 },
304
305 /* Anchor index values for the third subset of three-subset
306 * partitioning
307 */
308 {
309 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
310 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
311 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
312 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
313 }
314 };
315
316 static int
317 extract_bits(const uint8_t *block,
318 int offset,
319 int n_bits)
320 {
321 int byte_index = offset / 8;
322 int bit_index = offset % 8;
323 int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
324 int result = 0;
325 int bit = 0;
326
327 while (true) {
328 result |= ((block[byte_index] >> bit_index) &
329 ((1 << n_bits_in_byte) - 1)) << bit;
330
331 n_bits -= n_bits_in_byte;
332
333 if (n_bits <= 0)
334 return result;
335
336 bit += n_bits_in_byte;
337 byte_index++;
338 bit_index = 0;
339 n_bits_in_byte = MIN2(n_bits, 8);
340 }
341 }
342
343 static uint8_t
344 expand_component(uint8_t byte,
345 int n_bits)
346 {
347 /* Expands a n-bit quantity into a byte by copying the most-significant
348 * bits into the unused least-significant bits.
349 */
350 return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
351 }
352
353 static int
354 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
355 const uint8_t *block,
356 int bit_offset,
357 uint8_t endpoints[][4])
358 {
359 int component;
360 int subset;
361 int endpoint;
362 int pbit;
363 int n_components;
364
365 /* Extract each color component */
366 for (component = 0; component < 3; component++) {
367 for (subset = 0; subset < mode->n_subsets; subset++) {
368 for (endpoint = 0; endpoint < 2; endpoint++) {
369 endpoints[subset * 2 + endpoint][component] =
370 extract_bits(block, bit_offset, mode->n_color_bits);
371 bit_offset += mode->n_color_bits;
372 }
373 }
374 }
375
376 /* Extract the alpha values */
377 if (mode->n_alpha_bits > 0) {
378 for (subset = 0; subset < mode->n_subsets; subset++) {
379 for (endpoint = 0; endpoint < 2; endpoint++) {
380 endpoints[subset * 2 + endpoint][3] =
381 extract_bits(block, bit_offset, mode->n_alpha_bits);
382 bit_offset += mode->n_alpha_bits;
383 }
384 }
385
386 n_components = 4;
387 } else {
388 for (subset = 0; subset < mode->n_subsets; subset++)
389 for (endpoint = 0; endpoint < 2; endpoint++)
390 endpoints[subset * 2 + endpoint][3] = 255;
391
392 n_components = 3;
393 }
394
395 /* Add in the p-bits */
396 if (mode->has_endpoint_pbits) {
397 for (subset = 0; subset < mode->n_subsets; subset++) {
398 for (endpoint = 0; endpoint < 2; endpoint++) {
399 pbit = extract_bits(block, bit_offset, 1);
400 bit_offset += 1;
401
402 for (component = 0; component < n_components; component++) {
403 endpoints[subset * 2 + endpoint][component] <<= 1;
404 endpoints[subset * 2 + endpoint][component] |= pbit;
405 }
406 }
407 }
408 } else if (mode->has_shared_pbits) {
409 for (subset = 0; subset < mode->n_subsets; subset++) {
410 pbit = extract_bits(block, bit_offset, 1);
411 bit_offset += 1;
412
413 for (endpoint = 0; endpoint < 2; endpoint++) {
414 for (component = 0; component < n_components; component++) {
415 endpoints[subset * 2 + endpoint][component] <<= 1;
416 endpoints[subset * 2 + endpoint][component] |= pbit;
417 }
418 }
419 }
420 }
421
422 /* Expand the n-bit values to a byte */
423 for (subset = 0; subset < mode->n_subsets; subset++) {
424 for (endpoint = 0; endpoint < 2; endpoint++) {
425 for (component = 0; component < 3; component++) {
426 endpoints[subset * 2 + endpoint][component] =
427 expand_component(endpoints[subset * 2 + endpoint][component],
428 mode->n_color_bits +
429 mode->has_endpoint_pbits +
430 mode->has_shared_pbits);
431 }
432
433 if (mode->n_alpha_bits > 0) {
434 endpoints[subset * 2 + endpoint][3] =
435 expand_component(endpoints[subset * 2 + endpoint][3],
436 mode->n_alpha_bits +
437 mode->has_endpoint_pbits +
438 mode->has_shared_pbits);
439 }
440 }
441 }
442
443 return bit_offset;
444 }
445
446 static bool
447 is_anchor(int n_subsets,
448 int partition_num,
449 int texel)
450 {
451 if (texel == 0)
452 return true;
453
454 switch (n_subsets) {
455 case 1:
456 return false;
457 case 2:
458 return anchor_indices[0][partition_num] == texel;
459 case 3:
460 return (anchor_indices[1][partition_num] == texel ||
461 anchor_indices[2][partition_num] == texel);
462 default:
463 assert(false);
464 return false;
465 }
466 }
467
468 static int
469 count_anchors_before_texel(int n_subsets,
470 int partition_num,
471 int texel)
472 {
473 int count = 1;
474
475 if (texel == 0)
476 return 0;
477
478 switch (n_subsets) {
479 case 1:
480 break;
481 case 2:
482 if (texel > anchor_indices[0][partition_num])
483 count++;
484 break;
485 case 3:
486 if (texel > anchor_indices[1][partition_num])
487 count++;
488 if (texel > anchor_indices[2][partition_num])
489 count++;
490 break;
491 default:
492 assert(false);
493 return 0;
494 }
495
496 return count;
497 }
498
499 static int32_t
500 interpolate(int32_t a, int32_t b,
501 int index,
502 int index_bits)
503 {
504 static const uint8_t weights2[] = { 0, 21, 43, 64 };
505 static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
506 static const uint8_t weights4[] =
507 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
508 static const uint8_t *weights[] = {
509 NULL, NULL, weights2, weights3, weights4
510 };
511 int weight;
512
513 weight = weights[index_bits][index];
514
515 return ((64 - weight) * a + weight * b + 32) >> 6;
516 }
517
518 static void
519 apply_rotation(int rotation,
520 uint8_t *result)
521 {
522 uint8_t t;
523
524 if (rotation == 0)
525 return;
526
527 rotation--;
528
529 t = result[rotation];
530 result[rotation] = result[3];
531 result[3] = t;
532 }
533
534 static void
535 fetch_rgba_unorm_from_block(const uint8_t *block,
536 uint8_t *result,
537 int texel)
538 {
539 int mode_num = ffs(block[0]);
540 const struct bptc_unorm_mode *mode;
541 int bit_offset, secondary_bit_offset;
542 int partition_num;
543 int subset_num;
544 int rotation;
545 int index_selection;
546 int index_bits;
547 int indices[2];
548 int index;
549 int anchors_before_texel;
550 bool anchor;
551 uint8_t endpoints[3 * 2][4];
552 uint32_t subsets;
553 int component;
554
555 if (mode_num == 0) {
556 /* According to the spec this mode is reserved and shouldn't be used. */
557 memset(result, 0, 3);
558 result[3] = 0xff;
559 return;
560 }
561
562 mode = bptc_unorm_modes + mode_num - 1;
563 bit_offset = mode_num;
564
565 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
566 bit_offset += mode->n_partition_bits;
567
568 switch (mode->n_subsets) {
569 case 1:
570 subsets = 0;
571 break;
572 case 2:
573 subsets = partition_table1[partition_num];
574 break;
575 case 3:
576 subsets = partition_table2[partition_num];
577 break;
578 default:
579 assert(false);
580 return;
581 }
582
583 if (mode->has_rotation_bits) {
584 rotation = extract_bits(block, bit_offset, 2);
585 bit_offset += 2;
586 } else {
587 rotation = 0;
588 }
589
590 if (mode->has_index_selection_bit) {
591 index_selection = extract_bits(block, bit_offset, 1);
592 bit_offset++;
593 } else {
594 index_selection = 0;
595 }
596
597 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
598
599 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
600 partition_num, texel);
601
602 /* Calculate the offset to the secondary index */
603 secondary_bit_offset = (bit_offset +
604 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
605 mode->n_subsets +
606 mode->n_secondary_index_bits * texel -
607 anchors_before_texel);
608
609 /* Calculate the offset to the primary index for this texel */
610 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
611
612 subset_num = (subsets >> (texel * 2)) & 3;
613
614 anchor = is_anchor(mode->n_subsets, partition_num, texel);
615
616 index_bits = mode->n_index_bits;
617 if (anchor)
618 index_bits--;
619 indices[0] = extract_bits(block, bit_offset, index_bits);
620
621 if (mode->n_secondary_index_bits) {
622 index_bits = mode->n_secondary_index_bits;
623 if (anchor)
624 index_bits--;
625 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
626 }
627
628 index = indices[index_selection];
629 index_bits = (index_selection ?
630 mode->n_secondary_index_bits :
631 mode->n_index_bits);
632
633 for (component = 0; component < 3; component++)
634 result[component] = interpolate(endpoints[subset_num * 2][component],
635 endpoints[subset_num * 2 + 1][component],
636 index,
637 index_bits);
638
639 /* Alpha uses the opposite index from the color components */
640 if (mode->n_secondary_index_bits && !index_selection) {
641 index = indices[1];
642 index_bits = mode->n_secondary_index_bits;
643 } else {
644 index = indices[0];
645 index_bits = mode->n_index_bits;
646 }
647
648 result[3] = interpolate(endpoints[subset_num * 2][3],
649 endpoints[subset_num * 2 + 1][3],
650 index,
651 index_bits);
652
653 apply_rotation(rotation, result);
654 }
655
656 #ifdef BPTC_BLOCK_DECODE
657 static void
658 decompress_rgba_unorm_block(int src_width, int src_height,
659 const uint8_t *block,
660 uint8_t *dst_row, int dst_rowstride)
661 {
662 int mode_num = ffs(block[0]);
663 const struct bptc_unorm_mode *mode;
664 int bit_offset, secondary_bit_offset;
665 int partition_num;
666 int subset_num;
667 int rotation;
668 int index_selection;
669 int index_bits;
670 int indices[2];
671 int index;
672 int anchors_before_texel;
673 bool anchor;
674 uint8_t endpoints[3 * 2][4];
675 uint32_t subsets;
676 int component;
677 unsigned x, y;
678
679 if (mode_num == 0) {
680 /* According to the spec this mode is reserved and shouldn't be used. */
681 for(y = 0; y < src_height; y += 1) {
682 uint8_t *result = dst_row;
683 memset(result, 0, 4 * src_width);
684 for(x = 0; x < src_width; x += 1) {
685 result[3] = 0xff;
686 result += 4;
687 }
688 dst_row += dst_rowstride;
689 }
690 return;
691 }
692
693 mode = bptc_unorm_modes + mode_num - 1;
694 bit_offset = mode_num;
695
696 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
697 bit_offset += mode->n_partition_bits;
698
699 switch (mode->n_subsets) {
700 case 1:
701 subsets = 0;
702 break;
703 case 2:
704 subsets = partition_table1[partition_num];
705 break;
706 case 3:
707 subsets = partition_table2[partition_num];
708 break;
709 default:
710 assert(false);
711 return;
712 }
713
714 if (mode->has_rotation_bits) {
715 rotation = extract_bits(block, bit_offset, 2);
716 bit_offset += 2;
717 } else {
718 rotation = 0;
719 }
720
721 if (mode->has_index_selection_bit) {
722 index_selection = extract_bits(block, bit_offset, 1);
723 bit_offset++;
724 } else {
725 index_selection = 0;
726 }
727
728 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
729
730 for(y = 0; y < src_height; y += 1) {
731 uint8_t *result = dst_row;
732 for(x = 0; x < src_width; x += 1) {
733 int texel;
734 texel = x + y * 4;
735
736 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
737 partition_num,
738 texel);
739
740 /* Calculate the offset to the secondary index */
741 secondary_bit_offset = (bit_offset +
742 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
743 mode->n_subsets +
744 mode->n_secondary_index_bits * texel -
745 anchors_before_texel);
746
747 /* Calculate the offset to the primary index for this texel */
748 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
749
750 subset_num = (subsets >> (texel * 2)) & 3;
751
752 anchor = is_anchor(mode->n_subsets, partition_num, texel);
753
754 index_bits = mode->n_index_bits;
755 if (anchor)
756 index_bits--;
757 indices[0] = extract_bits(block, bit_offset, index_bits);
758
759 if (mode->n_secondary_index_bits) {
760 index_bits = mode->n_secondary_index_bits;
761 if (anchor)
762 index_bits--;
763 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
764 }
765
766 index = indices[index_selection];
767 index_bits = (index_selection ?
768 mode->n_secondary_index_bits :
769 mode->n_index_bits);
770
771 for (component = 0; component < 3; component++)
772 result[component] = interpolate(endpoints[subset_num * 2][component],
773 endpoints[subset_num * 2 + 1][component],
774 index,
775 index_bits);
776
777 /* Alpha uses the opposite index from the color components */
778 if (mode->n_secondary_index_bits && !index_selection) {
779 index = indices[1];
780 index_bits = mode->n_secondary_index_bits;
781 } else {
782 index = indices[0];
783 index_bits = mode->n_index_bits;
784 }
785
786 result[3] = interpolate(endpoints[subset_num * 2][3],
787 endpoints[subset_num * 2 + 1][3],
788 index,
789 index_bits);
790
791 apply_rotation(rotation, result);
792 result += 4;
793 }
794 dst_row += dst_rowstride;
795 }
796 }
797
798 static void
799 decompress_rgba_unorm(int width, int height,
800 const uint8_t *src, int src_rowstride,
801 uint8_t *dst, int dst_rowstride)
802 {
803 int src_row_diff;
804 int y, x;
805
806 if (src_rowstride >= width * 4)
807 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
808 else
809 src_row_diff = 0;
810
811 for (y = 0; y < height; y += BLOCK_SIZE) {
812 for (x = 0; x < width; x += BLOCK_SIZE) {
813 decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
814 MIN2(height - y, BLOCK_SIZE),
815 src,
816 dst + x * 4 + y * dst_rowstride,
817 dst_rowstride);
818 src += BLOCK_BYTES;
819 }
820 src += src_row_diff;
821 }
822 }
823 #endif // BPTC_BLOCK_DECODE
824
825 static int32_t
826 sign_extend(int32_t value,
827 int n_bits)
828 {
829 if ((value & (1 << (n_bits - 1)))) {
830 value |= (~(int32_t) 0) << n_bits;
831 }
832
833 return value;
834 }
835
836 static int
837 signed_unquantize(int value, int n_endpoint_bits)
838 {
839 bool sign;
840
841 if (n_endpoint_bits >= 16)
842 return value;
843
844 if (value == 0)
845 return 0;
846
847 sign = false;
848
849 if (value < 0) {
850 sign = true;
851 value = -value;
852 }
853
854 if (value >= (1 << (n_endpoint_bits - 1)) - 1)
855 value = 0x7fff;
856 else
857 value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
858
859 if (sign)
860 value = -value;
861
862 return value;
863 }
864
865 static int
866 unsigned_unquantize(int value, int n_endpoint_bits)
867 {
868 if (n_endpoint_bits >= 15)
869 return value;
870
871 if (value == 0)
872 return 0;
873
874 if (value == (1 << n_endpoint_bits) - 1)
875 return 0xffff;
876
877 return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
878 }
879
880 static int
881 extract_float_endpoints(const struct bptc_float_mode *mode,
882 const uint8_t *block,
883 int bit_offset,
884 int32_t endpoints[][3],
885 bool is_signed)
886 {
887 const struct bptc_float_bitfield *bitfield;
888 int endpoint, component;
889 int n_endpoints;
890 int value;
891 int i;
892
893 if (mode->n_partition_bits)
894 n_endpoints = 4;
895 else
896 n_endpoints = 2;
897
898 memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
899
900 for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
901 value = extract_bits(block, bit_offset, bitfield->n_bits);
902 bit_offset += bitfield->n_bits;
903
904 if (bitfield->reverse) {
905 for (i = 0; i < bitfield->n_bits; i++) {
906 if (value & (1 << i))
907 endpoints[bitfield->endpoint][bitfield->component] |=
908 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
909 }
910 } else {
911 endpoints[bitfield->endpoint][bitfield->component] |=
912 value << bitfield->offset;
913 }
914 }
915
916 if (mode->transformed_endpoints) {
917 /* The endpoints are specified as signed offsets from e0 */
918 for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
919 for (component = 0; component < 3; component++) {
920 value = sign_extend(endpoints[endpoint][component],
921 mode->n_delta_bits[component]);
922 endpoints[endpoint][component] =
923 ((endpoints[0][component] + value) &
924 ((1 << mode->n_endpoint_bits) - 1));
925 }
926 }
927 }
928
929 if (is_signed) {
930 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
931 for (component = 0; component < 3; component++) {
932 value = sign_extend(endpoints[endpoint][component],
933 mode->n_endpoint_bits);
934 endpoints[endpoint][component] =
935 signed_unquantize(value, mode->n_endpoint_bits);
936 }
937 }
938 } else {
939 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
940 for (component = 0; component < 3; component++) {
941 endpoints[endpoint][component] =
942 unsigned_unquantize(endpoints[endpoint][component],
943 mode->n_endpoint_bits);
944 }
945 }
946 }
947
948 return bit_offset;
949 }
950
951 static int32_t
952 finish_unsigned_unquantize(int32_t value)
953 {
954 return value * 31 / 64;
955 }
956
957 static int32_t
958 finish_signed_unquantize(int32_t value)
959 {
960 if (value < 0)
961 return (-value * 31 / 32) | 0x8000;
962 else
963 return value * 31 / 32;
964 }
965
966 static void
967 fetch_rgb_float_from_block(const uint8_t *block,
968 float *result,
969 int texel,
970 bool is_signed)
971 {
972 int mode_num;
973 const struct bptc_float_mode *mode;
974 int bit_offset;
975 int partition_num;
976 int subset_num;
977 int index_bits;
978 int index;
979 int anchors_before_texel;
980 int32_t endpoints[2 * 2][3];
981 uint32_t subsets;
982 int n_subsets;
983 int component;
984 int32_t value;
985
986 if (block[0] & 0x2) {
987 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
988 bit_offset = 5;
989 } else {
990 mode_num = block[0] & 3;
991 bit_offset = 2;
992 }
993
994 mode = bptc_float_modes + mode_num;
995
996 if (mode->reserved) {
997 memset(result, 0, sizeof result[0] * 3);
998 result[3] = 1.0f;
999 return;
1000 }
1001
1002 bit_offset = extract_float_endpoints(mode, block, bit_offset,
1003 endpoints, is_signed);
1004
1005 if (mode->n_partition_bits) {
1006 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1007 bit_offset += mode->n_partition_bits;
1008
1009 subsets = partition_table1[partition_num];
1010 n_subsets = 2;
1011 } else {
1012 partition_num = 0;
1013 subsets = 0;
1014 n_subsets = 1;
1015 }
1016
1017 anchors_before_texel =
1018 count_anchors_before_texel(n_subsets, partition_num, texel);
1019
1020 /* Calculate the offset to the primary index for this texel */
1021 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1022
1023 subset_num = (subsets >> (texel * 2)) & 3;
1024
1025 index_bits = mode->n_index_bits;
1026 if (is_anchor(n_subsets, partition_num, texel))
1027 index_bits--;
1028 index = extract_bits(block, bit_offset, index_bits);
1029
1030 for (component = 0; component < 3; component++) {
1031 value = interpolate(endpoints[subset_num * 2][component],
1032 endpoints[subset_num * 2 + 1][component],
1033 index,
1034 mode->n_index_bits);
1035
1036 if (is_signed)
1037 value = finish_signed_unquantize(value);
1038 else
1039 value = finish_unsigned_unquantize(value);
1040
1041 result[component] = _mesa_half_to_float(value);
1042 }
1043
1044 result[3] = 1.0f;
1045 }
1046
1047 #ifdef BPTC_BLOCK_DECODE
1048 static void
1049 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1050 const uint8_t *block,
1051 float *dst_row, unsigned dst_rowstride,
1052 bool is_signed)
1053 {
1054 int mode_num;
1055 const struct bptc_float_mode *mode;
1056 int bit_offset;
1057 int partition_num;
1058 int subset_num;
1059 int index_bits;
1060 int index;
1061 int anchors_before_texel;
1062 int32_t endpoints[2 * 2][3];
1063 uint32_t subsets;
1064 int n_subsets;
1065 int component;
1066 int32_t value;
1067 unsigned x, y;
1068
1069 if (block[0] & 0x2) {
1070 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1071 bit_offset = 5;
1072 } else {
1073 mode_num = block[0] & 3;
1074 bit_offset = 2;
1075 }
1076
1077 mode = bptc_float_modes + mode_num;
1078
1079 if (mode->reserved) {
1080 for(y = 0; y < src_height; y += 1) {
1081 float *result = dst_row;
1082 memset(result, 0, sizeof result[0] * 4 * src_width);
1083 for(x = 0; x < src_width; x += 1) {
1084 result[3] = 1.0f;
1085 result += 4;
1086 }
1087 dst_row += dst_rowstride / sizeof dst_row[0];
1088 }
1089 return;
1090 }
1091
1092 bit_offset = extract_float_endpoints(mode, block, bit_offset,
1093 endpoints, is_signed);
1094
1095 if (mode->n_partition_bits) {
1096 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1097 bit_offset += mode->n_partition_bits;
1098
1099 subsets = partition_table1[partition_num];
1100 n_subsets = 2;
1101 } else {
1102 partition_num = 0;
1103 subsets = 0;
1104 n_subsets = 1;
1105 }
1106
1107 for(y = 0; y < src_height; y += 1) {
1108 float *result = dst_row;
1109 for(x = 0; x < src_width; x += 1) {
1110 int texel;
1111
1112 texel = x + y * 4;
1113
1114 anchors_before_texel =
1115 count_anchors_before_texel(n_subsets, partition_num, texel);
1116
1117 /* Calculate the offset to the primary index for this texel */
1118 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1119
1120 subset_num = (subsets >> (texel * 2)) & 3;
1121
1122 index_bits = mode->n_index_bits;
1123 if (is_anchor(n_subsets, partition_num, texel))
1124 index_bits--;
1125 index = extract_bits(block, bit_offset, index_bits);
1126
1127 for (component = 0; component < 3; component++) {
1128 value = interpolate(endpoints[subset_num * 2][component],
1129 endpoints[subset_num * 2 + 1][component],
1130 index,
1131 mode->n_index_bits);
1132
1133 if (is_signed)
1134 value = finish_signed_unquantize(value);
1135 else
1136 value = finish_unsigned_unquantize(value);
1137
1138 result[component] = _mesa_half_to_float(value);
1139 }
1140
1141 result[3] = 1.0f;
1142 result += 4;
1143 }
1144 dst_row += dst_rowstride / sizeof dst_row[0];
1145 }
1146 }
1147
1148 static void
1149 decompress_rgb_float(int width, int height,
1150 const uint8_t *src, int src_rowstride,
1151 float *dst, int dst_rowstride, bool is_signed)
1152 {
1153 int src_row_diff;
1154 int y, x;
1155
1156 if (src_rowstride >= width * 4)
1157 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1158 else
1159 src_row_diff = 0;
1160
1161 for (y = 0; y < height; y += BLOCK_SIZE) {
1162 for (x = 0; x < width; x += BLOCK_SIZE) {
1163 decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1164 MIN2(height - y, BLOCK_SIZE),
1165 src,
1166 (dst + x * 4 +
1167 (y * dst_rowstride / sizeof dst[0])),
1168 dst_rowstride, is_signed);
1169 src += BLOCK_BYTES;
1170 }
1171 src += src_row_diff;
1172 }
1173 }
1174 #endif // BPTC_BLOCK_DECODE
1175
1176 static void
1177 write_bits(struct bit_writer *writer, int n_bits, int value)
1178 {
1179 do {
1180 if (n_bits + writer->pos >= 8) {
1181 *(writer->dst++) = writer->buf | (value << writer->pos);
1182 writer->buf = 0;
1183 value >>= (8 - writer->pos);
1184 n_bits -= (8 - writer->pos);
1185 writer->pos = 0;
1186 } else {
1187 writer->buf |= value << writer->pos;
1188 writer->pos += n_bits;
1189 break;
1190 }
1191 } while (n_bits > 0);
1192 }
1193
1194 static void
1195 get_average_luminance_alpha_unorm(int width, int height,
1196 const uint8_t *src, int src_rowstride,
1197 int *average_luminance, int *average_alpha)
1198 {
1199 int luminance_sum = 0, alpha_sum = 0;
1200 int y, x;
1201
1202 for (y = 0; y < height; y++) {
1203 for (x = 0; x < width; x++) {
1204 luminance_sum += src[0] + src[1] + src[2];
1205 alpha_sum += src[3];
1206 src += 4;
1207 }
1208 src += src_rowstride - width * 4;
1209 }
1210
1211 *average_luminance = luminance_sum / (width * height);
1212 *average_alpha = alpha_sum / (width * height);
1213 }
1214
1215 static void
1216 get_rgba_endpoints_unorm(int width, int height,
1217 const uint8_t *src, int src_rowstride,
1218 int average_luminance, int average_alpha,
1219 uint8_t endpoints[][4])
1220 {
1221 int endpoint_luminances[2];
1222 int midpoint;
1223 int sums[2][4];
1224 int endpoint;
1225 int luminance;
1226 uint8_t temp[3];
1227 const uint8_t *p = src;
1228 int rgb_left_endpoint_count = 0;
1229 int alpha_left_endpoint_count = 0;
1230 int y, x, i;
1231
1232 memset(sums, 0, sizeof sums);
1233
1234 for (y = 0; y < height; y++) {
1235 for (x = 0; x < width; x++) {
1236 luminance = p[0] + p[1] + p[2];
1237 if (luminance < average_luminance) {
1238 endpoint = 0;
1239 rgb_left_endpoint_count++;
1240 } else {
1241 endpoint = 1;
1242 }
1243 for (i = 0; i < 3; i++)
1244 sums[endpoint][i] += p[i];
1245
1246 if (p[2] < average_alpha) {
1247 endpoint = 0;
1248 alpha_left_endpoint_count++;
1249 } else {
1250 endpoint = 1;
1251 }
1252 sums[endpoint][3] += p[3];
1253
1254 p += 4;
1255 }
1256
1257 p += src_rowstride - width * 4;
1258 }
1259
1260 if (rgb_left_endpoint_count == 0 ||
1261 rgb_left_endpoint_count == width * height) {
1262 for (i = 0; i < 3; i++)
1263 endpoints[0][i] = endpoints[1][i] =
1264 (sums[0][i] + sums[1][i]) / (width * height);
1265 } else {
1266 for (i = 0; i < 3; i++) {
1267 endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1268 endpoints[1][i] = (sums[1][i] /
1269 (width * height - rgb_left_endpoint_count));
1270 }
1271 }
1272
1273 if (alpha_left_endpoint_count == 0 ||
1274 alpha_left_endpoint_count == width * height) {
1275 endpoints[0][3] = endpoints[1][3] =
1276 (sums[0][3] + sums[1][3]) / (width * height);
1277 } else {
1278 endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1279 endpoints[1][3] = (sums[1][3] /
1280 (width * height - alpha_left_endpoint_count));
1281 }
1282
1283 /* We may need to swap the endpoints to ensure the most-significant bit of
1284 * the first index is zero */
1285
1286 for (endpoint = 0; endpoint < 2; endpoint++) {
1287 endpoint_luminances[endpoint] =
1288 endpoints[endpoint][0] +
1289 endpoints[endpoint][1] +
1290 endpoints[endpoint][2];
1291 }
1292 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1293
1294 if ((src[0] + src[1] + src[2] <= midpoint) !=
1295 (endpoint_luminances[0] <= midpoint)) {
1296 memcpy(temp, endpoints[0], 3);
1297 memcpy(endpoints[0], endpoints[1], 3);
1298 memcpy(endpoints[1], temp, 3);
1299 }
1300
1301 /* Same for the alpha endpoints */
1302
1303 midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1304
1305 if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1306 temp[0] = endpoints[0][3];
1307 endpoints[0][3] = endpoints[1][3];
1308 endpoints[1][3] = temp[0];
1309 }
1310 }
1311
1312 static void
1313 write_rgb_indices_unorm(struct bit_writer *writer,
1314 int src_width, int src_height,
1315 const uint8_t *src, int src_rowstride,
1316 uint8_t endpoints[][4])
1317 {
1318 int luminance;
1319 int endpoint_luminances[2];
1320 int endpoint;
1321 int index;
1322 int y, x;
1323
1324 for (endpoint = 0; endpoint < 2; endpoint++) {
1325 endpoint_luminances[endpoint] =
1326 endpoints[endpoint][0] +
1327 endpoints[endpoint][1] +
1328 endpoints[endpoint][2];
1329 }
1330
1331 /* If the endpoints have the same luminance then we'll just use index 0 for
1332 * all of the texels */
1333 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1334 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1335 return;
1336 }
1337
1338 for (y = 0; y < src_height; y++) {
1339 for (x = 0; x < src_width; x++) {
1340 luminance = src[0] + src[1] + src[2];
1341
1342 index = ((luminance - endpoint_luminances[0]) * 3 /
1343 (endpoint_luminances[1] - endpoint_luminances[0]));
1344 if (index < 0)
1345 index = 0;
1346 else if (index > 3)
1347 index = 3;
1348
1349 assert(x != 0 || y != 0 || index < 2);
1350
1351 write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1352
1353 src += 4;
1354 }
1355
1356 /* Pad the indices out to the block size */
1357 if (src_width < BLOCK_SIZE)
1358 write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1359
1360 src += src_rowstride - src_width * 4;
1361 }
1362
1363 /* Pad the indices out to the block size */
1364 if (src_height < BLOCK_SIZE)
1365 write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1366 }
1367
1368 static void
1369 write_alpha_indices_unorm(struct bit_writer *writer,
1370 int src_width, int src_height,
1371 const uint8_t *src, int src_rowstride,
1372 uint8_t endpoints[][4])
1373 {
1374 int index;
1375 int y, x;
1376
1377 /* If the endpoints have the same alpha then we'll just use index 0 for
1378 * all of the texels */
1379 if (endpoints[0][3] == endpoints[1][3]) {
1380 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1381 return;
1382 }
1383
1384 for (y = 0; y < src_height; y++) {
1385 for (x = 0; x < src_width; x++) {
1386 index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1387 ((int) endpoints[1][3] - endpoints[0][3]));
1388 if (index < 0)
1389 index = 0;
1390 else if (index > 7)
1391 index = 7;
1392
1393 assert(x != 0 || y != 0 || index < 4);
1394
1395 /* The first index has one less bit */
1396 write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1397
1398 src += 4;
1399 }
1400
1401 /* Pad the indices out to the block size */
1402 if (src_width < BLOCK_SIZE)
1403 write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1404
1405 src += src_rowstride - src_width * 4;
1406 }
1407
1408 /* Pad the indices out to the block size */
1409 if (src_height < BLOCK_SIZE)
1410 write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1411 }
1412
1413 static void
1414 compress_rgba_unorm_block(int src_width, int src_height,
1415 const uint8_t *src, int src_rowstride,
1416 uint8_t *dst)
1417 {
1418 int average_luminance, average_alpha;
1419 uint8_t endpoints[2][4];
1420 struct bit_writer writer;
1421 int component, endpoint;
1422
1423 get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1424 &average_luminance, &average_alpha);
1425 get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1426 average_luminance, average_alpha,
1427 endpoints);
1428
1429 writer.dst = dst;
1430 writer.pos = 0;
1431 writer.buf = 0;
1432
1433 write_bits(&writer, 5, 0x10); /* mode 4 */
1434 write_bits(&writer, 2, 0); /* rotation 0 */
1435 write_bits(&writer, 1, 0); /* index selection bit */
1436
1437 /* Write the color endpoints */
1438 for (component = 0; component < 3; component++)
1439 for (endpoint = 0; endpoint < 2; endpoint++)
1440 write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1441
1442 /* Write the alpha endpoints */
1443 for (endpoint = 0; endpoint < 2; endpoint++)
1444 write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1445
1446 write_rgb_indices_unorm(&writer,
1447 src_width, src_height,
1448 src, src_rowstride,
1449 endpoints);
1450 write_alpha_indices_unorm(&writer,
1451 src_width, src_height,
1452 src, src_rowstride,
1453 endpoints);
1454 }
1455
1456 static void
1457 compress_rgba_unorm(int width, int height,
1458 const uint8_t *src, int src_rowstride,
1459 uint8_t *dst, int dst_rowstride)
1460 {
1461 int dst_row_diff;
1462 int y, x;
1463
1464 if (dst_rowstride >= width * 4)
1465 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1466 else
1467 dst_row_diff = 0;
1468
1469 for (y = 0; y < height; y += BLOCK_SIZE) {
1470 for (x = 0; x < width; x += BLOCK_SIZE) {
1471 compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1472 MIN2(height - y, BLOCK_SIZE),
1473 src + x * 4 + y * src_rowstride,
1474 src_rowstride,
1475 dst);
1476 dst += BLOCK_BYTES;
1477 }
1478 dst += dst_row_diff;
1479 }
1480 }
1481
1482 static float
1483 get_average_luminance_float(int width, int height,
1484 const float *src, int src_rowstride)
1485 {
1486 float luminance_sum = 0;
1487 int y, x;
1488
1489 for (y = 0; y < height; y++) {
1490 for (x = 0; x < width; x++) {
1491 luminance_sum += src[0] + src[1] + src[2];
1492 src += 3;
1493 }
1494 src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1495 }
1496
1497 return luminance_sum / (width * height);
1498 }
1499
1500 static float
1501 clamp_value(float value, bool is_signed)
1502 {
1503 if (value > 65504.0f)
1504 return 65504.0f;
1505
1506 if (is_signed) {
1507 if (value < -65504.0f)
1508 return -65504.0f;
1509 else
1510 return value;
1511 }
1512
1513 if (value < 0.0f)
1514 return 0.0f;
1515
1516 return value;
1517 }
1518
1519 static void
1520 get_endpoints_float(int width, int height,
1521 const float *src, int src_rowstride,
1522 float average_luminance, float endpoints[][3],
1523 bool is_signed)
1524 {
1525 float endpoint_luminances[2];
1526 float midpoint;
1527 float sums[2][3];
1528 int endpoint, component;
1529 float luminance;
1530 float temp[3];
1531 const float *p = src;
1532 int left_endpoint_count = 0;
1533 int y, x, i;
1534
1535 memset(sums, 0, sizeof sums);
1536
1537 for (y = 0; y < height; y++) {
1538 for (x = 0; x < width; x++) {
1539 luminance = p[0] + p[1] + p[2];
1540 if (luminance < average_luminance) {
1541 endpoint = 0;
1542 left_endpoint_count++;
1543 } else {
1544 endpoint = 1;
1545 }
1546 for (i = 0; i < 3; i++)
1547 sums[endpoint][i] += p[i];
1548
1549 p += 3;
1550 }
1551
1552 p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1553 }
1554
1555 if (left_endpoint_count == 0 ||
1556 left_endpoint_count == width * height) {
1557 for (i = 0; i < 3; i++)
1558 endpoints[0][i] = endpoints[1][i] =
1559 (sums[0][i] + sums[1][i]) / (width * height);
1560 } else {
1561 for (i = 0; i < 3; i++) {
1562 endpoints[0][i] = sums[0][i] / left_endpoint_count;
1563 endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1564 }
1565 }
1566
1567 /* Clamp the endpoints to the range of a half float and strip out
1568 * infinities */
1569 for (endpoint = 0; endpoint < 2; endpoint++) {
1570 for (component = 0; component < 3; component++) {
1571 endpoints[endpoint][component] =
1572 clamp_value(endpoints[endpoint][component], is_signed);
1573 }
1574 }
1575
1576 /* We may need to swap the endpoints to ensure the most-significant bit of
1577 * the first index is zero */
1578
1579 for (endpoint = 0; endpoint < 2; endpoint++) {
1580 endpoint_luminances[endpoint] =
1581 endpoints[endpoint][0] +
1582 endpoints[endpoint][1] +
1583 endpoints[endpoint][2];
1584 }
1585 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1586
1587 if ((src[0] + src[1] + src[2] <= midpoint) !=
1588 (endpoint_luminances[0] <= midpoint)) {
1589 memcpy(temp, endpoints[0], sizeof temp);
1590 memcpy(endpoints[0], endpoints[1], sizeof temp);
1591 memcpy(endpoints[1], temp, sizeof temp);
1592 }
1593 }
1594
1595 static void
1596 write_rgb_indices_float(struct bit_writer *writer,
1597 int src_width, int src_height,
1598 const float *src, int src_rowstride,
1599 float endpoints[][3])
1600 {
1601 float luminance;
1602 float endpoint_luminances[2];
1603 int endpoint;
1604 int index;
1605 int y, x;
1606
1607 for (endpoint = 0; endpoint < 2; endpoint++) {
1608 endpoint_luminances[endpoint] =
1609 endpoints[endpoint][0] +
1610 endpoints[endpoint][1] +
1611 endpoints[endpoint][2];
1612 }
1613
1614 /* If the endpoints have the same luminance then we'll just use index 0 for
1615 * all of the texels */
1616 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1617 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1618 return;
1619 }
1620
1621 for (y = 0; y < src_height; y++) {
1622 for (x = 0; x < src_width; x++) {
1623 luminance = src[0] + src[1] + src[2];
1624
1625 index = ((luminance - endpoint_luminances[0]) * 15 /
1626 (endpoint_luminances[1] - endpoint_luminances[0]));
1627 if (index < 0)
1628 index = 0;
1629 else if (index > 15)
1630 index = 15;
1631
1632 assert(x != 0 || y != 0 || index < 8);
1633
1634 write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1635
1636 src += 3;
1637 }
1638
1639 /* Pad the indices out to the block size */
1640 if (src_width < BLOCK_SIZE)
1641 write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1642
1643 src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1644 }
1645
1646 /* Pad the indices out to the block size */
1647 if (src_height < BLOCK_SIZE)
1648 write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1649 }
1650
1651 static int
1652 get_endpoint_value(float value, bool is_signed)
1653 {
1654 bool sign = false;
1655 int half;
1656
1657 if (is_signed) {
1658 half = _mesa_float_to_half(value);
1659
1660 if (half & 0x8000) {
1661 half &= 0x7fff;
1662 sign = true;
1663 }
1664
1665 half = (32 * half / 31) >> 6;
1666
1667 if (sign)
1668 half = -half & ((1 << 10) - 1);
1669
1670 return half;
1671 } else {
1672 if (value <= 0.0f)
1673 return 0;
1674
1675 half = _mesa_float_to_half(value);
1676
1677 return (64 * half / 31) >> 6;
1678 }
1679 }
1680
1681 static void
1682 compress_rgb_float_block(int src_width, int src_height,
1683 const float *src, int src_rowstride,
1684 uint8_t *dst,
1685 bool is_signed)
1686 {
1687 float average_luminance;
1688 float endpoints[2][3];
1689 struct bit_writer writer;
1690 int component, endpoint;
1691 int endpoint_value;
1692
1693 average_luminance =
1694 get_average_luminance_float(src_width, src_height, src, src_rowstride);
1695 get_endpoints_float(src_width, src_height, src, src_rowstride,
1696 average_luminance, endpoints, is_signed);
1697
1698 writer.dst = dst;
1699 writer.pos = 0;
1700 writer.buf = 0;
1701
1702 write_bits(&writer, 5, 3); /* mode 3 */
1703
1704 /* Write the endpoints */
1705 for (endpoint = 0; endpoint < 2; endpoint++) {
1706 for (component = 0; component < 3; component++) {
1707 endpoint_value =
1708 get_endpoint_value(endpoints[endpoint][component], is_signed);
1709 write_bits(&writer, 10, endpoint_value);
1710 }
1711 }
1712
1713 write_rgb_indices_float(&writer,
1714 src_width, src_height,
1715 src, src_rowstride,
1716 endpoints);
1717 }
1718
1719 static void
1720 compress_rgb_float(int width, int height,
1721 const float *src, int src_rowstride,
1722 uint8_t *dst, int dst_rowstride,
1723 bool is_signed)
1724 {
1725 int dst_row_diff;
1726 int y, x;
1727
1728 if (dst_rowstride >= width * 4)
1729 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1730 else
1731 dst_row_diff = 0;
1732
1733 for (y = 0; y < height; y += BLOCK_SIZE) {
1734 for (x = 0; x < width; x += BLOCK_SIZE) {
1735 compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1736 MIN2(height - y, BLOCK_SIZE),
1737 src + x * 3 +
1738 y * src_rowstride / sizeof (float),
1739 src_rowstride,
1740 dst,
1741 is_signed);
1742 dst += BLOCK_BYTES;
1743 }
1744 dst += dst_row_diff;
1745 }
1746 }
1747
1748 #endif