mesa/version: only enable GL4.1 with correct limits.
[mesa.git] / src / mesa / main / texcompress_bptc_tmp.h
1 /*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*
25 * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26 */
27
28 #ifndef TEXCOMPRESS_BPTC_TMP_H
29 #define TEXCOMPRESS_BPTC_TMP_H
30
31 #include "util/format_srgb.h"
32 #include "util/half_float.h"
33 #include "macros.h"
34
35 #define BLOCK_SIZE 4
36 #define N_PARTITIONS 64
37 #define BLOCK_BYTES 16
38
39 struct bptc_unorm_mode {
40 int n_subsets;
41 int n_partition_bits;
42 bool has_rotation_bits;
43 bool has_index_selection_bit;
44 int n_color_bits;
45 int n_alpha_bits;
46 bool has_endpoint_pbits;
47 bool has_shared_pbits;
48 int n_index_bits;
49 int n_secondary_index_bits;
50 };
51
52 struct bptc_float_bitfield {
53 int8_t endpoint;
54 uint8_t component;
55 uint8_t offset;
56 uint8_t n_bits;
57 bool reverse;
58 };
59
60 struct bptc_float_mode {
61 bool reserved;
62 bool transformed_endpoints;
63 int n_partition_bits;
64 int n_endpoint_bits;
65 int n_index_bits;
66 int n_delta_bits[3];
67 struct bptc_float_bitfield bitfields[24];
68 };
69
70 struct bit_writer {
71 uint8_t buf;
72 int pos;
73 uint8_t *dst;
74 };
75
76 static const struct bptc_unorm_mode
77 bptc_unorm_modes[] = {
78 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
79 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
80 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
81 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
82 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
83 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
84 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
85 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
86 };
87
88 static const struct bptc_float_mode
89 bptc_float_modes[] = {
90 /* 00 */
91 { false, true, 5, 10, 3, { 5, 5, 5 },
92 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
93 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
94 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
95 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
96 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
97 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
98 { 3, 2, 3, 1, false },
99 { -1 } }
100 },
101 /* 01 */
102 { false, true, 5, 7, 3, { 6, 6, 6 },
103 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
104 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
105 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
106 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
107 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
108 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
109 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
110 { 2, 0, 0, 6, false },
111 { 3, 0, 0, 6, false },
112 { -1 } }
113 },
114 /* 00010 */
115 { false, true, 5, 11, 3, { 5, 4, 4 },
116 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
117 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
118 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
119 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
120 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
121 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
122 { -1 } }
123 },
124 /* 00011 */
125 { false, false, 0, 10, 4, { 10, 10, 10 },
126 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
127 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
128 { -1 } }
129 },
130 /* 00110 */
131 { false, true, 5, 11, 3, { 4, 5, 4 },
132 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
133 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
134 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
135 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
136 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
137 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
138 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
139 { -1 } }
140 },
141 /* 00111 */
142 { false, true, 0, 11, 4, { 9, 9, 9 },
143 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
144 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
145 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
146 { -1 } }
147 },
148 /* 01010 */
149 { false, true, 5, 11, 3, { 4, 4, 5 },
150 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
151 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
152 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
153 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
154 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
155 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
156 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
157 { -1 } }
158 },
159 /* 01011 */
160 { false, true, 0, 12, 4, { 8, 8, 8 },
161 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
162 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
163 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
164 { -1 } }
165 },
166 /* 01110 */
167 { false, true, 5, 9, 3, { 5, 5, 5 },
168 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
169 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
170 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
171 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
172 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
173 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
174 { 3, 2, 3, 1, false },
175 { -1 } }
176 },
177 /* 01111 */
178 { false, true, 0, 16, 4, { 4, 4, 4 },
179 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
180 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
181 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
182 { -1 } }
183 },
184 /* 10010 */
185 { false, true, 5, 8, 3, { 6, 5, 5 },
186 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
187 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
188 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
189 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
190 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
191 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
192 { 3, 0, 0, 6, false },
193 { -1 } }
194 },
195 /* 10011 */
196 { true /* reserved */ },
197 /* 10110 */
198 { false, true, 5, 8, 3, { 5, 6, 5 },
199 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
200 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
201 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
202 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
203 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
204 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
205 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
206 { -1 } }
207 },
208 /* 10111 */
209 { true /* reserved */ },
210 /* 11010 */
211 { false, true, 5, 8, 3, { 5, 5, 6 },
212 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
213 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
214 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
215 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
216 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
217 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
218 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
219 { -1 } }
220 },
221 /* 11011 */
222 { true /* reserved */ },
223 /* 11110 */
224 { false, false, 5, 6, 3, { 6, 6, 6 },
225 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
226 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
227 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
228 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
229 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
230 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
231 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
232 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
233 { -1 } }
234 },
235 /* 11111 */
236 { true /* reserved */ },
237 };
238
239 /* This partition table is used when the mode has two subsets. Each
240 * partition is represented by a 32-bit value which gives 2 bits per texel
241 * within the block. The value of the two bits represents which subset to use
242 * (0 or 1).
243 */
244 static const uint32_t
245 partition_table1[N_PARTITIONS] = {
246 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
247 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
248 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
249 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
250 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
251 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
252 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
253 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
254 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
255 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
256 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
257 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
258 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
259 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
260 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
261 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
262 };
263
264 /* This partition table is used when the mode has three subsets. In this case
265 * the values can be 0, 1 or 2.
266 */
267 static const uint32_t
268 partition_table2[N_PARTITIONS] = {
269 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
270 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
271 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
272 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
273 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
274 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
275 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
276 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
277 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
278 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
279 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
280 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
281 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
282 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
283 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
284 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
285 };
286
287 static const uint8_t
288 anchor_indices[][N_PARTITIONS] = {
289 /* Anchor index values for the second subset of two-subset partitioning */
290 {
291 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
292 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
293 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
294 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
295 },
296
297 /* Anchor index values for the second subset of three-subset partitioning */
298 {
299 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
300 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
301 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
302 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
303 },
304
305 /* Anchor index values for the third subset of three-subset
306 * partitioning
307 */
308 {
309 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
310 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
311 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
312 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
313 }
314 };
315
316 static int
317 extract_bits(const uint8_t *block,
318 int offset,
319 int n_bits)
320 {
321 int byte_index = offset / 8;
322 int bit_index = offset % 8;
323 int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
324 int result = 0;
325 int bit = 0;
326
327 while (true) {
328 result |= ((block[byte_index] >> bit_index) &
329 ((1 << n_bits_in_byte) - 1)) << bit;
330
331 n_bits -= n_bits_in_byte;
332
333 if (n_bits <= 0)
334 return result;
335
336 bit += n_bits_in_byte;
337 byte_index++;
338 bit_index = 0;
339 n_bits_in_byte = MIN2(n_bits, 8);
340 }
341 }
342
343 static uint8_t
344 expand_component(uint8_t byte,
345 int n_bits)
346 {
347 /* Expands a n-bit quantity into a byte by copying the most-significant
348 * bits into the unused least-significant bits.
349 */
350 return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
351 }
352
353 static int
354 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
355 const uint8_t *block,
356 int bit_offset,
357 uint8_t endpoints[][4])
358 {
359 int component;
360 int subset;
361 int endpoint;
362 int pbit;
363 int n_components;
364
365 /* Extract each color component */
366 for (component = 0; component < 3; component++) {
367 for (subset = 0; subset < mode->n_subsets; subset++) {
368 for (endpoint = 0; endpoint < 2; endpoint++) {
369 endpoints[subset * 2 + endpoint][component] =
370 extract_bits(block, bit_offset, mode->n_color_bits);
371 bit_offset += mode->n_color_bits;
372 }
373 }
374 }
375
376 /* Extract the alpha values */
377 if (mode->n_alpha_bits > 0) {
378 for (subset = 0; subset < mode->n_subsets; subset++) {
379 for (endpoint = 0; endpoint < 2; endpoint++) {
380 endpoints[subset * 2 + endpoint][3] =
381 extract_bits(block, bit_offset, mode->n_alpha_bits);
382 bit_offset += mode->n_alpha_bits;
383 }
384 }
385
386 n_components = 4;
387 } else {
388 for (subset = 0; subset < mode->n_subsets; subset++)
389 for (endpoint = 0; endpoint < 2; endpoint++)
390 endpoints[subset * 2 + endpoint][3] = 255;
391
392 n_components = 3;
393 }
394
395 /* Add in the p-bits */
396 if (mode->has_endpoint_pbits) {
397 for (subset = 0; subset < mode->n_subsets; subset++) {
398 for (endpoint = 0; endpoint < 2; endpoint++) {
399 pbit = extract_bits(block, bit_offset, 1);
400 bit_offset += 1;
401
402 for (component = 0; component < n_components; component++) {
403 endpoints[subset * 2 + endpoint][component] <<= 1;
404 endpoints[subset * 2 + endpoint][component] |= pbit;
405 }
406 }
407 }
408 } else if (mode->has_shared_pbits) {
409 for (subset = 0; subset < mode->n_subsets; subset++) {
410 pbit = extract_bits(block, bit_offset, 1);
411 bit_offset += 1;
412
413 for (endpoint = 0; endpoint < 2; endpoint++) {
414 for (component = 0; component < n_components; component++) {
415 endpoints[subset * 2 + endpoint][component] <<= 1;
416 endpoints[subset * 2 + endpoint][component] |= pbit;
417 }
418 }
419 }
420 }
421
422 /* Expand the n-bit values to a byte */
423 for (subset = 0; subset < mode->n_subsets; subset++) {
424 for (endpoint = 0; endpoint < 2; endpoint++) {
425 for (component = 0; component < 3; component++) {
426 endpoints[subset * 2 + endpoint][component] =
427 expand_component(endpoints[subset * 2 + endpoint][component],
428 mode->n_color_bits +
429 mode->has_endpoint_pbits +
430 mode->has_shared_pbits);
431 }
432
433 if (mode->n_alpha_bits > 0) {
434 endpoints[subset * 2 + endpoint][3] =
435 expand_component(endpoints[subset * 2 + endpoint][3],
436 mode->n_alpha_bits +
437 mode->has_endpoint_pbits +
438 mode->has_shared_pbits);
439 }
440 }
441 }
442
443 return bit_offset;
444 }
445
446 static bool
447 is_anchor(int n_subsets,
448 int partition_num,
449 int texel)
450 {
451 if (texel == 0)
452 return true;
453
454 switch (n_subsets) {
455 case 1:
456 return false;
457 case 2:
458 return anchor_indices[0][partition_num] == texel;
459 case 3:
460 return (anchor_indices[1][partition_num] == texel ||
461 anchor_indices[2][partition_num] == texel);
462 default:
463 assert(false);
464 return false;
465 }
466 }
467
468 static int
469 count_anchors_before_texel(int n_subsets,
470 int partition_num,
471 int texel)
472 {
473 int count = 1;
474
475 if (texel == 0)
476 return 0;
477
478 switch (n_subsets) {
479 case 1:
480 break;
481 case 2:
482 if (texel > anchor_indices[0][partition_num])
483 count++;
484 break;
485 case 3:
486 if (texel > anchor_indices[1][partition_num])
487 count++;
488 if (texel > anchor_indices[2][partition_num])
489 count++;
490 break;
491 default:
492 assert(false);
493 return 0;
494 }
495
496 return count;
497 }
498
499 static int32_t
500 interpolate(int32_t a, int32_t b,
501 int index,
502 int index_bits)
503 {
504 static const uint8_t weights2[] = { 0, 21, 43, 64 };
505 static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
506 static const uint8_t weights4[] =
507 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
508 static const uint8_t *weights[] = {
509 NULL, NULL, weights2, weights3, weights4
510 };
511 int weight;
512
513 weight = weights[index_bits][index];
514
515 return ((64 - weight) * a + weight * b + 32) >> 6;
516 }
517
518 static void
519 apply_rotation(int rotation,
520 uint8_t *result)
521 {
522 uint8_t t;
523
524 if (rotation == 0)
525 return;
526
527 rotation--;
528
529 t = result[rotation];
530 result[rotation] = result[3];
531 result[3] = t;
532 }
533
534 static void
535 fetch_rgba_unorm_from_block(const uint8_t *block,
536 uint8_t *result,
537 int texel)
538 {
539 int mode_num = ffs(block[0]);
540 const struct bptc_unorm_mode *mode;
541 int bit_offset, secondary_bit_offset;
542 int partition_num;
543 int subset_num;
544 int rotation;
545 int index_selection;
546 int index_bits;
547 int indices[2];
548 int index;
549 int anchors_before_texel;
550 bool anchor;
551 uint8_t endpoints[3 * 2][4];
552 uint32_t subsets;
553 int component;
554
555 if (mode_num == 0) {
556 /* According to the spec this mode is reserved and shouldn't be used. */
557 memset(result, 0, 3);
558 result[3] = 0xff;
559 return;
560 }
561
562 mode = bptc_unorm_modes + mode_num - 1;
563 bit_offset = mode_num;
564
565 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
566 bit_offset += mode->n_partition_bits;
567
568 switch (mode->n_subsets) {
569 case 1:
570 subsets = 0;
571 break;
572 case 2:
573 subsets = partition_table1[partition_num];
574 break;
575 case 3:
576 subsets = partition_table2[partition_num];
577 break;
578 default:
579 assert(false);
580 return;
581 }
582
583 if (mode->has_rotation_bits) {
584 rotation = extract_bits(block, bit_offset, 2);
585 bit_offset += 2;
586 } else {
587 rotation = 0;
588 }
589
590 if (mode->has_index_selection_bit) {
591 index_selection = extract_bits(block, bit_offset, 1);
592 bit_offset++;
593 } else {
594 index_selection = 0;
595 }
596
597 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
598
599 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
600 partition_num, texel);
601
602 /* Calculate the offset to the secondary index */
603 secondary_bit_offset = (bit_offset +
604 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
605 mode->n_subsets +
606 mode->n_secondary_index_bits * texel -
607 anchors_before_texel);
608
609 /* Calculate the offset to the primary index for this texel */
610 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
611
612 subset_num = (subsets >> (texel * 2)) & 3;
613
614 anchor = is_anchor(mode->n_subsets, partition_num, texel);
615
616 index_bits = mode->n_index_bits;
617 if (anchor)
618 index_bits--;
619 indices[0] = extract_bits(block, bit_offset, index_bits);
620
621 if (mode->n_secondary_index_bits) {
622 index_bits = mode->n_secondary_index_bits;
623 if (anchor)
624 index_bits--;
625 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
626 }
627
628 index = indices[index_selection];
629 index_bits = (index_selection ?
630 mode->n_secondary_index_bits :
631 mode->n_index_bits);
632
633 for (component = 0; component < 3; component++)
634 result[component] = interpolate(endpoints[subset_num * 2][component],
635 endpoints[subset_num * 2 + 1][component],
636 index,
637 index_bits);
638
639 /* Alpha uses the opposite index from the color components */
640 if (mode->n_secondary_index_bits && !index_selection) {
641 index = indices[1];
642 index_bits = mode->n_secondary_index_bits;
643 } else {
644 index = indices[0];
645 index_bits = mode->n_index_bits;
646 }
647
648 result[3] = interpolate(endpoints[subset_num * 2][3],
649 endpoints[subset_num * 2 + 1][3],
650 index,
651 index_bits);
652
653 apply_rotation(rotation, result);
654 }
655
656 #ifdef BPTC_BLOCK_DECODE
657 static void
658 decompress_rgba_unorm_block(int src_width, int src_height,
659 const uint8_t *block,
660 uint8_t *dst_row, int dst_rowstride)
661 {
662 int mode_num = ffs(block[0]);
663 const struct bptc_unorm_mode *mode;
664 int bit_offset, secondary_bit_offset;
665 int partition_num;
666 int subset_num;
667 int rotation;
668 int index_selection;
669 int index_bits;
670 int indices[2];
671 int index;
672 int anchors_before_texel;
673 bool anchor;
674 uint8_t endpoints[3 * 2][4];
675 uint32_t subsets;
676 int component;
677 unsigned x, y;
678
679 if (mode_num == 0) {
680 /* According to the spec this mode is reserved and shouldn't be used. */
681 for(y = 0; y < src_height; y += 1) {
682 uint8_t *result = dst_row;
683 memset(result, 0, 4 * src_width);
684 for(x = 0; x < src_width; x += 1) {
685 result[3] = 0xff;
686 result += 4;
687 }
688 dst_row += dst_rowstride;
689 }
690 return;
691 }
692
693 mode = bptc_unorm_modes + mode_num - 1;
694 bit_offset = mode_num;
695
696 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
697 bit_offset += mode->n_partition_bits;
698
699 switch (mode->n_subsets) {
700 case 1:
701 subsets = 0;
702 break;
703 case 2:
704 subsets = partition_table1[partition_num];
705 break;
706 case 3:
707 subsets = partition_table2[partition_num];
708 break;
709 default:
710 assert(false);
711 return;
712 }
713
714 if (mode->has_rotation_bits) {
715 rotation = extract_bits(block, bit_offset, 2);
716 bit_offset += 2;
717 } else {
718 rotation = 0;
719 }
720
721 if (mode->has_index_selection_bit) {
722 index_selection = extract_bits(block, bit_offset, 1);
723 bit_offset++;
724 } else {
725 index_selection = 0;
726 }
727
728 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
729
730 for(y = 0; y < src_height; y += 1) {
731 uint8_t *result = dst_row;
732 for(x = 0; x < src_width; x += 1) {
733 int texel;
734 texel = x + y * 4;
735
736 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
737 partition_num,
738 texel);
739
740 /* Calculate the offset to the secondary index */
741 secondary_bit_offset = (bit_offset +
742 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
743 mode->n_subsets +
744 mode->n_secondary_index_bits * texel -
745 anchors_before_texel);
746
747 /* Calculate the offset to the primary index for this texel */
748 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
749
750 subset_num = (subsets >> (texel * 2)) & 3;
751
752 anchor = is_anchor(mode->n_subsets, partition_num, texel);
753
754 index_bits = mode->n_index_bits;
755 if (anchor)
756 index_bits--;
757 indices[0] = extract_bits(block, bit_offset, index_bits);
758
759 if (mode->n_secondary_index_bits) {
760 index_bits = mode->n_secondary_index_bits;
761 if (anchor)
762 index_bits--;
763 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
764 }
765
766 index = indices[index_selection];
767 index_bits = (index_selection ?
768 mode->n_secondary_index_bits :
769 mode->n_index_bits);
770
771 for (component = 0; component < 3; component++)
772 result[component] = interpolate(endpoints[subset_num * 2][component],
773 endpoints[subset_num * 2 + 1][component],
774 index,
775 index_bits);
776
777 /* Alpha uses the opposite index from the color components */
778 if (mode->n_secondary_index_bits && !index_selection) {
779 index = indices[1];
780 index_bits = mode->n_secondary_index_bits;
781 } else {
782 index = indices[0];
783 index_bits = mode->n_index_bits;
784 }
785
786 result[3] = interpolate(endpoints[subset_num * 2][3],
787 endpoints[subset_num * 2 + 1][3],
788 index,
789 index_bits);
790
791 apply_rotation(rotation, result);
792 result += 4;
793 }
794 dst_row += dst_rowstride;
795 }
796 }
797
798 static void
799 decompress_rgba_unorm(int width, int height,
800 const uint8_t *src, int src_rowstride,
801 uint8_t *dst, int dst_rowstride)
802 {
803 int src_row_diff;
804 int y, x;
805
806 if (src_rowstride >= width * 4)
807 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
808 else
809 src_row_diff = 0;
810
811 for (y = 0; y < height; y += BLOCK_SIZE) {
812 for (x = 0; x < width; x += BLOCK_SIZE) {
813 decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
814 MIN2(height - y, BLOCK_SIZE),
815 src,
816 dst + x * 4 + y * dst_rowstride,
817 dst_rowstride);
818 src += BLOCK_BYTES;
819 }
820 src += src_row_diff;
821 }
822 }
823 #endif // BPTC_BLOCK_DECODE
824
825 static int32_t
826 sign_extend(int32_t value,
827 int n_bits)
828 {
829 assert(n_bits > 0 && n_bits < 32);
830
831 const unsigned n = 32 - n_bits;
832 return (int32_t)((uint32_t)value << n) >> n;
833 }
834
835 static int
836 signed_unquantize(int value, int n_endpoint_bits)
837 {
838 bool sign;
839
840 if (n_endpoint_bits >= 16)
841 return value;
842
843 if (value == 0)
844 return 0;
845
846 sign = false;
847
848 if (value < 0) {
849 sign = true;
850 value = -value;
851 }
852
853 if (value >= (1 << (n_endpoint_bits - 1)) - 1)
854 value = 0x7fff;
855 else
856 value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
857
858 if (sign)
859 value = -value;
860
861 return value;
862 }
863
864 static int
865 unsigned_unquantize(int value, int n_endpoint_bits)
866 {
867 if (n_endpoint_bits >= 15)
868 return value;
869
870 if (value == 0)
871 return 0;
872
873 if (value == (1 << n_endpoint_bits) - 1)
874 return 0xffff;
875
876 return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
877 }
878
879 static int
880 extract_float_endpoints(const struct bptc_float_mode *mode,
881 const uint8_t *block,
882 int bit_offset,
883 int32_t endpoints[][3],
884 bool is_signed)
885 {
886 const struct bptc_float_bitfield *bitfield;
887 int endpoint, component;
888 int n_endpoints;
889 int value;
890 int i;
891
892 if (mode->n_partition_bits)
893 n_endpoints = 4;
894 else
895 n_endpoints = 2;
896
897 memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
898
899 for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
900 value = extract_bits(block, bit_offset, bitfield->n_bits);
901 bit_offset += bitfield->n_bits;
902
903 if (bitfield->reverse) {
904 for (i = 0; i < bitfield->n_bits; i++) {
905 if (value & (1 << i))
906 endpoints[bitfield->endpoint][bitfield->component] |=
907 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
908 }
909 } else {
910 endpoints[bitfield->endpoint][bitfield->component] |=
911 value << bitfield->offset;
912 }
913 }
914
915 if (mode->transformed_endpoints) {
916 /* The endpoints are specified as signed offsets from e0 */
917 for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
918 for (component = 0; component < 3; component++) {
919 value = sign_extend(endpoints[endpoint][component],
920 mode->n_delta_bits[component]);
921 endpoints[endpoint][component] =
922 ((endpoints[0][component] + value) &
923 ((1 << mode->n_endpoint_bits) - 1));
924 }
925 }
926 }
927
928 if (is_signed) {
929 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
930 for (component = 0; component < 3; component++) {
931 value = sign_extend(endpoints[endpoint][component],
932 mode->n_endpoint_bits);
933 endpoints[endpoint][component] =
934 signed_unquantize(value, mode->n_endpoint_bits);
935 }
936 }
937 } else {
938 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
939 for (component = 0; component < 3; component++) {
940 endpoints[endpoint][component] =
941 unsigned_unquantize(endpoints[endpoint][component],
942 mode->n_endpoint_bits);
943 }
944 }
945 }
946
947 return bit_offset;
948 }
949
950 static int32_t
951 finish_unsigned_unquantize(int32_t value)
952 {
953 return value * 31 / 64;
954 }
955
956 static int32_t
957 finish_signed_unquantize(int32_t value)
958 {
959 if (value < 0)
960 return (-value * 31 / 32) | 0x8000;
961 else
962 return value * 31 / 32;
963 }
964
965 static void
966 fetch_rgb_float_from_block(const uint8_t *block,
967 float *result,
968 int texel,
969 bool is_signed)
970 {
971 int mode_num;
972 const struct bptc_float_mode *mode;
973 int bit_offset;
974 int partition_num;
975 int subset_num;
976 int index_bits;
977 int index;
978 int anchors_before_texel;
979 int32_t endpoints[2 * 2][3];
980 uint32_t subsets;
981 int n_subsets;
982 int component;
983 int32_t value;
984
985 if (block[0] & 0x2) {
986 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
987 bit_offset = 5;
988 } else {
989 mode_num = block[0] & 3;
990 bit_offset = 2;
991 }
992
993 mode = bptc_float_modes + mode_num;
994
995 if (mode->reserved) {
996 memset(result, 0, sizeof result[0] * 3);
997 result[3] = 1.0f;
998 return;
999 }
1000
1001 bit_offset = extract_float_endpoints(mode, block, bit_offset,
1002 endpoints, is_signed);
1003
1004 if (mode->n_partition_bits) {
1005 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1006 bit_offset += mode->n_partition_bits;
1007
1008 subsets = partition_table1[partition_num];
1009 n_subsets = 2;
1010 } else {
1011 partition_num = 0;
1012 subsets = 0;
1013 n_subsets = 1;
1014 }
1015
1016 anchors_before_texel =
1017 count_anchors_before_texel(n_subsets, partition_num, texel);
1018
1019 /* Calculate the offset to the primary index for this texel */
1020 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1021
1022 subset_num = (subsets >> (texel * 2)) & 3;
1023
1024 index_bits = mode->n_index_bits;
1025 if (is_anchor(n_subsets, partition_num, texel))
1026 index_bits--;
1027 index = extract_bits(block, bit_offset, index_bits);
1028
1029 for (component = 0; component < 3; component++) {
1030 value = interpolate(endpoints[subset_num * 2][component],
1031 endpoints[subset_num * 2 + 1][component],
1032 index,
1033 mode->n_index_bits);
1034
1035 if (is_signed)
1036 value = finish_signed_unquantize(value);
1037 else
1038 value = finish_unsigned_unquantize(value);
1039
1040 result[component] = _mesa_half_to_float(value);
1041 }
1042
1043 result[3] = 1.0f;
1044 }
1045
1046 #ifdef BPTC_BLOCK_DECODE
1047 static void
1048 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1049 const uint8_t *block,
1050 float *dst_row, unsigned dst_rowstride,
1051 bool is_signed)
1052 {
1053 int mode_num;
1054 const struct bptc_float_mode *mode;
1055 int bit_offset;
1056 int partition_num;
1057 int subset_num;
1058 int index_bits;
1059 int index;
1060 int anchors_before_texel;
1061 int32_t endpoints[2 * 2][3];
1062 uint32_t subsets;
1063 int n_subsets;
1064 int component;
1065 int32_t value;
1066 unsigned x, y;
1067
1068 if (block[0] & 0x2) {
1069 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1070 bit_offset = 5;
1071 } else {
1072 mode_num = block[0] & 3;
1073 bit_offset = 2;
1074 }
1075
1076 mode = bptc_float_modes + mode_num;
1077
1078 if (mode->reserved) {
1079 for(y = 0; y < src_height; y += 1) {
1080 float *result = dst_row;
1081 memset(result, 0, sizeof result[0] * 4 * src_width);
1082 for(x = 0; x < src_width; x += 1) {
1083 result[3] = 1.0f;
1084 result += 4;
1085 }
1086 dst_row += dst_rowstride / sizeof dst_row[0];
1087 }
1088 return;
1089 }
1090
1091 bit_offset = extract_float_endpoints(mode, block, bit_offset,
1092 endpoints, is_signed);
1093
1094 if (mode->n_partition_bits) {
1095 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
1096 bit_offset += mode->n_partition_bits;
1097
1098 subsets = partition_table1[partition_num];
1099 n_subsets = 2;
1100 } else {
1101 partition_num = 0;
1102 subsets = 0;
1103 n_subsets = 1;
1104 }
1105
1106 for(y = 0; y < src_height; y += 1) {
1107 float *result = dst_row;
1108 for(x = 0; x < src_width; x += 1) {
1109 int texel;
1110
1111 texel = x + y * 4;
1112
1113 anchors_before_texel =
1114 count_anchors_before_texel(n_subsets, partition_num, texel);
1115
1116 /* Calculate the offset to the primary index for this texel */
1117 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1118
1119 subset_num = (subsets >> (texel * 2)) & 3;
1120
1121 index_bits = mode->n_index_bits;
1122 if (is_anchor(n_subsets, partition_num, texel))
1123 index_bits--;
1124 index = extract_bits(block, bit_offset, index_bits);
1125
1126 for (component = 0; component < 3; component++) {
1127 value = interpolate(endpoints[subset_num * 2][component],
1128 endpoints[subset_num * 2 + 1][component],
1129 index,
1130 mode->n_index_bits);
1131
1132 if (is_signed)
1133 value = finish_signed_unquantize(value);
1134 else
1135 value = finish_unsigned_unquantize(value);
1136
1137 result[component] = _mesa_half_to_float(value);
1138 }
1139
1140 result[3] = 1.0f;
1141 result += 4;
1142 }
1143 dst_row += dst_rowstride / sizeof dst_row[0];
1144 }
1145 }
1146
1147 static void
1148 decompress_rgb_float(int width, int height,
1149 const uint8_t *src, int src_rowstride,
1150 float *dst, int dst_rowstride, bool is_signed)
1151 {
1152 int src_row_diff;
1153 int y, x;
1154
1155 if (src_rowstride >= width * 4)
1156 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1157 else
1158 src_row_diff = 0;
1159
1160 for (y = 0; y < height; y += BLOCK_SIZE) {
1161 for (x = 0; x < width; x += BLOCK_SIZE) {
1162 decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1163 MIN2(height - y, BLOCK_SIZE),
1164 src,
1165 (dst + x * 4 +
1166 (y * dst_rowstride / sizeof dst[0])),
1167 dst_rowstride, is_signed);
1168 src += BLOCK_BYTES;
1169 }
1170 src += src_row_diff;
1171 }
1172 }
1173 #endif // BPTC_BLOCK_DECODE
1174
1175 static void
1176 write_bits(struct bit_writer *writer, int n_bits, int value)
1177 {
1178 do {
1179 if (n_bits + writer->pos >= 8) {
1180 *(writer->dst++) = writer->buf | (value << writer->pos);
1181 writer->buf = 0;
1182 value >>= (8 - writer->pos);
1183 n_bits -= (8 - writer->pos);
1184 writer->pos = 0;
1185 } else {
1186 writer->buf |= value << writer->pos;
1187 writer->pos += n_bits;
1188 break;
1189 }
1190 } while (n_bits > 0);
1191 }
1192
1193 static void
1194 get_average_luminance_alpha_unorm(int width, int height,
1195 const uint8_t *src, int src_rowstride,
1196 int *average_luminance, int *average_alpha)
1197 {
1198 int luminance_sum = 0, alpha_sum = 0;
1199 int y, x;
1200
1201 for (y = 0; y < height; y++) {
1202 for (x = 0; x < width; x++) {
1203 luminance_sum += src[0] + src[1] + src[2];
1204 alpha_sum += src[3];
1205 src += 4;
1206 }
1207 src += src_rowstride - width * 4;
1208 }
1209
1210 *average_luminance = luminance_sum / (width * height);
1211 *average_alpha = alpha_sum / (width * height);
1212 }
1213
1214 static void
1215 get_rgba_endpoints_unorm(int width, int height,
1216 const uint8_t *src, int src_rowstride,
1217 int average_luminance, int average_alpha,
1218 uint8_t endpoints[][4])
1219 {
1220 int endpoint_luminances[2];
1221 int midpoint;
1222 int sums[2][4];
1223 int endpoint;
1224 int luminance;
1225 uint8_t temp[3];
1226 const uint8_t *p = src;
1227 int rgb_left_endpoint_count = 0;
1228 int alpha_left_endpoint_count = 0;
1229 int y, x, i;
1230
1231 memset(sums, 0, sizeof sums);
1232
1233 for (y = 0; y < height; y++) {
1234 for (x = 0; x < width; x++) {
1235 luminance = p[0] + p[1] + p[2];
1236 if (luminance < average_luminance) {
1237 endpoint = 0;
1238 rgb_left_endpoint_count++;
1239 } else {
1240 endpoint = 1;
1241 }
1242 for (i = 0; i < 3; i++)
1243 sums[endpoint][i] += p[i];
1244
1245 if (p[2] < average_alpha) {
1246 endpoint = 0;
1247 alpha_left_endpoint_count++;
1248 } else {
1249 endpoint = 1;
1250 }
1251 sums[endpoint][3] += p[3];
1252
1253 p += 4;
1254 }
1255
1256 p += src_rowstride - width * 4;
1257 }
1258
1259 if (rgb_left_endpoint_count == 0 ||
1260 rgb_left_endpoint_count == width * height) {
1261 for (i = 0; i < 3; i++)
1262 endpoints[0][i] = endpoints[1][i] =
1263 (sums[0][i] + sums[1][i]) / (width * height);
1264 } else {
1265 for (i = 0; i < 3; i++) {
1266 endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1267 endpoints[1][i] = (sums[1][i] /
1268 (width * height - rgb_left_endpoint_count));
1269 }
1270 }
1271
1272 if (alpha_left_endpoint_count == 0 ||
1273 alpha_left_endpoint_count == width * height) {
1274 endpoints[0][3] = endpoints[1][3] =
1275 (sums[0][3] + sums[1][3]) / (width * height);
1276 } else {
1277 endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1278 endpoints[1][3] = (sums[1][3] /
1279 (width * height - alpha_left_endpoint_count));
1280 }
1281
1282 /* We may need to swap the endpoints to ensure the most-significant bit of
1283 * the first index is zero */
1284
1285 for (endpoint = 0; endpoint < 2; endpoint++) {
1286 endpoint_luminances[endpoint] =
1287 endpoints[endpoint][0] +
1288 endpoints[endpoint][1] +
1289 endpoints[endpoint][2];
1290 }
1291 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1292
1293 if ((src[0] + src[1] + src[2] <= midpoint) !=
1294 (endpoint_luminances[0] <= midpoint)) {
1295 memcpy(temp, endpoints[0], 3);
1296 memcpy(endpoints[0], endpoints[1], 3);
1297 memcpy(endpoints[1], temp, 3);
1298 }
1299
1300 /* Same for the alpha endpoints */
1301
1302 midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1303
1304 if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1305 temp[0] = endpoints[0][3];
1306 endpoints[0][3] = endpoints[1][3];
1307 endpoints[1][3] = temp[0];
1308 }
1309 }
1310
1311 static void
1312 write_rgb_indices_unorm(struct bit_writer *writer,
1313 int src_width, int src_height,
1314 const uint8_t *src, int src_rowstride,
1315 uint8_t endpoints[][4])
1316 {
1317 int luminance;
1318 int endpoint_luminances[2];
1319 int endpoint;
1320 int index;
1321 int y, x;
1322
1323 for (endpoint = 0; endpoint < 2; endpoint++) {
1324 endpoint_luminances[endpoint] =
1325 endpoints[endpoint][0] +
1326 endpoints[endpoint][1] +
1327 endpoints[endpoint][2];
1328 }
1329
1330 /* If the endpoints have the same luminance then we'll just use index 0 for
1331 * all of the texels */
1332 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1333 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1334 return;
1335 }
1336
1337 for (y = 0; y < src_height; y++) {
1338 for (x = 0; x < src_width; x++) {
1339 luminance = src[0] + src[1] + src[2];
1340
1341 index = ((luminance - endpoint_luminances[0]) * 3 /
1342 (endpoint_luminances[1] - endpoint_luminances[0]));
1343 if (index < 0)
1344 index = 0;
1345 else if (index > 3)
1346 index = 3;
1347
1348 assert(x != 0 || y != 0 || index < 2);
1349
1350 write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1351
1352 src += 4;
1353 }
1354
1355 /* Pad the indices out to the block size */
1356 if (src_width < BLOCK_SIZE)
1357 write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1358
1359 src += src_rowstride - src_width * 4;
1360 }
1361
1362 /* Pad the indices out to the block size */
1363 if (src_height < BLOCK_SIZE)
1364 write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1365 }
1366
1367 static void
1368 write_alpha_indices_unorm(struct bit_writer *writer,
1369 int src_width, int src_height,
1370 const uint8_t *src, int src_rowstride,
1371 uint8_t endpoints[][4])
1372 {
1373 int index;
1374 int y, x;
1375
1376 /* If the endpoints have the same alpha then we'll just use index 0 for
1377 * all of the texels */
1378 if (endpoints[0][3] == endpoints[1][3]) {
1379 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1380 return;
1381 }
1382
1383 for (y = 0; y < src_height; y++) {
1384 for (x = 0; x < src_width; x++) {
1385 index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1386 ((int) endpoints[1][3] - endpoints[0][3]));
1387 if (index < 0)
1388 index = 0;
1389 else if (index > 7)
1390 index = 7;
1391
1392 assert(x != 0 || y != 0 || index < 4);
1393
1394 /* The first index has one less bit */
1395 write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1396
1397 src += 4;
1398 }
1399
1400 /* Pad the indices out to the block size */
1401 if (src_width < BLOCK_SIZE)
1402 write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1403
1404 src += src_rowstride - src_width * 4;
1405 }
1406
1407 /* Pad the indices out to the block size */
1408 if (src_height < BLOCK_SIZE)
1409 write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1410 }
1411
1412 static void
1413 compress_rgba_unorm_block(int src_width, int src_height,
1414 const uint8_t *src, int src_rowstride,
1415 uint8_t *dst)
1416 {
1417 int average_luminance, average_alpha;
1418 uint8_t endpoints[2][4];
1419 struct bit_writer writer;
1420 int component, endpoint;
1421
1422 get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1423 &average_luminance, &average_alpha);
1424 get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1425 average_luminance, average_alpha,
1426 endpoints);
1427
1428 writer.dst = dst;
1429 writer.pos = 0;
1430 writer.buf = 0;
1431
1432 write_bits(&writer, 5, 0x10); /* mode 4 */
1433 write_bits(&writer, 2, 0); /* rotation 0 */
1434 write_bits(&writer, 1, 0); /* index selection bit */
1435
1436 /* Write the color endpoints */
1437 for (component = 0; component < 3; component++)
1438 for (endpoint = 0; endpoint < 2; endpoint++)
1439 write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1440
1441 /* Write the alpha endpoints */
1442 for (endpoint = 0; endpoint < 2; endpoint++)
1443 write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1444
1445 write_rgb_indices_unorm(&writer,
1446 src_width, src_height,
1447 src, src_rowstride,
1448 endpoints);
1449 write_alpha_indices_unorm(&writer,
1450 src_width, src_height,
1451 src, src_rowstride,
1452 endpoints);
1453 }
1454
1455 static void
1456 compress_rgba_unorm(int width, int height,
1457 const uint8_t *src, int src_rowstride,
1458 uint8_t *dst, int dst_rowstride)
1459 {
1460 int dst_row_diff;
1461 int y, x;
1462
1463 if (dst_rowstride >= width * 4)
1464 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1465 else
1466 dst_row_diff = 0;
1467
1468 for (y = 0; y < height; y += BLOCK_SIZE) {
1469 for (x = 0; x < width; x += BLOCK_SIZE) {
1470 compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1471 MIN2(height - y, BLOCK_SIZE),
1472 src + x * 4 + y * src_rowstride,
1473 src_rowstride,
1474 dst);
1475 dst += BLOCK_BYTES;
1476 }
1477 dst += dst_row_diff;
1478 }
1479 }
1480
1481 static float
1482 get_average_luminance_float(int width, int height,
1483 const float *src, int src_rowstride)
1484 {
1485 float luminance_sum = 0;
1486 int y, x;
1487
1488 for (y = 0; y < height; y++) {
1489 for (x = 0; x < width; x++) {
1490 luminance_sum += src[0] + src[1] + src[2];
1491 src += 3;
1492 }
1493 src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1494 }
1495
1496 return luminance_sum / (width * height);
1497 }
1498
1499 static float
1500 clamp_value(float value, bool is_signed)
1501 {
1502 if (value > 65504.0f)
1503 return 65504.0f;
1504
1505 if (is_signed) {
1506 if (value < -65504.0f)
1507 return -65504.0f;
1508 else
1509 return value;
1510 }
1511
1512 if (value < 0.0f)
1513 return 0.0f;
1514
1515 return value;
1516 }
1517
1518 static void
1519 get_endpoints_float(int width, int height,
1520 const float *src, int src_rowstride,
1521 float average_luminance, float endpoints[][3],
1522 bool is_signed)
1523 {
1524 float endpoint_luminances[2];
1525 float midpoint;
1526 float sums[2][3];
1527 int endpoint, component;
1528 float luminance;
1529 float temp[3];
1530 const float *p = src;
1531 int left_endpoint_count = 0;
1532 int y, x, i;
1533
1534 memset(sums, 0, sizeof sums);
1535
1536 for (y = 0; y < height; y++) {
1537 for (x = 0; x < width; x++) {
1538 luminance = p[0] + p[1] + p[2];
1539 if (luminance < average_luminance) {
1540 endpoint = 0;
1541 left_endpoint_count++;
1542 } else {
1543 endpoint = 1;
1544 }
1545 for (i = 0; i < 3; i++)
1546 sums[endpoint][i] += p[i];
1547
1548 p += 3;
1549 }
1550
1551 p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1552 }
1553
1554 if (left_endpoint_count == 0 ||
1555 left_endpoint_count == width * height) {
1556 for (i = 0; i < 3; i++)
1557 endpoints[0][i] = endpoints[1][i] =
1558 (sums[0][i] + sums[1][i]) / (width * height);
1559 } else {
1560 for (i = 0; i < 3; i++) {
1561 endpoints[0][i] = sums[0][i] / left_endpoint_count;
1562 endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1563 }
1564 }
1565
1566 /* Clamp the endpoints to the range of a half float and strip out
1567 * infinities */
1568 for (endpoint = 0; endpoint < 2; endpoint++) {
1569 for (component = 0; component < 3; component++) {
1570 endpoints[endpoint][component] =
1571 clamp_value(endpoints[endpoint][component], is_signed);
1572 }
1573 }
1574
1575 /* We may need to swap the endpoints to ensure the most-significant bit of
1576 * the first index is zero */
1577
1578 for (endpoint = 0; endpoint < 2; endpoint++) {
1579 endpoint_luminances[endpoint] =
1580 endpoints[endpoint][0] +
1581 endpoints[endpoint][1] +
1582 endpoints[endpoint][2];
1583 }
1584 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1585
1586 if ((src[0] + src[1] + src[2] <= midpoint) !=
1587 (endpoint_luminances[0] <= midpoint)) {
1588 memcpy(temp, endpoints[0], sizeof temp);
1589 memcpy(endpoints[0], endpoints[1], sizeof temp);
1590 memcpy(endpoints[1], temp, sizeof temp);
1591 }
1592 }
1593
1594 static void
1595 write_rgb_indices_float(struct bit_writer *writer,
1596 int src_width, int src_height,
1597 const float *src, int src_rowstride,
1598 float endpoints[][3])
1599 {
1600 float luminance;
1601 float endpoint_luminances[2];
1602 int endpoint;
1603 int index;
1604 int y, x;
1605
1606 for (endpoint = 0; endpoint < 2; endpoint++) {
1607 endpoint_luminances[endpoint] =
1608 endpoints[endpoint][0] +
1609 endpoints[endpoint][1] +
1610 endpoints[endpoint][2];
1611 }
1612
1613 /* If the endpoints have the same luminance then we'll just use index 0 for
1614 * all of the texels */
1615 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1616 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1617 return;
1618 }
1619
1620 for (y = 0; y < src_height; y++) {
1621 for (x = 0; x < src_width; x++) {
1622 luminance = src[0] + src[1] + src[2];
1623
1624 index = ((luminance - endpoint_luminances[0]) * 15 /
1625 (endpoint_luminances[1] - endpoint_luminances[0]));
1626 if (index < 0)
1627 index = 0;
1628 else if (index > 15)
1629 index = 15;
1630
1631 assert(x != 0 || y != 0 || index < 8);
1632
1633 write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1634
1635 src += 3;
1636 }
1637
1638 /* Pad the indices out to the block size */
1639 if (src_width < BLOCK_SIZE)
1640 write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1641
1642 src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1643 }
1644
1645 /* Pad the indices out to the block size */
1646 if (src_height < BLOCK_SIZE)
1647 write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1648 }
1649
1650 static int
1651 get_endpoint_value(float value, bool is_signed)
1652 {
1653 bool sign = false;
1654 int half;
1655
1656 if (is_signed) {
1657 half = _mesa_float_to_half(value);
1658
1659 if (half & 0x8000) {
1660 half &= 0x7fff;
1661 sign = true;
1662 }
1663
1664 half = (32 * half / 31) >> 6;
1665
1666 if (sign)
1667 half = -half & ((1 << 10) - 1);
1668
1669 return half;
1670 } else {
1671 if (value <= 0.0f)
1672 return 0;
1673
1674 half = _mesa_float_to_half(value);
1675
1676 return (64 * half / 31) >> 6;
1677 }
1678 }
1679
1680 static void
1681 compress_rgb_float_block(int src_width, int src_height,
1682 const float *src, int src_rowstride,
1683 uint8_t *dst,
1684 bool is_signed)
1685 {
1686 float average_luminance;
1687 float endpoints[2][3];
1688 struct bit_writer writer;
1689 int component, endpoint;
1690 int endpoint_value;
1691
1692 average_luminance =
1693 get_average_luminance_float(src_width, src_height, src, src_rowstride);
1694 get_endpoints_float(src_width, src_height, src, src_rowstride,
1695 average_luminance, endpoints, is_signed);
1696
1697 writer.dst = dst;
1698 writer.pos = 0;
1699 writer.buf = 0;
1700
1701 write_bits(&writer, 5, 3); /* mode 3 */
1702
1703 /* Write the endpoints */
1704 for (endpoint = 0; endpoint < 2; endpoint++) {
1705 for (component = 0; component < 3; component++) {
1706 endpoint_value =
1707 get_endpoint_value(endpoints[endpoint][component], is_signed);
1708 write_bits(&writer, 10, endpoint_value);
1709 }
1710 }
1711
1712 write_rgb_indices_float(&writer,
1713 src_width, src_height,
1714 src, src_rowstride,
1715 endpoints);
1716 }
1717
1718 static void
1719 compress_rgb_float(int width, int height,
1720 const float *src, int src_rowstride,
1721 uint8_t *dst, int dst_rowstride,
1722 bool is_signed)
1723 {
1724 int dst_row_diff;
1725 int y, x;
1726
1727 if (dst_rowstride >= width * 4)
1728 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1729 else
1730 dst_row_diff = 0;
1731
1732 for (y = 0; y < height; y += BLOCK_SIZE) {
1733 for (x = 0; x < width; x += BLOCK_SIZE) {
1734 compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1735 MIN2(height - y, BLOCK_SIZE),
1736 src + x * 3 +
1737 y * src_rowstride / sizeof (float),
1738 src_rowstride,
1739 dst,
1740 is_signed);
1741 dst += BLOCK_BYTES;
1742 }
1743 dst += dst_row_diff;
1744 }
1745 }
1746
1747 #endif