26e59158007c966b0c35f3dde11e7c9c1c095a28
[mesa.git] / src / mesa / main / texcompress_bptc.c
1 /*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file texcompress_bptc.c
26 * GL_ARB_texture_compression_bptc support.
27 */
28
29 #include <stdbool.h>
30 #include "texcompress.h"
31 #include "texcompress_bptc.h"
32 #include "util/format_srgb.h"
33 #include "util/half_float.h"
34 #include "texstore.h"
35 #include "macros.h"
36 #include "image.h"
37
38 #define BLOCK_SIZE 4
39 #define N_PARTITIONS 64
40 #define BLOCK_BYTES 16
41
42 struct bptc_unorm_mode {
43 int n_subsets;
44 int n_partition_bits;
45 bool has_rotation_bits;
46 bool has_index_selection_bit;
47 int n_color_bits;
48 int n_alpha_bits;
49 bool has_endpoint_pbits;
50 bool has_shared_pbits;
51 int n_index_bits;
52 int n_secondary_index_bits;
53 };
54
55 struct bptc_float_bitfield {
56 int8_t endpoint;
57 uint8_t component;
58 uint8_t offset;
59 uint8_t n_bits;
60 bool reverse;
61 };
62
63 struct bptc_float_mode {
64 bool reserved;
65 bool transformed_endpoints;
66 int n_partition_bits;
67 int n_endpoint_bits;
68 int n_index_bits;
69 int n_delta_bits[3];
70 struct bptc_float_bitfield bitfields[24];
71 };
72
73 struct bit_writer {
74 uint8_t buf;
75 int pos;
76 uint8_t *dst;
77 };
78
79 static const struct bptc_unorm_mode
80 bptc_unorm_modes[] = {
81 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
82 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
83 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
84 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
85 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
86 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
87 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
88 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
89 };
90
91 static const struct bptc_float_mode
92 bptc_float_modes[] = {
93 /* 00 */
94 { false, true, 5, 10, 3, { 5, 5, 5 },
95 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
96 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
97 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
98 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
99 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
100 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
101 { 3, 2, 3, 1, false },
102 { -1 } }
103 },
104 /* 01 */
105 { false, true, 5, 7, 3, { 6, 6, 6 },
106 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
107 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
108 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
109 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
110 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
111 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
112 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
113 { 2, 0, 0, 6, false },
114 { 3, 0, 0, 6, false },
115 { -1 } }
116 },
117 /* 00010 */
118 { false, true, 5, 11, 3, { 5, 4, 4 },
119 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
120 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
121 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
122 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
123 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
124 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
125 { -1 } }
126 },
127 /* 00011 */
128 { false, false, 0, 10, 4, { 10, 10, 10 },
129 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
130 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
131 { -1 } }
132 },
133 /* 00110 */
134 { false, true, 5, 11, 3, { 4, 5, 4 },
135 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
136 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
137 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
138 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
139 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
140 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
141 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
142 { -1 } }
143 },
144 /* 00111 */
145 { false, true, 0, 11, 4, { 9, 9, 9 },
146 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
147 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
148 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
149 { -1 } }
150 },
151 /* 01010 */
152 { false, true, 5, 11, 3, { 4, 4, 5 },
153 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
154 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
155 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
156 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
157 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
158 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
159 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
160 { -1 } }
161 },
162 /* 01011 */
163 { false, true, 0, 12, 4, { 8, 8, 8 },
164 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
165 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
166 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
167 { -1 } }
168 },
169 /* 01110 */
170 { false, true, 5, 9, 3, { 5, 5, 5 },
171 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
172 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
173 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
174 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
175 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
176 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
177 { 3, 2, 3, 1, false },
178 { -1 } }
179 },
180 /* 01111 */
181 { false, true, 0, 16, 4, { 4, 4, 4 },
182 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
183 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
184 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
185 { -1 } }
186 },
187 /* 10010 */
188 { false, true, 5, 8, 3, { 6, 5, 5 },
189 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
190 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
191 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
192 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
193 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
194 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
195 { 3, 0, 0, 6, false },
196 { -1 } }
197 },
198 /* 10011 */
199 { true /* reserved */ },
200 /* 10110 */
201 { false, true, 5, 8, 3, { 5, 6, 5 },
202 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
203 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
204 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
205 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
206 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
207 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
208 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
209 { -1 } }
210 },
211 /* 10111 */
212 { true /* reserved */ },
213 /* 11010 */
214 { false, true, 5, 8, 3, { 5, 5, 6 },
215 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
216 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
217 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
218 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
219 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
220 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
221 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
222 { -1 } }
223 },
224 /* 11011 */
225 { true /* reserved */ },
226 /* 11110 */
227 { false, false, 5, 6, 3, { 6, 6, 6 },
228 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
229 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
230 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
231 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
232 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
233 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
234 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
235 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
236 { -1 } }
237 },
238 /* 11111 */
239 { true /* reserved */ },
240 };
241
242 /* This partition table is used when the mode has two subsets. Each
243 * partition is represented by a 32-bit value which gives 2 bits per texel
244 * within the block. The value of the two bits represents which subset to use
245 * (0 or 1).
246 */
247 static const uint32_t
248 partition_table1[N_PARTITIONS] = {
249 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
250 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
251 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
252 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
253 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
254 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
255 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
256 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
257 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
258 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
259 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
260 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
261 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
262 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
263 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
264 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
265 };
266
267 /* This partition table is used when the mode has three subsets. In this case
268 * the values can be 0, 1 or 2.
269 */
270 static const uint32_t
271 partition_table2[N_PARTITIONS] = {
272 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
273 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
274 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
275 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
276 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
277 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
278 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
279 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
280 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
281 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
282 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
283 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
284 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
285 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
286 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
287 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
288 };
289
290 static const uint8_t
291 anchor_indices[][N_PARTITIONS] = {
292 /* Anchor index values for the second subset of two-subset partitioning */
293 {
294 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
295 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
296 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
297 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
298 },
299
300 /* Anchor index values for the second subset of three-subset partitioning */
301 {
302 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
303 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
304 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
305 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
306 },
307
308 /* Anchor index values for the third subset of three-subset
309 * partitioning
310 */
311 {
312 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
313 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
314 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
315 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
316 }
317 };
318
319 static int
320 extract_bits(const uint8_t *block,
321 int offset,
322 int n_bits)
323 {
324 int byte_index = offset / 8;
325 int bit_index = offset % 8;
326 int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
327 int result = 0;
328 int bit = 0;
329
330 while (true) {
331 result |= ((block[byte_index] >> bit_index) &
332 ((1 << n_bits_in_byte) - 1)) << bit;
333
334 n_bits -= n_bits_in_byte;
335
336 if (n_bits <= 0)
337 return result;
338
339 bit += n_bits_in_byte;
340 byte_index++;
341 bit_index = 0;
342 n_bits_in_byte = MIN2(n_bits, 8);
343 }
344 }
345
346 static uint8_t
347 expand_component(uint8_t byte,
348 int n_bits)
349 {
350 /* Expands a n-bit quantity into a byte by copying the most-significant
351 * bits into the unused least-significant bits.
352 */
353 return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
354 }
355
356 static int
357 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
358 const uint8_t *block,
359 int bit_offset,
360 uint8_t endpoints[][4])
361 {
362 int component;
363 int subset;
364 int endpoint;
365 int pbit;
366 int n_components;
367
368 /* Extract each color component */
369 for (component = 0; component < 3; component++) {
370 for (subset = 0; subset < mode->n_subsets; subset++) {
371 for (endpoint = 0; endpoint < 2; endpoint++) {
372 endpoints[subset * 2 + endpoint][component] =
373 extract_bits(block, bit_offset, mode->n_color_bits);
374 bit_offset += mode->n_color_bits;
375 }
376 }
377 }
378
379 /* Extract the alpha values */
380 if (mode->n_alpha_bits > 0) {
381 for (subset = 0; subset < mode->n_subsets; subset++) {
382 for (endpoint = 0; endpoint < 2; endpoint++) {
383 endpoints[subset * 2 + endpoint][3] =
384 extract_bits(block, bit_offset, mode->n_alpha_bits);
385 bit_offset += mode->n_alpha_bits;
386 }
387 }
388
389 n_components = 4;
390 } else {
391 for (subset = 0; subset < mode->n_subsets; subset++)
392 for (endpoint = 0; endpoint < 2; endpoint++)
393 endpoints[subset * 2 + endpoint][3] = 255;
394
395 n_components = 3;
396 }
397
398 /* Add in the p-bits */
399 if (mode->has_endpoint_pbits) {
400 for (subset = 0; subset < mode->n_subsets; subset++) {
401 for (endpoint = 0; endpoint < 2; endpoint++) {
402 pbit = extract_bits(block, bit_offset, 1);
403 bit_offset += 1;
404
405 for (component = 0; component < n_components; component++) {
406 endpoints[subset * 2 + endpoint][component] <<= 1;
407 endpoints[subset * 2 + endpoint][component] |= pbit;
408 }
409 }
410 }
411 } else if (mode->has_shared_pbits) {
412 for (subset = 0; subset < mode->n_subsets; subset++) {
413 pbit = extract_bits(block, bit_offset, 1);
414 bit_offset += 1;
415
416 for (endpoint = 0; endpoint < 2; endpoint++) {
417 for (component = 0; component < n_components; component++) {
418 endpoints[subset * 2 + endpoint][component] <<= 1;
419 endpoints[subset * 2 + endpoint][component] |= pbit;
420 }
421 }
422 }
423 }
424
425 /* Expand the n-bit values to a byte */
426 for (subset = 0; subset < mode->n_subsets; subset++) {
427 for (endpoint = 0; endpoint < 2; endpoint++) {
428 for (component = 0; component < 3; component++) {
429 endpoints[subset * 2 + endpoint][component] =
430 expand_component(endpoints[subset * 2 + endpoint][component],
431 mode->n_color_bits +
432 mode->has_endpoint_pbits +
433 mode->has_shared_pbits);
434 }
435
436 if (mode->n_alpha_bits > 0) {
437 endpoints[subset * 2 + endpoint][3] =
438 expand_component(endpoints[subset * 2 + endpoint][3],
439 mode->n_alpha_bits +
440 mode->has_endpoint_pbits +
441 mode->has_shared_pbits);
442 }
443 }
444 }
445
446 return bit_offset;
447 }
448
449 static bool
450 is_anchor(int n_subsets,
451 int partition_num,
452 int texel)
453 {
454 if (texel == 0)
455 return true;
456
457 switch (n_subsets) {
458 case 1:
459 return false;
460 case 2:
461 return anchor_indices[0][partition_num] == texel;
462 case 3:
463 return (anchor_indices[1][partition_num] == texel ||
464 anchor_indices[2][partition_num] == texel);
465 default:
466 assert(false);
467 return false;
468 }
469 }
470
471 static int
472 count_anchors_before_texel(int n_subsets,
473 int partition_num,
474 int texel)
475 {
476 int count = 1;
477
478 if (texel == 0)
479 return 0;
480
481 switch (n_subsets) {
482 case 1:
483 break;
484 case 2:
485 if (texel > anchor_indices[0][partition_num])
486 count++;
487 break;
488 case 3:
489 if (texel > anchor_indices[1][partition_num])
490 count++;
491 if (texel > anchor_indices[2][partition_num])
492 count++;
493 break;
494 default:
495 assert(false);
496 return 0;
497 }
498
499 return count;
500 }
501
502 static int32_t
503 interpolate(int32_t a, int32_t b,
504 int index,
505 int index_bits)
506 {
507 static const uint8_t weights2[] = { 0, 21, 43, 64 };
508 static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
509 static const uint8_t weights4[] =
510 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
511 static const uint8_t *weights[] = {
512 NULL, NULL, weights2, weights3, weights4
513 };
514 int weight;
515
516 weight = weights[index_bits][index];
517
518 return ((64 - weight) * a + weight * b + 32) >> 6;
519 }
520
521 static void
522 apply_rotation(int rotation,
523 uint8_t *result)
524 {
525 uint8_t t;
526
527 if (rotation == 0)
528 return;
529
530 rotation--;
531
532 t = result[rotation];
533 result[rotation] = result[3];
534 result[3] = t;
535 }
536
537 static void
538 fetch_rgba_unorm_from_block(const uint8_t *block,
539 uint8_t *result,
540 int texel)
541 {
542 int mode_num = ffs(block[0]);
543 const struct bptc_unorm_mode *mode;
544 int bit_offset, secondary_bit_offset;
545 int partition_num;
546 int subset_num;
547 int rotation;
548 int index_selection;
549 int index_bits;
550 int indices[2];
551 int index;
552 int anchors_before_texel;
553 bool anchor;
554 uint8_t endpoints[3 * 2][4];
555 uint32_t subsets;
556 int component;
557
558 if (mode_num == 0) {
559 /* According to the spec this mode is reserved and shouldn't be used. */
560 memset(result, 0, 3);
561 result[3] = 0xff;
562 return;
563 }
564
565 mode = bptc_unorm_modes + mode_num - 1;
566 bit_offset = mode_num;
567
568 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
569 bit_offset += mode->n_partition_bits;
570
571 switch (mode->n_subsets) {
572 case 1:
573 subsets = 0;
574 break;
575 case 2:
576 subsets = partition_table1[partition_num];
577 break;
578 case 3:
579 subsets = partition_table2[partition_num];
580 break;
581 default:
582 assert(false);
583 return;
584 }
585
586 if (mode->has_rotation_bits) {
587 rotation = extract_bits(block, bit_offset, 2);
588 bit_offset += 2;
589 } else {
590 rotation = 0;
591 }
592
593 if (mode->has_index_selection_bit) {
594 index_selection = extract_bits(block, bit_offset, 1);
595 bit_offset++;
596 } else {
597 index_selection = 0;
598 }
599
600 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
601
602 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
603 partition_num, texel);
604
605 /* Calculate the offset to the secondary index */
606 secondary_bit_offset = (bit_offset +
607 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
608 mode->n_subsets +
609 mode->n_secondary_index_bits * texel -
610 anchors_before_texel);
611
612 /* Calculate the offset to the primary index for this texel */
613 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
614
615 subset_num = (subsets >> (texel * 2)) & 3;
616
617 anchor = is_anchor(mode->n_subsets, partition_num, texel);
618
619 index_bits = mode->n_index_bits;
620 if (anchor)
621 index_bits--;
622 indices[0] = extract_bits(block, bit_offset, index_bits);
623
624 if (mode->n_secondary_index_bits) {
625 index_bits = mode->n_secondary_index_bits;
626 if (anchor)
627 index_bits--;
628 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
629 }
630
631 index = indices[index_selection];
632 index_bits = (index_selection ?
633 mode->n_secondary_index_bits :
634 mode->n_index_bits);
635
636 for (component = 0; component < 3; component++)
637 result[component] = interpolate(endpoints[subset_num * 2][component],
638 endpoints[subset_num * 2 + 1][component],
639 index,
640 index_bits);
641
642 /* Alpha uses the opposite index from the color components */
643 if (mode->n_secondary_index_bits && !index_selection) {
644 index = indices[1];
645 index_bits = mode->n_secondary_index_bits;
646 } else {
647 index = indices[0];
648 index_bits = mode->n_index_bits;
649 }
650
651 result[3] = interpolate(endpoints[subset_num * 2][3],
652 endpoints[subset_num * 2 + 1][3],
653 index,
654 index_bits);
655
656 apply_rotation(rotation, result);
657 }
658
659 static void
660 fetch_bptc_rgba_unorm_bytes(const GLubyte *map,
661 GLint rowStride, GLint i, GLint j,
662 GLubyte *texel)
663 {
664 const GLubyte *block;
665
666 block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
667
668 fetch_rgba_unorm_from_block(block, texel, (i % 4) + (j % 4) * 4);
669 }
670
671 static void
672 fetch_bptc_rgba_unorm(const GLubyte *map,
673 GLint rowStride, GLint i, GLint j,
674 GLfloat *texel)
675 {
676 GLubyte texel_bytes[4];
677
678 fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
679
680 texel[RCOMP] = UBYTE_TO_FLOAT(texel_bytes[0]);
681 texel[GCOMP] = UBYTE_TO_FLOAT(texel_bytes[1]);
682 texel[BCOMP] = UBYTE_TO_FLOAT(texel_bytes[2]);
683 texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
684 }
685
686 static void
687 fetch_bptc_srgb_alpha_unorm(const GLubyte *map,
688 GLint rowStride, GLint i, GLint j,
689 GLfloat *texel)
690 {
691 GLubyte texel_bytes[4];
692
693 fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
694
695 texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[0]);
696 texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[1]);
697 texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[2]);
698 texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
699 }
700
701 static int32_t
702 sign_extend(int32_t value,
703 int n_bits)
704 {
705 if ((value & (1 << (n_bits - 1)))) {
706 value |= (~(int32_t) 0) << n_bits;
707 }
708
709 return value;
710 }
711
712 static int
713 signed_unquantize(int value, int n_endpoint_bits)
714 {
715 bool sign;
716
717 if (n_endpoint_bits >= 16)
718 return value;
719
720 if (value == 0)
721 return 0;
722
723 sign = false;
724
725 if (value < 0) {
726 sign = true;
727 value = -value;
728 }
729
730 if (value >= (1 << (n_endpoint_bits - 1)) - 1)
731 value = 0x7fff;
732 else
733 value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
734
735 if (sign)
736 value = -value;
737
738 return value;
739 }
740
741 static int
742 unsigned_unquantize(int value, int n_endpoint_bits)
743 {
744 if (n_endpoint_bits >= 15)
745 return value;
746
747 if (value == 0)
748 return 0;
749
750 if (value == (1 << n_endpoint_bits) - 1)
751 return 0xffff;
752
753 return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
754 }
755
756 static int
757 extract_float_endpoints(const struct bptc_float_mode *mode,
758 const uint8_t *block,
759 int bit_offset,
760 int32_t endpoints[][3],
761 bool is_signed)
762 {
763 const struct bptc_float_bitfield *bitfield;
764 int endpoint, component;
765 int n_endpoints;
766 int value;
767 int i;
768
769 if (mode->n_partition_bits)
770 n_endpoints = 4;
771 else
772 n_endpoints = 2;
773
774 memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
775
776 for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
777 value = extract_bits(block, bit_offset, bitfield->n_bits);
778 bit_offset += bitfield->n_bits;
779
780 if (bitfield->reverse) {
781 for (i = 0; i < bitfield->n_bits; i++) {
782 if (value & (1 << i))
783 endpoints[bitfield->endpoint][bitfield->component] |=
784 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
785 }
786 } else {
787 endpoints[bitfield->endpoint][bitfield->component] |=
788 value << bitfield->offset;
789 }
790 }
791
792 if (mode->transformed_endpoints) {
793 /* The endpoints are specified as signed offsets from e0 */
794 for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
795 for (component = 0; component < 3; component++) {
796 value = sign_extend(endpoints[endpoint][component],
797 mode->n_delta_bits[component]);
798 endpoints[endpoint][component] =
799 ((endpoints[0][component] + value) &
800 ((1 << mode->n_endpoint_bits) - 1));
801 }
802 }
803 }
804
805 if (is_signed) {
806 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
807 for (component = 0; component < 3; component++) {
808 value = sign_extend(endpoints[endpoint][component],
809 mode->n_endpoint_bits);
810 endpoints[endpoint][component] =
811 signed_unquantize(value, mode->n_endpoint_bits);
812 }
813 }
814 } else {
815 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
816 for (component = 0; component < 3; component++) {
817 endpoints[endpoint][component] =
818 unsigned_unquantize(endpoints[endpoint][component],
819 mode->n_endpoint_bits);
820 }
821 }
822 }
823
824 return bit_offset;
825 }
826
827 static int32_t
828 finish_unsigned_unquantize(int32_t value)
829 {
830 return value * 31 / 64;
831 }
832
833 static int32_t
834 finish_signed_unquantize(int32_t value)
835 {
836 if (value < 0)
837 return (-value * 31 / 32) | 0x8000;
838 else
839 return value * 31 / 32;
840 }
841
842 static void
843 fetch_rgb_float_from_block(const uint8_t *block,
844 float *result,
845 int texel,
846 bool is_signed)
847 {
848 int mode_num;
849 const struct bptc_float_mode *mode;
850 int bit_offset;
851 int partition_num;
852 int subset_num;
853 int index_bits;
854 int index;
855 int anchors_before_texel;
856 int32_t endpoints[2 * 2][3];
857 uint32_t subsets;
858 int n_subsets;
859 int component;
860 int32_t value;
861
862 if (block[0] & 0x2) {
863 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
864 bit_offset = 5;
865 } else {
866 mode_num = block[0] & 3;
867 bit_offset = 2;
868 }
869
870 mode = bptc_float_modes + mode_num;
871
872 if (mode->reserved) {
873 memset(result, 0, sizeof result[0] * 3);
874 result[3] = 1.0f;
875 return;
876 }
877
878 bit_offset = extract_float_endpoints(mode, block, bit_offset,
879 endpoints, is_signed);
880
881 if (mode->n_partition_bits) {
882 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
883 bit_offset += mode->n_partition_bits;
884
885 subsets = partition_table1[partition_num];
886 n_subsets = 2;
887 } else {
888 partition_num = 0;
889 subsets = 0;
890 n_subsets = 1;
891 }
892
893 anchors_before_texel =
894 count_anchors_before_texel(n_subsets, partition_num, texel);
895
896 /* Calculate the offset to the primary index for this texel */
897 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
898
899 subset_num = (subsets >> (texel * 2)) & 3;
900
901 index_bits = mode->n_index_bits;
902 if (is_anchor(n_subsets, partition_num, texel))
903 index_bits--;
904 index = extract_bits(block, bit_offset, index_bits);
905
906 for (component = 0; component < 3; component++) {
907 value = interpolate(endpoints[subset_num * 2][component],
908 endpoints[subset_num * 2 + 1][component],
909 index,
910 mode->n_index_bits);
911
912 if (is_signed)
913 value = finish_signed_unquantize(value);
914 else
915 value = finish_unsigned_unquantize(value);
916
917 result[component] = _mesa_half_to_float(value);
918 }
919
920 result[3] = 1.0f;
921 }
922
923 static void
924 fetch_bptc_rgb_float(const GLubyte *map,
925 GLint rowStride, GLint i, GLint j,
926 GLfloat *texel,
927 bool is_signed)
928 {
929 const GLubyte *block;
930
931 block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
932
933 fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
934 }
935
936 static void
937 fetch_bptc_rgb_signed_float(const GLubyte *map,
938 GLint rowStride, GLint i, GLint j,
939 GLfloat *texel)
940 {
941 fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
942 }
943
944 static void
945 fetch_bptc_rgb_unsigned_float(const GLubyte *map,
946 GLint rowStride, GLint i, GLint j,
947 GLfloat *texel)
948 {
949 fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
950 }
951
952 compressed_fetch_func
953 _mesa_get_bptc_fetch_func(mesa_format format)
954 {
955 switch (format) {
956 case MESA_FORMAT_BPTC_RGBA_UNORM:
957 return fetch_bptc_rgba_unorm;
958 case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
959 return fetch_bptc_srgb_alpha_unorm;
960 case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT:
961 return fetch_bptc_rgb_signed_float;
962 case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
963 return fetch_bptc_rgb_unsigned_float;
964 default:
965 return NULL;
966 }
967 }
968
969 static void
970 write_bits(struct bit_writer *writer, int n_bits, int value)
971 {
972 do {
973 if (n_bits + writer->pos >= 8) {
974 *(writer->dst++) = writer->buf | (value << writer->pos);
975 writer->buf = 0;
976 value >>= (8 - writer->pos);
977 n_bits -= (8 - writer->pos);
978 writer->pos = 0;
979 } else {
980 writer->buf |= value << writer->pos;
981 writer->pos += n_bits;
982 break;
983 }
984 } while (n_bits > 0);
985 }
986
987 static void
988 get_average_luminance_alpha_unorm(int width, int height,
989 const uint8_t *src, int src_rowstride,
990 int *average_luminance, int *average_alpha)
991 {
992 int luminance_sum = 0, alpha_sum = 0;
993 int y, x;
994
995 for (y = 0; y < height; y++) {
996 for (x = 0; x < width; x++) {
997 luminance_sum += src[0] + src[1] + src[2];
998 alpha_sum += src[3];
999 src += 4;
1000 }
1001 src += src_rowstride - width * 4;
1002 }
1003
1004 *average_luminance = luminance_sum / (width * height);
1005 *average_alpha = alpha_sum / (width * height);
1006 }
1007
1008 static void
1009 get_rgba_endpoints_unorm(int width, int height,
1010 const uint8_t *src, int src_rowstride,
1011 int average_luminance, int average_alpha,
1012 uint8_t endpoints[][4])
1013 {
1014 int endpoint_luminances[2];
1015 int midpoint;
1016 int sums[2][4];
1017 int endpoint;
1018 int luminance;
1019 uint8_t temp[3];
1020 const uint8_t *p = src;
1021 int rgb_left_endpoint_count = 0;
1022 int alpha_left_endpoint_count = 0;
1023 int y, x, i;
1024
1025 memset(sums, 0, sizeof sums);
1026
1027 for (y = 0; y < height; y++) {
1028 for (x = 0; x < width; x++) {
1029 luminance = p[0] + p[1] + p[2];
1030 if (luminance < average_luminance) {
1031 endpoint = 0;
1032 rgb_left_endpoint_count++;
1033 } else {
1034 endpoint = 1;
1035 }
1036 for (i = 0; i < 3; i++)
1037 sums[endpoint][i] += p[i];
1038
1039 if (p[2] < average_alpha) {
1040 endpoint = 0;
1041 alpha_left_endpoint_count++;
1042 } else {
1043 endpoint = 1;
1044 }
1045 sums[endpoint][3] += p[3];
1046
1047 p += 4;
1048 }
1049
1050 p += src_rowstride - width * 4;
1051 }
1052
1053 if (rgb_left_endpoint_count == 0 ||
1054 rgb_left_endpoint_count == width * height) {
1055 for (i = 0; i < 3; i++)
1056 endpoints[0][i] = endpoints[1][i] =
1057 (sums[0][i] + sums[1][i]) / (width * height);
1058 } else {
1059 for (i = 0; i < 3; i++) {
1060 endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1061 endpoints[1][i] = (sums[1][i] /
1062 (width * height - rgb_left_endpoint_count));
1063 }
1064 }
1065
1066 if (alpha_left_endpoint_count == 0 ||
1067 alpha_left_endpoint_count == width * height) {
1068 endpoints[0][3] = endpoints[1][3] =
1069 (sums[0][3] + sums[1][3]) / (width * height);
1070 } else {
1071 endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1072 endpoints[1][3] = (sums[1][3] /
1073 (width * height - alpha_left_endpoint_count));
1074 }
1075
1076 /* We may need to swap the endpoints to ensure the most-significant bit of
1077 * the first index is zero */
1078
1079 for (endpoint = 0; endpoint < 2; endpoint++) {
1080 endpoint_luminances[endpoint] =
1081 endpoints[endpoint][0] +
1082 endpoints[endpoint][1] +
1083 endpoints[endpoint][2];
1084 }
1085 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1086
1087 if ((src[0] + src[1] + src[2] <= midpoint) !=
1088 (endpoint_luminances[0] <= midpoint)) {
1089 memcpy(temp, endpoints[0], 3);
1090 memcpy(endpoints[0], endpoints[1], 3);
1091 memcpy(endpoints[1], temp, 3);
1092 }
1093
1094 /* Same for the alpha endpoints */
1095
1096 midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1097
1098 if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1099 temp[0] = endpoints[0][3];
1100 endpoints[0][3] = endpoints[1][3];
1101 endpoints[1][3] = temp[0];
1102 }
1103 }
1104
1105 static void
1106 write_rgb_indices_unorm(struct bit_writer *writer,
1107 int src_width, int src_height,
1108 const uint8_t *src, int src_rowstride,
1109 uint8_t endpoints[][4])
1110 {
1111 int luminance;
1112 int endpoint_luminances[2];
1113 int endpoint;
1114 int index;
1115 int y, x;
1116
1117 for (endpoint = 0; endpoint < 2; endpoint++) {
1118 endpoint_luminances[endpoint] =
1119 endpoints[endpoint][0] +
1120 endpoints[endpoint][1] +
1121 endpoints[endpoint][2];
1122 }
1123
1124 /* If the endpoints have the same luminance then we'll just use index 0 for
1125 * all of the texels */
1126 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1127 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1128 return;
1129 }
1130
1131 for (y = 0; y < src_height; y++) {
1132 for (x = 0; x < src_width; x++) {
1133 luminance = src[0] + src[1] + src[2];
1134
1135 index = ((luminance - endpoint_luminances[0]) * 3 /
1136 (endpoint_luminances[1] - endpoint_luminances[0]));
1137 if (index < 0)
1138 index = 0;
1139 else if (index > 3)
1140 index = 3;
1141
1142 assert(x != 0 || y != 0 || index < 2);
1143
1144 write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1145
1146 src += 4;
1147 }
1148
1149 /* Pad the indices out to the block size */
1150 if (src_width < BLOCK_SIZE)
1151 write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1152
1153 src += src_rowstride - src_width * 4;
1154 }
1155
1156 /* Pad the indices out to the block size */
1157 if (src_height < BLOCK_SIZE)
1158 write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1159 }
1160
1161 static void
1162 write_alpha_indices_unorm(struct bit_writer *writer,
1163 int src_width, int src_height,
1164 const uint8_t *src, int src_rowstride,
1165 uint8_t endpoints[][4])
1166 {
1167 int index;
1168 int y, x;
1169
1170 /* If the endpoints have the same alpha then we'll just use index 0 for
1171 * all of the texels */
1172 if (endpoints[0][3] == endpoints[1][3]) {
1173 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1174 return;
1175 }
1176
1177 for (y = 0; y < src_height; y++) {
1178 for (x = 0; x < src_width; x++) {
1179 index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1180 ((int) endpoints[1][3] - endpoints[0][3]));
1181 if (index < 0)
1182 index = 0;
1183 else if (index > 7)
1184 index = 7;
1185
1186 assert(x != 0 || y != 0 || index < 4);
1187
1188 /* The first index has one less bit */
1189 write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1190
1191 src += 4;
1192 }
1193
1194 /* Pad the indices out to the block size */
1195 if (src_width < BLOCK_SIZE)
1196 write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1197
1198 src += src_rowstride - src_width * 4;
1199 }
1200
1201 /* Pad the indices out to the block size */
1202 if (src_height < BLOCK_SIZE)
1203 write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1204 }
1205
1206 static void
1207 compress_rgba_unorm_block(int src_width, int src_height,
1208 const uint8_t *src, int src_rowstride,
1209 uint8_t *dst)
1210 {
1211 int average_luminance, average_alpha;
1212 uint8_t endpoints[2][4];
1213 struct bit_writer writer;
1214 int component, endpoint;
1215
1216 get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1217 &average_luminance, &average_alpha);
1218 get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1219 average_luminance, average_alpha,
1220 endpoints);
1221
1222 writer.dst = dst;
1223 writer.pos = 0;
1224 writer.buf = 0;
1225
1226 write_bits(&writer, 5, 0x10); /* mode 4 */
1227 write_bits(&writer, 2, 0); /* rotation 0 */
1228 write_bits(&writer, 1, 0); /* index selection bit */
1229
1230 /* Write the color endpoints */
1231 for (component = 0; component < 3; component++)
1232 for (endpoint = 0; endpoint < 2; endpoint++)
1233 write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1234
1235 /* Write the alpha endpoints */
1236 for (endpoint = 0; endpoint < 2; endpoint++)
1237 write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1238
1239 write_rgb_indices_unorm(&writer,
1240 src_width, src_height,
1241 src, src_rowstride,
1242 endpoints);
1243 write_alpha_indices_unorm(&writer,
1244 src_width, src_height,
1245 src, src_rowstride,
1246 endpoints);
1247 }
1248
1249 static void
1250 compress_rgba_unorm(int width, int height,
1251 const uint8_t *src, int src_rowstride,
1252 uint8_t *dst, int dst_rowstride)
1253 {
1254 int dst_row_diff;
1255 int y, x;
1256
1257 if (dst_rowstride >= width * 4)
1258 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1259 else
1260 dst_row_diff = 0;
1261
1262 for (y = 0; y < height; y += BLOCK_SIZE) {
1263 for (x = 0; x < width; x += BLOCK_SIZE) {
1264 compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1265 MIN2(height - y, BLOCK_SIZE),
1266 src + x * 4 + y * src_rowstride,
1267 src_rowstride,
1268 dst);
1269 dst += BLOCK_BYTES;
1270 }
1271 dst += dst_row_diff;
1272 }
1273 }
1274
1275 GLboolean
1276 _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
1277 {
1278 const GLubyte *pixels;
1279 const GLubyte *tempImage = NULL;
1280 int rowstride;
1281
1282 if (srcFormat != GL_RGBA ||
1283 srcType != GL_UNSIGNED_BYTE ||
1284 ctx->_ImageTransferState ||
1285 srcPacking->SwapBytes) {
1286 /* convert image to RGBA/ubyte */
1287 GLubyte *tempImageSlices[1];
1288 int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
1289 tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
1290 if (!tempImage)
1291 return GL_FALSE; /* out of memory */
1292 tempImageSlices[0] = (GLubyte *) tempImage;
1293 _mesa_texstore(ctx, dims,
1294 baseInternalFormat,
1295 _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
1296 : MESA_FORMAT_A8B8G8R8_UNORM,
1297 rgbaRowStride, tempImageSlices,
1298 srcWidth, srcHeight, srcDepth,
1299 srcFormat, srcType, srcAddr,
1300 srcPacking);
1301
1302 pixels = tempImage;
1303 rowstride = srcWidth * 4;
1304 } else {
1305 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1306 srcFormat, srcType, 0, 0);
1307 rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1308 srcFormat, srcType);
1309 }
1310
1311 compress_rgba_unorm(srcWidth, srcHeight,
1312 pixels, rowstride,
1313 dstSlices[0], dstRowStride);
1314
1315 free((void *) tempImage);
1316
1317 return GL_TRUE;
1318 }
1319
1320 static float
1321 get_average_luminance_float(int width, int height,
1322 const float *src, int src_rowstride)
1323 {
1324 float luminance_sum = 0;
1325 int y, x;
1326
1327 for (y = 0; y < height; y++) {
1328 for (x = 0; x < width; x++) {
1329 luminance_sum += src[0] + src[1] + src[2];
1330 src += 3;
1331 }
1332 src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1333 }
1334
1335 return luminance_sum / (width * height);
1336 }
1337
1338 static float
1339 clamp_value(float value, bool is_signed)
1340 {
1341 if (value > 65504.0f)
1342 return 65504.0f;
1343
1344 if (is_signed) {
1345 if (value < -65504.0f)
1346 return -65504.0f;
1347 else
1348 return value;
1349 }
1350
1351 if (value < 0.0f)
1352 return 0.0f;
1353
1354 return value;
1355 }
1356
1357 static void
1358 get_endpoints_float(int width, int height,
1359 const float *src, int src_rowstride,
1360 float average_luminance, float endpoints[][3],
1361 bool is_signed)
1362 {
1363 float endpoint_luminances[2];
1364 float midpoint;
1365 float sums[2][3];
1366 int endpoint, component;
1367 float luminance;
1368 float temp[3];
1369 const float *p = src;
1370 int left_endpoint_count = 0;
1371 int y, x, i;
1372
1373 memset(sums, 0, sizeof sums);
1374
1375 for (y = 0; y < height; y++) {
1376 for (x = 0; x < width; x++) {
1377 luminance = p[0] + p[1] + p[2];
1378 if (luminance < average_luminance) {
1379 endpoint = 0;
1380 left_endpoint_count++;
1381 } else {
1382 endpoint = 1;
1383 }
1384 for (i = 0; i < 3; i++)
1385 sums[endpoint][i] += p[i];
1386
1387 p += 3;
1388 }
1389
1390 p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1391 }
1392
1393 if (left_endpoint_count == 0 ||
1394 left_endpoint_count == width * height) {
1395 for (i = 0; i < 3; i++)
1396 endpoints[0][i] = endpoints[1][i] =
1397 (sums[0][i] + sums[1][i]) / (width * height);
1398 } else {
1399 for (i = 0; i < 3; i++) {
1400 endpoints[0][i] = sums[0][i] / left_endpoint_count;
1401 endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1402 }
1403 }
1404
1405 /* Clamp the endpoints to the range of a half float and strip out
1406 * infinities */
1407 for (endpoint = 0; endpoint < 2; endpoint++) {
1408 for (component = 0; component < 3; component++) {
1409 endpoints[endpoint][component] =
1410 clamp_value(endpoints[endpoint][component], is_signed);
1411 }
1412 }
1413
1414 /* We may need to swap the endpoints to ensure the most-significant bit of
1415 * the first index is zero */
1416
1417 for (endpoint = 0; endpoint < 2; endpoint++) {
1418 endpoint_luminances[endpoint] =
1419 endpoints[endpoint][0] +
1420 endpoints[endpoint][1] +
1421 endpoints[endpoint][2];
1422 }
1423 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1424
1425 if ((src[0] + src[1] + src[2] <= midpoint) !=
1426 (endpoint_luminances[0] <= midpoint)) {
1427 memcpy(temp, endpoints[0], sizeof temp);
1428 memcpy(endpoints[0], endpoints[1], sizeof temp);
1429 memcpy(endpoints[1], temp, sizeof temp);
1430 }
1431 }
1432
1433 static void
1434 write_rgb_indices_float(struct bit_writer *writer,
1435 int src_width, int src_height,
1436 const float *src, int src_rowstride,
1437 float endpoints[][3])
1438 {
1439 float luminance;
1440 float endpoint_luminances[2];
1441 int endpoint;
1442 int index;
1443 int y, x;
1444
1445 for (endpoint = 0; endpoint < 2; endpoint++) {
1446 endpoint_luminances[endpoint] =
1447 endpoints[endpoint][0] +
1448 endpoints[endpoint][1] +
1449 endpoints[endpoint][2];
1450 }
1451
1452 /* If the endpoints have the same luminance then we'll just use index 0 for
1453 * all of the texels */
1454 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1455 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1456 return;
1457 }
1458
1459 for (y = 0; y < src_height; y++) {
1460 for (x = 0; x < src_width; x++) {
1461 luminance = src[0] + src[1] + src[2];
1462
1463 index = ((luminance - endpoint_luminances[0]) * 15 /
1464 (endpoint_luminances[1] - endpoint_luminances[0]));
1465 if (index < 0)
1466 index = 0;
1467 else if (index > 15)
1468 index = 15;
1469
1470 assert(x != 0 || y != 0 || index < 8);
1471
1472 write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1473
1474 src += 3;
1475 }
1476
1477 /* Pad the indices out to the block size */
1478 if (src_width < BLOCK_SIZE)
1479 write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1480
1481 src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1482 }
1483
1484 /* Pad the indices out to the block size */
1485 if (src_height < BLOCK_SIZE)
1486 write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1487 }
1488
1489 static int
1490 get_endpoint_value(float value, bool is_signed)
1491 {
1492 bool sign = false;
1493 int half;
1494
1495 if (is_signed) {
1496 half = _mesa_float_to_half(value);
1497
1498 if (half & 0x8000) {
1499 half &= 0x7fff;
1500 sign = true;
1501 }
1502
1503 half = (32 * half / 31) >> 6;
1504
1505 if (sign)
1506 half = -half & ((1 << 10) - 1);
1507
1508 return half;
1509 } else {
1510 if (value <= 0.0f)
1511 return 0;
1512
1513 half = _mesa_float_to_half(value);
1514
1515 return (64 * half / 31) >> 6;
1516 }
1517 }
1518
1519 static void
1520 compress_rgb_float_block(int src_width, int src_height,
1521 const float *src, int src_rowstride,
1522 uint8_t *dst,
1523 bool is_signed)
1524 {
1525 float average_luminance;
1526 float endpoints[2][3];
1527 struct bit_writer writer;
1528 int component, endpoint;
1529 int endpoint_value;
1530
1531 average_luminance =
1532 get_average_luminance_float(src_width, src_height, src, src_rowstride);
1533 get_endpoints_float(src_width, src_height, src, src_rowstride,
1534 average_luminance, endpoints, is_signed);
1535
1536 writer.dst = dst;
1537 writer.pos = 0;
1538 writer.buf = 0;
1539
1540 write_bits(&writer, 5, 3); /* mode 3 */
1541
1542 /* Write the endpoints */
1543 for (endpoint = 0; endpoint < 2; endpoint++) {
1544 for (component = 0; component < 3; component++) {
1545 endpoint_value =
1546 get_endpoint_value(endpoints[endpoint][component], is_signed);
1547 write_bits(&writer, 10, endpoint_value);
1548 }
1549 }
1550
1551 write_rgb_indices_float(&writer,
1552 src_width, src_height,
1553 src, src_rowstride,
1554 endpoints);
1555 }
1556
1557 static void
1558 compress_rgb_float(int width, int height,
1559 const float *src, int src_rowstride,
1560 uint8_t *dst, int dst_rowstride,
1561 bool is_signed)
1562 {
1563 int dst_row_diff;
1564 int y, x;
1565
1566 if (dst_rowstride >= width * 4)
1567 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1568 else
1569 dst_row_diff = 0;
1570
1571 for (y = 0; y < height; y += BLOCK_SIZE) {
1572 for (x = 0; x < width; x += BLOCK_SIZE) {
1573 compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1574 MIN2(height - y, BLOCK_SIZE),
1575 src + x * 3 +
1576 y * src_rowstride / sizeof (float),
1577 src_rowstride,
1578 dst,
1579 is_signed);
1580 dst += BLOCK_BYTES;
1581 }
1582 dst += dst_row_diff;
1583 }
1584 }
1585
1586 static GLboolean
1587 texstore_bptc_rgb_float(TEXSTORE_PARAMS,
1588 bool is_signed)
1589 {
1590 const float *pixels;
1591 const float *tempImage = NULL;
1592 int rowstride;
1593
1594 if (srcFormat != GL_RGB ||
1595 srcType != GL_FLOAT ||
1596 ctx->_ImageTransferState ||
1597 srcPacking->SwapBytes) {
1598 /* convert image to RGB/float */
1599 GLfloat *tempImageSlices[1];
1600 int rgbRowStride = 3 * srcWidth * sizeof(GLfloat);
1601 tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat));
1602 if (!tempImage)
1603 return GL_FALSE; /* out of memory */
1604 tempImageSlices[0] = (GLfloat *) tempImage;
1605 _mesa_texstore(ctx, dims,
1606 baseInternalFormat,
1607 MESA_FORMAT_RGB_FLOAT32,
1608 rgbRowStride, (GLubyte **)tempImageSlices,
1609 srcWidth, srcHeight, srcDepth,
1610 srcFormat, srcType, srcAddr,
1611 srcPacking);
1612
1613 pixels = tempImage;
1614 rowstride = srcWidth * sizeof(float) * 3;
1615 } else {
1616 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1617 srcFormat, srcType, 0, 0);
1618 rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1619 srcFormat, srcType);
1620 }
1621
1622 compress_rgb_float(srcWidth, srcHeight,
1623 pixels, rowstride,
1624 dstSlices[0], dstRowStride,
1625 is_signed);
1626
1627 free((void *) tempImage);
1628
1629 return GL_TRUE;
1630 }
1631
1632 GLboolean
1633 _mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS)
1634 {
1635 assert(dstFormat == MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT);
1636
1637 return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1638 dstFormat, dstRowStride, dstSlices,
1639 srcWidth, srcHeight, srcDepth,
1640 srcFormat, srcType,
1641 srcAddr, srcPacking,
1642 true /* signed */);
1643 }
1644
1645 GLboolean
1646 _mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS)
1647 {
1648 assert(dstFormat == MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT);
1649
1650 return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1651 dstFormat, dstRowStride, dstSlices,
1652 srcWidth, srcHeight, srcDepth,
1653 srcFormat, srcType,
1654 srcAddr, srcPacking,
1655 false /* unsigned */);
1656 }