mesa: add new _mesa_Get[Compressed]TextureSubImage() functions
[mesa.git] / src / mesa / main / texcompress_bptc.c
1 /*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file texcompress_bptc.c
26 * GL_ARB_texture_compression_bptc support.
27 */
28
29 #include <stdbool.h>
30 #include "texcompress.h"
31 #include "texcompress_bptc.h"
32 #include "util/format_srgb.h"
33 #include "texstore.h"
34 #include "macros.h"
35 #include "image.h"
36
37 #define BLOCK_SIZE 4
38 #define N_PARTITIONS 64
39 #define BLOCK_BYTES 16
40
41 struct bptc_unorm_mode {
42 int n_subsets;
43 int n_partition_bits;
44 bool has_rotation_bits;
45 bool has_index_selection_bit;
46 int n_color_bits;
47 int n_alpha_bits;
48 bool has_endpoint_pbits;
49 bool has_shared_pbits;
50 int n_index_bits;
51 int n_secondary_index_bits;
52 };
53
54 struct bptc_float_bitfield {
55 int8_t endpoint;
56 uint8_t component;
57 uint8_t offset;
58 uint8_t n_bits;
59 bool reverse;
60 };
61
62 struct bptc_float_mode {
63 bool reserved;
64 bool transformed_endpoints;
65 int n_partition_bits;
66 int n_endpoint_bits;
67 int n_index_bits;
68 int n_delta_bits[3];
69 struct bptc_float_bitfield bitfields[24];
70 };
71
72 struct bit_writer {
73 uint8_t buf;
74 int pos;
75 uint8_t *dst;
76 };
77
78 static const struct bptc_unorm_mode
79 bptc_unorm_modes[] = {
80 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
81 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
82 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
83 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
84 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
85 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
86 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
87 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
88 };
89
90 static const struct bptc_float_mode
91 bptc_float_modes[] = {
92 /* 00 */
93 { false, true, 5, 10, 3, { 5, 5, 5 },
94 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
95 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
96 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
97 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
98 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
99 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
100 { 3, 2, 3, 1, false },
101 { -1 } }
102 },
103 /* 01 */
104 { false, true, 5, 7, 3, { 6, 6, 6 },
105 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
106 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
107 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
108 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
109 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
110 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
111 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
112 { 2, 0, 0, 6, false },
113 { 3, 0, 0, 6, false },
114 { -1 } }
115 },
116 /* 00010 */
117 { false, true, 5, 11, 3, { 5, 4, 4 },
118 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
119 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
120 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
121 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
122 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
123 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
124 { -1 } }
125 },
126 /* 00011 */
127 { false, false, 0, 10, 4, { 10, 10, 10 },
128 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
129 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
130 { -1 } }
131 },
132 /* 00110 */
133 { false, true, 5, 11, 3, { 4, 5, 4 },
134 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
135 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
136 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
137 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
138 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
139 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
140 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
141 { -1 } }
142 },
143 /* 00111 */
144 { false, true, 0, 11, 4, { 9, 9, 9 },
145 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
146 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
147 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
148 { -1 } }
149 },
150 /* 01010 */
151 { false, true, 5, 11, 3, { 4, 4, 5 },
152 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
153 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
154 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
155 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
156 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
157 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
158 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
159 { -1 } }
160 },
161 /* 01011 */
162 { false, true, 0, 12, 4, { 8, 8, 8 },
163 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
164 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
165 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
166 { -1 } }
167 },
168 /* 01110 */
169 { false, true, 5, 9, 3, { 5, 5, 5 },
170 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
171 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
172 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
173 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
174 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
175 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
176 { 3, 2, 3, 1, false },
177 { -1 } }
178 },
179 /* 01111 */
180 { false, true, 0, 16, 4, { 4, 4, 4 },
181 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
182 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
183 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
184 { -1 } }
185 },
186 /* 10010 */
187 { false, true, 5, 8, 3, { 6, 5, 5 },
188 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
189 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
190 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
191 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
192 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
193 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
194 { 3, 0, 0, 6, false },
195 { -1 } }
196 },
197 /* 10011 */
198 { true /* reserved */ },
199 /* 10110 */
200 { false, true, 5, 8, 3, { 5, 6, 5 },
201 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
202 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
203 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
204 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
205 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
206 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
207 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
208 { -1 } }
209 },
210 /* 10111 */
211 { true /* reserved */ },
212 /* 11010 */
213 { false, true, 5, 8, 3, { 5, 5, 6 },
214 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
215 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
216 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
217 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
218 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
219 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
220 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
221 { -1 } }
222 },
223 /* 11011 */
224 { true /* reserved */ },
225 /* 11110 */
226 { false, false, 5, 6, 3, { 6, 6, 6 },
227 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
228 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
229 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
230 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
231 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
232 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
233 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
234 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
235 { -1 } }
236 },
237 /* 11111 */
238 { true /* reserved */ },
239 };
240
241 /* This partition table is used when the mode has two subsets. Each
242 * partition is represented by a 32-bit value which gives 2 bits per texel
243 * within the block. The value of the two bits represents which subset to use
244 * (0 or 1).
245 */
246 static const uint32_t
247 partition_table1[N_PARTITIONS] = {
248 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
249 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
250 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
251 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
252 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
253 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
254 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
255 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
256 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
257 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
258 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
259 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
260 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
261 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
262 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
263 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
264 };
265
266 /* This partition table is used when the mode has three subsets. In this case
267 * the values can be 0, 1 or 2.
268 */
269 static const uint32_t
270 partition_table2[N_PARTITIONS] = {
271 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
272 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
273 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
274 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
275 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
276 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
277 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
278 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
279 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
280 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
281 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
282 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
283 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
284 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
285 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
286 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
287 };
288
289 static const uint8_t
290 anchor_indices[][N_PARTITIONS] = {
291 /* Anchor index values for the second subset of two-subset partitioning */
292 {
293 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
294 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
295 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
296 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
297 },
298
299 /* Anchor index values for the second subset of three-subset partitioning */
300 {
301 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
302 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
303 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
304 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
305 },
306
307 /* Anchor index values for the third subset of three-subset
308 * partitioning
309 */
310 {
311 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
312 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
313 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
314 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
315 }
316 };
317
318 static int
319 extract_bits(const uint8_t *block,
320 int offset,
321 int n_bits)
322 {
323 int byte_index = offset / 8;
324 int bit_index = offset % 8;
325 int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
326 int result = 0;
327 int bit = 0;
328
329 while (true) {
330 result |= ((block[byte_index] >> bit_index) &
331 ((1 << n_bits_in_byte) - 1)) << bit;
332
333 n_bits -= n_bits_in_byte;
334
335 if (n_bits <= 0)
336 return result;
337
338 bit += n_bits_in_byte;
339 byte_index++;
340 bit_index = 0;
341 n_bits_in_byte = MIN2(n_bits, 8);
342 }
343 }
344
345 static uint8_t
346 expand_component(uint8_t byte,
347 int n_bits)
348 {
349 /* Expands a n-bit quantity into a byte by copying the most-significant
350 * bits into the unused least-significant bits.
351 */
352 return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
353 }
354
355 static int
356 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
357 const uint8_t *block,
358 int bit_offset,
359 uint8_t endpoints[][4])
360 {
361 int component;
362 int subset;
363 int endpoint;
364 int pbit;
365 int n_components;
366
367 /* Extract each color component */
368 for (component = 0; component < 3; component++) {
369 for (subset = 0; subset < mode->n_subsets; subset++) {
370 for (endpoint = 0; endpoint < 2; endpoint++) {
371 endpoints[subset * 2 + endpoint][component] =
372 extract_bits(block, bit_offset, mode->n_color_bits);
373 bit_offset += mode->n_color_bits;
374 }
375 }
376 }
377
378 /* Extract the alpha values */
379 if (mode->n_alpha_bits > 0) {
380 for (subset = 0; subset < mode->n_subsets; subset++) {
381 for (endpoint = 0; endpoint < 2; endpoint++) {
382 endpoints[subset * 2 + endpoint][3] =
383 extract_bits(block, bit_offset, mode->n_alpha_bits);
384 bit_offset += mode->n_alpha_bits;
385 }
386 }
387
388 n_components = 4;
389 } else {
390 for (subset = 0; subset < mode->n_subsets; subset++)
391 for (endpoint = 0; endpoint < 2; endpoint++)
392 endpoints[subset * 2 + endpoint][3] = 255;
393
394 n_components = 3;
395 }
396
397 /* Add in the p-bits */
398 if (mode->has_endpoint_pbits) {
399 for (subset = 0; subset < mode->n_subsets; subset++) {
400 for (endpoint = 0; endpoint < 2; endpoint++) {
401 pbit = extract_bits(block, bit_offset, 1);
402 bit_offset += 1;
403
404 for (component = 0; component < n_components; component++) {
405 endpoints[subset * 2 + endpoint][component] <<= 1;
406 endpoints[subset * 2 + endpoint][component] |= pbit;
407 }
408 }
409 }
410 } else if (mode->has_shared_pbits) {
411 for (subset = 0; subset < mode->n_subsets; subset++) {
412 pbit = extract_bits(block, bit_offset, 1);
413 bit_offset += 1;
414
415 for (endpoint = 0; endpoint < 2; endpoint++) {
416 for (component = 0; component < n_components; component++) {
417 endpoints[subset * 2 + endpoint][component] <<= 1;
418 endpoints[subset * 2 + endpoint][component] |= pbit;
419 }
420 }
421 }
422 }
423
424 /* Expand the n-bit values to a byte */
425 for (subset = 0; subset < mode->n_subsets; subset++) {
426 for (endpoint = 0; endpoint < 2; endpoint++) {
427 for (component = 0; component < 3; component++) {
428 endpoints[subset * 2 + endpoint][component] =
429 expand_component(endpoints[subset * 2 + endpoint][component],
430 mode->n_color_bits +
431 mode->has_endpoint_pbits +
432 mode->has_shared_pbits);
433 }
434
435 if (mode->n_alpha_bits > 0) {
436 endpoints[subset * 2 + endpoint][3] =
437 expand_component(endpoints[subset * 2 + endpoint][3],
438 mode->n_alpha_bits +
439 mode->has_endpoint_pbits +
440 mode->has_shared_pbits);
441 }
442 }
443 }
444
445 return bit_offset;
446 }
447
448 static bool
449 is_anchor(int n_subsets,
450 int partition_num,
451 int texel)
452 {
453 if (texel == 0)
454 return true;
455
456 switch (n_subsets) {
457 case 1:
458 return false;
459 case 2:
460 return anchor_indices[0][partition_num] == texel;
461 case 3:
462 return (anchor_indices[1][partition_num] == texel ||
463 anchor_indices[2][partition_num] == texel);
464 default:
465 assert(false);
466 return false;
467 }
468 }
469
470 static int
471 count_anchors_before_texel(int n_subsets,
472 int partition_num,
473 int texel)
474 {
475 int count = 1;
476
477 if (texel == 0)
478 return 0;
479
480 switch (n_subsets) {
481 case 1:
482 break;
483 case 2:
484 if (texel > anchor_indices[0][partition_num])
485 count++;
486 break;
487 case 3:
488 if (texel > anchor_indices[1][partition_num])
489 count++;
490 if (texel > anchor_indices[2][partition_num])
491 count++;
492 break;
493 default:
494 assert(false);
495 return 0;
496 }
497
498 return count;
499 }
500
501 static int32_t
502 interpolate(int32_t a, int32_t b,
503 int index,
504 int index_bits)
505 {
506 static const uint8_t weights2[] = { 0, 21, 43, 64 };
507 static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
508 static const uint8_t weights4[] =
509 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
510 static const uint8_t *weights[] = {
511 NULL, NULL, weights2, weights3, weights4
512 };
513 int weight;
514
515 weight = weights[index_bits][index];
516
517 return ((64 - weight) * a + weight * b + 32) >> 6;
518 }
519
520 static void
521 apply_rotation(int rotation,
522 uint8_t *result)
523 {
524 uint8_t t;
525
526 if (rotation == 0)
527 return;
528
529 rotation--;
530
531 t = result[rotation];
532 result[rotation] = result[3];
533 result[3] = t;
534 }
535
536 static void
537 fetch_rgba_unorm_from_block(const uint8_t *block,
538 uint8_t *result,
539 int texel)
540 {
541 int mode_num = ffs(block[0]);
542 const struct bptc_unorm_mode *mode;
543 int bit_offset, secondary_bit_offset;
544 int partition_num;
545 int subset_num;
546 int rotation;
547 int index_selection;
548 int index_bits;
549 int indices[2];
550 int index;
551 int anchors_before_texel;
552 bool anchor;
553 uint8_t endpoints[3 * 2][4];
554 uint32_t subsets;
555 int component;
556
557 if (mode_num == 0) {
558 /* According to the spec this mode is reserved and shouldn't be used. */
559 memset(result, 0, 3);
560 result[3] = 0xff;
561 return;
562 }
563
564 mode = bptc_unorm_modes + mode_num - 1;
565 bit_offset = mode_num;
566
567 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
568 bit_offset += mode->n_partition_bits;
569
570 switch (mode->n_subsets) {
571 case 1:
572 subsets = 0;
573 break;
574 case 2:
575 subsets = partition_table1[partition_num];
576 break;
577 case 3:
578 subsets = partition_table2[partition_num];
579 break;
580 default:
581 assert(false);
582 return;
583 }
584
585 if (mode->has_rotation_bits) {
586 rotation = extract_bits(block, bit_offset, 2);
587 bit_offset += 2;
588 } else {
589 rotation = 0;
590 }
591
592 if (mode->has_index_selection_bit) {
593 index_selection = extract_bits(block, bit_offset, 1);
594 bit_offset++;
595 } else {
596 index_selection = 0;
597 }
598
599 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
600
601 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
602 partition_num, texel);
603
604 /* Calculate the offset to the secondary index */
605 secondary_bit_offset = (bit_offset +
606 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
607 mode->n_subsets +
608 mode->n_secondary_index_bits * texel -
609 anchors_before_texel);
610
611 /* Calculate the offset to the primary index for this texel */
612 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
613
614 subset_num = (subsets >> (texel * 2)) & 3;
615
616 anchor = is_anchor(mode->n_subsets, partition_num, texel);
617
618 index_bits = mode->n_index_bits;
619 if (anchor)
620 index_bits--;
621 indices[0] = extract_bits(block, bit_offset, index_bits);
622
623 if (mode->n_secondary_index_bits) {
624 index_bits = mode->n_secondary_index_bits;
625 if (anchor)
626 index_bits--;
627 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
628 }
629
630 index = indices[index_selection];
631 index_bits = (index_selection ?
632 mode->n_secondary_index_bits :
633 mode->n_index_bits);
634
635 for (component = 0; component < 3; component++)
636 result[component] = interpolate(endpoints[subset_num * 2][component],
637 endpoints[subset_num * 2 + 1][component],
638 index,
639 index_bits);
640
641 /* Alpha uses the opposite index from the color components */
642 if (mode->n_secondary_index_bits && !index_selection) {
643 index = indices[1];
644 index_bits = mode->n_secondary_index_bits;
645 } else {
646 index = indices[0];
647 index_bits = mode->n_index_bits;
648 }
649
650 result[3] = interpolate(endpoints[subset_num * 2][3],
651 endpoints[subset_num * 2 + 1][3],
652 index,
653 index_bits);
654
655 apply_rotation(rotation, result);
656 }
657
658 static void
659 fetch_bptc_rgba_unorm_bytes(const GLubyte *map,
660 GLint rowStride, GLint i, GLint j,
661 GLubyte *texel)
662 {
663 const GLubyte *block;
664
665 block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
666
667 fetch_rgba_unorm_from_block(block, texel, (i % 4) + (j % 4) * 4);
668 }
669
670 static void
671 fetch_bptc_rgba_unorm(const GLubyte *map,
672 GLint rowStride, GLint i, GLint j,
673 GLfloat *texel)
674 {
675 GLubyte texel_bytes[4];
676
677 fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
678
679 texel[RCOMP] = UBYTE_TO_FLOAT(texel_bytes[0]);
680 texel[GCOMP] = UBYTE_TO_FLOAT(texel_bytes[1]);
681 texel[BCOMP] = UBYTE_TO_FLOAT(texel_bytes[2]);
682 texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
683 }
684
685 static void
686 fetch_bptc_srgb_alpha_unorm(const GLubyte *map,
687 GLint rowStride, GLint i, GLint j,
688 GLfloat *texel)
689 {
690 GLubyte texel_bytes[4];
691
692 fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
693
694 texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[0]);
695 texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[1]);
696 texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[2]);
697 texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
698 }
699
700 static int32_t
701 sign_extend(int32_t value,
702 int n_bits)
703 {
704 if ((value & (1 << (n_bits - 1)))) {
705 value |= (~(int32_t) 0) << n_bits;
706 }
707
708 return value;
709 }
710
711 static int
712 signed_unquantize(int value, int n_endpoint_bits)
713 {
714 bool sign;
715
716 if (n_endpoint_bits >= 16)
717 return value;
718
719 if (value == 0)
720 return 0;
721
722 sign = false;
723
724 if (value < 0) {
725 sign = true;
726 value = -value;
727 }
728
729 if (value >= (1 << (n_endpoint_bits - 1)) - 1)
730 value = 0x7fff;
731 else
732 value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
733
734 if (sign)
735 value = -value;
736
737 return value;
738 }
739
740 static int
741 unsigned_unquantize(int value, int n_endpoint_bits)
742 {
743 if (n_endpoint_bits >= 15)
744 return value;
745
746 if (value == 0)
747 return 0;
748
749 if (value == (1 << n_endpoint_bits) - 1)
750 return 0xffff;
751
752 return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
753 }
754
755 static int
756 extract_float_endpoints(const struct bptc_float_mode *mode,
757 const uint8_t *block,
758 int bit_offset,
759 int32_t endpoints[][3],
760 bool is_signed)
761 {
762 const struct bptc_float_bitfield *bitfield;
763 int endpoint, component;
764 int n_endpoints;
765 int value;
766 int i;
767
768 if (mode->n_partition_bits)
769 n_endpoints = 4;
770 else
771 n_endpoints = 2;
772
773 memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
774
775 for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
776 value = extract_bits(block, bit_offset, bitfield->n_bits);
777 bit_offset += bitfield->n_bits;
778
779 if (bitfield->reverse) {
780 for (i = 0; i < bitfield->n_bits; i++) {
781 if (value & (1 << i))
782 endpoints[bitfield->endpoint][bitfield->component] |=
783 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
784 }
785 } else {
786 endpoints[bitfield->endpoint][bitfield->component] |=
787 value << bitfield->offset;
788 }
789 }
790
791 if (mode->transformed_endpoints) {
792 /* The endpoints are specified as signed offsets from e0 */
793 for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
794 for (component = 0; component < 3; component++) {
795 value = sign_extend(endpoints[endpoint][component],
796 mode->n_delta_bits[component]);
797 endpoints[endpoint][component] =
798 ((endpoints[0][component] + value) &
799 ((1 << mode->n_endpoint_bits) - 1));
800 }
801 }
802 }
803
804 if (is_signed) {
805 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
806 for (component = 0; component < 3; component++) {
807 value = sign_extend(endpoints[endpoint][component],
808 mode->n_endpoint_bits);
809 endpoints[endpoint][component] =
810 signed_unquantize(value, mode->n_endpoint_bits);
811 }
812 }
813 } else {
814 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
815 for (component = 0; component < 3; component++) {
816 endpoints[endpoint][component] =
817 unsigned_unquantize(endpoints[endpoint][component],
818 mode->n_endpoint_bits);
819 }
820 }
821 }
822
823 return bit_offset;
824 }
825
826 static int32_t
827 finish_unsigned_unquantize(int32_t value)
828 {
829 return value * 31 / 64;
830 }
831
832 static int32_t
833 finish_signed_unquantize(int32_t value)
834 {
835 if (value < 0)
836 return (-value * 31 / 32) | 0x8000;
837 else
838 return value * 31 / 32;
839 }
840
841 static void
842 fetch_rgb_float_from_block(const uint8_t *block,
843 float *result,
844 int texel,
845 bool is_signed)
846 {
847 int mode_num;
848 const struct bptc_float_mode *mode;
849 int bit_offset;
850 int partition_num;
851 int subset_num;
852 int index_bits;
853 int index;
854 int anchors_before_texel;
855 int32_t endpoints[2 * 2][3];
856 uint32_t subsets;
857 int n_subsets;
858 int component;
859 int32_t value;
860
861 if (block[0] & 0x2) {
862 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
863 bit_offset = 5;
864 } else {
865 mode_num = block[0] & 3;
866 bit_offset = 2;
867 }
868
869 mode = bptc_float_modes + mode_num;
870
871 if (mode->reserved) {
872 memset(result, 0, sizeof result[0] * 3);
873 result[3] = 1.0f;
874 return;
875 }
876
877 bit_offset = extract_float_endpoints(mode, block, bit_offset,
878 endpoints, is_signed);
879
880 if (mode->n_partition_bits) {
881 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
882 bit_offset += mode->n_partition_bits;
883
884 subsets = partition_table1[partition_num];
885 n_subsets = 2;
886 } else {
887 partition_num = 0;
888 subsets = 0;
889 n_subsets = 1;
890 }
891
892 anchors_before_texel =
893 count_anchors_before_texel(n_subsets, partition_num, texel);
894
895 /* Calculate the offset to the primary index for this texel */
896 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
897
898 subset_num = (subsets >> (texel * 2)) & 3;
899
900 index_bits = mode->n_index_bits;
901 if (is_anchor(n_subsets, partition_num, texel))
902 index_bits--;
903 index = extract_bits(block, bit_offset, index_bits);
904
905 for (component = 0; component < 3; component++) {
906 value = interpolate(endpoints[subset_num * 2][component],
907 endpoints[subset_num * 2 + 1][component],
908 index,
909 mode->n_index_bits);
910
911 if (is_signed)
912 value = finish_signed_unquantize(value);
913 else
914 value = finish_unsigned_unquantize(value);
915
916 result[component] = _mesa_half_to_float(value);
917 }
918
919 result[3] = 1.0f;
920 }
921
922 static void
923 fetch_bptc_rgb_float(const GLubyte *map,
924 GLint rowStride, GLint i, GLint j,
925 GLfloat *texel,
926 bool is_signed)
927 {
928 const GLubyte *block;
929
930 block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
931
932 fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
933 }
934
935 static void
936 fetch_bptc_rgb_signed_float(const GLubyte *map,
937 GLint rowStride, GLint i, GLint j,
938 GLfloat *texel)
939 {
940 fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
941 }
942
943 static void
944 fetch_bptc_rgb_unsigned_float(const GLubyte *map,
945 GLint rowStride, GLint i, GLint j,
946 GLfloat *texel)
947 {
948 fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
949 }
950
951 compressed_fetch_func
952 _mesa_get_bptc_fetch_func(mesa_format format)
953 {
954 switch (format) {
955 case MESA_FORMAT_BPTC_RGBA_UNORM:
956 return fetch_bptc_rgba_unorm;
957 case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
958 return fetch_bptc_srgb_alpha_unorm;
959 case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT:
960 return fetch_bptc_rgb_signed_float;
961 case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
962 return fetch_bptc_rgb_unsigned_float;
963 default:
964 return NULL;
965 }
966 }
967
968 static void
969 write_bits(struct bit_writer *writer, int n_bits, int value)
970 {
971 do {
972 if (n_bits + writer->pos >= 8) {
973 *(writer->dst++) = writer->buf | (value << writer->pos);
974 writer->buf = 0;
975 value >>= (8 - writer->pos);
976 n_bits -= (8 - writer->pos);
977 writer->pos = 0;
978 } else {
979 writer->buf |= value << writer->pos;
980 writer->pos += n_bits;
981 break;
982 }
983 } while (n_bits > 0);
984 }
985
986 static void
987 get_average_luminance_alpha_unorm(int width, int height,
988 const uint8_t *src, int src_rowstride,
989 int *average_luminance, int *average_alpha)
990 {
991 int luminance_sum = 0, alpha_sum = 0;
992 int y, x;
993
994 for (y = 0; y < height; y++) {
995 for (x = 0; x < width; x++) {
996 luminance_sum += src[0] + src[1] + src[2];
997 alpha_sum += src[3];
998 src += 4;
999 }
1000 src += src_rowstride - width * 4;
1001 }
1002
1003 *average_luminance = luminance_sum / (width * height);
1004 *average_alpha = alpha_sum / (width * height);
1005 }
1006
1007 static void
1008 get_rgba_endpoints_unorm(int width, int height,
1009 const uint8_t *src, int src_rowstride,
1010 int average_luminance, int average_alpha,
1011 uint8_t endpoints[][4])
1012 {
1013 int endpoint_luminances[2];
1014 int midpoint;
1015 int sums[2][4];
1016 int endpoint;
1017 int luminance;
1018 uint8_t temp[3];
1019 const uint8_t *p = src;
1020 int rgb_left_endpoint_count = 0;
1021 int alpha_left_endpoint_count = 0;
1022 int y, x, i;
1023
1024 memset(sums, 0, sizeof sums);
1025
1026 for (y = 0; y < height; y++) {
1027 for (x = 0; x < width; x++) {
1028 luminance = p[0] + p[1] + p[2];
1029 if (luminance < average_luminance) {
1030 endpoint = 0;
1031 rgb_left_endpoint_count++;
1032 } else {
1033 endpoint = 1;
1034 }
1035 for (i = 0; i < 3; i++)
1036 sums[endpoint][i] += p[i];
1037
1038 if (p[2] < average_alpha) {
1039 endpoint = 0;
1040 alpha_left_endpoint_count++;
1041 } else {
1042 endpoint = 1;
1043 }
1044 sums[endpoint][3] += p[3];
1045
1046 p += 4;
1047 }
1048
1049 p += src_rowstride - width * 4;
1050 }
1051
1052 if (rgb_left_endpoint_count == 0 ||
1053 rgb_left_endpoint_count == width * height) {
1054 for (i = 0; i < 3; i++)
1055 endpoints[0][i] = endpoints[1][i] =
1056 (sums[0][i] + sums[1][i]) / (width * height);
1057 } else {
1058 for (i = 0; i < 3; i++) {
1059 endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1060 endpoints[1][i] = (sums[1][i] /
1061 (width * height - rgb_left_endpoint_count));
1062 }
1063 }
1064
1065 if (alpha_left_endpoint_count == 0 ||
1066 alpha_left_endpoint_count == width * height) {
1067 endpoints[0][3] = endpoints[1][3] =
1068 (sums[0][3] + sums[1][3]) / (width * height);
1069 } else {
1070 endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1071 endpoints[1][3] = (sums[1][3] /
1072 (width * height - alpha_left_endpoint_count));
1073 }
1074
1075 /* We may need to swap the endpoints to ensure the most-significant bit of
1076 * the first index is zero */
1077
1078 for (endpoint = 0; endpoint < 2; endpoint++) {
1079 endpoint_luminances[endpoint] =
1080 endpoints[endpoint][0] +
1081 endpoints[endpoint][1] +
1082 endpoints[endpoint][2];
1083 }
1084 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1085
1086 if ((src[0] + src[1] + src[2] <= midpoint) !=
1087 (endpoint_luminances[0] <= midpoint)) {
1088 memcpy(temp, endpoints[0], 3);
1089 memcpy(endpoints[0], endpoints[1], 3);
1090 memcpy(endpoints[1], temp, 3);
1091 }
1092
1093 /* Same for the alpha endpoints */
1094
1095 midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1096
1097 if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1098 temp[0] = endpoints[0][3];
1099 endpoints[0][3] = endpoints[1][3];
1100 endpoints[1][3] = temp[0];
1101 }
1102 }
1103
1104 static void
1105 write_rgb_indices_unorm(struct bit_writer *writer,
1106 int src_width, int src_height,
1107 const uint8_t *src, int src_rowstride,
1108 uint8_t endpoints[][4])
1109 {
1110 int luminance;
1111 int endpoint_luminances[2];
1112 int endpoint;
1113 int index;
1114 int y, x;
1115
1116 for (endpoint = 0; endpoint < 2; endpoint++) {
1117 endpoint_luminances[endpoint] =
1118 endpoints[endpoint][0] +
1119 endpoints[endpoint][1] +
1120 endpoints[endpoint][2];
1121 }
1122
1123 /* If the endpoints have the same luminance then we'll just use index 0 for
1124 * all of the texels */
1125 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1126 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1127 return;
1128 }
1129
1130 for (y = 0; y < src_height; y++) {
1131 for (x = 0; x < src_width; x++) {
1132 luminance = src[0] + src[1] + src[2];
1133
1134 index = ((luminance - endpoint_luminances[0]) * 3 /
1135 (endpoint_luminances[1] - endpoint_luminances[0]));
1136 if (index < 0)
1137 index = 0;
1138 else if (index > 3)
1139 index = 3;
1140
1141 assert(x != 0 || y != 0 || index < 2);
1142
1143 write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1144
1145 src += 4;
1146 }
1147
1148 /* Pad the indices out to the block size */
1149 if (src_width < BLOCK_SIZE)
1150 write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1151
1152 src += src_rowstride - src_width * 4;
1153 }
1154
1155 /* Pad the indices out to the block size */
1156 if (src_height < BLOCK_SIZE)
1157 write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1158 }
1159
1160 static void
1161 write_alpha_indices_unorm(struct bit_writer *writer,
1162 int src_width, int src_height,
1163 const uint8_t *src, int src_rowstride,
1164 uint8_t endpoints[][4])
1165 {
1166 int index;
1167 int y, x;
1168
1169 /* If the endpoints have the same alpha then we'll just use index 0 for
1170 * all of the texels */
1171 if (endpoints[0][3] == endpoints[1][3]) {
1172 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1173 return;
1174 }
1175
1176 for (y = 0; y < src_height; y++) {
1177 for (x = 0; x < src_width; x++) {
1178 index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1179 ((int) endpoints[1][3] - endpoints[0][3]));
1180 if (index < 0)
1181 index = 0;
1182 else if (index > 7)
1183 index = 7;
1184
1185 assert(x != 0 || y != 0 || index < 4);
1186
1187 /* The first index has one less bit */
1188 write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1189
1190 src += 4;
1191 }
1192
1193 /* Pad the indices out to the block size */
1194 if (src_width < BLOCK_SIZE)
1195 write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1196
1197 src += src_rowstride - src_width * 4;
1198 }
1199
1200 /* Pad the indices out to the block size */
1201 if (src_height < BLOCK_SIZE)
1202 write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1203 }
1204
1205 static void
1206 compress_rgba_unorm_block(int src_width, int src_height,
1207 const uint8_t *src, int src_rowstride,
1208 uint8_t *dst)
1209 {
1210 int average_luminance, average_alpha;
1211 uint8_t endpoints[2][4];
1212 struct bit_writer writer;
1213 int component, endpoint;
1214
1215 get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1216 &average_luminance, &average_alpha);
1217 get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1218 average_luminance, average_alpha,
1219 endpoints);
1220
1221 writer.dst = dst;
1222 writer.pos = 0;
1223 writer.buf = 0;
1224
1225 write_bits(&writer, 5, 0x10); /* mode 4 */
1226 write_bits(&writer, 2, 0); /* rotation 0 */
1227 write_bits(&writer, 1, 0); /* index selection bit */
1228
1229 /* Write the color endpoints */
1230 for (component = 0; component < 3; component++)
1231 for (endpoint = 0; endpoint < 2; endpoint++)
1232 write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1233
1234 /* Write the alpha endpoints */
1235 for (endpoint = 0; endpoint < 2; endpoint++)
1236 write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1237
1238 write_rgb_indices_unorm(&writer,
1239 src_width, src_height,
1240 src, src_rowstride,
1241 endpoints);
1242 write_alpha_indices_unorm(&writer,
1243 src_width, src_height,
1244 src, src_rowstride,
1245 endpoints);
1246 }
1247
1248 static void
1249 compress_rgba_unorm(int width, int height,
1250 const uint8_t *src, int src_rowstride,
1251 uint8_t *dst, int dst_rowstride)
1252 {
1253 int dst_row_diff;
1254 int y, x;
1255
1256 if (dst_rowstride >= width * 4)
1257 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1258 else
1259 dst_row_diff = 0;
1260
1261 for (y = 0; y < height; y += BLOCK_SIZE) {
1262 for (x = 0; x < width; x += BLOCK_SIZE) {
1263 compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1264 MIN2(height - y, BLOCK_SIZE),
1265 src + x * 4 + y * src_rowstride,
1266 src_rowstride,
1267 dst);
1268 dst += BLOCK_BYTES;
1269 }
1270 dst += dst_row_diff;
1271 }
1272 }
1273
1274 GLboolean
1275 _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
1276 {
1277 const GLubyte *pixels;
1278 const GLubyte *tempImage = NULL;
1279 int rowstride;
1280
1281 if (srcFormat != GL_RGBA ||
1282 srcType != GL_UNSIGNED_BYTE ||
1283 ctx->_ImageTransferState ||
1284 srcPacking->SwapBytes) {
1285 /* convert image to RGBA/ubyte */
1286 GLubyte *tempImageSlices[1];
1287 int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
1288 tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
1289 if (!tempImage)
1290 return GL_FALSE; /* out of memory */
1291 tempImageSlices[0] = (GLubyte *) tempImage;
1292 _mesa_texstore(ctx, dims,
1293 baseInternalFormat,
1294 MESA_FORMAT_R8G8B8A8_UNORM,
1295 rgbaRowStride, tempImageSlices,
1296 srcWidth, srcHeight, srcDepth,
1297 srcFormat, srcType, srcAddr,
1298 srcPacking);
1299
1300 pixels = tempImage;
1301 rowstride = srcWidth * 4;
1302 } else {
1303 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1304 srcFormat, srcType, 0, 0);
1305 rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1306 srcFormat, srcType);
1307 }
1308
1309 compress_rgba_unorm(srcWidth, srcHeight,
1310 pixels, rowstride,
1311 dstSlices[0], dstRowStride);
1312
1313 free((void *) tempImage);
1314
1315 return GL_TRUE;
1316 }
1317
1318 static float
1319 get_average_luminance_float(int width, int height,
1320 const float *src, int src_rowstride)
1321 {
1322 float luminance_sum = 0;
1323 int y, x;
1324
1325 for (y = 0; y < height; y++) {
1326 for (x = 0; x < width; x++) {
1327 luminance_sum += src[0] + src[1] + src[2];
1328 src += 3;
1329 }
1330 src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1331 }
1332
1333 return luminance_sum / (width * height);
1334 }
1335
1336 static float
1337 clamp_value(float value, bool is_signed)
1338 {
1339 if (value > 65504.0f)
1340 return 65504.0f;
1341
1342 if (is_signed) {
1343 if (value < -65504.0f)
1344 return -65504.0f;
1345 else
1346 return value;
1347 }
1348
1349 if (value < 0.0f)
1350 return 0.0f;
1351
1352 return value;
1353 }
1354
1355 static void
1356 get_endpoints_float(int width, int height,
1357 const float *src, int src_rowstride,
1358 float average_luminance, float endpoints[][3],
1359 bool is_signed)
1360 {
1361 float endpoint_luminances[2];
1362 float midpoint;
1363 float sums[2][3];
1364 int endpoint, component;
1365 float luminance;
1366 float temp[3];
1367 const float *p = src;
1368 int left_endpoint_count = 0;
1369 int y, x, i;
1370
1371 memset(sums, 0, sizeof sums);
1372
1373 for (y = 0; y < height; y++) {
1374 for (x = 0; x < width; x++) {
1375 luminance = p[0] + p[1] + p[2];
1376 if (luminance < average_luminance) {
1377 endpoint = 0;
1378 left_endpoint_count++;
1379 } else {
1380 endpoint = 1;
1381 }
1382 for (i = 0; i < 3; i++)
1383 sums[endpoint][i] += p[i];
1384
1385 p += 3;
1386 }
1387
1388 p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1389 }
1390
1391 if (left_endpoint_count == 0 ||
1392 left_endpoint_count == width * height) {
1393 for (i = 0; i < 3; i++)
1394 endpoints[0][i] = endpoints[1][i] =
1395 (sums[0][i] + sums[1][i]) / (width * height);
1396 } else {
1397 for (i = 0; i < 3; i++) {
1398 endpoints[0][i] = sums[0][i] / left_endpoint_count;
1399 endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1400 }
1401 }
1402
1403 /* Clamp the endpoints to the range of a half float and strip out
1404 * infinities */
1405 for (endpoint = 0; endpoint < 2; endpoint++) {
1406 for (component = 0; component < 3; component++) {
1407 endpoints[endpoint][component] =
1408 clamp_value(endpoints[endpoint][component], is_signed);
1409 }
1410 }
1411
1412 /* We may need to swap the endpoints to ensure the most-significant bit of
1413 * the first index is zero */
1414
1415 for (endpoint = 0; endpoint < 2; endpoint++) {
1416 endpoint_luminances[endpoint] =
1417 endpoints[endpoint][0] +
1418 endpoints[endpoint][1] +
1419 endpoints[endpoint][2];
1420 }
1421 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1422
1423 if ((src[0] + src[1] + src[2] <= midpoint) !=
1424 (endpoint_luminances[0] <= midpoint)) {
1425 memcpy(temp, endpoints[0], sizeof temp);
1426 memcpy(endpoints[0], endpoints[1], sizeof temp);
1427 memcpy(endpoints[1], temp, sizeof temp);
1428 }
1429 }
1430
1431 static void
1432 write_rgb_indices_float(struct bit_writer *writer,
1433 int src_width, int src_height,
1434 const float *src, int src_rowstride,
1435 float endpoints[][3])
1436 {
1437 float luminance;
1438 float endpoint_luminances[2];
1439 int endpoint;
1440 int index;
1441 int y, x;
1442
1443 for (endpoint = 0; endpoint < 2; endpoint++) {
1444 endpoint_luminances[endpoint] =
1445 endpoints[endpoint][0] +
1446 endpoints[endpoint][1] +
1447 endpoints[endpoint][2];
1448 }
1449
1450 /* If the endpoints have the same luminance then we'll just use index 0 for
1451 * all of the texels */
1452 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1453 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1454 return;
1455 }
1456
1457 for (y = 0; y < src_height; y++) {
1458 for (x = 0; x < src_width; x++) {
1459 luminance = src[0] + src[1] + src[2];
1460
1461 index = ((luminance - endpoint_luminances[0]) * 15 /
1462 (endpoint_luminances[1] - endpoint_luminances[0]));
1463 if (index < 0)
1464 index = 0;
1465 else if (index > 15)
1466 index = 15;
1467
1468 assert(x != 0 || y != 0 || index < 8);
1469
1470 write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1471
1472 src += 3;
1473 }
1474
1475 /* Pad the indices out to the block size */
1476 if (src_width < BLOCK_SIZE)
1477 write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1478
1479 src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1480 }
1481
1482 /* Pad the indices out to the block size */
1483 if (src_height < BLOCK_SIZE)
1484 write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1485 }
1486
1487 static int
1488 get_endpoint_value(float value, bool is_signed)
1489 {
1490 bool sign = false;
1491 int half;
1492
1493 if (is_signed) {
1494 half = _mesa_float_to_half(value);
1495
1496 if (half & 0x8000) {
1497 half &= 0x7fff;
1498 sign = true;
1499 }
1500
1501 half = (32 * half / 31) >> 6;
1502
1503 if (sign)
1504 half = -half & ((1 << 10) - 1);
1505
1506 return half;
1507 } else {
1508 if (value <= 0.0f)
1509 return 0;
1510
1511 half = _mesa_float_to_half(value);
1512
1513 return (64 * half / 31) >> 6;
1514 }
1515 }
1516
1517 static void
1518 compress_rgb_float_block(int src_width, int src_height,
1519 const float *src, int src_rowstride,
1520 uint8_t *dst,
1521 bool is_signed)
1522 {
1523 float average_luminance;
1524 float endpoints[2][3];
1525 struct bit_writer writer;
1526 int component, endpoint;
1527 int endpoint_value;
1528
1529 average_luminance =
1530 get_average_luminance_float(src_width, src_height, src, src_rowstride);
1531 get_endpoints_float(src_width, src_height, src, src_rowstride,
1532 average_luminance, endpoints, is_signed);
1533
1534 writer.dst = dst;
1535 writer.pos = 0;
1536 writer.buf = 0;
1537
1538 write_bits(&writer, 5, 3); /* mode 3 */
1539
1540 /* Write the endpoints */
1541 for (endpoint = 0; endpoint < 2; endpoint++) {
1542 for (component = 0; component < 3; component++) {
1543 endpoint_value =
1544 get_endpoint_value(endpoints[endpoint][component], is_signed);
1545 write_bits(&writer, 10, endpoint_value);
1546 }
1547 }
1548
1549 write_rgb_indices_float(&writer,
1550 src_width, src_height,
1551 src, src_rowstride,
1552 endpoints);
1553 }
1554
1555 static void
1556 compress_rgb_float(int width, int height,
1557 const float *src, int src_rowstride,
1558 uint8_t *dst, int dst_rowstride,
1559 bool is_signed)
1560 {
1561 int dst_row_diff;
1562 int y, x;
1563
1564 if (dst_rowstride >= width * 4)
1565 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1566 else
1567 dst_row_diff = 0;
1568
1569 for (y = 0; y < height; y += BLOCK_SIZE) {
1570 for (x = 0; x < width; x += BLOCK_SIZE) {
1571 compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1572 MIN2(height - y, BLOCK_SIZE),
1573 src + x * 3 +
1574 y * src_rowstride / sizeof (float),
1575 src_rowstride,
1576 dst,
1577 is_signed);
1578 dst += BLOCK_BYTES;
1579 }
1580 dst += dst_row_diff;
1581 }
1582 }
1583
1584 static GLboolean
1585 texstore_bptc_rgb_float(TEXSTORE_PARAMS,
1586 bool is_signed)
1587 {
1588 const float *pixels;
1589 const float *tempImage = NULL;
1590 int rowstride;
1591
1592 if (srcFormat != GL_RGB ||
1593 srcType != GL_FLOAT ||
1594 ctx->_ImageTransferState ||
1595 srcPacking->SwapBytes) {
1596 /* convert image to RGB/float */
1597 GLfloat *tempImageSlices[1];
1598 int rgbRowStride = 3 * srcWidth * sizeof(GLfloat);
1599 tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat));
1600 if (!tempImage)
1601 return GL_FALSE; /* out of memory */
1602 tempImageSlices[0] = (GLfloat *) tempImage;
1603 _mesa_texstore(ctx, dims,
1604 baseInternalFormat,
1605 MESA_FORMAT_RGB_FLOAT32,
1606 rgbRowStride, (GLubyte **)tempImageSlices,
1607 srcWidth, srcHeight, srcDepth,
1608 srcFormat, srcType, srcAddr,
1609 srcPacking);
1610
1611 pixels = tempImage;
1612 rowstride = srcWidth * sizeof(float) * 3;
1613 } else {
1614 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1615 srcFormat, srcType, 0, 0);
1616 rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1617 srcFormat, srcType);
1618 }
1619
1620 compress_rgb_float(srcWidth, srcHeight,
1621 pixels, rowstride,
1622 dstSlices[0], dstRowStride,
1623 is_signed);
1624
1625 free((void *) tempImage);
1626
1627 return GL_TRUE;
1628 }
1629
1630 GLboolean
1631 _mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS)
1632 {
1633 assert(dstFormat == MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT);
1634
1635 return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1636 dstFormat, dstRowStride, dstSlices,
1637 srcWidth, srcHeight, srcDepth,
1638 srcFormat, srcType,
1639 srcAddr, srcPacking,
1640 true /* signed */);
1641 }
1642
1643 GLboolean
1644 _mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS)
1645 {
1646 assert(dstFormat == MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT);
1647
1648 return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1649 dstFormat, dstRowStride, dstSlices,
1650 srcWidth, srcHeight, srcDepth,
1651 srcFormat, srcType,
1652 srcAddr, srcPacking,
1653 false /* unsigned */);
1654 }