2 * Copyright 2015 Philip Taylor <philip@zaynar.co.uk>
3 * Copyright 2018 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
26 * \file texcompress_astc.c
28 * Decompression code for GL_KHR_texture_compression_astc_ldr, which is just
31 * The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC
32 * library written by Philip Taylor. I added sRGB support and adjusted it for
36 #include "texcompress_astc.h"
38 #include "util/half_float.h"
40 #include <cstdlib> // for abort() on windows
42 static bool VERBOSE_DECODE
= false;
43 static bool VERBOSE_WRITE
= false;
46 uint16_div_64k_to_half_to_unorm8(uint16_t v
)
48 return _mesa_half_to_unorm8(_mesa_uint16_div_64k_to_half(v
));
56 unsupported_hdr_void_extent
,
57 reserved_block_mode_1
,
58 reserved_block_mode_2
,
59 dual_plane_and_too_many_partitions
,
60 invalid_range_in_void_extent
,
61 weight_grid_exceeds_block_size
,
62 invalid_colour_endpoints_size
,
63 invalid_colour_endpoints_count
,
75 /* Based on the Color Unquantization Parameters table,
76 * plus the bit-only representations, sorted by increasing size
78 static cem_range cem_ranges
[] = {
98 #define CAT_BITS_2(a, b) ( ((a) << 1) | (b) )
99 #define CAT_BITS_3(a, b, c) ( ((a) << 2) | ((b) << 1) | (c) )
100 #define CAT_BITS_4(a, b, c, d) ( ((a) << 3) | ((b) << 2) | ((c) << 1) | (d) )
101 #define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | ((d) << 1) | (e) )
104 * Unpack 5n+8 bits from 'in' into 5 output values.
105 * If n <= 4 then T should be uint32_t, else it must be uint64_t.
107 template <typename T
>
108 static void unpack_trit_block(int n
, T in
, uint8_t *out
)
110 assert(n
<= 6); /* else output will overflow uint8_t */
112 uint8_t T0
= (in
>> (n
)) & 0x1;
113 uint8_t T1
= (in
>> (n
+1)) & 0x1;
114 uint8_t T2
= (in
>> (2*n
+2)) & 0x1;
115 uint8_t T3
= (in
>> (2*n
+3)) & 0x1;
116 uint8_t T4
= (in
>> (3*n
+4)) & 0x1;
117 uint8_t T5
= (in
>> (4*n
+5)) & 0x1;
118 uint8_t T6
= (in
>> (4*n
+6)) & 0x1;
119 uint8_t T7
= (in
>> (5*n
+7)) & 0x1;
120 uint8_t mmask
= (1 << n
) - 1;
121 uint8_t m0
= (in
>> (0)) & mmask
;
122 uint8_t m1
= (in
>> (n
+2)) & mmask
;
123 uint8_t m2
= (in
>> (2*n
+4)) & mmask
;
124 uint8_t m3
= (in
>> (3*n
+5)) & mmask
;
125 uint8_t m4
= (in
>> (4*n
+7)) & mmask
;
128 uint8_t t4
, t3
, t2
, t1
, t0
;
129 if (CAT_BITS_3(T4
, T3
, T2
) == 0x7) {
130 C
= CAT_BITS_5(T7
, T6
, T5
, T1
, T0
);
133 C
= CAT_BITS_5(T4
, T3
, T2
, T1
, T0
);
134 if (CAT_BITS_2(T6
, T5
) == 0x3) {
139 t3
= CAT_BITS_2(T6
, T5
);
143 if ((C
& 0x3) == 0x3) {
146 uint8_t C3
= (C
>> 3) & 0x1;
147 uint8_t C2
= (C
>> 2) & 0x1;
148 t0
= (C3
<< 1) | (C2
& ~C3
);
149 } else if (((C
>> 2) & 0x3) == 0x3) {
156 uint8_t C1
= (C
>> 1) & 0x1;
157 uint8_t C0
= (C
>> 0) & 0x1;
158 t0
= (C1
<< 1) | (C0
& ~C1
);
161 out
[0] = (t0
<< n
) | m0
;
162 out
[1] = (t1
<< n
) | m1
;
163 out
[2] = (t2
<< n
) | m2
;
164 out
[3] = (t3
<< n
) | m3
;
165 out
[4] = (t4
<< n
) | m4
;
169 * Unpack 3n+7 bits from 'in' into 3 output values
171 static void unpack_quint_block(int n
, uint32_t in
, uint8_t *out
)
173 assert(n
<= 5); /* else output will overflow uint8_t */
175 uint8_t Q0
= (in
>> (n
)) & 0x1;
176 uint8_t Q1
= (in
>> (n
+1)) & 0x1;
177 uint8_t Q2
= (in
>> (n
+2)) & 0x1;
178 uint8_t Q3
= (in
>> (2*n
+3)) & 0x1;
179 uint8_t Q4
= (in
>> (2*n
+4)) & 0x1;
180 uint8_t Q5
= (in
>> (3*n
+5)) & 0x1;
181 uint8_t Q6
= (in
>> (3*n
+6)) & 0x1;
182 uint8_t mmask
= (1 << n
) - 1;
183 uint8_t m0
= (in
>> (0)) & mmask
;
184 uint8_t m1
= (in
>> (n
+3)) & mmask
;
185 uint8_t m2
= (in
>> (2*n
+5)) & mmask
;
189 if (CAT_BITS_4(Q6
, Q5
, Q2
, Q1
) == 0x3) {
190 q2
= CAT_BITS_3(Q0
, Q4
& ~Q0
, Q3
& ~Q0
);
194 if (CAT_BITS_2(Q2
, Q1
) == 0x3) {
196 C
= CAT_BITS_5(Q4
, Q3
, 0x1 & ~Q6
, 0x1 & ~Q5
, Q0
);
198 q2
= CAT_BITS_2(Q6
, Q5
);
199 C
= CAT_BITS_5(Q4
, Q3
, Q2
, Q1
, Q0
);
201 if ((C
& 0x7) == 0x5) {
209 out
[0] = (q0
<< n
) | m0
;
210 out
[1] = (q1
<< n
) | m1
;
211 out
[2] = (q2
<< n
) | m2
;
221 uint8x4_t(int a
, int b
, int c
, int d
)
223 assert(0 <= a
&& a
<= 255);
224 assert(0 <= b
&& b
<= 255);
225 assert(0 <= c
&& c
<= 255);
226 assert(0 <= d
&& d
<= 255);
233 static uint8x4_t
clamped(int a
, int b
, int c
, int d
)
236 r
.v
[0] = MAX2(0, MIN2(255, a
));
237 r
.v
[1] = MAX2(0, MIN2(255, b
));
238 r
.v
[2] = MAX2(0, MIN2(255, c
));
239 r
.v
[3] = MAX2(0, MIN2(255, d
));
244 static uint8x4_t
blue_contract(int r
, int g
, int b
, int a
)
246 return uint8x4_t((r
+b
) >> 1, (g
+b
) >> 1, b
, a
);
249 static uint8x4_t
blue_contract_clamped(int r
, int g
, int b
, int a
)
251 return uint8x4_t::clamped((r
+b
) >> 1, (g
+b
) >> 1, b
, a
);
254 static void bit_transfer_signed(int &a
, int &b
)
264 static uint32_t hash52(uint32_t p
)
279 static int select_partition(int seed
, int x
, int y
, int z
, int partitioncount
,
287 seed
+= (partitioncount
- 1) * 1024;
288 uint32_t rnum
= hash52(seed
);
289 uint8_t seed1
= rnum
& 0xF;
290 uint8_t seed2
= (rnum
>> 4) & 0xF;
291 uint8_t seed3
= (rnum
>> 8) & 0xF;
292 uint8_t seed4
= (rnum
>> 12) & 0xF;
293 uint8_t seed5
= (rnum
>> 16) & 0xF;
294 uint8_t seed6
= (rnum
>> 20) & 0xF;
295 uint8_t seed7
= (rnum
>> 24) & 0xF;
296 uint8_t seed8
= (rnum
>> 28) & 0xF;
297 uint8_t seed9
= (rnum
>> 18) & 0xF;
298 uint8_t seed10
= (rnum
>> 22) & 0xF;
299 uint8_t seed11
= (rnum
>> 26) & 0xF;
300 uint8_t seed12
= ((rnum
>> 30) | (rnum
<< 2)) & 0xF;
317 sh1
= (seed
& 2 ? 4 : 5);
318 sh2
= (partitioncount
== 3 ? 6 : 5);
320 sh1
= (partitioncount
== 3 ? 6 : 5);
321 sh2
= (seed
& 2 ? 4 : 5);
323 sh3
= (seed
& 0x10) ? sh1
: sh2
;
338 int a
= seed1
* x
+ seed2
* y
+ seed11
* z
+ (rnum
>> 14);
339 int b
= seed3
* x
+ seed4
* y
+ seed12
* z
+ (rnum
>> 10);
340 int c
= seed5
* x
+ seed6
* y
+ seed9
* z
+ (rnum
>> 6);
341 int d
= seed7
* x
+ seed8
* y
+ seed10
* z
+ (rnum
>> 2);
348 if (partitioncount
< 4)
350 if (partitioncount
< 3)
353 if (a
>= b
&& a
>= c
&& a
>= d
)
355 else if (b
>= c
&& b
>= d
)
364 struct InputBitVector
368 void printf_bits(int offset
, int count
, const char *fmt
= "", ...)
371 memset(out
, '.', 128);
374 for (int i
= 0; i
< count
; ++i
) {
375 out
[127 - idx
] = ((data
[idx
>> 5] >> (idx
& 31)) & 1) ? '1' : '0';
386 uint32_t get_bits(int offset
, int count
)
388 assert(count
>= 0 && count
< 32);
392 out
|= data
[0] >> offset
;
394 if (0 < offset
&& offset
<= 32)
395 out
|= data
[1] << (32 - offset
);
396 if (32 < offset
&& offset
< 64)
397 out
|= data
[1] >> (offset
- 32);
399 if (32 < offset
&& offset
<= 64)
400 out
|= data
[2] << (64 - offset
);
401 if (64 < offset
&& offset
< 96)
402 out
|= data
[2] >> (offset
- 64);
404 if (64 < offset
&& offset
<= 96)
405 out
|= data
[3] << (96 - offset
);
406 if (96 < offset
&& offset
< 128)
407 out
|= data
[3] >> (offset
- 96);
409 out
&= (1 << count
) - 1;
413 uint64_t get_bits64(int offset
, int count
)
415 assert(count
>= 0 && count
< 64);
419 out
|= data
[0] >> offset
;
422 out
|= (uint64_t)data
[1] << (32 - offset
);
423 if (32 < offset
&& offset
< 64)
424 out
|= data
[1] >> (offset
- 32);
426 if (0 < offset
&& offset
<= 64)
427 out
|= (uint64_t)data
[2] << (64 - offset
);
428 if (64 < offset
&& offset
< 96)
429 out
|= data
[2] >> (offset
- 64);
431 if (32 < offset
&& offset
<= 96)
432 out
|= (uint64_t)data
[3] << (96 - offset
);
433 if (96 < offset
&& offset
< 128)
434 out
|= data
[3] >> (offset
- 96);
436 out
&= ((uint64_t)1 << count
) - 1;
440 uint32_t get_bits_rev(int offset
, int count
)
442 assert(offset
>= count
);
443 uint32_t tmp
= get_bits(offset
- count
, count
);
445 for (int i
= 0; i
< count
; ++i
)
446 out
|= ((tmp
>> i
) & 1) << (count
- 1 - i
);
451 struct OutputBitVector
459 memset(data
, 0, sizeof(data
));
462 void append(uint32_t value
, int size
)
465 printf("append offset=%d size=%d values=0x%x\n", offset
, size
, value
);
467 assert(offset
+ size
<= 128);
471 assert((value
>> size
) == 0);
474 int c
= MIN2(size
, 32 - (offset
& 31));
475 data
[offset
>> 5] |= (value
<< (offset
& 31));
482 void append64(uint64_t value
, int size
)
485 printf("append offset=%d size=%d values=0x%llx\n", offset
, size
, (unsigned long long)value
);
487 assert(offset
+ size
<= 128);
491 assert((value
>> size
) == 0);
494 int c
= MIN2(size
, 32 - (offset
& 31));
495 data
[offset
>> 5] |= (value
<< (offset
& 31));
502 void append(OutputBitVector
&v
, int size
)
505 printf("append vector offset=%d size=%d\n", offset
, size
);
507 assert(offset
+ size
<= 128);
510 append(v
.data
[i
++], 32);
514 append(v
.data
[i
] & ((1 << size
) - 1), size
);
517 void append_end(OutputBitVector
&v
, int size
)
519 for (int i
= 0; i
< size
; ++i
)
520 data
[(127 - i
) >> 5] |= ((v
.data
[i
>> 5] >> (i
& 31)) & 1) << ((127 - i
) & 31);
523 /* Insert the given number of '1' bits. (We could use 0s instead, but 1s are
524 * more likely to flush out bugs where we accidentally read undefined bits.)
529 printf("skip offset=%d size=%d\n", offset
, size
);
531 assert(offset
+ size
<= 128);
533 append(0xffffffff, 32);
537 append(0xffffffff >> (32 - size
), size
);
545 Decoder(int block_w
, int block_h
, int block_d
, bool srgb
, bool output_unorm8
)
546 : block_w(block_w
), block_h(block_h
), block_d(block_d
), srgb(srgb
),
547 output_unorm8(output_unorm8
) {}
549 decode_error::type
decode(const uint8_t *in
, uint16_t *output
) const;
551 int block_w
, block_h
, block_d
;
552 bool srgb
, output_unorm8
;
558 bool bogus_colour_endpoints
;
563 int colour_component_selector
;
565 int wt_w
, wt_h
, wt_d
;
571 int void_extent_min_s
;
572 int void_extent_max_s
;
573 int void_extent_min_t
;
574 int void_extent_max_t
;
575 uint16_t void_extent_colour_r
;
576 uint16_t void_extent_colour_g
;
577 uint16_t void_extent_colour_b
;
578 uint16_t void_extent_colour_a
;
581 int num_extra_cem_bits
;
582 int colour_endpoint_data_offset
;
589 /* Calculated by unpack_weights(): */
590 uint8_t weights_quant
[64 + 4]; /* max 64 values, plus padding for overflows in trit parsing */
592 /* Calculated by unquantise_weights(): */
593 uint8_t weights
[64 + 18]; /* max 64 values, plus padding for the infill interpolation */
595 /* Calculated by unpack_colour_endpoints(): */
596 uint8_t colour_endpoints_quant
[18 + 4]; /* max 18 values, plus padding for overflows in trit parsing */
598 /* Calculated by unquantise_colour_endpoints(): */
599 uint8_t colour_endpoints
[18];
601 /* Calculated by calculate_from_weights(): */
609 /* Calculated by calculate_remaining_bits(): */
612 /* Calculated by calculate_colour_endpoints_size(): */
613 int colour_endpoint_bits
;
619 /* Calculated by compute_infill_weights(); */
620 uint8_t infill_weights
[2][216]; /* large enough for 6x6x6 */
622 /* Calculated by decode_colour_endpoints(); */
623 uint8x4_t endpoints_decoded
[2][4];
625 void calculate_from_weights();
626 void calculate_remaining_bits();
627 decode_error::type
calculate_colour_endpoints_size();
629 void unquantise_weights();
630 void unquantise_colour_endpoints();
632 decode_error::type
decode(const Decoder
&decoder
, InputBitVector in
);
634 decode_error::type
decode_block_mode(InputBitVector in
);
635 decode_error::type
decode_void_extent(InputBitVector in
);
636 void decode_cem(InputBitVector in
);
637 void unpack_colour_endpoints(InputBitVector in
);
638 void decode_colour_endpoints();
639 void unpack_weights(InputBitVector in
);
640 void compute_infill_weights(int block_w
, int block_h
, int block_d
);
642 void write_decoded(const Decoder
&decoder
, uint16_t *output
);
646 decode_error::type
Decoder::decode(const uint8_t *in
, uint16_t *output
) const
649 InputBitVector in_vec
;
650 memcpy(&in_vec
.data
, in
, 16);
651 decode_error::type err
= blk
.decode(*this, in_vec
);
652 if (err
== decode_error::ok
) {
653 blk
.write_decoded(*this, output
);
655 /* Fill output with the error colour */
656 for (int i
= 0; i
< block_w
* block_h
* block_d
; ++i
) {
658 output
[i
*4+0] = 0xff;
660 output
[i
*4+2] = 0xff;
661 output
[i
*4+3] = 0xff;
663 assert(!srgb
); /* srgb must use unorm8 */
665 output
[i
*4+0] = FP16_ONE
;
666 output
[i
*4+1] = FP16_ZERO
;
667 output
[i
*4+2] = FP16_ONE
;
668 output
[i
*4+3] = FP16_ONE
;
676 decode_error::type
Block::decode_void_extent(InputBitVector block
)
680 is_void_extent
= true;
681 void_extent_d
= block
.get_bits(9, 1);
682 void_extent_min_s
= block
.get_bits(12, 13);
683 void_extent_max_s
= block
.get_bits(25, 13);
684 void_extent_min_t
= block
.get_bits(38, 13);
685 void_extent_max_t
= block
.get_bits(51, 13);
686 void_extent_colour_r
= block
.get_bits(64, 16);
687 void_extent_colour_g
= block
.get_bits(80, 16);
688 void_extent_colour_b
= block
.get_bits(96, 16);
689 void_extent_colour_a
= block
.get_bits(112, 16);
691 /* TODO: maybe we should do something useful with the extent coordinates? */
694 return decode_error::unsupported_hdr_void_extent
;
697 if (void_extent_min_s
== 0x1fff && void_extent_max_s
== 0x1fff
698 && void_extent_min_t
== 0x1fff && void_extent_max_t
== 0x1fff) {
704 /* Check for illegal encoding */
705 if (void_extent_min_s
>= void_extent_max_s
|| void_extent_min_t
>= void_extent_max_t
) {
706 return decode_error::invalid_range_in_void_extent
;
710 return decode_error::ok
;
713 decode_error::type
Block::decode_block_mode(InputBitVector in
)
715 dual_plane
= in
.get_bits(10, 1);
716 high_prec
= in
.get_bits(9, 1);
718 if (in
.get_bits(0, 2) != 0x0) {
719 wt_range
= (in
.get_bits(0, 2) << 1) | in
.get_bits(4, 1);
720 int a
= in
.get_bits(5, 2);
721 int b
= in
.get_bits(7, 2);
722 switch (in
.get_bits(2, 2)) {
725 in
.printf_bits(0, 11, "DHBBAAR00RR");
731 in
.printf_bits(0, 11, "DHBBAAR01RR");
737 in
.printf_bits(0, 11, "DHBBAAR10RR");
742 if ((b
& 0x2) == 0) {
744 in
.printf_bits(0, 11, "DH0BAAR11RR");
749 in
.printf_bits(0, 11, "DH1BAAR11RR");
750 wt_w
= (b
& 0x1) + 2;
756 if (in
.get_bits(6, 3) == 0x7) {
757 if (in
.get_bits(0, 9) == 0x1fc) {
759 in
.printf_bits(0, 11, "xx111111100 (void extent)");
760 return decode_void_extent(in
);
763 in
.printf_bits(0, 11, "xx111xxxx00");
764 return decode_error::reserved_block_mode_1
;
767 if (in
.get_bits(0, 4) == 0x0) {
769 in
.printf_bits(0, 11, "xxxxxxx0000");
770 return decode_error::reserved_block_mode_2
;
773 wt_range
= in
.get_bits(1, 3) | in
.get_bits(4, 1);
774 int a
= in
.get_bits(5, 2);
777 switch (in
.get_bits(7, 2)) {
780 in
.printf_bits(0, 11, "DH00AARRR00");
786 in
.printf_bits(0, 11, "DH01AARRR00");
791 if (in
.get_bits(5, 1) == 0) {
793 in
.printf_bits(0, 11, "DH1100RRR00");
798 in
.printf_bits(0, 11, "DH1101RRR00");
805 in
.printf_bits(0, 11, "BB10AARRR00");
806 b
= in
.get_bits(9, 2);
814 return decode_error::ok
;
817 void Block::decode_cem(InputBitVector in
)
819 cems
[0] = cems
[1] = cems
[2] = cems
[3] = -1;
821 num_extra_cem_bits
= 0;
826 partition_index
= in
.get_bits(13, 10);
828 in
.printf_bits(13, 10, "partition ID (%d)", partition_index
);
830 uint32_t cem
= in
.get_bits(23, 6);
832 if ((cem
& 0x3) == 0x0) {
834 cem_base_class
= cem
>> 2;
835 is_multi_cem
= false;
837 for (int i
= 0; i
< num_parts
; ++i
)
841 in
.printf_bits(23, 6, "CEM (single, %d)", cem
);
844 cem_base_class
= (cem
& 0x3) - 1;
848 in
.printf_bits(23, 6, "CEM (multi, base class %d)", cem_base_class
);
850 int offset
= 128 - weight_bits
;
852 if (num_parts
== 2) {
853 if (VERBOSE_DECODE
) {
854 in
.printf_bits(25, 4, "M0M0 C1 C0");
855 in
.printf_bits(offset
- 2, 2, "M1M1");
858 uint32_t c0
= in
.get_bits(25, 1);
859 uint32_t c1
= in
.get_bits(26, 1);
861 extra_cem_bits
= c0
+ c1
;
863 num_extra_cem_bits
= 2;
865 uint32_t m0
= in
.get_bits(27, 2);
866 uint32_t m1
= in
.get_bits(offset
- 2, 2);
868 cems
[0] = ((cem_base_class
+ c0
) << 2) | m0
;
869 cems
[1] = ((cem_base_class
+ c1
) << 2) | m1
;
871 } else if (num_parts
== 3) {
872 if (VERBOSE_DECODE
) {
873 in
.printf_bits(25, 4, "M0 C2 C1 C0");
874 in
.printf_bits(offset
- 5, 5, "M2M2 M1M1 M0");
877 uint32_t c0
= in
.get_bits(25, 1);
878 uint32_t c1
= in
.get_bits(26, 1);
879 uint32_t c2
= in
.get_bits(27, 1);
881 extra_cem_bits
= c0
+ c1
+ c2
;
883 num_extra_cem_bits
= 5;
885 uint32_t m0
= in
.get_bits(28, 1) | (in
.get_bits(128 - weight_bits
- 5, 1) << 1);
886 uint32_t m1
= in
.get_bits(offset
- 4, 2);
887 uint32_t m2
= in
.get_bits(offset
- 2, 2);
889 cems
[0] = ((cem_base_class
+ c0
) << 2) | m0
;
890 cems
[1] = ((cem_base_class
+ c1
) << 2) | m1
;
891 cems
[2] = ((cem_base_class
+ c2
) << 2) | m2
;
893 } else if (num_parts
== 4) {
894 if (VERBOSE_DECODE
) {
895 in
.printf_bits(25, 4, "C3 C2 C1 C0");
896 in
.printf_bits(offset
- 8, 8, "M3M3 M2M2 M1M1 M0M0");
899 uint32_t c0
= in
.get_bits(25, 1);
900 uint32_t c1
= in
.get_bits(26, 1);
901 uint32_t c2
= in
.get_bits(27, 1);
902 uint32_t c3
= in
.get_bits(28, 1);
904 extra_cem_bits
= c0
+ c1
+ c2
+ c3
;
906 num_extra_cem_bits
= 8;
908 uint32_t m0
= in
.get_bits(offset
- 8, 2);
909 uint32_t m1
= in
.get_bits(offset
- 6, 2);
910 uint32_t m2
= in
.get_bits(offset
- 4, 2);
911 uint32_t m3
= in
.get_bits(offset
- 2, 2);
913 cems
[0] = ((cem_base_class
+ c0
) << 2) | m0
;
914 cems
[1] = ((cem_base_class
+ c1
) << 2) | m1
;
915 cems
[2] = ((cem_base_class
+ c2
) << 2) | m2
;
916 cems
[3] = ((cem_base_class
+ c3
) << 2) | m3
;
922 colour_endpoint_data_offset
= 29;
925 uint32_t cem
= in
.get_bits(13, 4);
927 cem_base_class
= cem
>> 2;
928 is_multi_cem
= false;
932 partition_index
= -1;
935 in
.printf_bits(13, 4, "CEM = %d (class %d)", cem
, cem_base_class
);
937 colour_endpoint_data_offset
= 17;
941 void Block::unpack_colour_endpoints(InputBitVector in
)
944 int offset
= colour_endpoint_data_offset
;
945 int bits_left
= colour_endpoint_bits
;
946 for (int i
= 0; i
< num_cem_values
; i
+= 5) {
947 int bits_to_read
= MIN2(bits_left
, 8 + ce_bits
* 5);
948 /* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have to use uint64_t */
949 uint64_t raw
= in
.get_bits64(offset
, bits_to_read
);
950 unpack_trit_block(ce_bits
, raw
, &colour_endpoints_quant
[i
]);
953 in
.printf_bits(offset
, bits_to_read
,
954 "trits [%d,%d,%d,%d,%d]",
955 colour_endpoints_quant
[i
+0], colour_endpoints_quant
[i
+1],
956 colour_endpoints_quant
[i
+2], colour_endpoints_quant
[i
+3],
957 colour_endpoints_quant
[i
+4]);
959 offset
+= 8 + ce_bits
* 5;
960 bits_left
-= 8 + ce_bits
* 5;
962 } else if (ce_quints
) {
963 int offset
= colour_endpoint_data_offset
;
964 int bits_left
= colour_endpoint_bits
;
965 for (int i
= 0; i
< num_cem_values
; i
+= 3) {
966 int bits_to_read
= MIN2(bits_left
, 7 + ce_bits
* 3);
967 /* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can use uint32_t */
968 uint32_t raw
= in
.get_bits(offset
, bits_to_read
);
969 unpack_quint_block(ce_bits
, raw
, &colour_endpoints_quant
[i
]);
972 in
.printf_bits(offset
, bits_to_read
,
974 colour_endpoints_quant
[i
], colour_endpoints_quant
[i
+1], colour_endpoints_quant
[i
+2]);
976 offset
+= 7 + ce_bits
* 3;
977 bits_left
-= 7 + ce_bits
* 3;
980 assert((colour_endpoint_bits
% ce_bits
) == 0);
981 int offset
= colour_endpoint_data_offset
;
982 for (int i
= 0; i
< num_cem_values
; i
++) {
983 colour_endpoints_quant
[i
] = in
.get_bits(offset
, ce_bits
);
986 in
.printf_bits(offset
, ce_bits
, "bits [%d]", colour_endpoints_quant
[i
]);
993 void Block::decode_colour_endpoints()
995 int cem_values_idx
= 0;
996 for (int part
= 0; part
< num_parts
; ++part
) {
997 uint8_t *v
= &colour_endpoints
[cem_values_idx
];
1006 cem_values_idx
+= ((cems
[part
] >> 2) + 1) * 2;
1014 e0
= uint8x4_t(v0
, v0
, v0
, 0xff);
1015 e1
= uint8x4_t(v1
, v1
, v1
, 0xff);
1018 L0
= (v0
>> 2) | (v1
& 0xc0);
1019 L1
= L0
+ (v1
& 0x3f);
1022 e0
= uint8x4_t(L0
, L0
, L0
, 0xff);
1023 e1
= uint8x4_t(L1
, L1
, L1
, 0xff);
1026 e0
= uint8x4_t(v0
, v0
, v0
, v2
);
1027 e1
= uint8x4_t(v1
, v1
, v1
, v3
);
1030 bit_transfer_signed(v1
, v0
);
1031 bit_transfer_signed(v3
, v2
);
1032 e0
= uint8x4_t(v0
, v0
, v0
, v2
);
1033 e1
= uint8x4_t::clamped(v0
+v1
, v0
+v1
, v0
+v1
, v2
+v3
);
1036 e0
= uint8x4_t(v0
*v3
>> 8, v1
*v3
>> 8, v2
*v3
>> 8, 0xff);
1037 e1
= uint8x4_t(v0
, v1
, v2
, 0xff);
1043 e0
= uint8x4_t(v0
, v2
, v4
, 0xff);
1044 e1
= uint8x4_t(v1
, v3
, v5
, 0xff);
1046 e0
= blue_contract(v1
, v3
, v5
, 0xff);
1047 e1
= blue_contract(v0
, v2
, v4
, 0xff);
1051 bit_transfer_signed(v1
, v0
);
1052 bit_transfer_signed(v3
, v2
);
1053 bit_transfer_signed(v5
, v4
);
1054 if (v1
+ v3
+ v5
>= 0) {
1055 e0
= uint8x4_t(v0
, v2
, v4
, 0xff);
1056 e1
= uint8x4_t::clamped(v0
+v1
, v2
+v3
, v4
+v5
, 0xff);
1058 e0
= blue_contract_clamped(v0
+v1
, v2
+v3
, v4
+v5
, 0xff);
1059 e1
= blue_contract(v0
, v2
, v4
, 0xff);
1063 e0
= uint8x4_t(v0
*v3
>> 8, v1
*v3
>> 8, v2
*v3
>> 8, v4
);
1064 e1
= uint8x4_t(v0
, v1
, v2
, v5
);
1070 e0
= uint8x4_t(v0
, v2
, v4
, v6
);
1071 e1
= uint8x4_t(v1
, v3
, v5
, v7
);
1073 e0
= blue_contract(v1
, v3
, v5
, v7
);
1074 e1
= blue_contract(v0
, v2
, v4
, v6
);
1078 bit_transfer_signed(v1
, v0
);
1079 bit_transfer_signed(v3
, v2
);
1080 bit_transfer_signed(v5
, v4
);
1081 bit_transfer_signed(v7
, v6
);
1082 if (v1
+ v3
+ v5
>= 0) {
1083 e0
= uint8x4_t(v0
, v2
, v4
, v6
);
1084 e1
= uint8x4_t::clamped(v0
+v1
, v2
+v3
, v4
+v5
, v6
+v7
);
1086 e0
= blue_contract_clamped(v0
+v1
, v2
+v3
, v4
+v5
, v6
+v7
);
1087 e1
= blue_contract(v0
, v2
, v4
, v6
);
1091 /* HDR endpoints not supported; return error colour */
1092 e0
= uint8x4_t(255, 0, 255, 255);
1093 e1
= uint8x4_t(255, 0, 255, 255);
1097 endpoints_decoded
[0][part
] = e0
;
1098 endpoints_decoded
[1][part
] = e1
;
1100 if (VERBOSE_DECODE
) {
1101 printf("cems[%d]=%d v=[", part
, cems
[part
]);
1102 for (int i
= 0; i
< (cems
[part
] >> 2) + 1; ++i
) {
1105 printf("%3d", v
[i
]);
1107 printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n",
1108 e0
.v
[0], e0
.v
[1], e0
.v
[2], e0
.v
[3],
1109 e1
.v
[0], e1
.v
[1], e1
.v
[2], e1
.v
[3]);
1114 void Block::unpack_weights(InputBitVector in
)
1118 int bits_left
= weight_bits
;
1119 for (int i
= 0; i
< num_weights
; i
+= 5) {
1120 int bits_to_read
= MIN2(bits_left
, 8 + 5*wt_bits
);
1121 /* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can use uint32_t */
1122 uint32_t raw
= in
.get_bits_rev(offset
, bits_to_read
);
1123 unpack_trit_block(wt_bits
, raw
, &weights_quant
[i
]);
1126 in
.printf_bits(offset
- bits_to_read
, bits_to_read
, "weight trits [%d,%d,%d,%d,%d]",
1127 weights_quant
[i
+0], weights_quant
[i
+1],
1128 weights_quant
[i
+2], weights_quant
[i
+3],
1129 weights_quant
[i
+4]);
1131 offset
-= 8 + wt_bits
* 5;
1132 bits_left
-= 8 + wt_bits
* 5;
1135 } else if (wt_quints
) {
1138 int bits_left
= weight_bits
;
1139 for (int i
= 0; i
< num_weights
; i
+= 3) {
1140 int bits_to_read
= MIN2(bits_left
, 7 + 3*wt_bits
);
1141 /* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can use uint32_t */
1142 uint32_t raw
= in
.get_bits_rev(offset
, bits_to_read
);
1143 unpack_quint_block(wt_bits
, raw
, &weights_quant
[i
]);
1146 in
.printf_bits(offset
- bits_to_read
, bits_to_read
, "weight quints [%d,%d,%d]",
1147 weights_quant
[i
], weights_quant
[i
+1], weights_quant
[i
+2]);
1149 offset
-= 7 + wt_bits
* 3;
1150 bits_left
-= 7 + wt_bits
* 3;
1155 assert((weight_bits
% wt_bits
) == 0);
1156 for (int i
= 0; i
< num_weights
; ++i
) {
1157 weights_quant
[i
] = in
.get_bits_rev(offset
, wt_bits
);
1160 in
.printf_bits(offset
- wt_bits
, wt_bits
, "weight bits [%d]", weights_quant
[i
]);
1167 void Block::unquantise_weights()
1169 assert(num_weights
<= (int)ARRAY_SIZE(weights_quant
));
1170 assert(num_weights
<= (int)ARRAY_SIZE(weights
));
1172 memset(weights
, 0, sizeof(weights
));
1174 for (int i
= 0; i
< num_weights
; ++i
) {
1176 uint8_t v
= weights_quant
[i
];
1185 A
= (v
& 0x1) ? 0x7F : 0x00;
1193 B
= (v
& 0x2) ? 0x45 : 0x00;
1198 B
= ((v
& 0x6) >> 1) | ((v
& 0x6) << 4);
1205 uint16_t T
= D
* C
+ B
;
1207 T
= (A
& 0x20) | (T
>> 2);
1214 } else if (wt_quints
) {
1220 A
= (v
& 0x1) ? 0x7F : 0x00;
1228 B
= (v
& 0x2) ? 0x42 : 0x00;
1235 uint16_t T
= D
* C
+ B
;
1237 T
= (A
& 0x20) | (T
>> 2);
1248 case 1: w
= v
? 0x3F : 0x00; break;
1249 case 2: w
= v
| (v
<< 2) | (v
<< 4); break;
1250 case 3: w
= v
| (v
<< 3); break;
1251 case 4: w
= (v
>> 2) | (v
<< 2); break;
1252 case 5: w
= (v
>> 4) | (v
<< 1); break;
1253 default: unreachable("");
1263 void Block::compute_infill_weights(int block_w
, int block_h
, int block_d
)
1265 int Ds
= block_w
<= 1 ? 0 : (1024 + block_w
/ 2) / (block_w
- 1);
1266 int Dt
= block_h
<= 1 ? 0 : (1024 + block_h
/ 2) / (block_h
- 1);
1267 int Dr
= block_d
<= 1 ? 0 : (1024 + block_d
/ 2) / (block_d
- 1);
1268 for (int r
= 0; r
< block_d
; ++r
) {
1269 for (int t
= 0; t
< block_h
; ++t
) {
1270 for (int s
= 0; s
< block_w
; ++s
) {
1274 int gs
= (cs
* (wt_w
- 1) + 32) >> 6;
1275 int gt
= (ct
* (wt_h
- 1) + 32) >> 6;
1276 int gr
= (cr
* (wt_d
- 1) + 32) >> 6;
1277 assert(gs
>= 0 && gs
<= 176);
1278 assert(gt
>= 0 && gt
<= 176);
1279 assert(gr
>= 0 && gr
<= 176);
1291 int w11
= (fs
* ft
+ 8) >> 4;
1294 int w00
= 16 - fs
- ft
+ w11
;
1297 int p00
, p01
, p10
, p11
, i0
, i1
;
1298 int v0
= js
+ jt
* wt_w
;
1299 p00
= weights
[(v0
) * 2];
1300 p01
= weights
[(v0
+ 1) * 2];
1301 p10
= weights
[(v0
+ wt_w
) * 2];
1302 p11
= weights
[(v0
+ wt_w
+ 1) * 2];
1303 i0
= (p00
*w00
+ p01
*w01
+ p10
*w10
+ p11
*w11
+ 8) >> 4;
1304 p00
= weights
[(v0
) * 2 + 1];
1305 p01
= weights
[(v0
+ 1) * 2 + 1];
1306 p10
= weights
[(v0
+ wt_w
) * 2 + 1];
1307 p11
= weights
[(v0
+ wt_w
+ 1) * 2 + 1];
1308 assert((v0
+ wt_w
+ 1) * 2 + 1 < (int)ARRAY_SIZE(weights
));
1309 i1
= (p00
*w00
+ p01
*w01
+ p10
*w10
+ p11
*w11
+ 8) >> 4;
1310 assert(0 <= i0
&& i0
<= 64);
1311 infill_weights
[0][s
+ t
*block_w
+ r
*block_w
*block_h
] = i0
;
1312 infill_weights
[1][s
+ t
*block_w
+ r
*block_w
*block_h
] = i1
;
1314 int p00
, p01
, p10
, p11
, i
;
1315 int v0
= js
+ jt
* wt_w
;
1317 p01
= weights
[v0
+ 1];
1318 p10
= weights
[v0
+ wt_w
];
1319 p11
= weights
[v0
+ wt_w
+ 1];
1320 assert(v0
+ wt_w
+ 1 < (int)ARRAY_SIZE(weights
));
1321 i
= (p00
*w00
+ p01
*w01
+ p10
*w10
+ p11
*w11
+ 8) >> 4;
1322 assert(0 <= i
&& i
<= 64);
1323 infill_weights
[0][s
+ t
*block_w
+ r
*block_w
*block_h
] = i
;
1330 void Block::unquantise_colour_endpoints()
1332 assert(num_cem_values
<= (int)ARRAY_SIZE(colour_endpoints_quant
));
1333 assert(num_cem_values
<= (int)ARRAY_SIZE(colour_endpoints
));
1335 for (int i
= 0; i
< num_cem_values
; ++i
) {
1336 uint8_t v
= colour_endpoints_quant
[i
];
1339 uint16_t A
, B
, C
, D
;
1341 A
= (v
& 0x1) ? 0x1FF : 0x000;
1349 B
= (v
& 0x2) ? 0x116 : 0x000;
1354 t
= ((v
>> 1) & 0x3);
1355 B
= t
| (t
<< 2) | (t
<< 7);
1360 t
= ((v
>> 1) & 0x7);
1366 t
= ((v
>> 1) & 0xF);
1367 B
= (t
>> 2) | (t
<< 5);
1372 B
= ((v
& 0x3E) << 3) | ((v
>> 5) & 0x1);
1379 uint16_t T
= D
* C
+ B
;
1381 T
= (A
& 0x80) | (T
>> 2);
1383 colour_endpoints
[i
] = T
;
1384 } else if (ce_quints
) {
1385 uint16_t A
, B
, C
, D
;
1387 A
= (v
& 0x1) ? 0x1FF : 0x000;
1395 B
= (v
& 0x2) ? 0x10C : 0x000;
1400 t
= ((v
>> 1) & 0x3);
1401 B
= (t
>> 1) | (t
<< 1) | (t
<< 7);
1406 t
= ((v
>> 1) & 0x7);
1407 B
= (t
>> 1) | (t
<< 6);
1412 t
= ((v
>> 1) & 0xF);
1413 B
= (t
>> 4) | (t
<< 5);
1420 uint16_t T
= D
* C
+ B
;
1422 T
= (A
& 0x80) | (T
>> 2);
1424 colour_endpoints
[i
] = T
;
1427 case 1: v
= v
? 0xFF : 0x00; break;
1428 case 2: v
= (v
<< 6) | (v
<< 4) | (v
<< 2) | v
; break;
1429 case 3: v
= (v
<< 5) | (v
<< 2) | (v
>> 1); break;
1430 case 4: v
= (v
<< 4) | v
; break;
1431 case 5: v
= (v
<< 3) | (v
>> 2); break;
1432 case 6: v
= (v
<< 2) | (v
>> 4); break;
1433 case 7: v
= (v
<< 1) | (v
>> 6); break;
1435 default: unreachable("");
1437 colour_endpoints
[i
] = v
;
1442 decode_error::type
Block::decode(const Decoder
&decoder
, InputBitVector in
)
1444 decode_error::type err
;
1447 bogus_colour_endpoints
= false;
1448 bogus_weights
= false;
1449 is_void_extent
= false;
1454 /* TODO: test for all the illegal encodings */
1457 in
.printf_bits(0, 128);
1459 err
= decode_block_mode(in
);
1460 if (err
!= decode_error::ok
)
1464 return decode_error::ok
;
1468 calculate_from_weights();
1471 printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n",
1472 wt_w
, wt_h
, wt_d
, dual_plane
, num_weights
, high_prec
, wt_range
, wt_max
, wt_trits
, wt_quints
, wt_bits
, weight_bits
);
1474 if (wt_w
> decoder
.block_w
|| wt_h
> decoder
.block_h
|| wt_d
> decoder
.block_d
)
1475 return decode_error::weight_grid_exceeds_block_size
;
1477 num_parts
= in
.get_bits(11, 2) + 1;
1480 in
.printf_bits(11, 2, "partitions = %d", num_parts
);
1482 if (dual_plane
&& num_parts
> 3)
1483 return decode_error::dual_plane_and_too_many_partitions
;
1488 printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems
[0], cems
[1], cems
[2], cems
[3], cem_base_class
);
1490 int num_cem_pairs
= (cem_base_class
+ 1) * num_parts
+ extra_cem_bits
;
1491 num_cem_values
= num_cem_pairs
* 2;
1493 calculate_remaining_bits();
1494 err
= calculate_colour_endpoints_size();
1495 if (err
!= decode_error::ok
)
1499 in
.printf_bits(colour_endpoint_data_offset
, colour_endpoint_bits
,
1500 "endpoint data (%d bits, %d vals, %dt %dq %db)",
1501 colour_endpoint_bits
, num_cem_values
, ce_trits
, ce_quints
, ce_bits
);
1503 unpack_colour_endpoints(in
);
1505 if (VERBOSE_DECODE
) {
1506 printf("cem values raw =[");
1507 for (int i
= 0; i
< num_cem_values
; i
++) {
1510 printf("%3d", colour_endpoints_quant
[i
]);
1515 if (num_cem_values
> 18)
1516 return decode_error::invalid_colour_endpoints_count
;
1518 unquantise_colour_endpoints();
1520 if (VERBOSE_DECODE
) {
1521 printf("cem values norm=[");
1522 for (int i
= 0; i
< num_cem_values
; i
++) {
1525 printf("%3d", colour_endpoints
[i
]);
1530 decode_colour_endpoints();
1533 int ccs_offset
= 128 - weight_bits
- num_extra_cem_bits
- 2;
1534 colour_component_selector
= in
.get_bits(ccs_offset
, 2);
1537 in
.printf_bits(ccs_offset
, 2, "colour component selector = %d", colour_component_selector
);
1539 colour_component_selector
= 0;
1544 in
.printf_bits(128 - weight_bits
, weight_bits
, "weights (%d bits)", weight_bits
);
1546 if (num_weights
> 64)
1547 return decode_error::invalid_num_weights
;
1549 if (weight_bits
< 24 || weight_bits
> 96)
1550 return decode_error::invalid_weight_bits
;
1554 unquantise_weights();
1556 if (VERBOSE_DECODE
) {
1557 printf("weights=[");
1558 for (int i
= 0; i
< num_weights
; ++i
) {
1561 printf("%d", weights
[i
]);
1565 for (int plane
= 0; plane
<= dual_plane
; ++plane
) {
1566 printf("weights (plane %d):\n", plane
);
1570 for (int r
= 0; r
< wt_d
; ++r
) {
1571 for (int t
= 0; t
< wt_h
; ++t
) {
1572 for (int s
= 0; s
< wt_w
; ++s
) {
1573 printf("%3d", weights
[i
++ * (1 + dual_plane
) + plane
]);
1583 compute_infill_weights(decoder
.block_w
, decoder
.block_h
, decoder
.block_d
);
1585 if (VERBOSE_DECODE
) {
1586 for (int plane
= 0; plane
<= dual_plane
; ++plane
) {
1587 printf("infilled weights (plane %d):\n", plane
);
1591 for (int r
= 0; r
< decoder
.block_d
; ++r
) {
1592 for (int t
= 0; t
< decoder
.block_h
; ++t
) {
1593 for (int s
= 0; s
< decoder
.block_w
; ++s
) {
1594 printf("%3d", infill_weights
[plane
][i
++]);
1598 if (r
< decoder
.block_d
- 1)
1606 return decode_error::ok
;
1609 void Block::write_decoded(const Decoder
&decoder
, uint16_t *output
)
1611 /* sRGB can only be stored as unorm8. */
1612 assert(!decoder
.srgb
|| decoder
.output_unorm8
);
1614 if (is_void_extent
) {
1615 for (int idx
= 0; idx
< decoder
.block_w
*decoder
.block_h
*decoder
.block_d
; ++idx
) {
1616 if (decoder
.output_unorm8
) {
1618 output
[idx
*4+0] = void_extent_colour_r
>> 8;
1619 output
[idx
*4+1] = void_extent_colour_g
>> 8;
1620 output
[idx
*4+2] = void_extent_colour_b
>> 8;
1622 output
[idx
*4+0] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_r
);
1623 output
[idx
*4+1] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_g
);
1624 output
[idx
*4+2] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_b
);
1626 output
[idx
*4+3] = uint16_div_64k_to_half_to_unorm8(void_extent_colour_a
);
1628 /* Store the color as FP16. */
1629 output
[idx
*4+0] = _mesa_uint16_div_64k_to_half(void_extent_colour_r
);
1630 output
[idx
*4+1] = _mesa_uint16_div_64k_to_half(void_extent_colour_g
);
1631 output
[idx
*4+2] = _mesa_uint16_div_64k_to_half(void_extent_colour_b
);
1632 output
[idx
*4+3] = _mesa_uint16_div_64k_to_half(void_extent_colour_a
);
1638 int small_block
= (decoder
.block_w
* decoder
.block_h
* decoder
.block_d
) < 31;
1641 for (int z
= 0; z
< decoder
.block_d
; ++z
) {
1642 for (int y
= 0; y
< decoder
.block_h
; ++y
) {
1643 for (int x
= 0; x
< decoder
.block_w
; ++x
) {
1646 if (num_parts
> 1) {
1647 partition
= select_partition(partition_index
, x
, y
, z
, num_parts
, small_block
);
1648 assert(partition
< num_parts
);
1655 uint8x4_t e0
= endpoints_decoded
[0][partition
];
1656 uint8x4_t e1
= endpoints_decoded
[1][partition
];
1657 uint16_t c0
[4], c1
[4];
1659 /* Expand to 16 bits. */
1661 c0
[0] = (uint16_t)((e0
.v
[0] << 8) | 0x80);
1662 c0
[1] = (uint16_t)((e0
.v
[1] << 8) | 0x80);
1663 c0
[2] = (uint16_t)((e0
.v
[2] << 8) | 0x80);
1664 c0
[3] = (uint16_t)((e0
.v
[3] << 8) | 0x80);
1666 c1
[0] = (uint16_t)((e1
.v
[0] << 8) | 0x80);
1667 c1
[1] = (uint16_t)((e1
.v
[1] << 8) | 0x80);
1668 c1
[2] = (uint16_t)((e1
.v
[2] << 8) | 0x80);
1669 c1
[3] = (uint16_t)((e1
.v
[3] << 8) | 0x80);
1671 c0
[0] = (uint16_t)((e0
.v
[0] << 8) | e0
.v
[0]);
1672 c0
[1] = (uint16_t)((e0
.v
[1] << 8) | e0
.v
[1]);
1673 c0
[2] = (uint16_t)((e0
.v
[2] << 8) | e0
.v
[2]);
1674 c0
[3] = (uint16_t)((e0
.v
[3] << 8) | e0
.v
[3]);
1676 c1
[0] = (uint16_t)((e1
.v
[0] << 8) | e1
.v
[0]);
1677 c1
[1] = (uint16_t)((e1
.v
[1] << 8) | e1
.v
[1]);
1678 c1
[2] = (uint16_t)((e1
.v
[2] << 8) | e1
.v
[2]);
1679 c1
[3] = (uint16_t)((e1
.v
[3] << 8) | e1
.v
[3]);
1684 int w0
= infill_weights
[0][idx
];
1685 int w1
= infill_weights
[1][idx
];
1686 w
[0] = w
[1] = w
[2] = w
[3] = w0
;
1687 w
[colour_component_selector
] = w1
;
1689 int w0
= infill_weights
[0][idx
];
1690 w
[0] = w
[1] = w
[2] = w
[3] = w0
;
1693 /* Interpolate to produce UNORM16, applying weights. */
1695 (uint16_t)((c0
[0] * (64 - w
[0]) + c1
[0] * w
[0] + 32) >> 6),
1696 (uint16_t)((c0
[1] * (64 - w
[1]) + c1
[1] * w
[1] + 32) >> 6),
1697 (uint16_t)((c0
[2] * (64 - w
[2]) + c1
[2] * w
[2] + 32) >> 6),
1698 (uint16_t)((c0
[3] * (64 - w
[3]) + c1
[3] * w
[3] + 32) >> 6),
1701 if (decoder
.output_unorm8
) {
1703 output
[idx
*4+0] = c
[0] >> 8;
1704 output
[idx
*4+1] = c
[1] >> 8;
1705 output
[idx
*4+2] = c
[2] >> 8;
1707 output
[idx
*4+0] = c
[0] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c
[0]);
1708 output
[idx
*4+1] = c
[1] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c
[1]);
1709 output
[idx
*4+2] = c
[2] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c
[2]);
1711 output
[idx
*4+3] = c
[3] == 65535 ? 0xff : uint16_div_64k_to_half_to_unorm8(c
[3]);
1713 /* Store the color as FP16. */
1714 output
[idx
*4+0] = c
[0] == 65535 ? FP16_ONE
: _mesa_uint16_div_64k_to_half(c
[0]);
1715 output
[idx
*4+1] = c
[1] == 65535 ? FP16_ONE
: _mesa_uint16_div_64k_to_half(c
[1]);
1716 output
[idx
*4+2] = c
[2] == 65535 ? FP16_ONE
: _mesa_uint16_div_64k_to_half(c
[2]);
1717 output
[idx
*4+3] = c
[3] == 65535 ? FP16_ONE
: _mesa_uint16_div_64k_to_half(c
[3]);
1726 void Block::calculate_from_weights()
1731 switch (high_prec
) {
1734 case 0x2: wt_max
= 1; wt_bits
= 1; break;
1735 case 0x3: wt_max
= 2; wt_trits
= 1; break;
1736 case 0x4: wt_max
= 3; wt_bits
= 2; break;
1737 case 0x5: wt_max
= 4; wt_quints
= 1; break;
1738 case 0x6: wt_max
= 5; wt_trits
= 1; wt_bits
= 1; break;
1739 case 0x7: wt_max
= 7; wt_bits
= 3; break;
1745 case 0x2: wt_max
= 9; wt_quints
= 1; wt_bits
= 1; break;
1746 case 0x3: wt_max
= 11; wt_trits
= 1; wt_bits
= 2; break;
1747 case 0x4: wt_max
= 15; wt_bits
= 4; break;
1748 case 0x5: wt_max
= 19; wt_quints
= 1; wt_bits
= 2; break;
1749 case 0x6: wt_max
= 23; wt_trits
= 1; wt_bits
= 3; break;
1750 case 0x7: wt_max
= 31; wt_bits
= 5; break;
1756 assert(wt_trits
|| wt_quints
|| wt_bits
);
1758 num_weights
= wt_w
* wt_h
* wt_d
;
1764 (num_weights
* 8 * wt_trits
+ 4) / 5
1765 + (num_weights
* 7 * wt_quints
+ 2) / 3
1766 + num_weights
* wt_bits
;
1769 void Block::calculate_remaining_bits()
1772 if (num_parts
> 1) {
1776 config_bits
= 25 + 3 * num_parts
;
1784 remaining_bits
= 128 - config_bits
- weight_bits
;
1787 decode_error::type
Block::calculate_colour_endpoints_size()
1789 /* Specified as illegal */
1790 if (remaining_bits
< (13 * num_cem_values
+ 4) / 5) {
1791 colour_endpoint_bits
= ce_max
= ce_trits
= ce_quints
= ce_bits
= 0;
1792 return decode_error::invalid_colour_endpoints_size
;
1795 /* Find the largest cem_ranges that fits within remaining_bits */
1796 for (int i
= ARRAY_SIZE(cem_ranges
)-1; i
>= 0; --i
) {
1798 cem_bits
= (num_cem_values
* 8 * cem_ranges
[i
].t
+ 4) / 5
1799 + (num_cem_values
* 7 * cem_ranges
[i
].q
+ 2) / 3
1800 + num_cem_values
* cem_ranges
[i
].b
;
1802 if (cem_bits
<= remaining_bits
)
1804 colour_endpoint_bits
= cem_bits
;
1805 ce_max
= cem_ranges
[i
].max
;
1806 ce_trits
= cem_ranges
[i
].t
;
1807 ce_quints
= cem_ranges
[i
].q
;
1808 ce_bits
= cem_ranges
[i
].b
;
1809 return decode_error::ok
;
1814 return decode_error::invalid_colour_endpoints_size
;
1818 * Decode ASTC 2D LDR texture data.
1820 * \param src_width in pixels
1821 * \param src_height in pixels
1822 * \param dst_stride in bytes
1825 _mesa_unpack_astc_2d_ldr(uint8_t *dst_row
,
1826 unsigned dst_stride
,
1827 const uint8_t *src_row
,
1828 unsigned src_stride
,
1830 unsigned src_height
,
1833 assert(_mesa_is_format_astc_2d(format
));
1834 bool srgb
= _mesa_is_format_srgb(format
);
1836 unsigned blk_w
, blk_h
;
1837 _mesa_get_format_block_size(format
, &blk_w
, &blk_h
);
1839 const unsigned block_size
= 16;
1840 unsigned x_blocks
= (src_width
+ blk_w
- 1) / blk_w
;
1841 unsigned y_blocks
= (src_height
+ blk_h
- 1) / blk_h
;
1843 Decoder
dec(blk_w
, blk_h
, 1, srgb
, true);
1845 for (unsigned y
= 0; y
< y_blocks
; ++y
) {
1846 for (unsigned x
= 0; x
< x_blocks
; ++x
) {
1847 /* Same size as the largest block. */
1848 uint16_t block_out
[12 * 12 * 4];
1850 dec
.decode(src_row
+ x
* block_size
, block_out
);
1852 /* This can be smaller with NPOT dimensions. */
1853 unsigned dst_blk_w
= MIN2(blk_w
, src_width
- x
*blk_w
);
1854 unsigned dst_blk_h
= MIN2(blk_h
, src_height
- y
*blk_h
);
1856 for (unsigned sub_y
= 0; sub_y
< dst_blk_h
; ++sub_y
) {
1857 for (unsigned sub_x
= 0; sub_x
< dst_blk_w
; ++sub_x
) {
1858 uint8_t *dst
= dst_row
+ sub_y
* dst_stride
+
1859 (x
* blk_w
+ sub_x
) * 4;
1860 const uint16_t *src
= &block_out
[(sub_y
* blk_w
+ sub_x
) * 4];
1869 src_row
+= src_stride
;
1870 dst_row
+= dst_stride
* blk_h
;