2 * Copyright (c) 2012-2013 Luc Verhaegen <libv@skynet.be>
3 * Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sub license,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
26 #include "pan_swizzle.h"
27 #include "pan_allocate.h"
29 /* Space a group of 4-bits out. For instance, 0x7 -- that is, 0b111 -- would
35 return ((i
& 0x8) << 3) |
41 /* Generate lookup table for the space filler curve. Note this is a 1:1
42 * mapping, just with bits twiddled around. */
44 uint32_t space_filler
[16][16];
45 uint32_t space_filler_packed4
[16][4];
48 panfrost_generate_space_filler_indices()
50 for (int y
= 0; y
< 16; ++y
) {
51 for (int x
= 0; x
< 16; ++x
) {
53 space_bits_4(y
^ x
) | (space_bits_4(y
) << 1);
56 for (int q
= 0; q
< 4; ++q
) {
57 space_filler_packed4
[y
][q
] =
58 (space_filler
[y
][(q
* 4) + 0] << 0) |
59 (space_filler
[y
][(q
* 4) + 1] << 8) |
60 (space_filler
[y
][(q
* 4) + 2] << 16) |
61 (space_filler
[y
][(q
* 4) + 3] << 24);
67 swizzle_bpp1_align16(int width
, int height
, int source_stride
, int block_pitch
,
68 const uint8_t *pixels
,
71 for (int y
= 0; y
< height
; ++y
) {
73 int block_y
= y
& ~(0x0f);
75 uint8_t *block_start_s
= ldest
+ (block_y
* block_pitch
);
76 const uint8_t *source_start
= pixels
+ (y
* source_stride
);
77 const uint8_t *source_end
= source_start
+ width
;
79 /* Operate on blocks of 16 pixels to minimise bookkeeping */
81 for (; source_start
< source_end
; block_start_s
+= 16 * 16, source_start
+= 16) {
82 const uint32_t *src_32
= (const uint32_t *) source_start
;
84 for (int q
= 0; q
< 4; ++q
) {
85 uint32_t src
= src_32
[q
];
86 uint32_t spaced
= space_filler_packed4
[rem_y
][q
];
87 uint16_t *bs
= (uint16_t *) block_start_s
;
89 int spacedA
= (spaced
>> 0) & 0xFF;
90 int spacedB
= (spaced
>> 16) & 0xFF;
92 bs
[spacedA
>> 1] = (src
>> 0) & 0xFFFF;
93 bs
[spacedB
>> 1] = (src
>> 16) & 0xFFFF;
101 int block_y
= y
& ~(0x0f);
102 int rem_y
= y
& 0x0f;
103 uint8_t *block_start_s
= ldest
+ (block_y
* block_pitch
);
104 const uint8_t *source_start
= pixels
+ (y
* source_stride
);
105 const uint8_t *source_end
= source_start
+ width
;
107 /* Operate on blocks of 16 pixels to minimise bookkeeping */
109 for (; source_start
< source_end
; block_start_s
+= 16 * 16, source_start
+= 16) {
110 const uint32_t *src_32
= (const uint32_t *) source_start
;
112 for (int q
= 0; q
< 4; ++q
) {
113 uint32_t src
= src_32
[q
];
114 uint32_t spaced
= space_filler_packed4
[rem_y
][q
];
116 block_start_s
[(spaced
>> 0) & 0xFF] = (src
>> 0) & 0xFF;
117 block_start_s
[(spaced
>> 8) & 0xFF] = (src
>> 8) & 0xFF;
119 block_start_s
[(spaced
>> 16) & 0xFF] = (src
>> 16) & 0xFF;
120 block_start_s
[(spaced
>> 24) & 0xFF] = (src
>> 24) & 0xFF;
129 swizzle_bpp4_align16(int width
, int height
, int source_stride
, int block_pitch
,
130 const uint32_t *pixels
,
133 for (int y
= 0; y
< height
; ++y
) {
134 int block_y
= y
& ~(0x0f);
135 int rem_y
= y
& 0x0f;
136 uint32_t *block_start_s
= ldest
+ (block_y
* block_pitch
);
137 const uint32_t *source_start
= pixels
+ (y
* source_stride
);
138 const uint32_t *source_end
= source_start
+ width
;
140 /* Operate on blocks of 16 pixels to minimise bookkeeping */
142 for (; source_start
< source_end
; block_start_s
+= 16 * 16, source_start
+= 16) {
143 for (int j
= 0; j
< 16; ++j
)
144 block_start_s
[space_filler
[rem_y
][j
]] = source_start
[j
];
150 panfrost_texture_swizzle(unsigned off_x
,
152 int width
, int height
, int bytes_per_pixel
, int source_stride
, int dest_width
,
153 const uint8_t *pixels
,
156 /* Calculate maximum size, overestimating a bit */
157 int block_pitch
= ALIGN(dest_width
, 16) >> 4;
159 /* Use fast path if available */
160 if (!(off_x
|| off_y
)) {
161 if (bytes_per_pixel
== 4 /* && (ALIGN(width, 16) == width) */) {
162 swizzle_bpp4_align16(width
, height
, source_stride
>> 2, (block_pitch
* 256 >> 4), (const uint32_t *) pixels
, (uint32_t *) ldest
);
164 } else if (bytes_per_pixel
== 1 /* && (ALIGN(width, 16) == width) */) {
165 swizzle_bpp1_align16(width
, height
, source_stride
, (block_pitch
* 256 >> 4), pixels
, (uint8_t *) ldest
);
170 /* Otherwise, default back on generic path */
172 for (int y
= 0; y
< height
; ++y
) {
173 int block_y
= (y
+ off_y
) >> 4;
174 int rem_y
= (y
+ off_y
) & 0x0F;
175 int block_start_s
= block_y
* block_pitch
* 256;
176 int source_start
= y
* source_stride
;
178 for (int x
= 0; x
< width
; ++x
) {
179 int block_x_s
= ((x
+ off_x
) >> 4) * 256;
180 int rem_x
= (x
+ off_x
) & 0x0F;
182 int index
= space_filler
[rem_y
][rem_x
];
183 const uint8_t *source
= &pixels
[source_start
+ bytes_per_pixel
* x
];
184 uint8_t *dest
= ldest
+ bytes_per_pixel
* (block_start_s
+ block_x_s
+ index
);
186 for (int b
= 0; b
< bytes_per_pixel
; ++b
)