panfrost: Break out fragment to SFBD/MFBD files
[mesa.git] / src / gallium / drivers / panfrost / pan_swizzle.c
1 /*
2 * Copyright (c) 2012-2013 Luc Verhaegen <libv@skynet.be>
3 * Copyright (c) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sub license,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <stdio.h>
26 #include "pan_swizzle.h"
27 #include "pan_allocate.h"
28
29 /* Space a group of 4-bits out. For instance, 0x7 -- that is, 0b111 -- would
30 * become 0b10101 */
31
32 static inline int
33 space_bits_4(int i)
34 {
35 return ((i & 0x8) << 3) |
36 ((i & 0x4) << 2) |
37 ((i & 0x2) << 1) |
38 ((i & 0x1) << 0);
39 }
40
41 /* Generate lookup table for the space filler curve. Note this is a 1:1
42 * mapping, just with bits twiddled around. */
43
44 uint32_t space_filler[16][16];
45 uint32_t space_filler_packed4[16][4];
46
47 void
48 panfrost_generate_space_filler_indices()
49 {
50 for (int y = 0; y < 16; ++y) {
51 for (int x = 0; x < 16; ++x) {
52 space_filler[y][x] =
53 space_bits_4(y ^ x) | (space_bits_4(y) << 1);
54 }
55
56 for (int q = 0; q < 4; ++q) {
57 space_filler_packed4[y][q] =
58 (space_filler[y][(q * 4) + 0] << 0) |
59 (space_filler[y][(q * 4) + 1] << 8) |
60 (space_filler[y][(q * 4) + 2] << 16) |
61 (space_filler[y][(q * 4) + 3] << 24);
62 }
63 }
64 }
65
66 static void
67 swizzle_bpp1_align16(int width, int height, int source_stride, int block_pitch,
68 const uint8_t *pixels,
69 uint8_t *ldest)
70 {
71 for (int y = 0; y < height; ++y) {
72 {
73 int block_y = y & ~(0x0f);
74 int rem_y = y & 0x0f;
75 uint8_t *block_start_s = ldest + (block_y * block_pitch);
76 const uint8_t *source_start = pixels + (y * source_stride);
77 const uint8_t *source_end = source_start + width;
78
79 /* Operate on blocks of 16 pixels to minimise bookkeeping */
80
81 for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) {
82 const uint32_t *src_32 = (const uint32_t *) source_start;
83
84 for (int q = 0; q < 4; ++q) {
85 uint32_t src = src_32[q];
86 uint32_t spaced = space_filler_packed4[rem_y][q];
87 uint16_t *bs = (uint16_t *) block_start_s;
88
89 int spacedA = (spaced >> 0) & 0xFF;
90 int spacedB = (spaced >> 16) & 0xFF;
91
92 bs[spacedA >> 1] = (src >> 0) & 0xFFFF;
93 bs[spacedB >> 1] = (src >> 16) & 0xFFFF;
94 }
95 }
96 }
97
98 ++y;
99
100 {
101 int block_y = y & ~(0x0f);
102 int rem_y = y & 0x0f;
103 uint8_t *block_start_s = ldest + (block_y * block_pitch);
104 const uint8_t *source_start = pixels + (y * source_stride);
105 const uint8_t *source_end = source_start + width;
106
107 /* Operate on blocks of 16 pixels to minimise bookkeeping */
108
109 for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) {
110 const uint32_t *src_32 = (const uint32_t *) source_start;
111
112 for (int q = 0; q < 4; ++q) {
113 uint32_t src = src_32[q];
114 uint32_t spaced = space_filler_packed4[rem_y][q];
115
116 block_start_s[(spaced >> 0) & 0xFF] = (src >> 0) & 0xFF;
117 block_start_s[(spaced >> 8) & 0xFF] = (src >> 8) & 0xFF;
118
119 block_start_s[(spaced >> 16) & 0xFF] = (src >> 16) & 0xFF;
120 block_start_s[(spaced >> 24) & 0xFF] = (src >> 24) & 0xFF;
121 }
122 }
123 }
124
125 }
126 }
127
128 static void
129 swizzle_bpp4_align16(int width, int height, int source_stride, int block_pitch,
130 const uint32_t *pixels,
131 uint32_t *ldest)
132 {
133 for (int y = 0; y < height; ++y) {
134 int block_y = y & ~(0x0f);
135 int rem_y = y & 0x0f;
136 uint32_t *block_start_s = ldest + (block_y * block_pitch);
137 const uint32_t *source_start = pixels + (y * source_stride);
138 const uint32_t *source_end = source_start + width;
139
140 /* Operate on blocks of 16 pixels to minimise bookkeeping */
141
142 for (; source_start < source_end; block_start_s += 16 * 16, source_start += 16) {
143 for (int j = 0; j < 16; ++j)
144 block_start_s[space_filler[rem_y][j]] = source_start[j];
145 }
146 }
147 }
148
149 void
150 panfrost_texture_swizzle(int width, int height, int bytes_per_pixel, int source_stride,
151 const uint8_t *pixels,
152 uint8_t *ldest)
153 {
154 /* Calculate maximum size, overestimating a bit */
155 int block_pitch = ALIGN(width, 16) >> 4;
156
157 /* Use fast path if available */
158 if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) {
159 swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest);
160 return;
161 } else if (bytes_per_pixel == 1 /* && (ALIGN(width, 16) == width) */) {
162 swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest);
163 return;
164 }
165
166 /* Otherwise, default back on generic path */
167
168 for (int y = 0; y < height; ++y) {
169 int block_y = y >> 4;
170 int rem_y = y & 0x0F;
171 int block_start_s = block_y * block_pitch * 256;
172 int source_start = y * source_stride;
173
174 for (int x = 0; x < width; ++x) {
175 int block_x_s = (x >> 4) * 256;
176 int rem_x = x & 0x0F;
177
178 int index = space_filler[rem_y][rem_x];
179 const uint8_t *source = &pixels[source_start + bytes_per_pixel * x];
180 uint8_t *dest = ldest + bytes_per_pixel * (block_start_s + block_x_s + index);
181
182 for (int b = 0; b < bytes_per_pixel; ++b)
183 dest[b] = source[b];
184 }
185 }
186 }
187
188
189 unsigned
190 panfrost_swizzled_size(int width, int height, int bytes_per_pixel)
191 {
192 /* Calculate maximum size, overestimating a bit */
193 int block_pitch = ALIGN(width, 16) >> 4;
194 unsigned sz = bytes_per_pixel * 256 * ((height >> 4) + 1) * block_pitch;
195
196 return sz;
197 }