4466b6f0ca49618bf64cee28b9a78e41ba5ebc44
[mesa.git] / src / panfrost / lib / pan_texture.c
1 /*
2 * Copyright (C) 2008 VMware, Inc.
3 * Copyright (C) 2014 Broadcom
4 * Copyright (C) 2018-2019 Alyssa Rosenzweig
5 * Copyright (C) 2019-2020 Collabora, Ltd.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 */
27
28 #include "util/macros.h"
29 #include "util/u_math.h"
30 #include "pan_texture.h"
31
32 /* Generates a texture descriptor. Ideally, descriptors are immutable after the
33 * texture is created, so we can keep these hanging around in GPU memory in a
34 * dedicated BO and not have to worry. In practice there are some minor gotchas
35 * with this (the driver sometimes will change the format of a texture on the
36 * fly for compression) but it's fast enough to just regenerate the descriptor
37 * in those cases, rather than monkeypatching at drawtime.
38 *
39 * A texture descriptor consists of a 32-byte mali_texture_descriptor structure
40 * followed by a variable number of pointers. Due to this variance and
41 * potentially large size, we actually upload directly rather than returning
42 * the descriptor. Whether the user does a copy themselves or not is irrelevant
43 * to us here.
44 */
45
46 /* List of supported modifiers, in descending order of preference. AFBC is
47 * faster than u-interleaved tiling which is faster than linear. Within AFBC,
48 * enabling the YUV-like transform is typically a win where possible. */
49
50 uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
51 DRM_FORMAT_MOD_ARM_AFBC(
52 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
53 AFBC_FORMAT_MOD_SPARSE |
54 AFBC_FORMAT_MOD_YTR),
55
56 DRM_FORMAT_MOD_ARM_AFBC(
57 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
58 AFBC_FORMAT_MOD_SPARSE),
59
60 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
61 DRM_FORMAT_MOD_LINEAR
62 };
63
64 /* Map modifiers to mali_texture_layout for packing in a texture descriptor */
65
66 static enum mali_texture_layout
67 panfrost_modifier_to_layout(uint64_t modifier)
68 {
69 if (drm_is_afbc(modifier))
70 return MALI_TEXTURE_AFBC;
71 else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
72 return MALI_TEXTURE_TILED;
73 else if (modifier == DRM_FORMAT_MOD_LINEAR)
74 return MALI_TEXTURE_LINEAR;
75 else
76 unreachable("Invalid modifer");
77 }
78
79 /* Check if we need to set a custom stride by computing the "expected"
80 * stride and comparing it to what the user actually wants. Only applies
81 * to linear textures, since tiled/compressed textures have strict
82 * alignment requirements for their strides as it is */
83
84 static bool
85 panfrost_needs_explicit_stride(
86 struct panfrost_slice *slices,
87 uint16_t width,
88 unsigned first_level, unsigned last_level,
89 unsigned bytes_per_pixel)
90 {
91 for (unsigned l = first_level; l <= last_level; ++l) {
92 unsigned actual = slices[l].stride;
93 unsigned expected = u_minify(width, l) * bytes_per_pixel;
94
95 if (actual != expected)
96 return true;
97 }
98
99 return false;
100 }
101
102 /* A Scalable Texture Compression (ASTC) corresponds to just a few texture type
103 * in the hardware, but in fact can be parametrized to have various widths and
104 * heights for the so-called "stretch factor". It turns out these parameters
105 * are stuffed in the bottom bits of the payload pointers. This functions
106 * computes these magic stuffing constants based on the ASTC format in use. The
107 * constant in a given dimension is 3-bits, and two are stored side-by-side for
108 * each active dimension.
109 */
110
111 static unsigned
112 panfrost_astc_stretch(unsigned dim)
113 {
114 assert(dim >= 4 && dim <= 12);
115 return MIN2(dim, 11) - 4;
116 }
117
118 /* Texture addresses are tagged with information about compressed formats.
119 * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
120 * RGBA only).
121 * For ASTC, this is a "stretch factor" encoding the block size. */
122
123 static unsigned
124 panfrost_compression_tag(
125 const struct util_format_description *desc,
126 enum mali_format format, uint64_t modifier)
127 {
128 if (drm_is_afbc(modifier))
129 return (modifier & AFBC_FORMAT_MOD_YTR) ? 1 : 0;
130 else if (format == MALI_ASTC_2D_LDR || format == MALI_ASTC_2D_HDR)
131 return (panfrost_astc_stretch(desc->block.height) << 3) |
132 panfrost_astc_stretch(desc->block.width);
133 else
134 return 0;
135 }
136
137
138 /* Cubemaps have 6 faces as "layers" in between each actual layer. We
139 * need to fix this up. TODO: logic wrong in the asserted out cases ...
140 * can they happen, perhaps from cubemap arrays? */
141
142 static void
143 panfrost_adjust_cube_dimensions(
144 unsigned *first_face, unsigned *last_face,
145 unsigned *first_layer, unsigned *last_layer)
146 {
147 *first_face = *first_layer % 6;
148 *last_face = *last_layer % 6;
149 *first_layer /= 6;
150 *last_layer /= 6;
151
152 assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
153 }
154
155 /* Following the texture descriptor is a number of pointers. How many? */
156
157 static unsigned
158 panfrost_texture_num_elements(
159 unsigned first_level, unsigned last_level,
160 unsigned first_layer, unsigned last_layer,
161 unsigned nr_samples,
162 bool is_cube, bool manual_stride)
163 {
164 unsigned first_face = 0, last_face = 0;
165
166 if (is_cube) {
167 panfrost_adjust_cube_dimensions(&first_face, &last_face,
168 &first_layer, &last_layer);
169 }
170
171 unsigned levels = 1 + last_level - first_level;
172 unsigned layers = 1 + last_layer - first_layer;
173 unsigned faces = 1 + last_face - first_face;
174 unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1);
175
176 if (manual_stride)
177 num_elements *= 2;
178
179 return num_elements;
180 }
181
182 /* Conservative estimate of the size of the texture payload a priori.
183 * Average case, size equal to the actual size. Worst case, off by 2x (if
184 * a manual stride is not needed on a linear texture). Returned value
185 * must be greater than or equal to the actual size, so it's safe to use
186 * as an allocation amount */
187
188 unsigned
189 panfrost_estimate_texture_payload_size(
190 unsigned first_level, unsigned last_level,
191 unsigned first_layer, unsigned last_layer,
192 unsigned nr_samples,
193 enum mali_texture_type type, uint64_t modifier)
194 {
195 /* Assume worst case */
196 unsigned manual_stride = (modifier == DRM_FORMAT_MOD_LINEAR);
197
198 unsigned elements = panfrost_texture_num_elements(
199 first_level, last_level,
200 first_layer, last_layer,
201 nr_samples,
202 type == MALI_TEX_CUBE, manual_stride);
203
204 return sizeof(mali_ptr) * elements;
205 }
206
207 /* Bifrost requires a tile stride for tiled textures. This stride is computed
208 * as (16 * bpp * width) assuming there is at least one tile (width >= 16).
209 * Otherwise if height <= 16, the blob puts zero. Interactions with AFBC are
210 * currently unknown.
211 */
212
213 static unsigned
214 panfrost_nonlinear_stride(uint64_t modifier,
215 unsigned bytes_per_pixel,
216 unsigned width,
217 unsigned height)
218 {
219 if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
220 return (height <= 16) ? 0 : (16 * bytes_per_pixel * ALIGN_POT(width, 16));
221 } else {
222 unreachable("TODO: AFBC on Bifrost");
223 }
224 }
225
226 static void
227 panfrost_emit_texture_payload(
228 mali_ptr *payload,
229 const struct util_format_description *desc,
230 enum mali_format mali_format,
231 enum mali_texture_type type,
232 uint64_t modifier,
233 unsigned width, unsigned height,
234 unsigned first_level, unsigned last_level,
235 unsigned first_layer, unsigned last_layer,
236 unsigned nr_samples,
237 unsigned cube_stride,
238 bool manual_stride,
239 mali_ptr base,
240 struct panfrost_slice *slices)
241 {
242 base |= panfrost_compression_tag(desc, mali_format, modifier);
243
244 /* Inject the addresses in, interleaving array indices, mip levels,
245 * cube faces, and strides in that order */
246
247 unsigned first_face = 0, last_face = 0, face_mult = 1;
248
249 if (type == MALI_TEX_CUBE) {
250 face_mult = 6;
251 panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);
252 }
253
254 nr_samples = MAX2(nr_samples, 1);
255
256 unsigned idx = 0;
257
258 for (unsigned w = first_layer; w <= last_layer; ++w) {
259 for (unsigned l = first_level; l <= last_level; ++l) {
260 for (unsigned f = first_face; f <= last_face; ++f) {
261 for (unsigned s = 0; s < nr_samples; ++s) {
262 payload[idx++] = base + panfrost_texture_offset(
263 slices, type == MALI_TEX_3D,
264 cube_stride, l, w * face_mult + f, s);
265
266 if (manual_stride) {
267 payload[idx++] = (modifier == DRM_FORMAT_MOD_LINEAR) ?
268 slices[l].stride :
269 panfrost_nonlinear_stride(modifier,
270 MAX2(desc->block.bits / 8, 1),
271 u_minify(width, l),
272 u_minify(height, l));
273 }
274 }
275 }
276 }
277 }
278 }
279
280 #define MALI_SWIZZLE_R001 \
281 (MALI_CHANNEL_RED << 0) | \
282 (MALI_CHANNEL_ZERO << 3) | \
283 (MALI_CHANNEL_ZERO << 6) | \
284 (MALI_CHANNEL_ONE << 9)
285
286 #define MALI_SWIZZLE_A001 \
287 (MALI_CHANNEL_ALPHA << 0) | \
288 (MALI_CHANNEL_ZERO << 3) | \
289 (MALI_CHANNEL_ZERO << 6) | \
290 (MALI_CHANNEL_ONE << 9)
291
292
293 void
294 panfrost_new_texture(
295 void *out,
296 uint16_t width, uint16_t height,
297 uint16_t depth, uint16_t array_size,
298 enum pipe_format format,
299 enum mali_texture_type type,
300 uint64_t modifier,
301 unsigned first_level, unsigned last_level,
302 unsigned first_layer, unsigned last_layer,
303 unsigned nr_samples,
304 unsigned cube_stride,
305 unsigned swizzle,
306 mali_ptr base,
307 struct panfrost_slice *slices)
308 {
309 const struct util_format_description *desc =
310 util_format_description(format);
311
312 unsigned bytes_per_pixel = util_format_get_blocksize(format);
313
314 enum mali_format mali_format = panfrost_pipe_format_table[desc->format].hw;
315 assert(mali_format);
316
317 bool manual_stride = (modifier == DRM_FORMAT_MOD_LINEAR)
318 && panfrost_needs_explicit_stride(slices, width,
319 first_level, last_level, bytes_per_pixel);
320
321 struct mali_texture_descriptor descriptor = {
322 .width = MALI_POSITIVE(u_minify(width, first_level)),
323 .height = MALI_POSITIVE(u_minify(height, first_level)),
324 .depth = MALI_POSITIVE(u_minify(depth, first_level)),
325 .array_size = MALI_POSITIVE(array_size),
326 .format = {
327 .swizzle = (format == PIPE_FORMAT_X24S8_UINT) ?
328 MALI_SWIZZLE_A001 :
329 (format == PIPE_FORMAT_S8_UINT) ?
330 MALI_SWIZZLE_R001 :
331 panfrost_translate_swizzle_4(desc->swizzle),
332 .format = mali_format,
333 .srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB),
334 .type = type,
335 .layout = panfrost_modifier_to_layout(modifier),
336 .manual_stride = manual_stride,
337 .unknown2 = 1,
338 },
339 .levels = last_level - first_level,
340 .swizzle = swizzle
341 };
342
343 memcpy(out, &descriptor, sizeof(descriptor));
344
345 mali_ptr *payload = (mali_ptr *) (out + sizeof(struct mali_texture_descriptor));
346 panfrost_emit_texture_payload(
347 payload,
348 desc,
349 mali_format,
350 type,
351 modifier,
352 width, height,
353 first_level, last_level,
354 first_layer, last_layer,
355 nr_samples,
356 cube_stride,
357 manual_stride,
358 base,
359 slices);
360 }
361
362 void
363 panfrost_new_texture_bifrost(
364 struct bifrost_texture_descriptor *descriptor,
365 uint16_t width, uint16_t height,
366 uint16_t depth, uint16_t array_size,
367 enum pipe_format format,
368 enum mali_texture_type type,
369 uint64_t modifier,
370 unsigned first_level, unsigned last_level,
371 unsigned first_layer, unsigned last_layer,
372 unsigned nr_samples,
373 unsigned cube_stride,
374 unsigned swizzle,
375 mali_ptr base,
376 struct panfrost_slice *slices,
377 struct panfrost_bo *payload)
378 {
379 const struct util_format_description *desc =
380 util_format_description(format);
381
382 enum mali_format mali_format = panfrost_pipe_format_table[desc->format].hw;
383 assert(mali_format);
384
385 panfrost_emit_texture_payload(
386 (mali_ptr *) payload->cpu,
387 desc,
388 mali_format,
389 type,
390 modifier,
391 width, height,
392 first_level, last_level,
393 first_layer, last_layer,
394 nr_samples,
395 cube_stride,
396 true, /* Stride explicit on Bifrost */
397 base,
398 slices);
399
400 descriptor->format_unk = 0x2;
401 descriptor->type = type;
402 descriptor->format = mali_format;
403 descriptor->srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
404 descriptor->format_unk3 = 0x0;
405 descriptor->width = MALI_POSITIVE(u_minify(width, first_level));
406 descriptor->height = MALI_POSITIVE(u_minify(height, first_level));
407 descriptor->swizzle = swizzle;
408 descriptor->layout = panfrost_modifier_to_layout(modifier),
409 descriptor->levels = last_level - first_level;
410 descriptor->unk1 = 0x0;
411 descriptor->levels_unk = 0;
412 descriptor->level_2 = last_level - first_level;
413 descriptor->payload = payload->gpu;
414 descriptor->array_size = MALI_POSITIVE(array_size);
415 descriptor->unk4 = 0x0;
416 descriptor->depth = MALI_POSITIVE(u_minify(depth, first_level));
417 descriptor->unk5 = 0x0;
418 }
419
420 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
421 * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
422 * This feature is also known as "transaction elimination". */
423
424 #define CHECKSUM_TILE_WIDTH 16
425 #define CHECKSUM_TILE_HEIGHT 16
426 #define CHECKSUM_BYTES_PER_TILE 8
427
428 unsigned
429 panfrost_compute_checksum_size(
430 struct panfrost_slice *slice,
431 unsigned width,
432 unsigned height)
433 {
434 unsigned aligned_width = ALIGN_POT(width, CHECKSUM_TILE_WIDTH);
435 unsigned aligned_height = ALIGN_POT(height, CHECKSUM_TILE_HEIGHT);
436
437 unsigned tile_count_x = aligned_width / CHECKSUM_TILE_WIDTH;
438 unsigned tile_count_y = aligned_height / CHECKSUM_TILE_HEIGHT;
439
440 slice->checksum_stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
441
442 return slice->checksum_stride * tile_count_y;
443 }
444
445 unsigned
446 panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level)
447 {
448 return is_3d ? slices[level].size0 : cube_stride;
449 }
450
451 /* Computes the offset into a texture at a particular level/face. Add to
452 * the base address of a texture to get the address to that level/face */
453
454 unsigned
455 panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face, unsigned sample)
456 {
457 unsigned layer_stride = panfrost_get_layer_stride(slices, is_3d, cube_stride, level);
458 return slices[level].offset + (face * layer_stride) + (sample * slices[level].size0);
459 }