pan/decode: Drop legacy 32-bit job support
[mesa.git] / src / panfrost / lib / pan_texture.c
1 /*
2 * Copyright (C) 2008 VMware, Inc.
3 * Copyright (C) 2014 Broadcom
4 * Copyright (C) 2018-2019 Alyssa Rosenzweig
5 * Copyright (C) 2019-2020 Collabora, Ltd.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 *
26 */
27
28 #include "util/macros.h"
29 #include "util/u_math.h"
30 #include "pan_texture.h"
31
32 /* Generates a texture descriptor. Ideally, descriptors are immutable after the
33 * texture is created, so we can keep these hanging around in GPU memory in a
34 * dedicated BO and not have to worry. In practice there are some minor gotchas
35 * with this (the driver sometimes will change the format of a texture on the
36 * fly for compression) but it's fast enough to just regenerate the descriptor
37 * in those cases, rather than monkeypatching at drawtime. A texture descriptor
38 * consists of a 32-byte header followed by pointers.
39 */
40
41 /* List of supported modifiers, in descending order of preference. AFBC is
42 * faster than u-interleaved tiling which is faster than linear. Within AFBC,
43 * enabling the YUV-like transform is typically a win where possible. */
44
45 uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
46 DRM_FORMAT_MOD_ARM_AFBC(
47 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
48 AFBC_FORMAT_MOD_SPARSE |
49 AFBC_FORMAT_MOD_YTR),
50
51 DRM_FORMAT_MOD_ARM_AFBC(
52 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
53 AFBC_FORMAT_MOD_SPARSE),
54
55 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
56 DRM_FORMAT_MOD_LINEAR
57 };
58
59 /* Map modifiers to mali_texture_layout for packing in a texture descriptor */
60
61 static enum mali_texture_layout
62 panfrost_modifier_to_layout(uint64_t modifier)
63 {
64 if (drm_is_afbc(modifier))
65 return MALI_TEXTURE_LAYOUT_AFBC;
66 else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
67 return MALI_TEXTURE_LAYOUT_TILED;
68 else if (modifier == DRM_FORMAT_MOD_LINEAR)
69 return MALI_TEXTURE_LAYOUT_LINEAR;
70 else
71 unreachable("Invalid modifer");
72 }
73
74 /* Check if we need to set a custom stride by computing the "expected"
75 * stride and comparing it to what the user actually wants. Only applies
76 * to linear textures, since tiled/compressed textures have strict
77 * alignment requirements for their strides as it is */
78
79 static bool
80 panfrost_needs_explicit_stride(
81 struct panfrost_slice *slices,
82 uint16_t width,
83 unsigned first_level, unsigned last_level,
84 unsigned bytes_per_pixel)
85 {
86 for (unsigned l = first_level; l <= last_level; ++l) {
87 unsigned actual = slices[l].stride;
88 unsigned expected = u_minify(width, l) * bytes_per_pixel;
89
90 if (actual != expected)
91 return true;
92 }
93
94 return false;
95 }
96
97 /* A Scalable Texture Compression (ASTC) corresponds to just a few texture type
98 * in the hardware, but in fact can be parametrized to have various widths and
99 * heights for the so-called "stretch factor". It turns out these parameters
100 * are stuffed in the bottom bits of the payload pointers. This functions
101 * computes these magic stuffing constants based on the ASTC format in use. The
102 * constant in a given dimension is 3-bits, and two are stored side-by-side for
103 * each active dimension.
104 */
105
106 static unsigned
107 panfrost_astc_stretch(unsigned dim)
108 {
109 assert(dim >= 4 && dim <= 12);
110 return MIN2(dim, 11) - 4;
111 }
112
113 /* Texture addresses are tagged with information about compressed formats.
114 * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
115 * RGBA only).
116 * For ASTC, this is a "stretch factor" encoding the block size. */
117
118 static unsigned
119 panfrost_compression_tag(
120 const struct util_format_description *desc,
121 enum mali_format format, uint64_t modifier)
122 {
123 if (drm_is_afbc(modifier))
124 return (modifier & AFBC_FORMAT_MOD_YTR) ? 1 : 0;
125 else if (format == MALI_ASTC_2D_LDR || format == MALI_ASTC_2D_HDR)
126 return (panfrost_astc_stretch(desc->block.height) << 3) |
127 panfrost_astc_stretch(desc->block.width);
128 else
129 return 0;
130 }
131
132
133 /* Cubemaps have 6 faces as "layers" in between each actual layer. We
134 * need to fix this up. TODO: logic wrong in the asserted out cases ...
135 * can they happen, perhaps from cubemap arrays? */
136
137 static void
138 panfrost_adjust_cube_dimensions(
139 unsigned *first_face, unsigned *last_face,
140 unsigned *first_layer, unsigned *last_layer)
141 {
142 *first_face = *first_layer % 6;
143 *last_face = *last_layer % 6;
144 *first_layer /= 6;
145 *last_layer /= 6;
146
147 assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
148 }
149
150 /* Following the texture descriptor is a number of pointers. How many? */
151
152 static unsigned
153 panfrost_texture_num_elements(
154 unsigned first_level, unsigned last_level,
155 unsigned first_layer, unsigned last_layer,
156 unsigned nr_samples,
157 bool is_cube, bool manual_stride)
158 {
159 unsigned first_face = 0, last_face = 0;
160
161 if (is_cube) {
162 panfrost_adjust_cube_dimensions(&first_face, &last_face,
163 &first_layer, &last_layer);
164 }
165
166 unsigned levels = 1 + last_level - first_level;
167 unsigned layers = 1 + last_layer - first_layer;
168 unsigned faces = 1 + last_face - first_face;
169 unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1);
170
171 if (manual_stride)
172 num_elements *= 2;
173
174 return num_elements;
175 }
176
177 /* Conservative estimate of the size of the texture payload a priori.
178 * Average case, size equal to the actual size. Worst case, off by 2x (if
179 * a manual stride is not needed on a linear texture). Returned value
180 * must be greater than or equal to the actual size, so it's safe to use
181 * as an allocation amount */
182
183 unsigned
184 panfrost_estimate_texture_payload_size(
185 unsigned first_level, unsigned last_level,
186 unsigned first_layer, unsigned last_layer,
187 unsigned nr_samples,
188 enum mali_texture_dimension dim, uint64_t modifier)
189 {
190 /* Assume worst case */
191 unsigned manual_stride = (modifier == DRM_FORMAT_MOD_LINEAR);
192
193 unsigned elements = panfrost_texture_num_elements(
194 first_level, last_level,
195 first_layer, last_layer,
196 nr_samples,
197 dim == MALI_TEXTURE_DIMENSION_CUBE, manual_stride);
198
199 return sizeof(mali_ptr) * elements;
200 }
201
202 /* Bifrost requires a tile stride for tiled textures. This stride is computed
203 * as (16 * bpp * width) assuming there is at least one tile (width >= 16).
204 * Otherwise if height <= 16, the blob puts zero. Interactions with AFBC are
205 * currently unknown.
206 */
207
208 static unsigned
209 panfrost_nonlinear_stride(uint64_t modifier,
210 unsigned bytes_per_pixel,
211 unsigned width,
212 unsigned height)
213 {
214 if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED) {
215 return (height <= 16) ? 0 : (16 * bytes_per_pixel * ALIGN_POT(width, 16));
216 } else {
217 unreachable("TODO: AFBC on Bifrost");
218 }
219 }
220
221 static void
222 panfrost_emit_texture_payload(
223 mali_ptr *payload,
224 const struct util_format_description *desc,
225 enum mali_format mali_format,
226 enum mali_texture_dimension dim,
227 uint64_t modifier,
228 unsigned width, unsigned height,
229 unsigned first_level, unsigned last_level,
230 unsigned first_layer, unsigned last_layer,
231 unsigned nr_samples,
232 unsigned cube_stride,
233 bool manual_stride,
234 mali_ptr base,
235 struct panfrost_slice *slices)
236 {
237 base |= panfrost_compression_tag(desc, mali_format, modifier);
238
239 /* Inject the addresses in, interleaving array indices, mip levels,
240 * cube faces, and strides in that order */
241
242 unsigned first_face = 0, last_face = 0, face_mult = 1;
243
244 if (dim == MALI_TEXTURE_DIMENSION_CUBE) {
245 face_mult = 6;
246 panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);
247 }
248
249 nr_samples = MAX2(nr_samples, 1);
250
251 unsigned idx = 0;
252
253 for (unsigned w = first_layer; w <= last_layer; ++w) {
254 for (unsigned l = first_level; l <= last_level; ++l) {
255 for (unsigned f = first_face; f <= last_face; ++f) {
256 for (unsigned s = 0; s < nr_samples; ++s) {
257 payload[idx++] = base + panfrost_texture_offset(
258 slices, dim == MALI_TEXTURE_DIMENSION_3D,
259 cube_stride, l, w * face_mult + f, s);
260
261 if (manual_stride) {
262 payload[idx++] = (modifier == DRM_FORMAT_MOD_LINEAR) ?
263 slices[l].stride :
264 panfrost_nonlinear_stride(modifier,
265 MAX2(desc->block.bits / 8, 1),
266 u_minify(width, l),
267 u_minify(height, l));
268 }
269 }
270 }
271 }
272 }
273 }
274
275 #define MALI_SWIZZLE_R001 \
276 (MALI_CHANNEL_R << 0) | \
277 (MALI_CHANNEL_0 << 3) | \
278 (MALI_CHANNEL_0 << 6) | \
279 (MALI_CHANNEL_1 << 9)
280
281 #define MALI_SWIZZLE_A001 \
282 (MALI_CHANNEL_A << 0) | \
283 (MALI_CHANNEL_0 << 3) | \
284 (MALI_CHANNEL_0 << 6) | \
285 (MALI_CHANNEL_1 << 9)
286
287
288 void
289 panfrost_new_texture(
290 void *out,
291 uint16_t width, uint16_t height,
292 uint16_t depth, uint16_t array_size,
293 enum pipe_format format,
294 enum mali_texture_dimension dim,
295 uint64_t modifier,
296 unsigned first_level, unsigned last_level,
297 unsigned first_layer, unsigned last_layer,
298 unsigned nr_samples,
299 unsigned cube_stride,
300 unsigned swizzle,
301 mali_ptr base,
302 struct panfrost_slice *slices)
303 {
304 const struct util_format_description *desc =
305 util_format_description(format);
306
307 unsigned bytes_per_pixel = util_format_get_blocksize(format);
308
309 enum mali_format mali_format = panfrost_pipe_format_table[desc->format].hw;
310 assert(mali_format);
311
312 bool manual_stride = (modifier == DRM_FORMAT_MOD_LINEAR)
313 && panfrost_needs_explicit_stride(slices, width,
314 first_level, last_level, bytes_per_pixel);
315
316 unsigned format_swizzle = (format == PIPE_FORMAT_X24S8_UINT) ?
317 MALI_SWIZZLE_A001 :
318 (format == PIPE_FORMAT_S8_UINT) ?
319 MALI_SWIZZLE_R001 :
320 panfrost_translate_swizzle_4(desc->swizzle);
321
322 bool srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
323
324 pan_pack(out, MIDGARD_TEXTURE, cfg) {
325 cfg.width = u_minify(width, first_level);
326 cfg.height = u_minify(height, first_level);
327 cfg.depth = u_minify(depth, first_level);
328 cfg.array_size = array_size;
329 cfg.format = format_swizzle | (mali_format << 12) | (srgb << 20);
330 cfg.dimension = dim;
331 cfg.texel_ordering = panfrost_modifier_to_layout(modifier);
332 cfg.manual_stride = manual_stride;
333 cfg.levels = last_level - first_level;
334 cfg.swizzle = swizzle;
335 };
336
337 panfrost_emit_texture_payload(
338 (mali_ptr *) (out + MALI_MIDGARD_TEXTURE_LENGTH),
339 desc,
340 mali_format,
341 dim,
342 modifier,
343 width, height,
344 first_level, last_level,
345 first_layer, last_layer,
346 nr_samples,
347 cube_stride,
348 manual_stride,
349 base,
350 slices);
351 }
352
353 void
354 panfrost_new_texture_bifrost(
355 struct mali_bifrost_texture_packed *out,
356 uint16_t width, uint16_t height,
357 uint16_t depth, uint16_t array_size,
358 enum pipe_format format,
359 enum mali_texture_dimension dim,
360 uint64_t modifier,
361 unsigned first_level, unsigned last_level,
362 unsigned first_layer, unsigned last_layer,
363 unsigned nr_samples,
364 unsigned cube_stride,
365 unsigned swizzle,
366 mali_ptr base,
367 struct panfrost_slice *slices,
368 struct panfrost_bo *payload)
369 {
370 const struct util_format_description *desc =
371 util_format_description(format);
372
373 enum mali_format mali_format = panfrost_pipe_format_table[desc->format].hw;
374 assert(mali_format);
375
376 panfrost_emit_texture_payload(
377 (mali_ptr *) payload->cpu,
378 desc,
379 mali_format,
380 dim,
381 modifier,
382 width, height,
383 first_level, last_level,
384 first_layer, last_layer,
385 nr_samples,
386 cube_stride,
387 true, /* Stride explicit on Bifrost */
388 base,
389 slices);
390
391 bool srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
392
393 pan_pack(out, BIFROST_TEXTURE, cfg) {
394 cfg.dimension = dim;
395 cfg.format = (mali_format << 12) | (srgb << 20);
396 cfg.width = u_minify(width, first_level);
397 cfg.height = u_minify(height, first_level);
398 cfg.swizzle = swizzle;
399 cfg.texel_ordering = panfrost_modifier_to_layout(modifier);
400 cfg.levels = last_level - first_level;
401 cfg.surfaces = payload->gpu;
402
403 /* Use the sampler descriptor for LOD clamping */
404 cfg.minimum_lod = 0;
405 cfg.maximum_lod = last_level - first_level;
406 }
407 }
408
409 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
410 * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
411 * This feature is also known as "transaction elimination". */
412
413 #define CHECKSUM_TILE_WIDTH 16
414 #define CHECKSUM_TILE_HEIGHT 16
415 #define CHECKSUM_BYTES_PER_TILE 8
416
417 unsigned
418 panfrost_compute_checksum_size(
419 struct panfrost_slice *slice,
420 unsigned width,
421 unsigned height)
422 {
423 unsigned aligned_width = ALIGN_POT(width, CHECKSUM_TILE_WIDTH);
424 unsigned aligned_height = ALIGN_POT(height, CHECKSUM_TILE_HEIGHT);
425
426 unsigned tile_count_x = aligned_width / CHECKSUM_TILE_WIDTH;
427 unsigned tile_count_y = aligned_height / CHECKSUM_TILE_HEIGHT;
428
429 slice->checksum_stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
430
431 return slice->checksum_stride * tile_count_y;
432 }
433
434 unsigned
435 panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level)
436 {
437 return is_3d ? slices[level].size0 : cube_stride;
438 }
439
440 /* Computes the offset into a texture at a particular level/face. Add to
441 * the base address of a texture to get the address to that level/face */
442
443 unsigned
444 panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face, unsigned sample)
445 {
446 unsigned layer_stride = panfrost_get_layer_stride(slices, is_3d, cube_stride, level);
447 return slices[level].offset + (face * layer_stride) + (sample * slices[level].size0);
448 }