2 * Copyright (C) 2008 VMware, Inc.
3 * Copyright (C) 2014 Broadcom
4 * Copyright (C) 2018-2019 Alyssa Rosenzweig
5 * Copyright (C) 2019-2020 Collabora, Ltd.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 #include "util/macros.h"
29 #include "util/u_math.h"
30 #include "pan_texture.h"
32 /* Generates a texture descriptor. Ideally, descriptors are immutable after the
33 * texture is created, so we can keep these hanging around in GPU memory in a
34 * dedicated BO and not have to worry. In practice there are some minor gotchas
35 * with this (the driver sometimes will change the format of a texture on the
36 * fly for compression) but it's fast enough to just regenerate the descriptor
37 * in those cases, rather than monkeypatching at drawtime. A texture descriptor
38 * consists of a 32-byte header followed by pointers.
41 /* List of supported modifiers, in descending order of preference. AFBC is
42 * faster than u-interleaved tiling which is faster than linear. Within AFBC,
43 * enabling the YUV-like transform is typically a win where possible. */
45 uint64_t pan_best_modifiers
[PAN_MODIFIER_COUNT
] = {
46 DRM_FORMAT_MOD_ARM_AFBC(
47 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16
|
48 AFBC_FORMAT_MOD_SPARSE
|
51 DRM_FORMAT_MOD_ARM_AFBC(
52 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16
|
53 AFBC_FORMAT_MOD_SPARSE
),
55 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED
,
59 /* Map modifiers to mali_texture_layout for packing in a texture descriptor */
61 static enum mali_texture_layout
62 panfrost_modifier_to_layout(uint64_t modifier
)
64 if (drm_is_afbc(modifier
))
65 return MALI_TEXTURE_LAYOUT_AFBC
;
66 else if (modifier
== DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED
)
67 return MALI_TEXTURE_LAYOUT_TILED
;
68 else if (modifier
== DRM_FORMAT_MOD_LINEAR
)
69 return MALI_TEXTURE_LAYOUT_LINEAR
;
71 unreachable("Invalid modifer");
74 /* Check if we need to set a custom stride by computing the "expected"
75 * stride and comparing it to what the user actually wants. Only applies
76 * to linear textures, since tiled/compressed textures have strict
77 * alignment requirements for their strides as it is */
80 panfrost_needs_explicit_stride(
81 struct panfrost_slice
*slices
,
83 unsigned first_level
, unsigned last_level
,
84 unsigned bytes_per_pixel
)
86 for (unsigned l
= first_level
; l
<= last_level
; ++l
) {
87 unsigned actual
= slices
[l
].stride
;
88 unsigned expected
= u_minify(width
, l
) * bytes_per_pixel
;
90 if (actual
!= expected
)
97 /* A Scalable Texture Compression (ASTC) corresponds to just a few texture type
98 * in the hardware, but in fact can be parametrized to have various widths and
99 * heights for the so-called "stretch factor". It turns out these parameters
100 * are stuffed in the bottom bits of the payload pointers. This functions
101 * computes these magic stuffing constants based on the ASTC format in use. The
102 * constant in a given dimension is 3-bits, and two are stored side-by-side for
103 * each active dimension.
107 panfrost_astc_stretch(unsigned dim
)
109 assert(dim
>= 4 && dim
<= 12);
110 return MIN2(dim
, 11) - 4;
113 /* Texture addresses are tagged with information about compressed formats.
114 * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
116 * For ASTC, this is a "stretch factor" encoding the block size. */
119 panfrost_compression_tag(
120 const struct util_format_description
*desc
,
121 enum mali_format format
, uint64_t modifier
)
123 if (drm_is_afbc(modifier
))
124 return (modifier
& AFBC_FORMAT_MOD_YTR
) ? 1 : 0;
125 else if (format
== MALI_ASTC_2D_LDR
|| format
== MALI_ASTC_2D_HDR
)
126 return (panfrost_astc_stretch(desc
->block
.height
) << 3) |
127 panfrost_astc_stretch(desc
->block
.width
);
133 /* Cubemaps have 6 faces as "layers" in between each actual layer. We
134 * need to fix this up. TODO: logic wrong in the asserted out cases ...
135 * can they happen, perhaps from cubemap arrays? */
138 panfrost_adjust_cube_dimensions(
139 unsigned *first_face
, unsigned *last_face
,
140 unsigned *first_layer
, unsigned *last_layer
)
142 *first_face
= *first_layer
% 6;
143 *last_face
= *last_layer
% 6;
147 assert((*first_layer
== *last_layer
) || (*first_face
== 0 && *last_face
== 5));
150 /* Following the texture descriptor is a number of pointers. How many? */
153 panfrost_texture_num_elements(
154 unsigned first_level
, unsigned last_level
,
155 unsigned first_layer
, unsigned last_layer
,
157 bool is_cube
, bool manual_stride
)
159 unsigned first_face
= 0, last_face
= 0;
162 panfrost_adjust_cube_dimensions(&first_face
, &last_face
,
163 &first_layer
, &last_layer
);
166 unsigned levels
= 1 + last_level
- first_level
;
167 unsigned layers
= 1 + last_layer
- first_layer
;
168 unsigned faces
= 1 + last_face
- first_face
;
169 unsigned num_elements
= levels
* layers
* faces
* MAX2(nr_samples
, 1);
177 /* Conservative estimate of the size of the texture payload a priori.
178 * Average case, size equal to the actual size. Worst case, off by 2x (if
179 * a manual stride is not needed on a linear texture). Returned value
180 * must be greater than or equal to the actual size, so it's safe to use
181 * as an allocation amount */
184 panfrost_estimate_texture_payload_size(
185 unsigned first_level
, unsigned last_level
,
186 unsigned first_layer
, unsigned last_layer
,
188 enum mali_texture_dimension dim
, uint64_t modifier
)
190 /* Assume worst case */
191 unsigned manual_stride
= (modifier
== DRM_FORMAT_MOD_LINEAR
);
193 unsigned elements
= panfrost_texture_num_elements(
194 first_level
, last_level
,
195 first_layer
, last_layer
,
197 dim
== MALI_TEXTURE_DIMENSION_CUBE
, manual_stride
);
199 return sizeof(mali_ptr
) * elements
;
202 /* Bifrost requires a tile stride for tiled textures. This stride is computed
203 * as (16 * bpp * width) assuming there is at least one tile (width >= 16).
204 * Otherwise if height <= 16, the blob puts zero. Interactions with AFBC are
209 panfrost_nonlinear_stride(uint64_t modifier
,
210 unsigned bytes_per_pixel
,
214 if (modifier
== DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED
) {
215 return (height
<= 16) ? 0 : (16 * bytes_per_pixel
* ALIGN_POT(width
, 16));
217 unreachable("TODO: AFBC on Bifrost");
222 panfrost_emit_texture_payload(
224 const struct util_format_description
*desc
,
225 enum mali_format mali_format
,
226 enum mali_texture_dimension dim
,
228 unsigned width
, unsigned height
,
229 unsigned first_level
, unsigned last_level
,
230 unsigned first_layer
, unsigned last_layer
,
232 unsigned cube_stride
,
235 struct panfrost_slice
*slices
)
237 base
|= panfrost_compression_tag(desc
, mali_format
, modifier
);
239 /* Inject the addresses in, interleaving array indices, mip levels,
240 * cube faces, and strides in that order */
242 unsigned first_face
= 0, last_face
= 0, face_mult
= 1;
244 if (dim
== MALI_TEXTURE_DIMENSION_CUBE
) {
246 panfrost_adjust_cube_dimensions(&first_face
, &last_face
, &first_layer
, &last_layer
);
249 nr_samples
= MAX2(nr_samples
, 1);
253 for (unsigned w
= first_layer
; w
<= last_layer
; ++w
) {
254 for (unsigned l
= first_level
; l
<= last_level
; ++l
) {
255 for (unsigned f
= first_face
; f
<= last_face
; ++f
) {
256 for (unsigned s
= 0; s
< nr_samples
; ++s
) {
257 payload
[idx
++] = base
+ panfrost_texture_offset(
258 slices
, dim
== MALI_TEXTURE_DIMENSION_3D
,
259 cube_stride
, l
, w
* face_mult
+ f
, s
);
262 payload
[idx
++] = (modifier
== DRM_FORMAT_MOD_LINEAR
) ?
264 panfrost_nonlinear_stride(modifier
,
265 MAX2(desc
->block
.bits
/ 8, 1),
267 u_minify(height
, l
));
275 #define MALI_SWIZZLE_R001 \
276 (MALI_CHANNEL_R << 0) | \
277 (MALI_CHANNEL_0 << 3) | \
278 (MALI_CHANNEL_0 << 6) | \
279 (MALI_CHANNEL_1 << 9)
281 #define MALI_SWIZZLE_A001 \
282 (MALI_CHANNEL_A << 0) | \
283 (MALI_CHANNEL_0 << 3) | \
284 (MALI_CHANNEL_0 << 6) | \
285 (MALI_CHANNEL_1 << 9)
289 panfrost_new_texture(
291 uint16_t width
, uint16_t height
,
292 uint16_t depth
, uint16_t array_size
,
293 enum pipe_format format
,
294 enum mali_texture_dimension dim
,
296 unsigned first_level
, unsigned last_level
,
297 unsigned first_layer
, unsigned last_layer
,
299 unsigned cube_stride
,
302 struct panfrost_slice
*slices
)
304 const struct util_format_description
*desc
=
305 util_format_description(format
);
307 unsigned bytes_per_pixel
= util_format_get_blocksize(format
);
309 enum mali_format mali_format
= panfrost_pipe_format_table
[desc
->format
].hw
;
312 bool manual_stride
= (modifier
== DRM_FORMAT_MOD_LINEAR
)
313 && panfrost_needs_explicit_stride(slices
, width
,
314 first_level
, last_level
, bytes_per_pixel
);
316 unsigned format_swizzle
= (format
== PIPE_FORMAT_X24S8_UINT
) ?
318 (format
== PIPE_FORMAT_S8_UINT
) ?
320 panfrost_translate_swizzle_4(desc
->swizzle
);
322 bool srgb
= (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
);
324 pan_pack(out
, MIDGARD_TEXTURE
, cfg
) {
325 cfg
.width
= u_minify(width
, first_level
);
326 cfg
.height
= u_minify(height
, first_level
);
327 cfg
.depth
= u_minify(depth
, first_level
);
328 cfg
.array_size
= array_size
;
329 cfg
.format
= format_swizzle
| (mali_format
<< 12) | (srgb
<< 20);
331 cfg
.texel_ordering
= panfrost_modifier_to_layout(modifier
);
332 cfg
.manual_stride
= manual_stride
;
333 cfg
.levels
= last_level
- first_level
;
334 cfg
.swizzle
= swizzle
;
337 panfrost_emit_texture_payload(
338 (mali_ptr
*) (out
+ MALI_MIDGARD_TEXTURE_LENGTH
),
344 first_level
, last_level
,
345 first_layer
, last_layer
,
354 panfrost_new_texture_bifrost(
355 struct mali_bifrost_texture_packed
*out
,
356 uint16_t width
, uint16_t height
,
357 uint16_t depth
, uint16_t array_size
,
358 enum pipe_format format
,
359 enum mali_texture_dimension dim
,
361 unsigned first_level
, unsigned last_level
,
362 unsigned first_layer
, unsigned last_layer
,
364 unsigned cube_stride
,
367 struct panfrost_slice
*slices
,
368 struct panfrost_bo
*payload
)
370 const struct util_format_description
*desc
=
371 util_format_description(format
);
373 enum mali_format mali_format
= panfrost_pipe_format_table
[desc
->format
].hw
;
376 panfrost_emit_texture_payload(
377 (mali_ptr
*) payload
->cpu
,
383 first_level
, last_level
,
384 first_layer
, last_layer
,
387 true, /* Stride explicit on Bifrost */
391 bool srgb
= (desc
->colorspace
== UTIL_FORMAT_COLORSPACE_SRGB
);
393 pan_pack(out
, BIFROST_TEXTURE
, cfg
) {
395 cfg
.format
= (mali_format
<< 12) | (srgb
<< 20);
396 cfg
.width
= u_minify(width
, first_level
);
397 cfg
.height
= u_minify(height
, first_level
);
398 cfg
.swizzle
= swizzle
;
399 cfg
.texel_ordering
= panfrost_modifier_to_layout(modifier
);
400 cfg
.levels
= last_level
- first_level
;
401 cfg
.surfaces
= payload
->gpu
;
403 /* Use the sampler descriptor for LOD clamping */
405 cfg
.maximum_lod
= last_level
- first_level
;
409 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
410 * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
411 * This feature is also known as "transaction elimination". */
413 #define CHECKSUM_TILE_WIDTH 16
414 #define CHECKSUM_TILE_HEIGHT 16
415 #define CHECKSUM_BYTES_PER_TILE 8
418 panfrost_compute_checksum_size(
419 struct panfrost_slice
*slice
,
423 unsigned aligned_width
= ALIGN_POT(width
, CHECKSUM_TILE_WIDTH
);
424 unsigned aligned_height
= ALIGN_POT(height
, CHECKSUM_TILE_HEIGHT
);
426 unsigned tile_count_x
= aligned_width
/ CHECKSUM_TILE_WIDTH
;
427 unsigned tile_count_y
= aligned_height
/ CHECKSUM_TILE_HEIGHT
;
429 slice
->checksum_stride
= tile_count_x
* CHECKSUM_BYTES_PER_TILE
;
431 return slice
->checksum_stride
* tile_count_y
;
435 panfrost_get_layer_stride(struct panfrost_slice
*slices
, bool is_3d
, unsigned cube_stride
, unsigned level
)
437 return is_3d
? slices
[level
].size0
: cube_stride
;
440 /* Computes the offset into a texture at a particular level/face. Add to
441 * the base address of a texture to get the address to that level/face */
444 panfrost_texture_offset(struct panfrost_slice
*slices
, bool is_3d
, unsigned cube_stride
, unsigned level
, unsigned face
, unsigned sample
)
446 unsigned layer_stride
= panfrost_get_layer_stride(slices
, is_3d
, cube_stride
, level
);
447 return slices
[level
].offset
+ (face
* layer_stride
) + (sample
* slices
[level
].size0
);