2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018-2019 Google, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Rob Clark <robclark@freedesktop.org>
30 #include "freedreno_layout.h"
32 #define RGB_TILE_WIDTH_ALIGNMENT 64
33 #define RGB_TILE_HEIGHT_ALIGNMENT 16
34 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
37 is_r8g8(struct fdl_layout
*layout
)
39 return layout
->cpp
== 2 &&
40 util_format_get_nr_components(layout
->format
) == 2;
44 fdl6_get_ubwc_blockwidth(struct fdl_layout
*layout
,
45 uint32_t *blockwidth
, uint32_t *blockheight
)
51 { 16, 4 }, /* cpp = 1 */
52 { 16, 4 }, /* cpp = 2 */
53 { 16, 4 }, /* cpp = 4 */
54 { 8, 4, }, /* cpp = 8 */
55 { 4, 4, }, /* cpp = 16 */
56 { 4, 2 }, /* cpp = 32 */
57 { 0, 0 }, /* cpp = 64 (TODO) */
60 /* special case for r8g8: */
61 if (is_r8g8(layout
)) {
67 uint32_t cpp
= fdl_cpp_shift(layout
);
68 assert(cpp
< ARRAY_SIZE(blocksize
));
69 *blockwidth
= blocksize
[cpp
].width
;
70 *blockheight
= blocksize
[cpp
].height
;
74 fdl6_tile_alignment(struct fdl_layout
*layout
, uint32_t *heightalign
)
76 layout
->pitchalign
= fdl_cpp_shift(layout
);
79 if (is_r8g8(layout
) || layout
->cpp
== 1) {
80 layout
->pitchalign
= 1;
82 } else if (layout
->cpp
== 2) {
83 layout
->pitchalign
= 2;
86 /* note: this base_align is *probably* not always right,
87 * it doesn't really get tested. for example with UBWC we might
88 * want 4k alignment, since we align UBWC levels to 4k
91 layout
->base_align
= 64;
92 else if (layout
->cpp
== 2)
93 layout
->base_align
= 128;
95 layout
->base_align
= 256;
98 /* NOTE: good way to test this is: (for example)
99 * piglit/bin/texelFetch fs sampler3D 100x100x8
102 fdl6_layout(struct fdl_layout
*layout
,
103 enum pipe_format format
, uint32_t nr_samples
,
104 uint32_t width0
, uint32_t height0
, uint32_t depth0
,
105 uint32_t mip_levels
, uint32_t array_size
, bool is_3d
,
106 struct fdl_slice
*plane_layout
)
108 uint32_t offset
, pitch0
;
109 uint32_t pitchalign
, heightalign
;
110 uint32_t ubwc_blockwidth
, ubwc_blockheight
;
112 assert(nr_samples
> 0);
113 layout
->width0
= width0
;
114 layout
->height0
= height0
;
115 layout
->depth0
= depth0
;
117 layout
->cpp
= util_format_get_blocksize(format
);
118 layout
->cpp
*= nr_samples
;
119 layout
->cpp_shift
= ffs(layout
->cpp
) - 1;
121 layout
->format
= format
;
122 layout
->nr_samples
= nr_samples
;
123 layout
->layer_first
= !is_3d
;
125 fdl6_get_ubwc_blockwidth(layout
, &ubwc_blockwidth
, &ubwc_blockheight
);
127 if (depth0
> 1 || ubwc_blockwidth
== 0)
128 layout
->ubwc
= false;
130 /* in layer_first layout, the level (slice) contains just one
131 * layer (since in fact the layer contains the slices)
133 uint32_t layers_in_level
= layout
->layer_first
? 1 : array_size
;
135 /* note: for tiled+noubwc layouts, we can use a lower pitchalign
136 * which will affect the linear levels only, (the hardware will still
137 * expect the tiled alignment on the tiled levels)
139 if (layout
->tile_mode
) {
140 fdl6_tile_alignment(layout
, &heightalign
);
142 layout
->base_align
= 64;
143 layout
->pitchalign
= 0;
144 /* align pitch to at least 16 pixels:
145 * both turnip and galium assume there is enough alignment for 16x4
146 * aligned gmem store. turnip can use CP_BLIT to work without this
147 * extra alignment, but gallium driver doesn't implement it yet
150 layout
->pitchalign
= fdl_cpp_shift(layout
) - 2;
152 /* when possible, use a bit more alignment than necessary
153 * presumably this is better for performance?
156 layout
->pitchalign
= fdl_cpp_shift(layout
);
158 /* not used, avoid "may be used uninitialized" warning */
162 pitchalign
= 64 << layout
->pitchalign
;
165 offset
= plane_layout
->offset
;
166 pitch0
= plane_layout
->pitch
;
167 if (align(pitch0
, pitchalign
) != pitch0
)
170 uint32_t nblocksx
= util_format_get_nblocksx(format
, width0
);
172 pitch0
= util_align_npot(nblocksx
* layout
->cpp
, pitchalign
);
175 uint32_t ubwc_width0
= width0
;
176 uint32_t ubwc_height0
= height0
;
177 uint32_t ubwc_tile_height_alignment
= RGB_TILE_HEIGHT_ALIGNMENT
;
178 if (mip_levels
> 1) {
179 /* With mipmapping enabled, UBWC layout is power-of-two sized,
180 * specified in log2 width/height in the descriptors. The height
181 * alignment is 64 for mipmapping, but for buffer sharing (always
182 * single level) other participants expect 16.
184 ubwc_width0
= util_next_power_of_two(width0
);
185 ubwc_height0
= util_next_power_of_two(height0
);
186 ubwc_tile_height_alignment
= 64;
188 ubwc_width0
= align(DIV_ROUND_UP(ubwc_width0
, ubwc_blockwidth
),
189 RGB_TILE_WIDTH_ALIGNMENT
);
190 ubwc_height0
= align(DIV_ROUND_UP(ubwc_height0
, ubwc_blockheight
),
191 ubwc_tile_height_alignment
);
193 for (uint32_t level
= 0; level
< mip_levels
; level
++) {
194 uint32_t depth
= u_minify(depth0
, level
);
195 struct fdl_slice
*slice
= &layout
->slices
[level
];
196 struct fdl_slice
*ubwc_slice
= &layout
->ubwc_slices
[level
];
197 uint32_t tile_mode
= fdl_tile_mode(layout
, level
);
200 /* tiled levels of 3D textures are rounded up to PoT dimensions: */
201 if (is_3d
&& tile_mode
) {
202 height
= u_minify(util_next_power_of_two(height0
), level
);
204 height
= u_minify(height0
, level
);
207 uint32_t nblocksy
= util_format_get_nblocksy(format
, height
);
209 nblocksy
= align(nblocksy
, heightalign
);
211 /* The blits used for mem<->gmem work at a granularity of
212 * 16x4, which can cause faults due to over-fetch on the
213 * last level. The simple solution is to over-allocate a
214 * bit the last level to ensure any over-fetch is harmless.
215 * The pitch is already sufficiently aligned, but height
216 * may not be. note this only matters if last level is linear
218 if (level
== mip_levels
- 1)
219 height
= align(nblocksy
, 4);
221 slice
->offset
= offset
+ layout
->size
;
222 slice
->pitch
= align(u_minify(pitch0
, level
), pitchalign
);
224 /* 1d array and 2d array textures must all have the same layer size
225 * for each miplevel on a6xx. 3d textures can have different layer
226 * sizes for high levels, but the hw auto-sizer is buggy (or at least
227 * different than what this code does), so as soon as the layer size
228 * range gets into range, we stop reducing it.
231 if (level
< 1 || layout
->slices
[level
- 1].size0
> 0xf000) {
232 slice
->size0
= align(nblocksy
* slice
->pitch
, 4096);
234 slice
->size0
= layout
->slices
[level
- 1].size0
;
237 slice
->size0
= nblocksy
* slice
->pitch
;
240 layout
->size
+= slice
->size0
* depth
* layers_in_level
;
243 /* with UBWC every level is aligned to 4K */
244 layout
->size
= align(layout
->size
, 4096);
246 uint32_t meta_pitch
= align(u_minify(ubwc_width0
, level
),
247 RGB_TILE_WIDTH_ALIGNMENT
);
248 uint32_t meta_height
= align(u_minify(ubwc_height0
, level
),
249 ubwc_tile_height_alignment
);
251 ubwc_slice
->size0
= align(meta_pitch
* meta_height
, UBWC_PLANE_SIZE_ALIGNMENT
);
252 ubwc_slice
->pitch
= meta_pitch
;
253 ubwc_slice
->offset
= offset
+ layout
->ubwc_layer_size
;
254 layout
->ubwc_layer_size
+= ubwc_slice
->size0
;
258 if (layout
->layer_first
) {
259 layout
->layer_size
= align(layout
->size
, 4096);
260 layout
->size
= layout
->layer_size
* array_size
;
263 /* Place the UBWC slices before the uncompressed slices, because the
264 * kernel expects UBWC to be at the start of the buffer. In the HW, we
265 * get to program the UBWC and non-UBWC offset/strides
269 for (uint32_t level
= 0; level
< mip_levels
; level
++)
270 layout
->slices
[level
].offset
+= layout
->ubwc_layer_size
* array_size
;
271 layout
->size
+= layout
->ubwc_layer_size
* array_size
;
274 /* include explicit offset in size */
275 layout
->size
+= offset
;