freedreno/a6xx: FETCHSIZE is PITCHALIGN
[mesa.git] / src / freedreno / fdl / fd6_layout.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018-2019 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include <stdio.h>
29
30 #include "freedreno_layout.h"
31
32 /* indexed by cpp, including msaa 2x and 4x:
33 * TODO:
34 * cpp=1 UBWC needs testing at larger texture sizes
35 * missing UBWC blockwidth/blockheight for npot+64 cpp
36 * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
37 */
38 static const struct tile_alignment {
39 unsigned basealign;
40 unsigned pitchalign;
41 unsigned heightalign;
42 /* UBWC block width/height. Used in size alignment, and calculating a
43 * descriptor's FLAG_BUFFER_LOG2W/H for mipmapping.
44 */
45 uint8_t ubwc_blockwidth;
46 uint8_t ubwc_blockheight;
47 } tile_alignment[] = {
48 [1] = { 64, 128, 32, 16, 4 },
49 [2] = { 128, 128, 16, 16, 4 },
50 [3] = { 256, 64, 32 },
51 [4] = { 256, 64, 16, 16, 4 },
52 [6] = { 256, 64, 16 },
53 [8] = { 256, 64, 16, 8, 4, },
54 [12] = { 256, 64, 16 },
55 [16] = { 256, 64, 16, 4, 4, },
56 [24] = { 256, 64, 16 },
57 [32] = { 256, 64, 16, 4, 2 },
58 [48] = { 256, 64, 16 },
59 [64] = { 256, 64, 16 },
60
61 /* special cases for r8g8: */
62 [0] = { 256, 64, 32, 16, 8 },
63 };
64
65 #define RGB_TILE_WIDTH_ALIGNMENT 64
66 #define RGB_TILE_HEIGHT_ALIGNMENT 16
67 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
68
69 static const struct tile_alignment *
70 fdl6_tile_alignment(struct fdl_layout *layout)
71 {
72 debug_assert(layout->cpp < ARRAY_SIZE(tile_alignment));
73
74 if ((layout->cpp == 2) && (util_format_get_nr_components(layout->format) == 2))
75 return &tile_alignment[0];
76 else
77 return &tile_alignment[layout->cpp];
78 }
79
80 static int
81 fdl6_pitchalign(struct fdl_layout *layout, int level)
82 {
83 uint32_t pitchalign = 64;
84 if (fdl_tile_mode(layout, level))
85 pitchalign = fdl6_tile_alignment(layout)->pitchalign;
86
87 return pitchalign;
88 }
89
90 /* NOTE: good way to test this is: (for example)
91 * piglit/bin/texelFetch fs sampler3D 100x100x8
92 */
93 bool
94 fdl6_layout(struct fdl_layout *layout,
95 enum pipe_format format, uint32_t nr_samples,
96 uint32_t width0, uint32_t height0, uint32_t depth0,
97 uint32_t mip_levels, uint32_t array_size, bool is_3d,
98 struct fdl_slice *plane_layout)
99 {
100 uint32_t offset;
101 uint32_t pitch0;
102
103 assert(nr_samples > 0);
104 layout->width0 = width0;
105 layout->height0 = height0;
106 layout->depth0 = depth0;
107
108 layout->cpp = util_format_get_blocksize(format);
109 layout->cpp *= nr_samples;
110 layout->cpp_shift = ffs(layout->cpp) - 1;
111
112 layout->format = format;
113 layout->nr_samples = nr_samples;
114 layout->layer_first = !is_3d;
115
116 if (depth0 > 1)
117 layout->ubwc = false;
118 if (tile_alignment[layout->cpp].ubwc_blockwidth == 0)
119 layout->ubwc = false;
120
121 const struct tile_alignment *ta = fdl6_tile_alignment(layout);
122
123 /* in layer_first layout, the level (slice) contains just one
124 * layer (since in fact the layer contains the slices)
125 */
126 uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
127
128 debug_assert(ta->pitchalign);
129
130 if (layout->tile_mode) {
131 layout->base_align = ta->basealign;
132 } else {
133 layout->base_align = 64;
134 }
135
136 if (plane_layout) {
137 offset = plane_layout->offset;
138 pitch0 = plane_layout->pitch;
139 if (align(pitch0, fdl6_pitchalign(layout, 0) * layout->cpp) != pitch0)
140 return false;
141 pitch0 /= layout->cpp; /* explicit pitch is in bytes */
142 if (pitch0 < width0 && height0 > 1)
143 return false;
144 } else {
145 offset = 0;
146 pitch0 = util_align_npot(width0, fdl6_pitchalign(layout, 0));
147 }
148
149 uint32_t ubwc_width0 = width0;
150 uint32_t ubwc_height0 = height0;
151 uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
152 if (mip_levels > 1) {
153 /* With mipmapping enabled, UBWC layout is power-of-two sized,
154 * specified in log2 width/height in the descriptors. The height
155 * alignment is 64 for mipmapping, but for buffer sharing (always
156 * single level) other participants expect 16.
157 */
158 ubwc_width0 = util_next_power_of_two(width0);
159 ubwc_height0 = util_next_power_of_two(height0);
160 ubwc_tile_height_alignment = 64;
161 }
162 ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ta->ubwc_blockwidth),
163 RGB_TILE_WIDTH_ALIGNMENT);
164 ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0,
165 ta->ubwc_blockheight),
166 ubwc_tile_height_alignment);
167
168 layout->pitchalign =
169 util_logbase2_ceil(fdl6_pitchalign(layout, mip_levels - 1) * layout->cpp >> 6);
170
171 for (uint32_t level = 0; level < mip_levels; level++) {
172 uint32_t depth = u_minify(depth0, level);
173 struct fdl_slice *slice = &layout->slices[level];
174 struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
175 uint32_t tile_mode = fdl_tile_mode(layout, level);
176 uint32_t height;
177
178 /* tiled levels of 3D textures are rounded up to PoT dimensions: */
179 if (is_3d && tile_mode) {
180 height = u_minify(util_next_power_of_two(height0), level);
181 } else {
182 height = u_minify(height0, level);
183 }
184
185 uint32_t nblocksy = util_format_get_nblocksy(format, height);
186 if (tile_mode)
187 nblocksy = align(nblocksy, ta->heightalign);
188
189 /* The blits used for mem<->gmem work at a granularity of
190 * 16x4, which can cause faults due to over-fetch on the
191 * last level. The simple solution is to over-allocate a
192 * bit the last level to ensure any over-fetch is harmless.
193 * The pitch is already sufficiently aligned, but height
194 * may not be. note this only matters if last level is linear
195 */
196 if (level == mip_levels - 1)
197 height = align(nblocksy, 4);
198
199 uint32_t nblocksx =
200 util_align_npot(util_format_get_nblocksx(format, u_minify(pitch0, level)),
201 fdl6_pitchalign(layout, level));
202
203 slice->offset = offset + layout->size;
204 uint32_t blocks = nblocksx * nblocksy;
205
206 slice->pitch = nblocksx * layout->cpp;
207
208 /* 1d array and 2d array textures must all have the same layer size
209 * for each miplevel on a6xx. 3d textures can have different layer
210 * sizes for high levels, but the hw auto-sizer is buggy (or at least
211 * different than what this code does), so as soon as the layer size
212 * range gets into range, we stop reducing it.
213 */
214 if (is_3d) {
215 if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
216 slice->size0 = align(blocks * layout->cpp, 4096);
217 } else {
218 slice->size0 = layout->slices[level - 1].size0;
219 }
220 } else {
221 slice->size0 = blocks * layout->cpp;
222 }
223
224 layout->size += slice->size0 * depth * layers_in_level;
225
226 if (layout->ubwc) {
227 /* with UBWC every level is aligned to 4K */
228 layout->size = align(layout->size, 4096);
229
230 uint32_t meta_pitch = align(u_minify(ubwc_width0, level),
231 RGB_TILE_WIDTH_ALIGNMENT);
232 uint32_t meta_height = align(u_minify(ubwc_height0, level),
233 ubwc_tile_height_alignment);
234
235 ubwc_slice->size0 = align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
236 ubwc_slice->pitch = meta_pitch;
237 ubwc_slice->offset = offset + layout->ubwc_layer_size;
238 layout->ubwc_layer_size += ubwc_slice->size0;
239 }
240 }
241
242 if (layout->layer_first) {
243 layout->layer_size = align(layout->size, 4096);
244 layout->size = layout->layer_size * array_size;
245 }
246
247 /* Place the UBWC slices before the uncompressed slices, because the
248 * kernel expects UBWC to be at the start of the buffer. In the HW, we
249 * get to program the UBWC and non-UBWC offset/strides
250 * independently.
251 */
252 if (layout->ubwc) {
253 for (uint32_t level = 0; level < mip_levels; level++)
254 layout->slices[level].offset += layout->ubwc_layer_size * array_size;
255 layout->size += layout->ubwc_layer_size * array_size;
256 }
257
258 /* include explicit offset in size */
259 layout->size += offset;
260
261 return true;
262 }
263
264 void
265 fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
266 uint32_t *blockwidth, uint32_t *blockheight)
267 {
268 const struct tile_alignment *ta = fdl6_tile_alignment(layout);
269 *blockwidth = ta->ubwc_blockwidth;
270 *blockheight = ta->ubwc_blockheight;
271 }