47a6c852da7bbd297618842ea1a1444ae699016e
[mesa.git] / src / freedreno / fdl / fd6_layout.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018-2019 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include <stdio.h>
29
30 #include "freedreno_layout.h"
31
32 #define RGB_TILE_WIDTH_ALIGNMENT 64
33 #define RGB_TILE_HEIGHT_ALIGNMENT 16
34 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
35
36 static bool
37 is_r8g8(struct fdl_layout *layout)
38 {
39 return layout->cpp == 2 &&
40 util_format_get_nr_components(layout->format) == 2;
41 }
42
43 void
44 fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
45 uint32_t *blockwidth, uint32_t *blockheight)
46 {
47 static const struct {
48 uint8_t width;
49 uint8_t height;
50 } blocksize[] = {
51 { 16, 4 }, /* cpp = 1 */
52 { 16, 4 }, /* cpp = 2 */
53 { 16, 4 }, /* cpp = 4 */
54 { 8, 4, }, /* cpp = 8 */
55 { 4, 4, }, /* cpp = 16 */
56 { 4, 2 }, /* cpp = 32 */
57 { 0, 0 }, /* cpp = 64 (TODO) */
58 };
59
60 /* special case for r8g8: */
61 if (is_r8g8(layout)) {
62 *blockwidth = 16;
63 *blockheight = 8;
64 return;
65 }
66
67 uint32_t cpp = fdl_cpp_shift(layout);
68 assert(cpp < ARRAY_SIZE(blocksize));
69 *blockwidth = blocksize[cpp].width;
70 *blockheight = blocksize[cpp].height;
71 }
72
73 static void
74 fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
75 {
76 layout->pitchalign = fdl_cpp_shift(layout);
77 *heightalign = 16;
78
79 if (is_r8g8(layout) || layout->cpp == 1) {
80 layout->pitchalign = 1;
81 *heightalign = 32;
82 } else if (layout->cpp == 2) {
83 layout->pitchalign = 2;
84 }
85
86 /* note: this base_align is *probably* not always right,
87 * it doesn't really get tested. for example with UBWC we might
88 * want 4k alignment, since we align UBWC levels to 4k
89 */
90 if (layout->cpp == 1)
91 layout->base_align = 64;
92 else if (layout->cpp == 2)
93 layout->base_align = 128;
94 else
95 layout->base_align = 256;
96 }
97
98 /* NOTE: good way to test this is: (for example)
99 * piglit/bin/texelFetch fs sampler3D 100x100x8
100 */
101 bool
102 fdl6_layout(struct fdl_layout *layout,
103 enum pipe_format format, uint32_t nr_samples,
104 uint32_t width0, uint32_t height0, uint32_t depth0,
105 uint32_t mip_levels, uint32_t array_size, bool is_3d,
106 struct fdl_slice *plane_layout)
107 {
108 uint32_t offset, pitch0;
109 uint32_t pitchalign, heightalign;
110 uint32_t ubwc_blockwidth, ubwc_blockheight;
111
112 assert(nr_samples > 0);
113 layout->width0 = width0;
114 layout->height0 = height0;
115 layout->depth0 = depth0;
116
117 layout->cpp = util_format_get_blocksize(format);
118 layout->cpp *= nr_samples;
119 layout->cpp_shift = ffs(layout->cpp) - 1;
120
121 layout->format = format;
122 layout->nr_samples = nr_samples;
123 layout->layer_first = !is_3d;
124
125 fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
126
127 if (depth0 > 1 || ubwc_blockwidth == 0)
128 layout->ubwc = false;
129
130 /* in layer_first layout, the level (slice) contains just one
131 * layer (since in fact the layer contains the slices)
132 */
133 uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
134
135 /* note: for tiled+noubwc layouts, we can use a lower pitchalign
136 * which will affect the linear levels only, (the hardware will still
137 * expect the tiled alignment on the tiled levels)
138 */
139 if (layout->tile_mode) {
140 fdl6_tile_alignment(layout, &heightalign);
141 } else {
142 layout->base_align = 64;
143 layout->pitchalign = 0;
144 /* align pitch to at least 16 pixels:
145 * both turnip and galium assume there is enough alignment for 16x4
146 * aligned gmem store. turnip can use CP_BLIT to work without this
147 * extra alignment, but gallium driver doesn't implement it yet
148 */
149 if (layout->cpp > 4)
150 layout->pitchalign = fdl_cpp_shift(layout) - 2;
151
152 /* when possible, use a bit more alignment than necessary
153 * presumably this is better for performance?
154 */
155 if (!plane_layout)
156 layout->pitchalign = fdl_cpp_shift(layout);
157
158 /* not used, avoid "may be used uninitialized" warning */
159 heightalign = 1;
160 }
161
162 pitchalign = 64 << layout->pitchalign;
163
164 if (plane_layout) {
165 offset = plane_layout->offset;
166 pitch0 = plane_layout->pitch;
167 if (align(pitch0, pitchalign) != pitch0)
168 return false;
169 } else {
170 uint32_t nblocksx = util_format_get_nblocksx(format, width0);
171 offset = 0;
172 pitch0 = util_align_npot(nblocksx * layout->cpp, pitchalign);
173 }
174
175 uint32_t ubwc_width0 = width0;
176 uint32_t ubwc_height0 = height0;
177 uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
178 if (mip_levels > 1) {
179 /* With mipmapping enabled, UBWC layout is power-of-two sized,
180 * specified in log2 width/height in the descriptors. The height
181 * alignment is 64 for mipmapping, but for buffer sharing (always
182 * single level) other participants expect 16.
183 */
184 ubwc_width0 = util_next_power_of_two(width0);
185 ubwc_height0 = util_next_power_of_two(height0);
186 ubwc_tile_height_alignment = 64;
187 }
188 ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
189 RGB_TILE_WIDTH_ALIGNMENT);
190 ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
191 ubwc_tile_height_alignment);
192
193 for (uint32_t level = 0; level < mip_levels; level++) {
194 uint32_t depth = u_minify(depth0, level);
195 struct fdl_slice *slice = &layout->slices[level];
196 struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
197 uint32_t tile_mode = fdl_tile_mode(layout, level);
198 uint32_t height;
199
200 /* tiled levels of 3D textures are rounded up to PoT dimensions: */
201 if (is_3d && tile_mode) {
202 height = u_minify(util_next_power_of_two(height0), level);
203 } else {
204 height = u_minify(height0, level);
205 }
206
207 uint32_t nblocksy = util_format_get_nblocksy(format, height);
208 if (tile_mode)
209 nblocksy = align(nblocksy, heightalign);
210
211 /* The blits used for mem<->gmem work at a granularity of
212 * 16x4, which can cause faults due to over-fetch on the
213 * last level. The simple solution is to over-allocate a
214 * bit the last level to ensure any over-fetch is harmless.
215 * The pitch is already sufficiently aligned, but height
216 * may not be. note this only matters if last level is linear
217 */
218 if (level == mip_levels - 1)
219 height = align(nblocksy, 4);
220
221 slice->offset = offset + layout->size;
222 slice->pitch = align(u_minify(pitch0, level), pitchalign);
223
224 /* 1d array and 2d array textures must all have the same layer size
225 * for each miplevel on a6xx. 3d textures can have different layer
226 * sizes for high levels, but the hw auto-sizer is buggy (or at least
227 * different than what this code does), so as soon as the layer size
228 * range gets into range, we stop reducing it.
229 */
230 if (is_3d) {
231 if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
232 slice->size0 = align(nblocksy * slice->pitch, 4096);
233 } else {
234 slice->size0 = layout->slices[level - 1].size0;
235 }
236 } else {
237 slice->size0 = nblocksy * slice->pitch;
238 }
239
240 layout->size += slice->size0 * depth * layers_in_level;
241
242 if (layout->ubwc) {
243 /* with UBWC every level is aligned to 4K */
244 layout->size = align(layout->size, 4096);
245
246 uint32_t meta_pitch = align(u_minify(ubwc_width0, level),
247 RGB_TILE_WIDTH_ALIGNMENT);
248 uint32_t meta_height = align(u_minify(ubwc_height0, level),
249 ubwc_tile_height_alignment);
250
251 ubwc_slice->size0 = align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
252 ubwc_slice->pitch = meta_pitch;
253 ubwc_slice->offset = offset + layout->ubwc_layer_size;
254 layout->ubwc_layer_size += ubwc_slice->size0;
255 }
256 }
257
258 if (layout->layer_first) {
259 layout->layer_size = align(layout->size, 4096);
260 layout->size = layout->layer_size * array_size;
261 }
262
263 /* Place the UBWC slices before the uncompressed slices, because the
264 * kernel expects UBWC to be at the start of the buffer. In the HW, we
265 * get to program the UBWC and non-UBWC offset/strides
266 * independently.
267 */
268 if (layout->ubwc) {
269 for (uint32_t level = 0; level < mip_levels; level++)
270 layout->slices[level].offset += layout->ubwc_layer_size * array_size;
271 layout->size += layout->ubwc_layer_size * array_size;
272 }
273
274 /* include explicit offset in size */
275 layout->size += offset;
276
277 return true;
278 }