4a35dd6db2fc649379263676f504fa5a76840ffc
[mesa.git] / src / gallium / drivers / freedreno / a6xx / fd6_resource.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include "fd6_resource.h"
29 #include "fd6_format.h"
30
31 #include "a6xx.xml.h"
32
33 /* indexed by cpp, including msaa 2x and 4x: */
34 static const struct {
35 unsigned pitchalign;
36 unsigned heightalign;
37 } tile_alignment[] = {
38 [1] = { 128, 32 },
39 [2] = { 64, 32 },
40 [3] = { 64, 32 },
41 [4] = { 64, 16 },
42 [6] = { 64, 16 },
43 [8] = { 64, 16 },
44 [12] = { 64, 16 },
45 [16] = { 64, 16 },
46 [24] = { 64, 16 },
47 [32] = { 64, 16 },
48 [48] = { 64, 16 },
49 [64] = { 64, 16 },
50
51 /* special cases for r16: */
52 [0] = { 128, 16 },
53 };
54
55 /* NOTE: good way to test this is: (for example)
56 * piglit/bin/texelFetch fs sampler3D 100x100x8
57 */
58 static uint32_t
59 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
60 {
61 struct pipe_resource *prsc = &rsc->base;
62 struct fd_screen *screen = fd_screen(prsc->screen);
63 enum util_format_layout layout = util_format_description(format)->layout;
64 uint32_t pitchalign = screen->gmem_alignw;
65 uint32_t level, size = 0;
66 uint32_t depth = prsc->depth0;
67 /* linear dimensions: */
68 uint32_t lwidth = prsc->width0;
69 uint32_t lheight = prsc->height0;
70 /* tile_mode dimensions: */
71 uint32_t twidth = util_next_power_of_two(lwidth);
72 uint32_t theight = util_next_power_of_two(lheight);
73 /* in layer_first layout, the level (slice) contains just one
74 * layer (since in fact the layer contains the slices)
75 */
76 uint32_t layers_in_level = rsc->layout.layer_first ? 1 : prsc->array_size;
77 int ta = rsc->layout.cpp;
78
79 /* The z16/r16 formats seem to not play by the normal tiling rules: */
80 if ((rsc->layout.cpp == 2) && (util_format_get_nr_components(format) == 1))
81 ta = 0;
82
83 debug_assert(ta < ARRAY_SIZE(tile_alignment));
84 debug_assert(tile_alignment[ta].pitchalign);
85
86 for (level = 0; level <= prsc->last_level; level++) {
87 struct fdl_slice *slice = fd_resource_slice(rsc, level);
88 uint32_t tile_mode = fd_resource_tile_mode(prsc, level);
89 uint32_t width, height;
90
91 /* tiled levels of 3D textures are rounded up to PoT dimensions: */
92 if ((prsc->target == PIPE_TEXTURE_3D) && tile_mode) {
93 width = twidth;
94 height = theight;
95 } else {
96 width = lwidth;
97 height = lheight;
98 }
99 uint32_t aligned_height = height;
100 uint32_t blocks;
101
102 if (tile_mode) {
103 pitchalign = tile_alignment[ta].pitchalign;
104 aligned_height = align(aligned_height,
105 tile_alignment[ta].heightalign);
106 } else {
107 pitchalign = 64;
108 }
109
110 /* The blits used for mem<->gmem work at a granularity of
111 * 32x32, which can cause faults due to over-fetch on the
112 * last level. The simple solution is to over-allocate a
113 * bit the last level to ensure any over-fetch is harmless.
114 * The pitch is already sufficiently aligned, but height
115 * may not be:
116 */
117 if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER))
118 aligned_height = align(aligned_height, 32);
119
120 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
121 slice->pitch =
122 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
123 else
124 slice->pitch = align(width, pitchalign);
125
126 slice->offset = size;
127 blocks = util_format_get_nblocks(format, slice->pitch, aligned_height);
128
129 /* 1d array and 2d array textures must all have the same layer size
130 * for each miplevel on a6xx. 3d textures can have different layer
131 * sizes for high levels, but the hw auto-sizer is buggy (or at least
132 * different than what this code does), so as soon as the layer size
133 * range gets into range, we stop reducing it.
134 */
135 if (prsc->target == PIPE_TEXTURE_3D) {
136 if (level < 1 || fd_resource_slice(rsc, level - 1)->size0 > 0xf000) {
137 slice->size0 = align(blocks * rsc->layout.cpp, alignment);
138 } else {
139 slice->size0 = fd_resource_slice(rsc, level - 1)->size0;
140 }
141 } else {
142 slice->size0 = align(blocks * rsc->layout.cpp, alignment);
143 }
144
145 size += slice->size0 * depth * layers_in_level;
146
147 #if 0
148 fprintf(stderr, "%s: %ux%ux%u@%u:\t%2u: stride=%4u, size=%6u,%7u, aligned_height=%3u, blocks=%u, offset=0x%x tiling=%d\n",
149 util_format_name(prsc->format),
150 width, height, depth, rsc->layout.cpp,
151 level, slice->pitch * rsc->layout.cpp,
152 slice->size0, size, aligned_height, blocks,
153 slice->offset, fd_resource_tile_mode(prsc, level));
154 #endif
155
156 depth = u_minify(depth, 1);
157 lwidth = u_minify(lwidth, 1);
158 lheight = u_minify(lheight, 1);
159 twidth = u_minify(twidth, 1);
160 theight = u_minify(theight, 1);
161 }
162
163 return size;
164 }
165
166 /* A subset of the valid tiled formats can be compressed. We do
167 * already require tiled in order to be compressed, but just because
168 * it can be tiled doesn't mean it can be compressed.
169 */
170 static bool
171 ok_ubwc_format(enum pipe_format pfmt)
172 {
173 /* NOTE: both x24s8 and z24s8 map to RB6_X8Z24_UNORM, but UBWC
174 * does not seem to work properly when sampling x24s8.. possibly
175 * because we sample it as TFMT6_8_8_8_8_UINT.
176 *
177 * This could possibly be a hw limitation, or maybe something
178 * else wrong somewhere (although z24s8 blits and sampling with
179 * UBWC seem fine). Recheck on a later revision of a6xx
180 */
181 if (pfmt == PIPE_FORMAT_X24S8_UINT)
182 return false;
183
184 switch (fd6_pipe2color(pfmt)) {
185 case RB6_R10G10B10A2_UINT:
186 case RB6_R10G10B10A2_UNORM:
187 case RB6_R11G11B10_FLOAT:
188 case RB6_R16_FLOAT:
189 case RB6_R16G16B16A16_FLOAT:
190 case RB6_R16G16B16A16_SINT:
191 case RB6_R16G16B16A16_UINT:
192 case RB6_R16G16_FLOAT:
193 case RB6_R16G16_SINT:
194 case RB6_R16G16_UINT:
195 case RB6_R16_SINT:
196 case RB6_R16_UINT:
197 case RB6_R32G32B32A32_SINT:
198 case RB6_R32G32B32A32_UINT:
199 case RB6_R32G32_SINT:
200 case RB6_R32G32_UINT:
201 case RB6_R5G6B5_UNORM:
202 case RB6_R8G8B8A8_SINT:
203 case RB6_R8G8B8A8_UINT:
204 case RB6_R8G8B8A8_UNORM:
205 case RB6_R8G8B8_UNORM:
206 case RB6_R8G8_SINT:
207 case RB6_R8G8_UINT:
208 case RB6_R8G8_UNORM:
209 case RB6_Z24_UNORM_S8_UINT:
210 case RB6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
211 return true;
212 default:
213 return false;
214 }
215 }
216
217 uint32_t
218 fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc)
219 {
220 #define RBG_TILE_WIDTH_ALIGNMENT 64
221 #define RGB_TILE_HEIGHT_ALIGNMENT 16
222 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
223
224 struct pipe_resource *prsc = &rsc->base;
225 uint32_t width = prsc->width0;
226 uint32_t height = prsc->height0;
227
228 if (!ok_ubwc_format(prsc->format))
229 return 0;
230
231 /* limit things to simple single level 2d for now: */
232 if ((prsc->depth0 != 1) || (prsc->array_size != 1) || (prsc->last_level != 0))
233 return 0;
234
235 uint32_t block_width, block_height;
236 switch (rsc->layout.cpp) {
237 case 2:
238 case 4:
239 block_width = 16;
240 block_height = 4;
241 break;
242 case 8:
243 block_width = 8;
244 block_height = 4;
245 break;
246 case 16:
247 block_width = 4;
248 block_height = 4;
249 break;
250 default:
251 return 0;
252 }
253
254 uint32_t meta_stride =
255 ALIGN_POT(DIV_ROUND_UP(width, block_width), RBG_TILE_WIDTH_ALIGNMENT);
256 uint32_t meta_height =
257 ALIGN_POT(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
258 uint32_t meta_size =
259 ALIGN_POT(meta_stride * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
260
261 /* UBWC goes first, then color data.. this constraint is mainly only
262 * because it is what the kernel expects for scanout. For non-2D we
263 * could just use a separate UBWC buffer..
264 */
265 for (int level = 0; level <= prsc->last_level; level++) {
266 struct fdl_slice *slice = fd_resource_slice(rsc, level);
267 slice->offset += meta_size;
268 }
269
270 rsc->layout.ubwc_slices[0].offset = 0;
271 rsc->layout.ubwc_slices[0].pitch = meta_stride;
272 rsc->layout.ubwc_size = meta_size >> 2; /* in dwords??? */
273 rsc->layout.tile_mode = TILE6_3;
274
275 return meta_size;
276 }
277
278 /**
279 * Ensure the rsc is in an ok state to be used with the specified format.
280 * This handles the case of UBWC buffers used with non-UBWC compatible
281 * formats, by triggering an uncompress.
282 */
283 void
284 fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
285 enum pipe_format format)
286 {
287 if (!rsc->layout.ubwc_size)
288 return;
289
290 if (ok_ubwc_format(format))
291 return;
292
293 fd_resource_uncompress(ctx, rsc);
294 }
295
296 static void
297 setup_lrz(struct fd_resource *rsc)
298 {
299 struct fd_screen *screen = fd_screen(rsc->base.screen);
300 const uint32_t flags = DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
301 DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
302 unsigned width0 = rsc->base.width0;
303 unsigned height0 = rsc->base.height0;
304
305 /* LRZ buffer is super-sampled: */
306 switch (rsc->base.nr_samples) {
307 case 4:
308 width0 *= 2;
309 /* fallthru */
310 case 2:
311 height0 *= 2;
312 }
313
314 unsigned lrz_pitch = align(DIV_ROUND_UP(width0, 8), 32);
315 unsigned lrz_height = align(DIV_ROUND_UP(height0, 8), 16);
316
317 unsigned size = lrz_pitch * lrz_height * 2;
318
319 rsc->lrz_height = lrz_height;
320 rsc->lrz_width = lrz_pitch;
321 rsc->lrz_pitch = lrz_pitch;
322 rsc->lrz = fd_bo_new(screen->dev, size, flags, "lrz");
323 }
324
325 uint32_t
326 fd6_setup_slices(struct fd_resource *rsc)
327 {
328 uint32_t alignment;
329
330 if (!(fd_mesa_debug & FD_DBG_NOLRZ) && has_depth(rsc->base.format))
331 setup_lrz(rsc);
332
333 switch (rsc->base.target) {
334 case PIPE_TEXTURE_3D:
335 rsc->layout.layer_first = false;
336 alignment = 4096;
337 break;
338 default:
339 rsc->layout.layer_first = true;
340 alignment = 1;
341 break;
342 }
343
344 return setup_slices(rsc, alignment, rsc->base.format);
345 }