freedreno/ir3: debug cleanup
[mesa.git] / src / gallium / drivers / freedreno / a6xx / fd6_resource.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include "fd6_resource.h"
29 #include "fd6_format.h"
30
31 #include "a6xx.xml.h"
32
33 /* indexed by cpp, including msaa 2x and 4x: */
34 static const struct {
35 unsigned pitchalign;
36 unsigned heightalign;
37 } tile_alignment[] = {
38 [1] = { 128, 32 },
39 [2] = { 64, 32 },
40 [3] = { 64, 32 },
41 [4] = { 64, 16 },
42 [6] = { 64, 16 },
43 [8] = { 64, 16 },
44 [12] = { 64, 16 },
45 [16] = { 64, 16 },
46 [24] = { 64, 16 },
47 [32] = { 64, 16 },
48 [48] = { 64, 16 },
49 [64] = { 64, 16 },
50
51 /* special cases for r16: */
52 [0] = { 128, 16 },
53 };
54
55 /* NOTE: good way to test this is: (for example)
56 * piglit/bin/texelFetch fs sampler3D 100x100x8
57 */
58 static uint32_t
59 setup_slices(struct fd_resource *rsc, uint32_t alignment, enum pipe_format format)
60 {
61 struct pipe_resource *prsc = &rsc->base;
62 struct fd_screen *screen = fd_screen(prsc->screen);
63 enum util_format_layout layout = util_format_description(format)->layout;
64 uint32_t pitchalign = screen->gmem_alignw;
65 uint32_t level, size = 0;
66 uint32_t depth = prsc->depth0;
67 /* linear dimensions: */
68 uint32_t lwidth = prsc->width0;
69 uint32_t lheight = prsc->height0;
70 /* tile_mode dimensions: */
71 uint32_t twidth = util_next_power_of_two(lwidth);
72 uint32_t theight = util_next_power_of_two(lheight);
73 /* in layer_first layout, the level (slice) contains just one
74 * layer (since in fact the layer contains the slices)
75 */
76 uint32_t layers_in_level = rsc->layer_first ? 1 : prsc->array_size;
77 int ta = rsc->cpp;
78
79 /* The z16/r16 formats seem to not play by the normal tiling rules: */
80 if ((rsc->cpp == 2) && (util_format_get_nr_components(format) == 1))
81 ta = 0;
82
83 debug_assert(ta < ARRAY_SIZE(tile_alignment));
84 debug_assert(tile_alignment[ta].pitchalign);
85
86 for (level = 0; level <= prsc->last_level; level++) {
87 struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
88 bool linear_level = fd_resource_level_linear(prsc, level);
89 uint32_t width, height;
90
91 /* tiled levels of 3D textures are rounded up to PoT dimensions: */
92 if ((prsc->target == PIPE_TEXTURE_3D) && rsc->tile_mode && !linear_level) {
93 width = twidth;
94 height = theight;
95 } else {
96 width = lwidth;
97 height = lheight;
98 }
99 uint32_t aligned_height = height;
100 uint32_t blocks;
101
102 if (rsc->tile_mode && !linear_level) {
103 pitchalign = tile_alignment[ta].pitchalign;
104 aligned_height = align(aligned_height,
105 tile_alignment[ta].heightalign);
106 } else {
107 pitchalign = 64;
108 }
109
110 /* The blits used for mem<->gmem work at a granularity of
111 * 32x32, which can cause faults due to over-fetch on the
112 * last level. The simple solution is to over-allocate a
113 * bit the last level to ensure any over-fetch is harmless.
114 * The pitch is already sufficiently aligned, but height
115 * may not be:
116 */
117 if ((level == prsc->last_level) && (prsc->target != PIPE_BUFFER))
118 aligned_height = align(aligned_height, 32);
119
120 if (layout == UTIL_FORMAT_LAYOUT_ASTC)
121 slice->pitch =
122 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
123 else
124 slice->pitch = align(width, pitchalign);
125
126 slice->offset = size;
127 blocks = util_format_get_nblocks(format, slice->pitch, aligned_height);
128
129 /* 1d array and 2d array textures must all have the same layer size
130 * for each miplevel on a6xx. 3d textures can have different layer
131 * sizes for high levels, but the hw auto-sizer is buggy (or at least
132 * different than what this code does), so as soon as the layer size
133 * range gets into range, we stop reducing it.
134 */
135 if (prsc->target == PIPE_TEXTURE_3D) {
136 if (level < 1 || (rsc->slices[level - 1].size0 > 0xf000)) {
137 slice->size0 = align(blocks * rsc->cpp, alignment);
138 } else {
139 slice->size0 = rsc->slices[level - 1].size0;
140 }
141 } else {
142 slice->size0 = align(blocks * rsc->cpp, alignment);
143 }
144
145 size += slice->size0 * depth * layers_in_level;
146
147 #if 0
148 debug_printf("%s: %ux%ux%u@%u:\t%2u: stride=%4u, size=%6u,%7u, aligned_height=%3u, blocks=%u, offset=0x%x\n",
149 util_format_name(prsc->format),
150 width, height, depth, rsc->cpp,
151 level, slice->pitch * rsc->cpp,
152 slice->size0, size, aligned_height, blocks,
153 slice->offset);
154 #endif
155
156 depth = u_minify(depth, 1);
157 lwidth = u_minify(lwidth, 1);
158 lheight = u_minify(lheight, 1);
159 twidth = u_minify(twidth, 1);
160 theight = u_minify(theight, 1);
161 }
162
163 return size;
164 }
165
166 /* A subset of the valid tiled formats can be compressed. We do
167 * already require tiled in order to be compressed, but just because
168 * it can be tiled doesn't mean it can be compressed.
169 */
170 static bool
171 ok_ubwc_format(enum pipe_format pfmt)
172 {
173 /* NOTE: both x24s8 and z24s8 map to RB6_X8Z24_UNORM, but UBWC
174 * does not seem to work properly when sampling x24s8.. possibly
175 * because we sample it as TFMT6_8_8_8_8_UINT.
176 *
177 * This could possibly be a hw limitation, or maybe something
178 * else wrong somewhere (although z24s8 blits and sampling with
179 * UBWC seem fine). Recheck on a later revision of a6xx
180 */
181 if (pfmt == PIPE_FORMAT_X24S8_UINT)
182 return false;
183
184 switch (fd6_pipe2color(pfmt)) {
185 case RB6_R10G10B10A2_UINT:
186 case RB6_R10G10B10A2_UNORM:
187 case RB6_R11G11B10_FLOAT:
188 case RB6_R16_FLOAT:
189 case RB6_R16G16B16A16_FLOAT:
190 case RB6_R16G16B16A16_SINT:
191 case RB6_R16G16B16A16_UINT:
192 case RB6_R16G16_FLOAT:
193 case RB6_R16G16_SINT:
194 case RB6_R16G16_UINT:
195 case RB6_R16_SINT:
196 case RB6_R16_UINT:
197 case RB6_R32G32B32A32_SINT:
198 case RB6_R32G32B32A32_UINT:
199 case RB6_R32G32_SINT:
200 case RB6_R32G32_UINT:
201 case RB6_R5G6B5_UNORM:
202 case RB6_R8G8B8A8_SINT:
203 case RB6_R8G8B8A8_UINT:
204 case RB6_R8G8B8A8_UNORM:
205 case RB6_R8G8B8_UNORM:
206 case RB6_R8G8_SINT:
207 case RB6_R8G8_UINT:
208 case RB6_R8G8_UNORM:
209 case RB6_X8Z24_UNORM:
210 case RB6_Z24_UNORM_S8_UINT:
211 return true;
212 default:
213 return false;
214 }
215 }
216
217 uint32_t
218 fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc)
219 {
220 #define RBG_TILE_WIDTH_ALIGNMENT 64
221 #define RGB_TILE_HEIGHT_ALIGNMENT 16
222 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
223
224 struct pipe_resource *prsc = &rsc->base;
225 uint32_t width = prsc->width0;
226 uint32_t height = prsc->height0;
227
228 if (!ok_ubwc_format(prsc->format))
229 return 0;
230
231 /* limit things to simple single level 2d for now: */
232 if ((prsc->depth0 != 1) || (prsc->array_size != 1) || (prsc->last_level != 0))
233 return 0;
234
235 uint32_t block_width, block_height;
236 switch (rsc->cpp) {
237 case 2:
238 case 4:
239 block_width = 16;
240 block_height = 4;
241 break;
242 case 8:
243 block_width = 8;
244 block_height = 4;
245 break;
246 case 16:
247 block_width = 4;
248 block_height = 4;
249 break;
250 default:
251 return 0;
252 }
253
254 uint32_t meta_stride =
255 ALIGN_POT(DIV_ROUND_UP(width, block_width), RBG_TILE_WIDTH_ALIGNMENT);
256 uint32_t meta_height =
257 ALIGN_POT(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
258 uint32_t meta_size =
259 ALIGN_POT(meta_stride * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
260
261 /* UBWC goes first, then color data.. this constraint is mainly only
262 * because it is what the kernel expects for scanout. For non-2D we
263 * could just use a separate UBWC buffer..
264 */
265 rsc->ubwc_offset = 0;
266 rsc->offset = meta_size;
267 rsc->ubwc_pitch = meta_stride;
268 rsc->ubwc_size = meta_size >> 2; /* in dwords??? */
269 rsc->tile_mode = TILE6_3;
270
271 return meta_size;
272 }
273
274 /**
275 * Ensure the rsc is in an ok state to be used with the specified format.
276 * This handles the case of UBWC buffers used with non-UBWC compatible
277 * formats, by triggering an uncompress.
278 */
279 void
280 fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
281 enum pipe_format format)
282 {
283 if (!rsc->ubwc_size)
284 return;
285
286 if (ok_ubwc_format(format))
287 return;
288
289 fd_resource_uncompress(ctx, rsc);
290 }
291
292 uint32_t
293 fd6_setup_slices(struct fd_resource *rsc)
294 {
295 uint32_t alignment;
296
297 switch (rsc->base.target) {
298 case PIPE_TEXTURE_3D:
299 rsc->layer_first = false;
300 alignment = 4096;
301 break;
302 default:
303 rsc->layer_first = true;
304 alignment = 1;
305 break;
306 }
307
308 return setup_slices(rsc, alignment, rsc->base.format);
309 }