panfrost: Fix Z24 vs Z32 mixup
[mesa.git] / src / gallium / drivers / panfrost / pan_mfbd.c
1 /*
2 * Copyright 2018-2019 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 */
24
25 #include "pan_bo.h"
26 #include "pan_context.h"
27 #include "pan_cmdstream.h"
28 #include "pan_util.h"
29 #include "panfrost-quirks.h"
30
31 static struct mali_rt_format
32 panfrost_mfbd_format(struct pipe_surface *surf)
33 {
34 struct panfrost_device *dev = pan_device(surf->context->screen);
35 bool is_bifrost = dev->quirks & IS_BIFROST;
36
37 /* Explode details on the format */
38
39 const struct util_format_description *desc =
40 util_format_description(surf->format);
41
42 /* The swizzle for rendering is inverted from texturing */
43
44 unsigned char swizzle[4];
45 panfrost_invert_swizzle(desc->swizzle, swizzle);
46
47 /* Fill in accordingly, defaulting to 8-bit UNORM */
48
49 struct mali_rt_format fmt = {
50 .unk1 = 0x4000000,
51 .unk2 = 0x1,
52 .nr_channels = MALI_POSITIVE(desc->nr_channels),
53 .unk3 = 0x4,
54 .unk4 = is_bifrost,
55 .flags = 0x8,
56 .swizzle = panfrost_translate_swizzle_4(swizzle),
57 .no_preload = true
58 };
59
60 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
61 fmt.flags |= MALI_MFBD_FORMAT_SRGB;
62
63 /* sRGB handled as a dedicated flag */
64 enum pipe_format linearized = util_format_linear(surf->format);
65
66 /* If RGB, we're good to go */
67 if (util_format_is_unorm8(desc))
68 return fmt;
69
70 /* Set flags for alternative formats */
71
72 switch (linearized) {
73 case PIPE_FORMAT_B5G6R5_UNORM:
74 fmt.unk1 = 0x14000000;
75 fmt.nr_channels = MALI_POSITIVE(2);
76 fmt.unk3 |= 0x1;
77 break;
78
79 case PIPE_FORMAT_A4B4G4R4_UNORM:
80 case PIPE_FORMAT_B4G4R4A4_UNORM:
81 fmt.unk1 = 0x10000000;
82 fmt.unk3 = 0x5;
83 fmt.nr_channels = MALI_POSITIVE(1);
84 break;
85
86 case PIPE_FORMAT_R10G10B10A2_UNORM:
87 case PIPE_FORMAT_B10G10R10A2_UNORM:
88 case PIPE_FORMAT_R10G10B10X2_UNORM:
89 case PIPE_FORMAT_B10G10R10X2_UNORM:
90 fmt.unk1 = 0x08000000;
91 fmt.unk3 = 0x6;
92 fmt.nr_channels = MALI_POSITIVE(1);
93 break;
94
95 case PIPE_FORMAT_B5G5R5A1_UNORM:
96 case PIPE_FORMAT_B5G5R5X1_UNORM:
97 fmt.unk1 = 0x18000000;
98 fmt.unk3 = 0x7;
99 fmt.nr_channels = MALI_POSITIVE(2);
100 break;
101
102 /* Generic 8-bit */
103 case PIPE_FORMAT_R8_UINT:
104 case PIPE_FORMAT_R8_SINT:
105 fmt.unk1 = 0x80000000;
106 fmt.unk3 = 0x0;
107 fmt.nr_channels = MALI_POSITIVE(1);
108 break;
109
110 /* Generic 32-bit */
111 case PIPE_FORMAT_R11G11B10_FLOAT:
112 case PIPE_FORMAT_R8G8B8A8_UINT:
113 case PIPE_FORMAT_R8G8B8A8_SINT:
114 case PIPE_FORMAT_R16G16_FLOAT:
115 case PIPE_FORMAT_R16G16_UINT:
116 case PIPE_FORMAT_R16G16_SINT:
117 case PIPE_FORMAT_R32_FLOAT:
118 case PIPE_FORMAT_R32_UINT:
119 case PIPE_FORMAT_R32_SINT:
120 case PIPE_FORMAT_R10G10B10A2_UINT:
121 fmt.unk1 = 0x88000000;
122 fmt.unk3 = 0x0;
123 fmt.nr_channels = MALI_POSITIVE(4);
124 break;
125
126 /* Generic 16-bit */
127 case PIPE_FORMAT_R8G8_UINT:
128 case PIPE_FORMAT_R8G8_SINT:
129 case PIPE_FORMAT_R16_FLOAT:
130 case PIPE_FORMAT_R16_UINT:
131 case PIPE_FORMAT_R16_SINT:
132 fmt.unk1 = 0x84000000;
133 fmt.unk3 = 0x0;
134 fmt.nr_channels = MALI_POSITIVE(2);
135 break;
136
137 /* Generic 64-bit */
138 case PIPE_FORMAT_R32G32_FLOAT:
139 case PIPE_FORMAT_R32G32_SINT:
140 case PIPE_FORMAT_R32G32_UINT:
141 case PIPE_FORMAT_R16G16B16A16_FLOAT:
142 case PIPE_FORMAT_R16G16B16A16_SINT:
143 case PIPE_FORMAT_R16G16B16A16_UINT:
144 fmt.unk1 = 0x8c000000;
145 fmt.unk3 = 0x1;
146 fmt.nr_channels = MALI_POSITIVE(2);
147 break;
148
149 /* Generic 128-bit */
150 case PIPE_FORMAT_R32G32B32A32_FLOAT:
151 case PIPE_FORMAT_R32G32B32A32_SINT:
152 case PIPE_FORMAT_R32G32B32A32_UINT:
153 fmt.unk1 = 0x90000000;
154 fmt.unk3 = 0x1;
155 fmt.nr_channels = MALI_POSITIVE(4);
156 break;
157
158 default:
159 unreachable("Invalid format rendering");
160 }
161
162 return fmt;
163 }
164
165
166 static void
167 panfrost_mfbd_clear(
168 struct panfrost_batch *batch,
169 struct mali_framebuffer *fb,
170 struct mali_framebuffer_extra *fbx,
171 struct mali_render_target *rts,
172 unsigned rt_count)
173 {
174 struct panfrost_context *ctx = batch->ctx;
175 struct pipe_context *gallium = (struct pipe_context *) ctx;
176 struct panfrost_device *dev = pan_device(gallium->screen);
177
178 for (unsigned i = 0; i < rt_count; ++i) {
179 if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
180 continue;
181
182 rts[i].clear_color_1 = batch->clear_color[i][0];
183 rts[i].clear_color_2 = batch->clear_color[i][1];
184 rts[i].clear_color_3 = batch->clear_color[i][2];
185 rts[i].clear_color_4 = batch->clear_color[i][3];
186 }
187
188 if (batch->clear & PIPE_CLEAR_DEPTH) {
189 fb->clear_depth = batch->clear_depth;
190 }
191
192 if (batch->clear & PIPE_CLEAR_STENCIL) {
193 fb->clear_stencil = batch->clear_stencil;
194 }
195
196 if (dev->quirks & IS_BIFROST) {
197 fbx->clear_color_1 = batch->clear_color[0][0];
198 fbx->clear_color_2 = 0xc0000000 | (fbx->clear_color_1 & 0xffff); /* WTF? */
199 }
200 }
201
202 static void
203 panfrost_mfbd_set_cbuf(
204 struct mali_render_target *rt,
205 struct pipe_surface *surf)
206 {
207 struct panfrost_resource *rsrc = pan_resource(surf->texture);
208
209 unsigned level = surf->u.tex.level;
210 unsigned first_layer = surf->u.tex.first_layer;
211 assert(surf->u.tex.last_layer == first_layer);
212 int stride = rsrc->slices[level].stride;
213
214 mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer);
215
216 rt->format = panfrost_mfbd_format(surf);
217
218 /* Now, we set the layout specific pieces */
219
220 if (rsrc->layout == MALI_TEXTURE_LINEAR) {
221 rt->format.block = MALI_BLOCK_LINEAR;
222 rt->framebuffer = base;
223 rt->framebuffer_stride = stride / 16;
224 } else if (rsrc->layout == MALI_TEXTURE_TILED) {
225 rt->format.block = MALI_BLOCK_TILED;
226 rt->framebuffer = base;
227 rt->framebuffer_stride = stride;
228 } else if (rsrc->layout == MALI_TEXTURE_AFBC) {
229 rt->format.block = MALI_BLOCK_AFBC;
230
231 unsigned header_size = rsrc->slices[level].header_size;
232
233 rt->framebuffer = base + header_size;
234 rt->afbc.metadata = base;
235 rt->afbc.stride = 0;
236 rt->afbc.unk = 0x30009;
237
238 /* TODO: The blob sets this to something nonzero, but it's not
239 * clear what/how to calculate/if it matters */
240 rt->framebuffer_stride = 0;
241 } else {
242 fprintf(stderr, "Invalid render layout (cbuf)");
243 assert(0);
244 }
245 }
246
247 static void
248 panfrost_mfbd_set_zsbuf(
249 struct mali_framebuffer *fb,
250 struct mali_framebuffer_extra *fbx,
251 struct pipe_surface *surf)
252 {
253 struct panfrost_resource *rsrc = pan_resource(surf->texture);
254
255 unsigned level = surf->u.tex.level;
256 unsigned first_layer = surf->u.tex.first_layer;
257 assert(surf->u.tex.last_layer == first_layer);
258
259 mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer);
260
261 if (rsrc->layout == MALI_TEXTURE_AFBC) {
262 /* The only Z/S format we can compress is Z24S8 or variants
263 * thereof (handled by the gallium frontend) */
264 assert(panfrost_is_z24s8_variant(surf->format));
265
266 unsigned header_size = rsrc->slices[level].header_size;
267
268 fb->mfbd_flags |= MALI_MFBD_EXTRA;
269
270 fbx->flags_hi |= MALI_EXTRA_PRESENT;
271 fbx->flags_lo |= MALI_EXTRA_ZS | 0x1; /* unknown */
272 fbx->zs_block = MALI_BLOCK_AFBC;
273
274 fbx->ds_afbc.depth_stencil = base + header_size;
275 fbx->ds_afbc.depth_stencil_afbc_metadata = base;
276 fbx->ds_afbc.depth_stencil_afbc_stride = 0;
277
278 fbx->ds_afbc.zero1 = 0x10009;
279 fbx->ds_afbc.padding = 0x1000;
280 } else if (rsrc->layout == MALI_TEXTURE_LINEAR || rsrc->layout == MALI_TEXTURE_TILED) {
281 /* TODO: Z32F(S8) support, which is always linear */
282
283 int stride = rsrc->slices[level].stride;
284
285 fb->mfbd_flags |= MALI_MFBD_EXTRA;
286 fbx->flags_hi |= MALI_EXTRA_PRESENT;
287 fbx->flags_lo |= MALI_EXTRA_ZS;
288
289 fbx->ds_linear.depth = base;
290
291 if (rsrc->layout == MALI_TEXTURE_LINEAR) {
292 fbx->zs_block = MALI_BLOCK_LINEAR;
293 fbx->ds_linear.depth_stride = stride / 16;
294 } else {
295 fbx->zs_block = MALI_BLOCK_TILED;
296 fbx->ds_linear.depth_stride = stride;
297 }
298
299 if (panfrost_is_z24s8_variant(surf->format)) {
300 fbx->flags_lo |= 0x1;
301 } else if (surf->format == PIPE_FORMAT_Z32_FLOAT) {
302 fbx->flags_lo |= 0xA;
303 fb->mfbd_flags ^= 0x100;
304 fb->mfbd_flags |= 0x200;
305 } else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
306 fbx->flags_hi |= 0x400;
307 fbx->flags_lo |= 0xA;
308 fb->mfbd_flags ^= 0x100;
309 fb->mfbd_flags |= 0x201;
310
311 struct panfrost_resource *stencil = rsrc->separate_stencil;
312 struct panfrost_slice stencil_slice = stencil->slices[level];
313
314 fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer);
315 fbx->ds_linear.stencil_stride = stencil_slice.stride;
316 }
317
318 } else {
319 assert(0);
320 }
321 }
322
323 /* Helper for sequential uploads used for MFBD */
324
325 #define UPLOAD(dest, offset, src, max) { \
326 size_t sz = sizeof(*src); \
327 memcpy(dest.cpu + offset, src, sz); \
328 assert((offset + sz) <= max); \
329 offset += sz; \
330 }
331
332 static mali_ptr
333 panfrost_mfbd_upload(struct panfrost_batch *batch,
334 struct mali_framebuffer *fb,
335 struct mali_framebuffer_extra *fbx,
336 struct mali_render_target *rts,
337 unsigned rt_count)
338 {
339 off_t offset = 0;
340
341 /* There may be extra data stuck in the middle */
342 bool has_extra = fb->mfbd_flags & MALI_MFBD_EXTRA;
343
344 /* Compute total size for transfer */
345
346 size_t total_sz =
347 sizeof(struct mali_framebuffer) +
348 (has_extra ? sizeof(struct mali_framebuffer_extra) : 0) +
349 sizeof(struct mali_render_target) * 4;
350
351 struct panfrost_transfer m_f_trans =
352 panfrost_allocate_transient(batch, total_sz);
353
354 /* Do the transfer */
355
356 UPLOAD(m_f_trans, offset, fb, total_sz);
357
358 if (has_extra)
359 UPLOAD(m_f_trans, offset, fbx, total_sz);
360
361 for (unsigned c = 0; c < 4; ++c) {
362 UPLOAD(m_f_trans, offset, &rts[c], total_sz);
363 }
364
365 /* Return pointer suitable for the fragment section */
366 unsigned tag =
367 MALI_MFBD |
368 (has_extra ? MALI_MFBD_TAG_EXTRA : 0) |
369 (MALI_POSITIVE(rt_count) << 2);
370
371 return m_f_trans.gpu | tag;
372 }
373
374 #undef UPLOAD
375
376 static struct mali_framebuffer
377 panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
378 {
379 struct panfrost_context *ctx = batch->ctx;
380 struct pipe_context *gallium = (struct pipe_context *) ctx;
381 struct panfrost_device *dev = pan_device(gallium->screen);
382
383 unsigned width = batch->key.width;
384 unsigned height = batch->key.height;
385
386 struct mali_framebuffer mfbd = {
387 .width1 = MALI_POSITIVE(width),
388 .height1 = MALI_POSITIVE(height),
389 .width2 = MALI_POSITIVE(width),
390 .height2 = MALI_POSITIVE(height),
391
392 .unk1 = 0x1080,
393
394 .rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs),
395 .rt_count_2 = 4,
396 };
397
398 if (dev->quirks & IS_BIFROST) {
399 mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
400 mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count);
401 } else {
402 unsigned shift = panfrost_get_stack_shift(batch->stack_size);
403 struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch,
404 shift,
405 dev->thread_tls_alloc,
406 dev->core_count);
407 mfbd.shared_memory.stack_shift = shift;
408 mfbd.shared_memory.scratchpad = bo->gpu;
409 mfbd.shared_memory.shared_workgroup_count = ~0;
410
411 mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count);
412 }
413
414 return mfbd;
415 }
416
417 void
418 panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
419 {
420 struct mali_framebuffer mfbd =
421 panfrost_emit_mfbd(batch, vertex_count);
422
423 memcpy(batch->framebuffer.cpu, &mfbd, sizeof(mfbd));
424 }
425
426 /* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
427
428 mali_ptr
429 panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
430 {
431 struct mali_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
432 struct mali_framebuffer_extra fbx = {0};
433 struct mali_render_target rts[4] = {0};
434
435 /* We always upload at least one dummy GL_NONE render target */
436
437 unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1);
438
439 fb.rt_count_1 = MALI_POSITIVE(rt_descriptors);
440 fb.rt_count_2 = rt_descriptors;
441 fb.mfbd_flags = 0x100;
442
443 /* TODO: MRT clear */
444 panfrost_mfbd_clear(batch, &fb, &fbx, rts, fb.rt_count_2);
445
446
447 /* Upload either the render target or a dummy GL_NONE target */
448
449 for (int cb = 0; cb < rt_descriptors; ++cb) {
450 struct pipe_surface *surf = batch->key.cbufs[cb];
451
452 if (surf) {
453 panfrost_mfbd_set_cbuf(&rts[cb], surf);
454
455 /* What is this? Looks like some extension of the bpp
456 * field. Maybe it establishes how much internal
457 * tilebuffer space is reserved? */
458
459 unsigned bpp = util_format_get_blocksize(surf->format);
460 fb.rt_count_2 = MAX2(fb.rt_count_2, ALIGN_POT(bpp, 4) / 4);
461 } else {
462 struct mali_rt_format null_rt = {
463 .unk1 = 0x4000000,
464 .no_preload = true
465 };
466
467 rts[cb].format = null_rt;
468 rts[cb].framebuffer = 0;
469 rts[cb].framebuffer_stride = 0;
470 }
471
472 /* TODO: Break out the field */
473 rts[cb].format.unk1 |= (cb * 0x400);
474 }
475
476 if (batch->key.zsbuf) {
477 panfrost_mfbd_set_zsbuf(&fb, &fbx, batch->key.zsbuf);
478 }
479
480 /* When scanning out, the depth buffer is immediately invalidated, so
481 * we don't need to waste bandwidth writing it out. This can improve
482 * performance substantially (Z24X8_UNORM 1080p @ 60fps is 475 MB/s of
483 * memory bandwidth!).
484 *
485 * The exception is ReadPixels, but this is not supported on GLES so we
486 * can safely ignore it. */
487
488 if (panfrost_batch_is_scanout(batch))
489 batch->requirements &= ~PAN_REQ_DEPTH_WRITE;
490
491 /* Actualize the requirements */
492
493 if (batch->requirements & PAN_REQ_MSAA) {
494 rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
495
496 /* XXX */
497 fb.unk1 |= (1 << 4) | (1 << 1);
498 fb.rt_count_2 = 4;
499 }
500
501 if (batch->requirements & PAN_REQ_DEPTH_WRITE)
502 fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE;
503
504 /* Checksumming only works with a single render target */
505
506 if (batch->key.nr_cbufs == 1) {
507 struct pipe_surface *surf = batch->key.cbufs[0];
508 struct panfrost_resource *rsrc = pan_resource(surf->texture);
509
510 if (rsrc->checksummed) {
511 unsigned level = surf->u.tex.level;
512 struct panfrost_slice *slice = &rsrc->slices[level];
513
514 fb.mfbd_flags |= MALI_MFBD_EXTRA;
515 fbx.flags_lo |= MALI_EXTRA_PRESENT;
516 fbx.checksum_stride = slice->checksum_stride;
517 if (slice->checksum_bo)
518 fbx.checksum = slice->checksum_bo->gpu;
519 else
520 fbx.checksum = rsrc->bo->gpu + slice->checksum_offset;
521 }
522 }
523
524 return panfrost_mfbd_upload(batch, &fb, &fbx, rts, rt_descriptors);
525 }