gallium: change comments to remove 'state tracker'
[mesa.git] / src / gallium / drivers / panfrost / pan_mfbd.c
1 /*
2 * Copyright 2018-2019 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 */
24
25 #include "pan_bo.h"
26 #include "pan_context.h"
27 #include "pan_cmdstream.h"
28 #include "pan_util.h"
29 #include "panfrost-quirks.h"
30
31 static struct mali_rt_format
32 panfrost_mfbd_format(struct pipe_surface *surf)
33 {
34 struct panfrost_device *dev = pan_device(surf->context->screen);
35 bool is_bifrost = dev->quirks & IS_BIFROST;
36
37 /* Explode details on the format */
38
39 const struct util_format_description *desc =
40 util_format_description(surf->format);
41
42 /* The swizzle for rendering is inverted from texturing */
43
44 unsigned char swizzle[4];
45 panfrost_invert_swizzle(desc->swizzle, swizzle);
46
47 /* Fill in accordingly, defaulting to 8-bit UNORM */
48
49 struct mali_rt_format fmt = {
50 .unk1 = 0x4000000,
51 .unk2 = 0x1,
52 .nr_channels = MALI_POSITIVE(desc->nr_channels),
53 .unk3 = 0x4,
54 .unk4 = is_bifrost,
55 .flags = 0x8,
56 .swizzle = panfrost_translate_swizzle_4(swizzle),
57 .no_preload = true
58 };
59
60 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
61 fmt.flags |= MALI_MFBD_FORMAT_SRGB;
62
63 /* sRGB handled as a dedicated flag */
64 enum pipe_format linearized = util_format_linear(surf->format);
65
66 /* If RGB, we're good to go */
67 if (util_format_is_unorm8(desc))
68 return fmt;
69
70 /* Set flags for alternative formats */
71
72 switch (linearized) {
73 case PIPE_FORMAT_B5G6R5_UNORM:
74 fmt.unk1 = 0x14000000;
75 fmt.nr_channels = MALI_POSITIVE(2);
76 fmt.unk3 |= 0x1;
77 break;
78
79 case PIPE_FORMAT_A4B4G4R4_UNORM:
80 case PIPE_FORMAT_B4G4R4A4_UNORM:
81 fmt.unk1 = 0x10000000;
82 fmt.unk3 = 0x5;
83 fmt.nr_channels = MALI_POSITIVE(1);
84 break;
85
86 case PIPE_FORMAT_R10G10B10A2_UNORM:
87 case PIPE_FORMAT_B10G10R10A2_UNORM:
88 case PIPE_FORMAT_R10G10B10X2_UNORM:
89 case PIPE_FORMAT_B10G10R10X2_UNORM:
90 fmt.unk1 = 0x08000000;
91 fmt.unk3 = 0x6;
92 fmt.nr_channels = MALI_POSITIVE(1);
93 break;
94
95 /* Generic 8-bit */
96 case PIPE_FORMAT_R8_UINT:
97 case PIPE_FORMAT_R8_SINT:
98 fmt.unk1 = 0x80000000;
99 fmt.unk3 = 0x0;
100 fmt.nr_channels = MALI_POSITIVE(1);
101 break;
102
103 /* Generic 32-bit */
104 case PIPE_FORMAT_R11G11B10_FLOAT:
105 case PIPE_FORMAT_R8G8B8A8_UINT:
106 case PIPE_FORMAT_R8G8B8A8_SINT:
107 case PIPE_FORMAT_R16G16_FLOAT:
108 case PIPE_FORMAT_R16G16_UINT:
109 case PIPE_FORMAT_R16G16_SINT:
110 case PIPE_FORMAT_R32_FLOAT:
111 case PIPE_FORMAT_R32_UINT:
112 case PIPE_FORMAT_R32_SINT:
113 case PIPE_FORMAT_R10G10B10A2_UINT:
114 fmt.unk1 = 0x88000000;
115 fmt.unk3 = 0x0;
116 fmt.nr_channels = MALI_POSITIVE(4);
117 break;
118
119 /* Generic 16-bit */
120 case PIPE_FORMAT_R8G8_UINT:
121 case PIPE_FORMAT_R8G8_SINT:
122 case PIPE_FORMAT_R16_FLOAT:
123 case PIPE_FORMAT_R16_UINT:
124 case PIPE_FORMAT_R16_SINT:
125 case PIPE_FORMAT_B5G5R5A1_UNORM:
126 fmt.unk1 = 0x84000000;
127 fmt.unk3 = 0x0;
128 fmt.nr_channels = MALI_POSITIVE(2);
129 break;
130
131 /* Generic 64-bit */
132 case PIPE_FORMAT_R32G32_FLOAT:
133 case PIPE_FORMAT_R32G32_SINT:
134 case PIPE_FORMAT_R32G32_UINT:
135 case PIPE_FORMAT_R16G16B16A16_FLOAT:
136 case PIPE_FORMAT_R16G16B16A16_SINT:
137 case PIPE_FORMAT_R16G16B16A16_UINT:
138 fmt.unk1 = 0x8c000000;
139 fmt.unk3 = 0x1;
140 fmt.nr_channels = MALI_POSITIVE(2);
141 break;
142
143 /* Generic 128-bit */
144 case PIPE_FORMAT_R32G32B32A32_FLOAT:
145 case PIPE_FORMAT_R32G32B32A32_SINT:
146 case PIPE_FORMAT_R32G32B32A32_UINT:
147 fmt.unk1 = 0x90000000;
148 fmt.unk3 = 0x1;
149 fmt.nr_channels = MALI_POSITIVE(4);
150 break;
151
152 default:
153 unreachable("Invalid format rendering");
154 }
155
156 return fmt;
157 }
158
159
160 static void
161 panfrost_mfbd_clear(
162 struct panfrost_batch *batch,
163 struct mali_framebuffer *fb,
164 struct mali_framebuffer_extra *fbx,
165 struct mali_render_target *rts,
166 unsigned rt_count)
167 {
168 struct panfrost_context *ctx = batch->ctx;
169 struct pipe_context *gallium = (struct pipe_context *) ctx;
170 struct panfrost_device *dev = pan_device(gallium->screen);
171
172 for (unsigned i = 0; i < rt_count; ++i) {
173 if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
174 continue;
175
176 rts[i].clear_color_1 = batch->clear_color[i][0];
177 rts[i].clear_color_2 = batch->clear_color[i][1];
178 rts[i].clear_color_3 = batch->clear_color[i][2];
179 rts[i].clear_color_4 = batch->clear_color[i][3];
180 }
181
182 if (batch->clear & PIPE_CLEAR_DEPTH) {
183 fb->clear_depth = batch->clear_depth;
184 }
185
186 if (batch->clear & PIPE_CLEAR_STENCIL) {
187 fb->clear_stencil = batch->clear_stencil;
188 }
189
190 if (dev->quirks & IS_BIFROST) {
191 fbx->clear_color_1 = batch->clear_color[0][0];
192 fbx->clear_color_2 = 0xc0000000 | (fbx->clear_color_1 & 0xffff); /* WTF? */
193 }
194 }
195
196 static void
197 panfrost_mfbd_set_cbuf(
198 struct mali_render_target *rt,
199 struct pipe_surface *surf)
200 {
201 struct panfrost_resource *rsrc = pan_resource(surf->texture);
202
203 unsigned level = surf->u.tex.level;
204 unsigned first_layer = surf->u.tex.first_layer;
205 assert(surf->u.tex.last_layer == first_layer);
206 int stride = rsrc->slices[level].stride;
207
208 mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer);
209
210 rt->format = panfrost_mfbd_format(surf);
211
212 /* Now, we set the layout specific pieces */
213
214 if (rsrc->layout == MALI_TEXTURE_LINEAR) {
215 rt->format.block = MALI_BLOCK_LINEAR;
216 rt->framebuffer = base;
217 rt->framebuffer_stride = stride / 16;
218 } else if (rsrc->layout == MALI_TEXTURE_TILED) {
219 rt->format.block = MALI_BLOCK_TILED;
220 rt->framebuffer = base;
221 rt->framebuffer_stride = stride;
222 } else if (rsrc->layout == MALI_TEXTURE_AFBC) {
223 rt->format.block = MALI_BLOCK_AFBC;
224
225 unsigned header_size = rsrc->slices[level].header_size;
226
227 rt->framebuffer = base + header_size;
228 rt->afbc.metadata = base;
229 rt->afbc.stride = 0;
230 rt->afbc.unk = 0x30009;
231
232 /* TODO: The blob sets this to something nonzero, but it's not
233 * clear what/how to calculate/if it matters */
234 rt->framebuffer_stride = 0;
235 } else {
236 fprintf(stderr, "Invalid render layout (cbuf)");
237 assert(0);
238 }
239 }
240
241 static void
242 panfrost_mfbd_set_zsbuf(
243 struct mali_framebuffer *fb,
244 struct mali_framebuffer_extra *fbx,
245 struct pipe_surface *surf)
246 {
247 struct panfrost_resource *rsrc = pan_resource(surf->texture);
248
249 unsigned level = surf->u.tex.level;
250 unsigned first_layer = surf->u.tex.first_layer;
251 assert(surf->u.tex.last_layer == first_layer);
252
253 mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer);
254
255 if (rsrc->layout == MALI_TEXTURE_AFBC) {
256 /* The only Z/S format we can compress is Z24S8 or variants
257 * thereof (handled by the gallium frontend) */
258 assert(panfrost_is_z24s8_variant(surf->format));
259
260 unsigned header_size = rsrc->slices[level].header_size;
261
262 fb->mfbd_flags |= MALI_MFBD_EXTRA;
263
264 fbx->flags_hi |= MALI_EXTRA_PRESENT;
265 fbx->flags_lo |= MALI_EXTRA_ZS | 0x1; /* unknown */
266 fbx->zs_block = MALI_BLOCK_AFBC;
267
268 fbx->ds_afbc.depth_stencil = base + header_size;
269 fbx->ds_afbc.depth_stencil_afbc_metadata = base;
270 fbx->ds_afbc.depth_stencil_afbc_stride = 0;
271
272 fbx->ds_afbc.zero1 = 0x10009;
273 fbx->ds_afbc.padding = 0x1000;
274 } else if (rsrc->layout == MALI_TEXTURE_LINEAR || rsrc->layout == MALI_TEXTURE_TILED) {
275 /* TODO: Z32F(S8) support, which is always linear */
276
277 int stride = rsrc->slices[level].stride;
278
279 fb->mfbd_flags |= MALI_MFBD_EXTRA;
280 fbx->flags_hi |= MALI_EXTRA_PRESENT;
281 fbx->flags_lo |= MALI_EXTRA_ZS;
282
283 fbx->ds_linear.depth = base;
284
285 if (rsrc->layout == MALI_TEXTURE_LINEAR) {
286 fbx->zs_block = MALI_BLOCK_LINEAR;
287 fbx->ds_linear.depth_stride = stride / 16;
288 } else {
289 fbx->zs_block = MALI_BLOCK_TILED;
290 fbx->ds_linear.depth_stride = stride;
291 }
292
293 if (panfrost_is_z24s8_variant(surf->format)) {
294 fbx->flags_lo |= 0x1;
295 } else if (surf->format == PIPE_FORMAT_Z32_UNORM) {
296 /* default flags (0 in bottom place) */
297 } else if (surf->format == PIPE_FORMAT_Z32_FLOAT) {
298 fbx->flags_lo |= 0xA;
299 fb->mfbd_flags ^= 0x100;
300 fb->mfbd_flags |= 0x200;
301 } else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
302 fbx->flags_hi |= 0x400;
303 fbx->flags_lo |= 0xA;
304 fb->mfbd_flags ^= 0x100;
305 fb->mfbd_flags |= 0x201;
306
307 struct panfrost_resource *stencil = rsrc->separate_stencil;
308 struct panfrost_slice stencil_slice = stencil->slices[level];
309
310 fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer);
311 fbx->ds_linear.stencil_stride = stencil_slice.stride;
312 }
313
314 } else {
315 assert(0);
316 }
317 }
318
319 /* Helper for sequential uploads used for MFBD */
320
321 #define UPLOAD(dest, offset, src, max) { \
322 size_t sz = sizeof(*src); \
323 memcpy(dest.cpu + offset, src, sz); \
324 assert((offset + sz) <= max); \
325 offset += sz; \
326 }
327
328 static mali_ptr
329 panfrost_mfbd_upload(struct panfrost_batch *batch,
330 struct mali_framebuffer *fb,
331 struct mali_framebuffer_extra *fbx,
332 struct mali_render_target *rts,
333 unsigned rt_count)
334 {
335 off_t offset = 0;
336
337 /* There may be extra data stuck in the middle */
338 bool has_extra = fb->mfbd_flags & MALI_MFBD_EXTRA;
339
340 /* Compute total size for transfer */
341
342 size_t total_sz =
343 sizeof(struct mali_framebuffer) +
344 (has_extra ? sizeof(struct mali_framebuffer_extra) : 0) +
345 sizeof(struct mali_render_target) * 4;
346
347 struct panfrost_transfer m_f_trans =
348 panfrost_allocate_transient(batch, total_sz);
349
350 /* Do the transfer */
351
352 UPLOAD(m_f_trans, offset, fb, total_sz);
353
354 if (has_extra)
355 UPLOAD(m_f_trans, offset, fbx, total_sz);
356
357 for (unsigned c = 0; c < 4; ++c) {
358 UPLOAD(m_f_trans, offset, &rts[c], total_sz);
359 }
360
361 /* Return pointer suitable for the fragment section */
362 unsigned tag =
363 MALI_MFBD |
364 (has_extra ? MALI_MFBD_TAG_EXTRA : 0) |
365 (MALI_POSITIVE(rt_count) << 2);
366
367 return m_f_trans.gpu | tag;
368 }
369
370 #undef UPLOAD
371
372 static struct mali_framebuffer
373 panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
374 {
375 struct panfrost_context *ctx = batch->ctx;
376 struct pipe_context *gallium = (struct pipe_context *) ctx;
377 struct panfrost_device *dev = pan_device(gallium->screen);
378
379 unsigned width = batch->key.width;
380 unsigned height = batch->key.height;
381
382 struct mali_framebuffer mfbd = {
383 .width1 = MALI_POSITIVE(width),
384 .height1 = MALI_POSITIVE(height),
385 .width2 = MALI_POSITIVE(width),
386 .height2 = MALI_POSITIVE(height),
387
388 .unk1 = 0x1080,
389
390 .rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs),
391 .rt_count_2 = 4,
392 };
393
394 if (dev->quirks & IS_BIFROST) {
395 mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
396 mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count);
397 } else {
398 unsigned shift = panfrost_get_stack_shift(batch->stack_size);
399 struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch,
400 shift,
401 dev->thread_tls_alloc,
402 dev->core_count);
403 mfbd.shared_memory.stack_shift = shift;
404 mfbd.shared_memory.scratchpad = bo->gpu;
405 mfbd.shared_memory.shared_workgroup_count = ~0;
406
407 mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count);
408 }
409
410 return mfbd;
411 }
412
413 void
414 panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
415 {
416 struct mali_framebuffer mfbd =
417 panfrost_emit_mfbd(batch, vertex_count);
418
419 memcpy(batch->framebuffer.cpu, &mfbd, sizeof(mfbd));
420 }
421
422 /* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
423
424 mali_ptr
425 panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
426 {
427 struct mali_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
428 struct mali_framebuffer_extra fbx = {0};
429 struct mali_render_target rts[4] = {0};
430
431 /* We always upload at least one dummy GL_NONE render target */
432
433 unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1);
434
435 fb.rt_count_1 = MALI_POSITIVE(rt_descriptors);
436 fb.rt_count_2 = rt_descriptors;
437 fb.mfbd_flags = 0x100;
438
439 /* TODO: MRT clear */
440 panfrost_mfbd_clear(batch, &fb, &fbx, rts, fb.rt_count_2);
441
442
443 /* Upload either the render target or a dummy GL_NONE target */
444
445 for (int cb = 0; cb < rt_descriptors; ++cb) {
446 struct pipe_surface *surf = batch->key.cbufs[cb];
447
448 if (surf) {
449 panfrost_mfbd_set_cbuf(&rts[cb], surf);
450
451 /* What is this? Looks like some extension of the bpp
452 * field. Maybe it establishes how much internal
453 * tilebuffer space is reserved? */
454
455 unsigned bpp = util_format_get_blocksize(surf->format);
456 fb.rt_count_2 = MAX2(fb.rt_count_2, ALIGN_POT(bpp, 4) / 4);
457 } else {
458 struct mali_rt_format null_rt = {
459 .unk1 = 0x4000000,
460 .no_preload = true
461 };
462
463 rts[cb].format = null_rt;
464 rts[cb].framebuffer = 0;
465 rts[cb].framebuffer_stride = 0;
466 }
467
468 /* TODO: Break out the field */
469 rts[cb].format.unk1 |= (cb * 0x400);
470 }
471
472 if (batch->key.zsbuf) {
473 panfrost_mfbd_set_zsbuf(&fb, &fbx, batch->key.zsbuf);
474 }
475
476 /* When scanning out, the depth buffer is immediately invalidated, so
477 * we don't need to waste bandwidth writing it out. This can improve
478 * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
479 * memory bandwidth!).
480 *
481 * The exception is ReadPixels, but this is not supported on GLES so we
482 * can safely ignore it. */
483
484 if (panfrost_batch_is_scanout(batch))
485 batch->requirements &= ~PAN_REQ_DEPTH_WRITE;
486
487 /* Actualize the requirements */
488
489 if (batch->requirements & PAN_REQ_MSAA) {
490 rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
491
492 /* XXX */
493 fb.unk1 |= (1 << 4) | (1 << 1);
494 fb.rt_count_2 = 4;
495 }
496
497 if (batch->requirements & PAN_REQ_DEPTH_WRITE)
498 fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE;
499
500 /* Checksumming only works with a single render target */
501
502 if (batch->key.nr_cbufs == 1) {
503 struct pipe_surface *surf = batch->key.cbufs[0];
504 struct panfrost_resource *rsrc = pan_resource(surf->texture);
505
506 if (rsrc->checksummed) {
507 unsigned level = surf->u.tex.level;
508 struct panfrost_slice *slice = &rsrc->slices[level];
509
510 fb.mfbd_flags |= MALI_MFBD_EXTRA;
511 fbx.flags_lo |= MALI_EXTRA_PRESENT;
512 fbx.checksum_stride = slice->checksum_stride;
513 if (slice->checksum_bo)
514 fbx.checksum = slice->checksum_bo->gpu;
515 else
516 fbx.checksum = rsrc->bo->gpu + slice->checksum_offset;
517 }
518 }
519
520 return panfrost_mfbd_upload(batch, &fb, &fbx, rts, rt_descriptors);
521 }