panfrost: Move panfrost_emit_varying_descriptor() to pan_cmdstream.c
[mesa.git] / src / gallium / drivers / panfrost / pan_mfbd.c
1 /*
2 * Copyright 2018-2019 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 */
24
25 #include "pan_bo.h"
26 #include "pan_context.h"
27 #include "pan_util.h"
28
29 static struct mali_rt_format
30 panfrost_mfbd_format(struct pipe_surface *surf)
31 {
32 /* Explode details on the format */
33
34 const struct util_format_description *desc =
35 util_format_description(surf->format);
36
37 /* The swizzle for rendering is inverted from texturing */
38
39 unsigned char swizzle[4];
40 panfrost_invert_swizzle(desc->swizzle, swizzle);
41
42 /* Fill in accordingly, defaulting to 8-bit UNORM */
43
44 struct mali_rt_format fmt = {
45 .unk1 = 0x4000000,
46 .unk2 = 0x1,
47 .nr_channels = MALI_POSITIVE(desc->nr_channels),
48 .unk3 = 0x4,
49 .flags = 0x8,
50 .swizzle = panfrost_translate_swizzle_4(swizzle),
51 .no_preload = true
52 };
53
54 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
55 fmt.flags |= MALI_MFBD_FORMAT_SRGB;
56
57 /* sRGB handled as a dedicated flag */
58 enum pipe_format linearized = util_format_linear(surf->format);
59
60 /* If RGB, we're good to go */
61 if (util_format_is_unorm8(desc))
62 return fmt;
63
64 /* Set flags for alternative formats */
65
66 switch (linearized) {
67 case PIPE_FORMAT_B5G6R5_UNORM:
68 fmt.unk1 = 0x14000000;
69 fmt.nr_channels = MALI_POSITIVE(2);
70 fmt.unk3 |= 0x1;
71 break;
72
73 case PIPE_FORMAT_A4B4G4R4_UNORM:
74 case PIPE_FORMAT_B4G4R4A4_UNORM:
75 fmt.unk1 = 0x10000000;
76 fmt.unk3 = 0x5;
77 fmt.nr_channels = MALI_POSITIVE(1);
78 break;
79
80 case PIPE_FORMAT_R10G10B10A2_UNORM:
81 case PIPE_FORMAT_B10G10R10A2_UNORM:
82 case PIPE_FORMAT_R10G10B10X2_UNORM:
83 case PIPE_FORMAT_B10G10R10X2_UNORM:
84 fmt.unk1 = 0x08000000;
85 fmt.unk3 = 0x6;
86 fmt.nr_channels = MALI_POSITIVE(1);
87 break;
88
89 /* Generic 8-bit */
90 case PIPE_FORMAT_R8_UINT:
91 case PIPE_FORMAT_R8_SINT:
92 fmt.unk1 = 0x80000000;
93 fmt.unk3 = 0x0;
94 fmt.nr_channels = MALI_POSITIVE(1);
95 break;
96
97 /* Generic 32-bit */
98 case PIPE_FORMAT_R11G11B10_FLOAT:
99 case PIPE_FORMAT_R8G8B8A8_UINT:
100 case PIPE_FORMAT_R8G8B8A8_SINT:
101 case PIPE_FORMAT_R16G16_FLOAT:
102 case PIPE_FORMAT_R16G16_UINT:
103 case PIPE_FORMAT_R16G16_SINT:
104 case PIPE_FORMAT_R32_FLOAT:
105 case PIPE_FORMAT_R32_UINT:
106 case PIPE_FORMAT_R32_SINT:
107 case PIPE_FORMAT_R10G10B10A2_UINT:
108 fmt.unk1 = 0x88000000;
109 fmt.unk3 = 0x0;
110 fmt.nr_channels = MALI_POSITIVE(4);
111 break;
112
113 /* Generic 16-bit */
114 case PIPE_FORMAT_R8G8_UINT:
115 case PIPE_FORMAT_R8G8_SINT:
116 case PIPE_FORMAT_R16_FLOAT:
117 case PIPE_FORMAT_R16_UINT:
118 case PIPE_FORMAT_R16_SINT:
119 case PIPE_FORMAT_B5G5R5A1_UNORM:
120 fmt.unk1 = 0x84000000;
121 fmt.unk3 = 0x0;
122 fmt.nr_channels = MALI_POSITIVE(2);
123 break;
124
125 /* Generic 64-bit */
126 case PIPE_FORMAT_R32G32_FLOAT:
127 case PIPE_FORMAT_R32G32_SINT:
128 case PIPE_FORMAT_R32G32_UINT:
129 case PIPE_FORMAT_R16G16B16A16_FLOAT:
130 case PIPE_FORMAT_R16G16B16A16_SINT:
131 case PIPE_FORMAT_R16G16B16A16_UINT:
132 fmt.unk1 = 0x8c000000;
133 fmt.unk3 = 0x1;
134 fmt.nr_channels = MALI_POSITIVE(2);
135 break;
136
137 /* Generic 128-bit */
138 case PIPE_FORMAT_R32G32B32A32_FLOAT:
139 case PIPE_FORMAT_R32G32B32A32_SINT:
140 case PIPE_FORMAT_R32G32B32A32_UINT:
141 fmt.unk1 = 0x90000000;
142 fmt.unk3 = 0x1;
143 fmt.nr_channels = MALI_POSITIVE(4);
144 break;
145
146 default:
147 unreachable("Invalid format rendering");
148 }
149
150 return fmt;
151 }
152
153
154 static void
155 panfrost_mfbd_clear(
156 struct panfrost_batch *batch,
157 struct mali_framebuffer *fb,
158 struct mali_framebuffer_extra *fbx,
159 struct mali_render_target *rts,
160 unsigned rt_count)
161 {
162 for (unsigned i = 0; i < rt_count; ++i) {
163 if (!(batch->clear & (PIPE_CLEAR_COLOR0 << i)))
164 continue;
165
166 rts[i].clear_color_1 = batch->clear_color[i][0];
167 rts[i].clear_color_2 = batch->clear_color[i][1];
168 rts[i].clear_color_3 = batch->clear_color[i][2];
169 rts[i].clear_color_4 = batch->clear_color[i][3];
170 }
171
172 if (batch->clear & PIPE_CLEAR_DEPTH) {
173 fb->clear_depth = batch->clear_depth;
174 }
175
176 if (batch->clear & PIPE_CLEAR_STENCIL) {
177 fb->clear_stencil = batch->clear_stencil;
178 }
179 }
180
181 static void
182 panfrost_mfbd_set_cbuf(
183 struct mali_render_target *rt,
184 struct pipe_surface *surf)
185 {
186 struct panfrost_resource *rsrc = pan_resource(surf->texture);
187
188 unsigned level = surf->u.tex.level;
189 unsigned first_layer = surf->u.tex.first_layer;
190 assert(surf->u.tex.last_layer == first_layer);
191 int stride = rsrc->slices[level].stride;
192
193 mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer);
194
195 rt->format = panfrost_mfbd_format(surf);
196
197 /* Now, we set the layout specific pieces */
198
199 if (rsrc->layout == MALI_TEXTURE_LINEAR) {
200 rt->format.block = MALI_BLOCK_LINEAR;
201 rt->framebuffer = base;
202 rt->framebuffer_stride = stride / 16;
203 } else if (rsrc->layout == MALI_TEXTURE_TILED) {
204 rt->format.block = MALI_BLOCK_TILED;
205 rt->framebuffer = base;
206 rt->framebuffer_stride = stride;
207 } else if (rsrc->layout == MALI_TEXTURE_AFBC) {
208 rt->format.block = MALI_BLOCK_AFBC;
209
210 unsigned header_size = rsrc->slices[level].header_size;
211
212 rt->framebuffer = base + header_size;
213 rt->afbc.metadata = base;
214 rt->afbc.stride = 0;
215 rt->afbc.unk = 0x30009;
216
217 /* TODO: The blob sets this to something nonzero, but it's not
218 * clear what/how to calculate/if it matters */
219 rt->framebuffer_stride = 0;
220 } else {
221 fprintf(stderr, "Invalid render layout (cbuf)");
222 assert(0);
223 }
224 }
225
226 static void
227 panfrost_mfbd_set_zsbuf(
228 struct mali_framebuffer *fb,
229 struct mali_framebuffer_extra *fbx,
230 struct pipe_surface *surf)
231 {
232 struct panfrost_resource *rsrc = pan_resource(surf->texture);
233
234 unsigned level = surf->u.tex.level;
235 unsigned first_layer = surf->u.tex.first_layer;
236 assert(surf->u.tex.last_layer == first_layer);
237
238 mali_ptr base = panfrost_get_texture_address(rsrc, level, first_layer);
239
240 if (rsrc->layout == MALI_TEXTURE_AFBC) {
241 /* The only Z/S format we can compress is Z24S8 or variants
242 * thereof (handled by the state tracker) */
243 assert(panfrost_is_z24s8_variant(surf->format));
244
245 unsigned header_size = rsrc->slices[level].header_size;
246
247 fb->mfbd_flags |= MALI_MFBD_EXTRA;
248
249 fbx->flags_hi |= MALI_EXTRA_PRESENT;
250 fbx->flags_lo |= MALI_EXTRA_ZS | 0x1; /* unknown */
251 fbx->zs_block = MALI_BLOCK_AFBC;
252
253 fbx->ds_afbc.depth_stencil = base + header_size;
254 fbx->ds_afbc.depth_stencil_afbc_metadata = base;
255 fbx->ds_afbc.depth_stencil_afbc_stride = 0;
256
257 fbx->ds_afbc.zero1 = 0x10009;
258 fbx->ds_afbc.padding = 0x1000;
259 } else if (rsrc->layout == MALI_TEXTURE_LINEAR || rsrc->layout == MALI_TEXTURE_TILED) {
260 /* TODO: Z32F(S8) support, which is always linear */
261
262 int stride = rsrc->slices[level].stride;
263
264 fb->mfbd_flags |= MALI_MFBD_EXTRA;
265 fbx->flags_hi |= MALI_EXTRA_PRESENT;
266 fbx->flags_lo |= MALI_EXTRA_ZS;
267
268 fbx->ds_linear.depth = base;
269
270 if (rsrc->layout == MALI_TEXTURE_LINEAR) {
271 fbx->zs_block = MALI_BLOCK_LINEAR;
272 fbx->ds_linear.depth_stride = stride / 16;
273 } else {
274 fbx->zs_block = MALI_BLOCK_TILED;
275 fbx->ds_linear.depth_stride = stride;
276 }
277
278 if (panfrost_is_z24s8_variant(surf->format)) {
279 fbx->flags_lo |= 0x1;
280 } else if (surf->format == PIPE_FORMAT_Z32_UNORM) {
281 /* default flags (0 in bottom place) */
282 } else if (surf->format == PIPE_FORMAT_Z32_FLOAT) {
283 fbx->flags_lo |= 0xA;
284 fb->mfbd_flags ^= 0x100;
285 fb->mfbd_flags |= 0x200;
286 } else if (surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
287 fbx->flags_hi |= 0x400;
288 fbx->flags_lo |= 0xA;
289 fb->mfbd_flags ^= 0x100;
290 fb->mfbd_flags |= 0x201;
291
292 struct panfrost_resource *stencil = rsrc->separate_stencil;
293 struct panfrost_slice stencil_slice = stencil->slices[level];
294
295 fbx->ds_linear.stencil = panfrost_get_texture_address(stencil, level, first_layer);
296 fbx->ds_linear.stencil_stride = stencil_slice.stride;
297 }
298
299 } else {
300 assert(0);
301 }
302 }
303
304 /* Helper for sequential uploads used for MFBD */
305
306 #define UPLOAD(dest, offset, src, max) { \
307 size_t sz = sizeof(*src); \
308 memcpy(dest.cpu + offset, src, sz); \
309 assert((offset + sz) <= max); \
310 offset += sz; \
311 }
312
313 static mali_ptr
314 panfrost_mfbd_upload(struct panfrost_batch *batch,
315 struct mali_framebuffer *fb,
316 struct mali_framebuffer_extra *fbx,
317 struct mali_render_target *rts,
318 unsigned rt_count)
319 {
320 off_t offset = 0;
321
322 /* There may be extra data stuck in the middle */
323 bool has_extra = fb->mfbd_flags & MALI_MFBD_EXTRA;
324
325 /* Compute total size for transfer */
326
327 size_t total_sz =
328 sizeof(struct mali_framebuffer) +
329 (has_extra ? sizeof(struct mali_framebuffer_extra) : 0) +
330 sizeof(struct mali_render_target) * 4;
331
332 struct panfrost_transfer m_f_trans =
333 panfrost_allocate_transient(batch, total_sz);
334
335 /* Do the transfer */
336
337 UPLOAD(m_f_trans, offset, fb, total_sz);
338
339 if (has_extra)
340 UPLOAD(m_f_trans, offset, fbx, total_sz);
341
342 for (unsigned c = 0; c < 4; ++c) {
343 UPLOAD(m_f_trans, offset, &rts[c], total_sz);
344 }
345
346 /* Return pointer suitable for the fragment section */
347 unsigned tag =
348 MALI_MFBD |
349 (has_extra ? MALI_MFBD_TAG_EXTRA : 0) |
350 (MALI_POSITIVE(rt_count) << 2);
351
352 return m_f_trans.gpu | tag;
353 }
354
355 #undef UPLOAD
356
357 static struct mali_framebuffer
358 panfrost_emit_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
359 {
360 struct panfrost_context *ctx = batch->ctx;
361 struct pipe_context *gallium = (struct pipe_context *) ctx;
362 struct panfrost_screen *screen = pan_screen(gallium->screen);
363
364 unsigned width = batch->key.width;
365 unsigned height = batch->key.height;
366
367 unsigned shift = panfrost_get_stack_shift(batch->stack_size);
368
369 struct mali_framebuffer framebuffer = {
370 .width1 = MALI_POSITIVE(width),
371 .height1 = MALI_POSITIVE(height),
372 .width2 = MALI_POSITIVE(width),
373 .height2 = MALI_POSITIVE(height),
374
375 .unk1 = 0x1080,
376
377 .rt_count_1 = MALI_POSITIVE(batch->key.nr_cbufs),
378 .rt_count_2 = 4,
379
380 .tiler = panfrost_emit_midg_tiler(batch, vertex_count),
381
382 .shared_memory = {
383 .stack_shift = shift,
384 .scratchpad = panfrost_batch_get_scratchpad(batch, shift, screen->thread_tls_alloc, screen->core_count)->gpu,
385 .shared_workgroup_count = ~0,
386 }
387 };
388
389 return framebuffer;
390 }
391
392 void
393 panfrost_attach_mfbd(struct panfrost_batch *batch, unsigned vertex_count)
394 {
395 struct mali_framebuffer mfbd =
396 panfrost_emit_mfbd(batch, vertex_count);
397
398 memcpy(batch->framebuffer.cpu, &mfbd, sizeof(mfbd));
399 }
400
401 /* Creates an MFBD for the FRAGMENT section of the bound framebuffer */
402
403 mali_ptr
404 panfrost_mfbd_fragment(struct panfrost_batch *batch, bool has_draws)
405 {
406 struct mali_framebuffer fb = panfrost_emit_mfbd(batch, has_draws);
407 struct mali_framebuffer_extra fbx = {0};
408 struct mali_render_target rts[4] = {0};
409
410 /* We always upload at least one dummy GL_NONE render target */
411
412 unsigned rt_descriptors = MAX2(batch->key.nr_cbufs, 1);
413
414 fb.rt_count_1 = MALI_POSITIVE(rt_descriptors);
415 fb.rt_count_2 = rt_descriptors;
416 fb.mfbd_flags = 0x100;
417
418 /* TODO: MRT clear */
419 panfrost_mfbd_clear(batch, &fb, &fbx, rts, fb.rt_count_2);
420
421
422 /* Upload either the render target or a dummy GL_NONE target */
423
424 for (int cb = 0; cb < rt_descriptors; ++cb) {
425 struct pipe_surface *surf = batch->key.cbufs[cb];
426
427 if (surf) {
428 panfrost_mfbd_set_cbuf(&rts[cb], surf);
429
430 /* What is this? Looks like some extension of the bpp
431 * field. Maybe it establishes how much internal
432 * tilebuffer space is reserved? */
433
434 unsigned bpp = util_format_get_blocksize(surf->format);
435 fb.rt_count_2 = MAX2(fb.rt_count_2, ALIGN_POT(bpp, 4) / 4);
436 } else {
437 struct mali_rt_format null_rt = {
438 .unk1 = 0x4000000,
439 .no_preload = true
440 };
441
442 rts[cb].format = null_rt;
443 rts[cb].framebuffer = 0;
444 rts[cb].framebuffer_stride = 0;
445 }
446
447 /* TODO: Break out the field */
448 rts[cb].format.unk1 |= (cb * 0x400);
449 }
450
451 if (batch->key.zsbuf) {
452 panfrost_mfbd_set_zsbuf(&fb, &fbx, batch->key.zsbuf);
453 }
454
455 /* When scanning out, the depth buffer is immediately invalidated, so
456 * we don't need to waste bandwidth writing it out. This can improve
457 * performance substantially (Z32_UNORM 1080p @ 60fps is 475 MB/s of
458 * memory bandwidth!).
459 *
460 * The exception is ReadPixels, but this is not supported on GLES so we
461 * can safely ignore it. */
462
463 if (panfrost_batch_is_scanout(batch))
464 batch->requirements &= ~PAN_REQ_DEPTH_WRITE;
465
466 /* Actualize the requirements */
467
468 if (batch->requirements & PAN_REQ_MSAA) {
469 rts[0].format.flags |= MALI_MFBD_FORMAT_MSAA;
470
471 /* XXX */
472 fb.unk1 |= (1 << 4) | (1 << 1);
473 fb.rt_count_2 = 4;
474 }
475
476 if (batch->requirements & PAN_REQ_DEPTH_WRITE)
477 fb.mfbd_flags |= MALI_MFBD_DEPTH_WRITE;
478
479 /* Checksumming only works with a single render target */
480
481 if (batch->key.nr_cbufs == 1) {
482 struct pipe_surface *surf = batch->key.cbufs[0];
483 struct panfrost_resource *rsrc = pan_resource(surf->texture);
484 struct panfrost_bo *bo = rsrc->bo;
485
486 if (rsrc->checksummed) {
487 unsigned level = surf->u.tex.level;
488 struct panfrost_slice *slice = &rsrc->slices[level];
489
490 fb.mfbd_flags |= MALI_MFBD_EXTRA;
491 fbx.flags_lo |= MALI_EXTRA_PRESENT;
492 fbx.checksum_stride = slice->checksum_stride;
493 fbx.checksum = bo->gpu + slice->checksum_offset;
494 }
495 }
496
497 return panfrost_mfbd_upload(batch, &fb, &fbx, rts, rt_descriptors);
498 }