lima: add lima_submit_get
[mesa.git] / src / gallium / drivers / lima / lima_submit.c
1 /*
2 * Copyright (C) 2017-2019 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include "xf86drm.h"
28 #include "drm-uapi/lima_drm.h"
29
30 #include "util/u_math.h"
31 #include "util/ralloc.h"
32 #include "util/u_dynarray.h"
33 #include "util/os_time.h"
34 #include "util/hash_table.h"
35 #include "util/u_upload_mgr.h"
36 #include "util/u_inlines.h"
37
38 #include "lima_screen.h"
39 #include "lima_context.h"
40 #include "lima_submit.h"
41 #include "lima_bo.h"
42 #include "lima_util.h"
43 #include "lima_format.h"
44 #include "lima_resource.h"
45 #include "lima_texture.h"
46 #include "lima_fence.h"
47 #include "lima_gpu.h"
48
49 struct lima_submit {
50 int fd;
51 struct lima_context *ctx;
52
53 struct util_dynarray gem_bos[2];
54 struct util_dynarray bos[2];
55 };
56
57
58 #define VOID2U64(x) ((uint64_t)(unsigned long)(x))
59
60 static struct lima_submit *
61 lima_submit_create(struct lima_context *ctx)
62 {
63 struct lima_submit *s;
64
65 s = rzalloc(ctx, struct lima_submit);
66 if (!s)
67 return NULL;
68
69 s->fd = lima_screen(ctx->base.screen)->fd;
70 s->ctx = ctx;
71
72 for (int i = 0; i < 2; i++) {
73 util_dynarray_init(s->gem_bos + i, s);
74 util_dynarray_init(s->bos + i, s);
75 }
76
77 return s;
78 }
79
80 static void
81 lima_submit_free(struct lima_submit *submit)
82 {
83
84 }
85
86 /*
87 * Note: this function can only be called in draw code path,
88 * must not exist in flush code path.
89 */
90 struct lima_submit *
91 lima_submit_get(struct lima_context *ctx)
92 {
93 return ctx->submit;
94 }
95
96 bool lima_submit_add_bo(struct lima_submit *submit, int pipe,
97 struct lima_bo *bo, uint32_t flags)
98 {
99 util_dynarray_foreach(submit->gem_bos + pipe, struct drm_lima_gem_submit_bo, gem_bo) {
100 if (bo->handle == gem_bo->handle) {
101 gem_bo->flags |= flags;
102 return true;
103 }
104 }
105
106 struct drm_lima_gem_submit_bo *submit_bo =
107 util_dynarray_grow(submit->gem_bos + pipe, struct drm_lima_gem_submit_bo, 1);
108 submit_bo->handle = bo->handle;
109 submit_bo->flags = flags;
110
111 struct lima_bo **jbo = util_dynarray_grow(submit->bos + pipe, struct lima_bo *, 1);
112 *jbo = bo;
113
114 /* prevent bo from being freed when submit start */
115 lima_bo_reference(bo);
116
117 return true;
118 }
119
120 static bool
121 lima_submit_start(struct lima_submit *submit, int pipe, void *frame, uint32_t size)
122 {
123 struct lima_context *ctx = submit->ctx;
124 struct drm_lima_gem_submit req = {
125 .ctx = ctx->id,
126 .pipe = pipe,
127 .nr_bos = submit->gem_bos[pipe].size / sizeof(struct drm_lima_gem_submit_bo),
128 .bos = VOID2U64(util_dynarray_begin(submit->gem_bos + pipe)),
129 .frame = VOID2U64(frame),
130 .frame_size = size,
131 .out_sync = ctx->out_sync[pipe],
132 };
133
134 if (ctx->in_sync_fd >= 0) {
135 int err = drmSyncobjImportSyncFile(submit->fd, ctx->in_sync[pipe],
136 ctx->in_sync_fd);
137 if (err)
138 return false;
139
140 req.in_sync[0] = ctx->in_sync[pipe];
141 close(ctx->in_sync_fd);
142 ctx->in_sync_fd = -1;
143 }
144
145 bool ret = drmIoctl(submit->fd, DRM_IOCTL_LIMA_GEM_SUBMIT, &req) == 0;
146
147 util_dynarray_foreach(submit->bos + pipe, struct lima_bo *, bo) {
148 lima_bo_unreference(*bo);
149 }
150
151 util_dynarray_clear(submit->gem_bos + pipe);
152 util_dynarray_clear(submit->bos + pipe);
153 return ret;
154 }
155
156 static bool
157 lima_submit_wait(struct lima_submit *submit, int pipe, uint64_t timeout_ns)
158 {
159 int64_t abs_timeout = os_time_get_absolute_timeout(timeout_ns);
160 if (abs_timeout == OS_TIMEOUT_INFINITE)
161 abs_timeout = INT64_MAX;
162
163 struct lima_context *ctx = submit->ctx;
164 return !drmSyncobjWait(submit->fd, ctx->out_sync + pipe, 1, abs_timeout, 0, NULL);
165 }
166
167 bool lima_submit_has_bo(struct lima_submit *submit, struct lima_bo *bo, bool all)
168 {
169 for (int i = 0; i < 2; i++) {
170 util_dynarray_foreach(submit->gem_bos + i, struct drm_lima_gem_submit_bo, gem_bo) {
171 if (bo->handle == gem_bo->handle) {
172 if (all || gem_bo->flags & LIMA_SUBMIT_BO_WRITE)
173 return true;
174 else
175 break;
176 }
177 }
178 }
179
180 return false;
181 }
182
183 void *
184 lima_submit_create_stream_bo(struct lima_submit *submit, int pipe,
185 unsigned size, uint32_t *va)
186 {
187 struct lima_context *ctx = submit->ctx;
188
189 void *cpu;
190 unsigned offset;
191 struct pipe_resource *pres = NULL;
192 u_upload_alloc(ctx->uploader, 0, size, 0x40, &offset, &pres, &cpu);
193
194 struct lima_resource *res = lima_resource(pres);
195 *va = res->bo->va + offset;
196
197 lima_submit_add_bo(submit, pipe, res->bo, LIMA_SUBMIT_BO_READ);
198
199 pipe_resource_reference(&pres, NULL);
200
201 return cpu;
202 }
203
204 static inline bool
205 lima_submit_dirty(struct lima_submit *submit)
206 {
207 struct lima_context *ctx = submit->ctx;
208
209 return !!ctx->resolve;
210 }
211
212 static inline struct lima_damage_region *
213 lima_submit_get_damage(struct lima_submit *submit)
214 {
215 struct lima_context *ctx = submit->ctx;
216
217 if (!(ctx->framebuffer.base.nr_cbufs && (ctx->resolve & PIPE_CLEAR_COLOR0)))
218 return NULL;
219
220 struct lima_surface *surf = lima_surface(ctx->framebuffer.base.cbufs[0]);
221 struct lima_resource *res = lima_resource(surf->base.texture);
222 return &res->damage;
223 }
224
225 static bool
226 lima_fb_need_reload(struct lima_submit *submit)
227 {
228 struct lima_context *ctx = submit->ctx;
229
230 /* Depth buffer is always discarded */
231 if (!(ctx->framebuffer.base.nr_cbufs && (ctx->resolve & PIPE_CLEAR_COLOR0)))
232 return false;
233
234 struct lima_surface *surf = lima_surface(ctx->framebuffer.base.cbufs[0]);
235 struct lima_resource *res = lima_resource(surf->base.texture);
236 if (res->damage.region) {
237 /* for EGL_KHR_partial_update, when EGL_EXT_buffer_age is enabled,
238 * we need to reload damage region, otherwise just want to reload
239 * the region not aligned to tile boundary */
240 //if (!res->damage.aligned)
241 // return true;
242 return true;
243 }
244 else if (surf->reload)
245 return true;
246
247 return false;
248 }
249
250 static void
251 lima_pack_reload_plbu_cmd(struct lima_submit *submit)
252 {
253 #define lima_reload_render_state_offset 0x0000
254 #define lima_reload_gl_pos_offset 0x0040
255 #define lima_reload_varying_offset 0x0080
256 #define lima_reload_tex_desc_offset 0x00c0
257 #define lima_reload_tex_array_offset 0x0100
258 #define lima_reload_buffer_size 0x0140
259
260 struct lima_context *ctx = submit->ctx;
261
262 uint32_t va;
263 void *cpu = lima_submit_create_stream_bo(
264 submit, LIMA_PIPE_PP, lima_reload_buffer_size, &va);
265
266 struct lima_screen *screen = lima_screen(ctx->base.screen);
267
268 uint32_t reload_shader_first_instr_size =
269 ((uint32_t *)(screen->pp_buffer->map + pp_reload_program_offset))[0] & 0x1f;
270 uint32_t reload_shader_va = screen->pp_buffer->va + pp_reload_program_offset;
271
272 struct lima_render_state reload_render_state = {
273 .alpha_blend = 0xf03b1ad2,
274 .depth_test = 0x0000000e,
275 .depth_range = 0xffff0000,
276 .stencil_front = 0x00000007,
277 .stencil_back = 0x00000007,
278 .multi_sample = 0x0000f007,
279 .shader_address = reload_shader_va | reload_shader_first_instr_size,
280 .varying_types = 0x00000001,
281 .textures_address = va + lima_reload_tex_array_offset,
282 .aux0 = 0x00004021,
283 .varyings_address = va + lima_reload_varying_offset,
284 };
285 memcpy(cpu + lima_reload_render_state_offset, &reload_render_state,
286 sizeof(reload_render_state));
287
288 struct lima_context_framebuffer *fb = &ctx->framebuffer;
289 lima_tex_desc *td = cpu + lima_reload_tex_desc_offset;
290 memset(td, 0, lima_min_tex_desc_size);
291 lima_texture_desc_set_res(ctx, td, fb->base.cbufs[0]->texture, 0, 0);
292 td->unnorm_coords = 1;
293 td->texture_type = LIMA_TEXTURE_TYPE_2D;
294 td->min_img_filter_nearest = 1;
295 td->mag_img_filter_nearest = 1;
296 td->wrap_s_clamp_to_edge = 1;
297 td->wrap_t_clamp_to_edge = 1;
298 td->unknown_2_2 = 0x1;
299
300 uint32_t *ta = cpu + lima_reload_tex_array_offset;
301 ta[0] = va + lima_reload_tex_desc_offset;
302
303 float reload_gl_pos[] = {
304 fb->base.width, 0, 0, 1,
305 0, 0, 0, 1,
306 0, fb->base.height, 0, 1,
307 };
308 memcpy(cpu + lima_reload_gl_pos_offset, reload_gl_pos,
309 sizeof(reload_gl_pos));
310
311 float reload_varying[] = {
312 fb->base.width, 0, 0, 0,
313 0, fb->base.height, 0, 0,
314 };
315 memcpy(cpu + lima_reload_varying_offset, reload_varying,
316 sizeof(reload_varying));
317
318 PLBU_CMD_BEGIN(&ctx->plbu_cmd_head, 20);
319
320 PLBU_CMD_VIEWPORT_LEFT(0);
321 PLBU_CMD_VIEWPORT_RIGHT(fui(fb->base.width));
322 PLBU_CMD_VIEWPORT_BOTTOM(0);
323 PLBU_CMD_VIEWPORT_TOP(fui(fb->base.height));
324
325 PLBU_CMD_RSW_VERTEX_ARRAY(
326 va + lima_reload_render_state_offset,
327 va + lima_reload_gl_pos_offset);
328
329 PLBU_CMD_UNKNOWN2();
330 PLBU_CMD_UNKNOWN1();
331
332 PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset);
333 PLBU_CMD_INDEXED_DEST(va + lima_reload_gl_pos_offset);
334 PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
335
336 PLBU_CMD_END();
337 }
338
339 static void
340 lima_pack_head_plbu_cmd(struct lima_submit *submit)
341 {
342 struct lima_context *ctx = submit->ctx;
343 struct lima_context_framebuffer *fb = &ctx->framebuffer;
344
345 PLBU_CMD_BEGIN(&ctx->plbu_cmd_head, 10);
346
347 PLBU_CMD_UNKNOWN2();
348 PLBU_CMD_BLOCK_STEP(fb->shift_min, fb->shift_h, fb->shift_w);
349 PLBU_CMD_TILED_DIMENSIONS(fb->tiled_w, fb->tiled_h);
350 PLBU_CMD_BLOCK_STRIDE(fb->block_w);
351
352 PLBU_CMD_ARRAY_ADDRESS(
353 ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size,
354 fb->block_w * fb->block_h);
355
356 PLBU_CMD_END();
357
358 if (lima_fb_need_reload(submit))
359 lima_pack_reload_plbu_cmd(submit);
360 }
361
362 static void
363 hilbert_rotate(int n, int *x, int *y, int rx, int ry)
364 {
365 if (ry == 0) {
366 if (rx == 1) {
367 *x = n-1 - *x;
368 *y = n-1 - *y;
369 }
370
371 /* Swap x and y */
372 int t = *x;
373 *x = *y;
374 *y = t;
375 }
376 }
377
378 static void
379 hilbert_coords(int n, int d, int *x, int *y)
380 {
381 int rx, ry, i, t=d;
382
383 *x = *y = 0;
384
385 for (i = 0; (1 << i) < n; i++) {
386
387 rx = 1 & (t / 2);
388 ry = 1 & (t ^ rx);
389
390 hilbert_rotate(1 << i, x, y, rx, ry);
391
392 *x += rx << i;
393 *y += ry << i;
394
395 t /= 4;
396 }
397 }
398
399 static int
400 lima_get_pp_stream_size(int num_pp, int tiled_w, int tiled_h, uint32_t *off)
401 {
402 /* carefully calculate each stream start address:
403 * 1. overflow: each stream size may be different due to
404 * fb->tiled_w * fb->tiled_h can't be divided by num_pp,
405 * extra size should be added to the preceeding stream
406 * 2. alignment: each stream address should be 0x20 aligned
407 */
408 int delta = tiled_w * tiled_h / num_pp * 16 + 16;
409 int remain = tiled_w * tiled_h % num_pp;
410 int offset = 0;
411
412 for (int i = 0; i < num_pp; i++) {
413 off[i] = offset;
414
415 offset += delta;
416 if (remain) {
417 offset += 16;
418 remain--;
419 }
420 offset = align(offset, 0x20);
421 }
422
423 return offset;
424 }
425
426 static bool
427 inside_damage_region(int x, int y, struct lima_damage_region *ds)
428 {
429 if (!ds || !ds->region)
430 return true;
431
432 for (int i = 0; i < ds->num_region; i++) {
433 struct pipe_scissor_state *ss = ds->region + i;
434 if (x >= ss->minx && x < ss->maxx &&
435 y >= ss->miny && y < ss->maxy)
436 return true;
437 }
438
439 return false;
440 }
441
442 static void
443 lima_generate_pp_stream(struct lima_submit *submit, int off_x, int off_y,
444 int tiled_w, int tiled_h)
445 {
446 struct lima_context *ctx = submit->ctx;
447 struct lima_pp_stream_state *ps = &ctx->pp_stream;
448 struct lima_context_framebuffer *fb = &ctx->framebuffer;
449 struct lima_damage_region *damage = lima_submit_get_damage(submit);
450 struct lima_screen *screen = lima_screen(ctx->base.screen);
451 int i, num_pp = screen->num_pp;
452
453 /* use hilbert_coords to generates 1D to 2D relationship.
454 * 1D for pp stream index and 2D for plb block x/y on framebuffer.
455 * if multi pp, interleave the 1D index to make each pp's render target
456 * close enough which should result close workload
457 */
458 int max = MAX2(tiled_w, tiled_h);
459 int index = 0;
460 uint32_t *stream[4];
461 int si[4] = {0};
462 int dim = 0;
463 int count = 0;
464
465 /* Don't update count if we get zero rect. We'll just generate
466 * PP stream with just terminators in it.
467 */
468 if ((tiled_w * tiled_h) != 0) {
469 dim = util_logbase2_ceil(max);
470 count = 1 << (dim + dim);
471 }
472
473 for (i = 0; i < num_pp; i++)
474 stream[i] = ps->map + ps->offset[i];
475
476 for (i = 0; i < count; i++) {
477 int x, y;
478 hilbert_coords(max, i, &x, &y);
479 if (x < tiled_w && y < tiled_h) {
480 x += off_x;
481 y += off_y;
482
483 if (!inside_damage_region(x, y, damage))
484 continue;
485
486 int pp = index % num_pp;
487 int offset = ((y >> fb->shift_h) * fb->block_w +
488 (x >> fb->shift_w)) * LIMA_CTX_PLB_BLK_SIZE;
489 int plb_va = ctx->plb[ctx->plb_index]->va + offset;
490
491 stream[pp][si[pp]++] = 0;
492 stream[pp][si[pp]++] = 0xB8000000 | x | (y << 8);
493 stream[pp][si[pp]++] = 0xE0000002 | ((plb_va >> 3) & ~0xE0000003);
494 stream[pp][si[pp]++] = 0xB0000000;
495
496 index++;
497 }
498 }
499
500 for (i = 0; i < num_pp; i++) {
501 stream[i][si[i]++] = 0;
502 stream[i][si[i]++] = 0xBC000000;
503 stream[i][si[i]++] = 0;
504 stream[i][si[i]++] = 0;
505
506 lima_dump_command_stream_print(
507 stream[i], si[i] * 4, false, "pp plb stream %d at va %x\n",
508 i, ps->va + ps->offset[i]);
509 }
510 }
511
512 static void
513 lima_update_damage_pp_stream(struct lima_submit *submit)
514 {
515 struct lima_context *ctx = submit->ctx;
516 struct lima_damage_region *ds = lima_submit_get_damage(submit);
517 struct lima_context_framebuffer *fb = &ctx->framebuffer;
518 struct pipe_scissor_state bound;
519
520 if (ds && ds->region) {
521 struct pipe_scissor_state *dbound = &ds->bound;
522 bound.minx = MAX2(dbound->minx, ctx->damage_rect.minx >> 4);
523 bound.miny = MAX2(dbound->miny, ctx->damage_rect.miny >> 4);
524 bound.maxx = MIN2(dbound->maxx, (ctx->damage_rect.maxx + 0xf) >> 4);
525 bound.maxy = MIN2(dbound->maxy, (ctx->damage_rect.maxy + 0xf) >> 4);
526 } else {
527 bound.minx = ctx->damage_rect.minx >> 4;
528 bound.miny = ctx->damage_rect.miny >> 4;
529 bound.maxx = (ctx->damage_rect.maxx + 0xf) >> 4;
530 bound.maxy = (ctx->damage_rect.maxy + 0xf) >> 4;
531 }
532
533 /* Clamp to FB size */
534 bound.minx = MIN2(bound.minx, fb->tiled_w);
535 bound.miny = MIN2(bound.miny, fb->tiled_h);
536 bound.maxx = MIN2(bound.maxx, fb->tiled_w);
537 bound.maxy = MIN2(bound.maxy, fb->tiled_h);
538
539 int tiled_w = bound.maxx - bound.minx;
540 int tiled_h = bound.maxy - bound.miny;
541
542 struct lima_screen *screen = lima_screen(ctx->base.screen);
543 int size = lima_get_pp_stream_size(
544 screen->num_pp, tiled_w, tiled_h, ctx->pp_stream.offset);
545
546 ctx->pp_stream.map = lima_submit_create_stream_bo(
547 submit, LIMA_PIPE_PP, size, &ctx->pp_stream.va);
548
549 lima_generate_pp_stream(submit, bound.minx, bound.miny, tiled_w, tiled_h);
550 }
551
552 static void
553 lima_update_full_pp_stream(struct lima_submit *submit)
554 {
555 struct lima_context *ctx = submit->ctx;
556 struct lima_context_framebuffer *fb = &ctx->framebuffer;
557 struct lima_ctx_plb_pp_stream_key key = {
558 .plb_index = ctx->plb_index,
559 .tiled_w = fb->tiled_w,
560 .tiled_h = fb->tiled_h,
561 };
562
563 struct hash_entry *entry =
564 _mesa_hash_table_search(ctx->plb_pp_stream, &key);
565 struct lima_ctx_plb_pp_stream *s = entry->data;
566
567 if (s->bo) {
568 ctx->pp_stream.map = lima_bo_map(s->bo);
569 ctx->pp_stream.va = s->bo->va;
570 memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
571 }
572 else {
573 struct lima_screen *screen = lima_screen(ctx->base.screen);
574 int size = lima_get_pp_stream_size(
575 screen->num_pp, fb->tiled_w, fb->tiled_h, s->offset);
576 s->bo = lima_bo_create(screen, size, 0);
577
578 ctx->pp_stream.map = lima_bo_map(s->bo);
579 ctx->pp_stream.va = s->bo->va;
580 memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
581
582 lima_generate_pp_stream(submit, 0, 0, fb->tiled_w, fb->tiled_h);
583 }
584
585 lima_submit_add_bo(submit, LIMA_PIPE_PP, s->bo, LIMA_SUBMIT_BO_READ);
586 }
587
588 static bool
589 lima_damage_fullscreen(struct lima_submit *submit)
590 {
591 struct lima_context *ctx = submit->ctx;
592
593 return ctx->damage_rect.minx == 0 &&
594 ctx->damage_rect.miny == 0 &&
595 ctx->damage_rect.maxx == ctx->framebuffer.base.width &&
596 ctx->damage_rect.maxy == ctx->framebuffer.base.height;
597 }
598
599 static void
600 lima_update_pp_stream(struct lima_submit *submit)
601 {
602 struct lima_context *ctx = submit->ctx;
603 struct lima_damage_region *damage = lima_submit_get_damage(submit);
604 if ((damage && damage->region) || !lima_damage_fullscreen(submit))
605 lima_update_damage_pp_stream(submit);
606 else if (ctx->plb_pp_stream)
607 lima_update_full_pp_stream(submit);
608 else
609 ctx->pp_stream.map = NULL;
610 }
611
612 static void
613 lima_update_submit_bo(struct lima_submit *submit)
614 {
615 struct lima_context *ctx = submit->ctx;
616
617 lima_submit_add_bo(submit, LIMA_PIPE_GP, ctx->plb_gp_stream,
618 LIMA_SUBMIT_BO_READ);
619 lima_submit_add_bo(submit, LIMA_PIPE_GP, ctx->plb[ctx->plb_index],
620 LIMA_SUBMIT_BO_WRITE);
621 lima_submit_add_bo(submit, LIMA_PIPE_GP, ctx->gp_tile_heap[ctx->plb_index],
622 LIMA_SUBMIT_BO_WRITE);
623
624 lima_dump_command_stream_print(
625 ctx->plb_gp_stream->map + ctx->plb_index * ctx->plb_gp_size,
626 ctx->plb_gp_size, false, "gp plb stream at va %x\n",
627 ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size);
628
629 lima_submit_add_bo(submit, LIMA_PIPE_PP, ctx->plb[ctx->plb_index],
630 LIMA_SUBMIT_BO_READ);
631 lima_submit_add_bo(submit, LIMA_PIPE_PP, ctx->gp_tile_heap[ctx->plb_index],
632 LIMA_SUBMIT_BO_READ);
633
634 struct lima_screen *screen = lima_screen(ctx->base.screen);
635 lima_submit_add_bo(submit, LIMA_PIPE_PP, screen->pp_buffer, LIMA_SUBMIT_BO_READ);
636 }
637
638 static void
639 lima_finish_plbu_cmd(struct util_dynarray *plbu_cmd_array)
640 {
641 int i = 0;
642 uint32_t *plbu_cmd = util_dynarray_ensure_cap(plbu_cmd_array, plbu_cmd_array->size + 2 * 4);
643
644 plbu_cmd[i++] = 0x00000000;
645 plbu_cmd[i++] = 0x50000000; /* END */
646
647 plbu_cmd_array->size += i * 4;
648 }
649
650 static void
651 lima_pack_wb_zsbuf_reg(struct lima_submit *submit, uint32_t *wb_reg, int wb_idx)
652 {
653 struct lima_context *ctx = submit->ctx;
654 struct lima_context_framebuffer *fb = &ctx->framebuffer;
655 struct lima_resource *res = lima_resource(fb->base.zsbuf->texture);
656 int level = fb->base.zsbuf->u.tex.level;
657 uint32_t format = lima_format_get_pixel(fb->base.zsbuf->format);
658
659 struct lima_pp_wb_reg *wb = (void *)wb_reg;
660 wb[wb_idx].type = 0x01; /* 1 for depth, stencil */
661 wb[wb_idx].address = res->bo->va + res->levels[level].offset;
662 wb[wb_idx].pixel_format = format;
663 if (res->tiled) {
664 wb[wb_idx].pixel_layout = 0x2;
665 wb[wb_idx].pitch = fb->tiled_w;
666 } else {
667 wb[wb_idx].pixel_layout = 0x0;
668 wb[wb_idx].pitch = res->levels[level].stride / 8;
669 }
670 wb[wb_idx].mrt_bits = 0;
671 }
672
673 static void
674 lima_pack_wb_cbuf_reg(struct lima_submit *submit, uint32_t *wb_reg, int wb_idx)
675 {
676 struct lima_context *ctx = submit->ctx;
677 struct lima_context_framebuffer *fb = &ctx->framebuffer;
678 struct lima_resource *res = lima_resource(fb->base.cbufs[0]->texture);
679 int level = fb->base.cbufs[0]->u.tex.level;
680 unsigned layer = fb->base.cbufs[0]->u.tex.first_layer;
681 uint32_t format = lima_format_get_pixel(fb->base.cbufs[0]->format);
682 bool swap_channels = lima_format_get_swap_rb(fb->base.cbufs[0]->format);
683
684 struct lima_pp_wb_reg *wb = (void *)wb_reg;
685 wb[wb_idx].type = 0x02; /* 2 for color buffer */
686 wb[wb_idx].address = res->bo->va + res->levels[level].offset + layer * res->levels[level].layer_stride;
687 wb[wb_idx].pixel_format = format;
688 if (res->tiled) {
689 wb[wb_idx].pixel_layout = 0x2;
690 wb[wb_idx].pitch = fb->tiled_w;
691 } else {
692 wb[wb_idx].pixel_layout = 0x0;
693 wb[wb_idx].pitch = res->levels[level].stride / 8;
694 }
695 wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0;
696 }
697
698 static void
699 lima_pack_pp_frame_reg(struct lima_submit *submit, uint32_t *frame_reg,
700 uint32_t *wb_reg)
701 {
702 struct lima_context *ctx = submit->ctx;
703 struct lima_context_framebuffer *fb = &ctx->framebuffer;
704 struct lima_pp_frame_reg *frame = (void *)frame_reg;
705 struct lima_screen *screen = lima_screen(ctx->base.screen);
706 int wb_idx = 0;
707
708 frame->render_address = screen->pp_buffer->va + pp_frame_rsw_offset;
709 frame->flags = 0x02;
710 frame->clear_value_depth = ctx->clear.depth;
711 frame->clear_value_stencil = ctx->clear.stencil;
712 frame->clear_value_color = ctx->clear.color_8pc;
713 frame->clear_value_color_1 = ctx->clear.color_8pc;
714 frame->clear_value_color_2 = ctx->clear.color_8pc;
715 frame->clear_value_color_3 = ctx->clear.color_8pc;
716 frame->one = 1;
717
718 frame->width = fb->base.width - 1;
719 frame->height = fb->base.height - 1;
720
721 /* frame->fragment_stack_address is overwritten per-pp in the kernel
722 * by the values of pp_frame.fragment_stack_address[i] */
723
724 /* These are "stack size" and "stack offset" shifted,
725 * here they are assumed to be always the same. */
726 frame->fragment_stack_size = ctx->pp_max_stack_size << 16 | ctx->pp_max_stack_size;
727
728 /* related with MSAA and different value when r4p0/r7p0 */
729 frame->supersampled_height = fb->base.height * 2 - 1;
730 frame->scale = 0xE0C;
731
732 frame->dubya = 0x77;
733 frame->onscreen = 1;
734 frame->blocking = (fb->shift_min << 28) | (fb->shift_h << 16) | fb->shift_w;
735 frame->foureight = 0x8888;
736
737 if (fb->base.nr_cbufs && (ctx->resolve & PIPE_CLEAR_COLOR0))
738 lima_pack_wb_cbuf_reg(submit, wb_reg, wb_idx++);
739
740 if (fb->base.zsbuf &&
741 (ctx->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)))
742 lima_pack_wb_zsbuf_reg(submit, wb_reg, wb_idx++);
743 }
744
745 static void
746 lima_do_submit(struct lima_submit *submit)
747 {
748 #define pp_stack_pp_size 0x400
749
750 struct lima_context *ctx = submit->ctx;
751
752 lima_pack_head_plbu_cmd(submit);
753 lima_finish_plbu_cmd(&ctx->plbu_cmd_array);
754
755 lima_update_submit_bo(submit);
756
757 int vs_cmd_size = ctx->vs_cmd_array.size;
758 uint32_t vs_cmd_va = 0;
759
760 if (vs_cmd_size) {
761 void *vs_cmd = lima_submit_create_stream_bo(
762 submit, LIMA_PIPE_GP, vs_cmd_size, &vs_cmd_va);
763 memcpy(vs_cmd, util_dynarray_begin(&ctx->vs_cmd_array), vs_cmd_size);
764 util_dynarray_clear(&ctx->vs_cmd_array);
765
766 lima_dump_command_stream_print(
767 vs_cmd, vs_cmd_size, false, "flush vs cmd at va %x\n", vs_cmd_va);
768 lima_dump_vs_command_stream_print(vs_cmd, vs_cmd_size, vs_cmd_va);
769 }
770
771 uint32_t plbu_cmd_va;
772 int plbu_cmd_size = ctx->plbu_cmd_array.size + ctx->plbu_cmd_head.size;
773 void *plbu_cmd = lima_submit_create_stream_bo(
774 submit, LIMA_PIPE_GP, plbu_cmd_size, &plbu_cmd_va);
775 memcpy(plbu_cmd,
776 util_dynarray_begin(&ctx->plbu_cmd_head),
777 ctx->plbu_cmd_head.size);
778 memcpy(plbu_cmd + ctx->plbu_cmd_head.size,
779 util_dynarray_begin(&ctx->plbu_cmd_array),
780 ctx->plbu_cmd_array.size);
781 util_dynarray_clear(&ctx->plbu_cmd_array);
782 util_dynarray_clear(&ctx->plbu_cmd_head);
783
784 lima_dump_command_stream_print(
785 plbu_cmd, plbu_cmd_size, false, "flush plbu cmd at va %x\n", plbu_cmd_va);
786 lima_dump_plbu_command_stream_print(plbu_cmd, plbu_cmd_size, plbu_cmd_va);
787
788 struct lima_screen *screen = lima_screen(ctx->base.screen);
789 struct drm_lima_gp_frame gp_frame;
790 struct lima_gp_frame_reg *gp_frame_reg = (void *)gp_frame.frame;
791 gp_frame_reg->vs_cmd_start = vs_cmd_va;
792 gp_frame_reg->vs_cmd_end = vs_cmd_va + vs_cmd_size;
793 gp_frame_reg->plbu_cmd_start = plbu_cmd_va;
794 gp_frame_reg->plbu_cmd_end = plbu_cmd_va + plbu_cmd_size;
795 gp_frame_reg->tile_heap_start = ctx->gp_tile_heap[ctx->plb_index]->va;
796 gp_frame_reg->tile_heap_end = ctx->gp_tile_heap[ctx->plb_index]->va + ctx->gp_tile_heap_size;
797
798 lima_dump_command_stream_print(
799 &gp_frame, sizeof(gp_frame), false, "add gp frame\n");
800
801 if (!lima_submit_start(submit, LIMA_PIPE_GP, &gp_frame, sizeof(gp_frame)))
802 fprintf(stderr, "gp submit error\n");
803
804 if (lima_dump_command_stream) {
805 if (lima_submit_wait(submit, LIMA_PIPE_GP, PIPE_TIMEOUT_INFINITE)) {
806 if (ctx->gp_output) {
807 float *pos = lima_bo_map(ctx->gp_output);
808 lima_dump_command_stream_print(
809 pos, 4 * 4 * 16, true, "gl_pos dump at va %x\n",
810 ctx->gp_output->va);
811 }
812
813 uint32_t *plb = lima_bo_map(ctx->plb[ctx->plb_index]);
814 lima_dump_command_stream_print(
815 plb, LIMA_CTX_PLB_BLK_SIZE, false, "plb dump at va %x\n",
816 ctx->plb[ctx->plb_index]->va);
817 }
818 else {
819 fprintf(stderr, "gp submit wait error\n");
820 exit(1);
821 }
822 }
823
824 uint32_t pp_stack_va = 0;
825 if (ctx->pp_max_stack_size) {
826 lima_submit_create_stream_bo(
827 submit, LIMA_PIPE_PP,
828 screen->num_pp * ctx->pp_max_stack_size * pp_stack_pp_size,
829 &pp_stack_va);
830 }
831
832 lima_update_pp_stream(submit);
833
834 struct lima_pp_stream_state *ps = &ctx->pp_stream;
835 if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) {
836 struct drm_lima_m400_pp_frame pp_frame = {0};
837 lima_pack_pp_frame_reg(submit, pp_frame.frame, pp_frame.wb);
838 pp_frame.num_pp = screen->num_pp;
839
840 for (int i = 0; i < screen->num_pp; i++) {
841 pp_frame.plbu_array_address[i] = ps->va + ps->offset[i];
842 if (ctx->pp_max_stack_size)
843 pp_frame.fragment_stack_address[i] = pp_stack_va +
844 ctx->pp_max_stack_size * pp_stack_pp_size * i;
845 }
846
847 lima_dump_command_stream_print(
848 &pp_frame, sizeof(pp_frame), false, "add pp frame\n");
849
850 if (!lima_submit_start(submit, LIMA_PIPE_PP, &pp_frame, sizeof(pp_frame)))
851 fprintf(stderr, "pp submit error\n");
852 }
853 else {
854 struct drm_lima_m450_pp_frame pp_frame = {0};
855 lima_pack_pp_frame_reg(submit, pp_frame.frame, pp_frame.wb);
856 pp_frame.num_pp = screen->num_pp;
857
858 if (ctx->pp_max_stack_size)
859 for (int i = 0; i < screen->num_pp; i++)
860 pp_frame.fragment_stack_address[i] = pp_stack_va +
861 ctx->pp_max_stack_size * pp_stack_pp_size * i;
862
863 if (ps->map) {
864 for (int i = 0; i < screen->num_pp; i++)
865 pp_frame.plbu_array_address[i] = ps->va + ps->offset[i];
866 }
867 else {
868 pp_frame.use_dlbu = true;
869
870 struct lima_context_framebuffer *fb = &ctx->framebuffer;
871 pp_frame.dlbu_regs[0] = ctx->plb[ctx->plb_index]->va;
872 pp_frame.dlbu_regs[1] = ((fb->tiled_h - 1) << 16) | (fb->tiled_w - 1);
873 unsigned s = util_logbase2(LIMA_CTX_PLB_BLK_SIZE) - 7;
874 pp_frame.dlbu_regs[2] = (s << 28) | (fb->shift_h << 16) | fb->shift_w;
875 pp_frame.dlbu_regs[3] = ((fb->tiled_h - 1) << 24) | ((fb->tiled_w - 1) << 16);
876 }
877
878 lima_dump_command_stream_print(
879 &pp_frame, sizeof(pp_frame), false, "add pp frame\n");
880
881 if (!lima_submit_start(submit, LIMA_PIPE_PP, &pp_frame, sizeof(pp_frame)))
882 fprintf(stderr, "pp submit error\n");
883 }
884
885 if (lima_dump_command_stream) {
886 if (!lima_submit_wait(submit, LIMA_PIPE_PP, PIPE_TIMEOUT_INFINITE)) {
887 fprintf(stderr, "pp wait error\n");
888 exit(1);
889 }
890 }
891
892 ctx->plb_index = (ctx->plb_index + 1) % lima_ctx_num_plb;
893
894 if (ctx->framebuffer.base.nr_cbufs && (ctx->resolve & PIPE_CLEAR_COLOR0)) {
895 /* Set reload flag for next draw. It'll be unset if buffer is cleared */
896 struct lima_surface *surf = lima_surface(ctx->framebuffer.base.cbufs[0]);
897 surf->reload = true;
898 }
899
900 ctx->pp_max_stack_size = 0;
901
902 ctx->damage_rect.minx = ctx->damage_rect.miny = 0xffff;
903 ctx->damage_rect.maxx = ctx->damage_rect.maxy = 0;
904
905 ctx->resolve = 0;
906
907 lima_dump_file_next();
908 }
909
910 void
911 lima_flush(struct lima_context *ctx)
912 {
913 if (!lima_submit_dirty(ctx->submit))
914 return;
915
916 lima_do_submit(ctx->submit);
917 }
918
919 static void
920 lima_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
921 unsigned flags)
922 {
923 struct lima_context *ctx = lima_context(pctx);
924 if (lima_submit_dirty(ctx->submit))
925 lima_do_submit(ctx->submit);
926
927 if (fence) {
928 int drm_fd = lima_screen(ctx->base.screen)->fd;
929 int fd;
930
931 if (!drmSyncobjExportSyncFile(drm_fd, ctx->out_sync[LIMA_PIPE_PP], &fd))
932 *fence = lima_fence_create(fd);
933 }
934 }
935
936 bool lima_submit_init(struct lima_context *ctx)
937 {
938 int fd = lima_screen(ctx->base.screen)->fd;
939
940 ctx->submit = lima_submit_create(ctx);
941 if (!ctx->submit)
942 return false;
943
944 ctx->in_sync_fd = -1;
945
946 for (int i = 0; i < 2; i++) {
947 if (drmSyncobjCreate(fd, DRM_SYNCOBJ_CREATE_SIGNALED, ctx->in_sync + i) ||
948 drmSyncobjCreate(fd, DRM_SYNCOBJ_CREATE_SIGNALED, ctx->out_sync + i))
949 return false;
950 }
951
952 ctx->base.flush = lima_pipe_flush;
953
954 return true;
955 }
956
957 void lima_submit_fini(struct lima_context *ctx)
958 {
959 int fd = lima_screen(ctx->base.screen)->fd;
960
961 for (int i = 0; i < 2; i++) {
962 if (ctx->in_sync[i])
963 drmSyncobjDestroy(fd, ctx->in_sync[i]);
964 if (ctx->out_sync[i])
965 drmSyncobjDestroy(fd, ctx->out_sync[i]);
966 }
967
968 if (ctx->in_sync_fd >= 0)
969 close(ctx->in_sync_fd);
970
971 if (ctx->submit)
972 lima_submit_free(ctx->submit);
973 }