2 * Copyright (C) 2017-2019 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "drm-uapi/lima_drm.h"
30 #include "util/u_math.h"
31 #include "util/ralloc.h"
32 #include "util/u_dynarray.h"
33 #include "util/os_time.h"
34 #include "util/hash_table.h"
35 #include "util/u_upload_mgr.h"
36 #include "util/u_inlines.h"
38 #include "lima_screen.h"
39 #include "lima_context.h"
40 #include "lima_submit.h"
42 #include "lima_util.h"
43 #include "lima_format.h"
44 #include "lima_resource.h"
45 #include "lima_texture.h"
46 #include "lima_fence.h"
51 struct lima_context
*ctx
;
53 struct util_dynarray gem_bos
[2];
54 struct util_dynarray bos
[2];
58 #define VOID2U64(x) ((uint64_t)(unsigned long)(x))
60 static struct lima_submit
*
61 lima_submit_create(struct lima_context
*ctx
)
63 struct lima_submit
*s
;
65 s
= rzalloc(ctx
, struct lima_submit
);
69 s
->fd
= lima_screen(ctx
->base
.screen
)->fd
;
72 for (int i
= 0; i
< 2; i
++) {
73 util_dynarray_init(s
->gem_bos
+ i
, s
);
74 util_dynarray_init(s
->bos
+ i
, s
);
81 lima_submit_free(struct lima_submit
*submit
)
87 * Note: this function can only be called in draw code path,
88 * must not exist in flush code path.
91 lima_submit_get(struct lima_context
*ctx
)
96 bool lima_submit_add_bo(struct lima_submit
*submit
, int pipe
,
97 struct lima_bo
*bo
, uint32_t flags
)
99 util_dynarray_foreach(submit
->gem_bos
+ pipe
, struct drm_lima_gem_submit_bo
, gem_bo
) {
100 if (bo
->handle
== gem_bo
->handle
) {
101 gem_bo
->flags
|= flags
;
106 struct drm_lima_gem_submit_bo
*submit_bo
=
107 util_dynarray_grow(submit
->gem_bos
+ pipe
, struct drm_lima_gem_submit_bo
, 1);
108 submit_bo
->handle
= bo
->handle
;
109 submit_bo
->flags
= flags
;
111 struct lima_bo
**jbo
= util_dynarray_grow(submit
->bos
+ pipe
, struct lima_bo
*, 1);
114 /* prevent bo from being freed when submit start */
115 lima_bo_reference(bo
);
121 lima_submit_start(struct lima_submit
*submit
, int pipe
, void *frame
, uint32_t size
)
123 struct lima_context
*ctx
= submit
->ctx
;
124 struct drm_lima_gem_submit req
= {
127 .nr_bos
= submit
->gem_bos
[pipe
].size
/ sizeof(struct drm_lima_gem_submit_bo
),
128 .bos
= VOID2U64(util_dynarray_begin(submit
->gem_bos
+ pipe
)),
129 .frame
= VOID2U64(frame
),
131 .out_sync
= ctx
->out_sync
[pipe
],
134 if (ctx
->in_sync_fd
>= 0) {
135 int err
= drmSyncobjImportSyncFile(submit
->fd
, ctx
->in_sync
[pipe
],
140 req
.in_sync
[0] = ctx
->in_sync
[pipe
];
141 close(ctx
->in_sync_fd
);
142 ctx
->in_sync_fd
= -1;
145 bool ret
= drmIoctl(submit
->fd
, DRM_IOCTL_LIMA_GEM_SUBMIT
, &req
) == 0;
147 util_dynarray_foreach(submit
->bos
+ pipe
, struct lima_bo
*, bo
) {
148 lima_bo_unreference(*bo
);
151 util_dynarray_clear(submit
->gem_bos
+ pipe
);
152 util_dynarray_clear(submit
->bos
+ pipe
);
157 lima_submit_wait(struct lima_submit
*submit
, int pipe
, uint64_t timeout_ns
)
159 int64_t abs_timeout
= os_time_get_absolute_timeout(timeout_ns
);
160 if (abs_timeout
== OS_TIMEOUT_INFINITE
)
161 abs_timeout
= INT64_MAX
;
163 struct lima_context
*ctx
= submit
->ctx
;
164 return !drmSyncobjWait(submit
->fd
, ctx
->out_sync
+ pipe
, 1, abs_timeout
, 0, NULL
);
167 bool lima_submit_has_bo(struct lima_submit
*submit
, struct lima_bo
*bo
, bool all
)
169 for (int i
= 0; i
< 2; i
++) {
170 util_dynarray_foreach(submit
->gem_bos
+ i
, struct drm_lima_gem_submit_bo
, gem_bo
) {
171 if (bo
->handle
== gem_bo
->handle
) {
172 if (all
|| gem_bo
->flags
& LIMA_SUBMIT_BO_WRITE
)
184 lima_submit_create_stream_bo(struct lima_submit
*submit
, int pipe
,
185 unsigned size
, uint32_t *va
)
187 struct lima_context
*ctx
= submit
->ctx
;
191 struct pipe_resource
*pres
= NULL
;
192 u_upload_alloc(ctx
->uploader
, 0, size
, 0x40, &offset
, &pres
, &cpu
);
194 struct lima_resource
*res
= lima_resource(pres
);
195 *va
= res
->bo
->va
+ offset
;
197 lima_submit_add_bo(submit
, pipe
, res
->bo
, LIMA_SUBMIT_BO_READ
);
199 pipe_resource_reference(&pres
, NULL
);
205 lima_submit_dirty(struct lima_submit
*submit
)
207 struct lima_context
*ctx
= submit
->ctx
;
209 return !!ctx
->resolve
;
212 static inline struct lima_damage_region
*
213 lima_submit_get_damage(struct lima_submit
*submit
)
215 struct lima_context
*ctx
= submit
->ctx
;
217 if (!(ctx
->framebuffer
.base
.nr_cbufs
&& (ctx
->resolve
& PIPE_CLEAR_COLOR0
)))
220 struct lima_surface
*surf
= lima_surface(ctx
->framebuffer
.base
.cbufs
[0]);
221 struct lima_resource
*res
= lima_resource(surf
->base
.texture
);
226 lima_fb_need_reload(struct lima_submit
*submit
)
228 struct lima_context
*ctx
= submit
->ctx
;
230 /* Depth buffer is always discarded */
231 if (!(ctx
->framebuffer
.base
.nr_cbufs
&& (ctx
->resolve
& PIPE_CLEAR_COLOR0
)))
234 struct lima_surface
*surf
= lima_surface(ctx
->framebuffer
.base
.cbufs
[0]);
235 struct lima_resource
*res
= lima_resource(surf
->base
.texture
);
236 if (res
->damage
.region
) {
237 /* for EGL_KHR_partial_update, when EGL_EXT_buffer_age is enabled,
238 * we need to reload damage region, otherwise just want to reload
239 * the region not aligned to tile boundary */
240 //if (!res->damage.aligned)
244 else if (surf
->reload
)
251 lima_pack_reload_plbu_cmd(struct lima_submit
*submit
)
253 #define lima_reload_render_state_offset 0x0000
254 #define lima_reload_gl_pos_offset 0x0040
255 #define lima_reload_varying_offset 0x0080
256 #define lima_reload_tex_desc_offset 0x00c0
257 #define lima_reload_tex_array_offset 0x0100
258 #define lima_reload_buffer_size 0x0140
260 struct lima_context
*ctx
= submit
->ctx
;
263 void *cpu
= lima_submit_create_stream_bo(
264 submit
, LIMA_PIPE_PP
, lima_reload_buffer_size
, &va
);
266 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
268 uint32_t reload_shader_first_instr_size
=
269 ((uint32_t *)(screen
->pp_buffer
->map
+ pp_reload_program_offset
))[0] & 0x1f;
270 uint32_t reload_shader_va
= screen
->pp_buffer
->va
+ pp_reload_program_offset
;
272 struct lima_render_state reload_render_state
= {
273 .alpha_blend
= 0xf03b1ad2,
274 .depth_test
= 0x0000000e,
275 .depth_range
= 0xffff0000,
276 .stencil_front
= 0x00000007,
277 .stencil_back
= 0x00000007,
278 .multi_sample
= 0x0000f007,
279 .shader_address
= reload_shader_va
| reload_shader_first_instr_size
,
280 .varying_types
= 0x00000001,
281 .textures_address
= va
+ lima_reload_tex_array_offset
,
283 .varyings_address
= va
+ lima_reload_varying_offset
,
285 memcpy(cpu
+ lima_reload_render_state_offset
, &reload_render_state
,
286 sizeof(reload_render_state
));
288 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
289 lima_tex_desc
*td
= cpu
+ lima_reload_tex_desc_offset
;
290 memset(td
, 0, lima_min_tex_desc_size
);
291 lima_texture_desc_set_res(ctx
, td
, fb
->base
.cbufs
[0]->texture
, 0, 0);
292 td
->unnorm_coords
= 1;
293 td
->texture_type
= LIMA_TEXTURE_TYPE_2D
;
294 td
->min_img_filter_nearest
= 1;
295 td
->mag_img_filter_nearest
= 1;
296 td
->wrap_s_clamp_to_edge
= 1;
297 td
->wrap_t_clamp_to_edge
= 1;
298 td
->unknown_2_2
= 0x1;
300 uint32_t *ta
= cpu
+ lima_reload_tex_array_offset
;
301 ta
[0] = va
+ lima_reload_tex_desc_offset
;
303 float reload_gl_pos
[] = {
304 fb
->base
.width
, 0, 0, 1,
306 0, fb
->base
.height
, 0, 1,
308 memcpy(cpu
+ lima_reload_gl_pos_offset
, reload_gl_pos
,
309 sizeof(reload_gl_pos
));
311 float reload_varying
[] = {
312 fb
->base
.width
, 0, 0, 0,
313 0, fb
->base
.height
, 0, 0,
315 memcpy(cpu
+ lima_reload_varying_offset
, reload_varying
,
316 sizeof(reload_varying
));
318 PLBU_CMD_BEGIN(&ctx
->plbu_cmd_head
, 20);
320 PLBU_CMD_VIEWPORT_LEFT(0);
321 PLBU_CMD_VIEWPORT_RIGHT(fui(fb
->base
.width
));
322 PLBU_CMD_VIEWPORT_BOTTOM(0);
323 PLBU_CMD_VIEWPORT_TOP(fui(fb
->base
.height
));
325 PLBU_CMD_RSW_VERTEX_ARRAY(
326 va
+ lima_reload_render_state_offset
,
327 va
+ lima_reload_gl_pos_offset
);
332 PLBU_CMD_INDICES(screen
->pp_buffer
->va
+ pp_shared_index_offset
);
333 PLBU_CMD_INDEXED_DEST(va
+ lima_reload_gl_pos_offset
);
334 PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
340 lima_pack_head_plbu_cmd(struct lima_submit
*submit
)
342 struct lima_context
*ctx
= submit
->ctx
;
343 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
345 PLBU_CMD_BEGIN(&ctx
->plbu_cmd_head
, 10);
348 PLBU_CMD_BLOCK_STEP(fb
->shift_min
, fb
->shift_h
, fb
->shift_w
);
349 PLBU_CMD_TILED_DIMENSIONS(fb
->tiled_w
, fb
->tiled_h
);
350 PLBU_CMD_BLOCK_STRIDE(fb
->block_w
);
352 PLBU_CMD_ARRAY_ADDRESS(
353 ctx
->plb_gp_stream
->va
+ ctx
->plb_index
* ctx
->plb_gp_size
,
354 fb
->block_w
* fb
->block_h
);
358 if (lima_fb_need_reload(submit
))
359 lima_pack_reload_plbu_cmd(submit
);
363 hilbert_rotate(int n
, int *x
, int *y
, int rx
, int ry
)
379 hilbert_coords(int n
, int d
, int *x
, int *y
)
385 for (i
= 0; (1 << i
) < n
; i
++) {
390 hilbert_rotate(1 << i
, x
, y
, rx
, ry
);
400 lima_get_pp_stream_size(int num_pp
, int tiled_w
, int tiled_h
, uint32_t *off
)
402 /* carefully calculate each stream start address:
403 * 1. overflow: each stream size may be different due to
404 * fb->tiled_w * fb->tiled_h can't be divided by num_pp,
405 * extra size should be added to the preceeding stream
406 * 2. alignment: each stream address should be 0x20 aligned
408 int delta
= tiled_w
* tiled_h
/ num_pp
* 16 + 16;
409 int remain
= tiled_w
* tiled_h
% num_pp
;
412 for (int i
= 0; i
< num_pp
; i
++) {
420 offset
= align(offset
, 0x20);
427 inside_damage_region(int x
, int y
, struct lima_damage_region
*ds
)
429 if (!ds
|| !ds
->region
)
432 for (int i
= 0; i
< ds
->num_region
; i
++) {
433 struct pipe_scissor_state
*ss
= ds
->region
+ i
;
434 if (x
>= ss
->minx
&& x
< ss
->maxx
&&
435 y
>= ss
->miny
&& y
< ss
->maxy
)
443 lima_generate_pp_stream(struct lima_submit
*submit
, int off_x
, int off_y
,
444 int tiled_w
, int tiled_h
)
446 struct lima_context
*ctx
= submit
->ctx
;
447 struct lima_pp_stream_state
*ps
= &ctx
->pp_stream
;
448 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
449 struct lima_damage_region
*damage
= lima_submit_get_damage(submit
);
450 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
451 int i
, num_pp
= screen
->num_pp
;
453 /* use hilbert_coords to generates 1D to 2D relationship.
454 * 1D for pp stream index and 2D for plb block x/y on framebuffer.
455 * if multi pp, interleave the 1D index to make each pp's render target
456 * close enough which should result close workload
458 int max
= MAX2(tiled_w
, tiled_h
);
465 /* Don't update count if we get zero rect. We'll just generate
466 * PP stream with just terminators in it.
468 if ((tiled_w
* tiled_h
) != 0) {
469 dim
= util_logbase2_ceil(max
);
470 count
= 1 << (dim
+ dim
);
473 for (i
= 0; i
< num_pp
; i
++)
474 stream
[i
] = ps
->map
+ ps
->offset
[i
];
476 for (i
= 0; i
< count
; i
++) {
478 hilbert_coords(max
, i
, &x
, &y
);
479 if (x
< tiled_w
&& y
< tiled_h
) {
483 if (!inside_damage_region(x
, y
, damage
))
486 int pp
= index
% num_pp
;
487 int offset
= ((y
>> fb
->shift_h
) * fb
->block_w
+
488 (x
>> fb
->shift_w
)) * LIMA_CTX_PLB_BLK_SIZE
;
489 int plb_va
= ctx
->plb
[ctx
->plb_index
]->va
+ offset
;
491 stream
[pp
][si
[pp
]++] = 0;
492 stream
[pp
][si
[pp
]++] = 0xB8000000 | x
| (y
<< 8);
493 stream
[pp
][si
[pp
]++] = 0xE0000002 | ((plb_va
>> 3) & ~0xE0000003);
494 stream
[pp
][si
[pp
]++] = 0xB0000000;
500 for (i
= 0; i
< num_pp
; i
++) {
501 stream
[i
][si
[i
]++] = 0;
502 stream
[i
][si
[i
]++] = 0xBC000000;
503 stream
[i
][si
[i
]++] = 0;
504 stream
[i
][si
[i
]++] = 0;
506 lima_dump_command_stream_print(
507 stream
[i
], si
[i
] * 4, false, "pp plb stream %d at va %x\n",
508 i
, ps
->va
+ ps
->offset
[i
]);
513 lima_update_damage_pp_stream(struct lima_submit
*submit
)
515 struct lima_context
*ctx
= submit
->ctx
;
516 struct lima_damage_region
*ds
= lima_submit_get_damage(submit
);
517 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
518 struct pipe_scissor_state bound
;
520 if (ds
&& ds
->region
) {
521 struct pipe_scissor_state
*dbound
= &ds
->bound
;
522 bound
.minx
= MAX2(dbound
->minx
, ctx
->damage_rect
.minx
>> 4);
523 bound
.miny
= MAX2(dbound
->miny
, ctx
->damage_rect
.miny
>> 4);
524 bound
.maxx
= MIN2(dbound
->maxx
, (ctx
->damage_rect
.maxx
+ 0xf) >> 4);
525 bound
.maxy
= MIN2(dbound
->maxy
, (ctx
->damage_rect
.maxy
+ 0xf) >> 4);
527 bound
.minx
= ctx
->damage_rect
.minx
>> 4;
528 bound
.miny
= ctx
->damage_rect
.miny
>> 4;
529 bound
.maxx
= (ctx
->damage_rect
.maxx
+ 0xf) >> 4;
530 bound
.maxy
= (ctx
->damage_rect
.maxy
+ 0xf) >> 4;
533 /* Clamp to FB size */
534 bound
.minx
= MIN2(bound
.minx
, fb
->tiled_w
);
535 bound
.miny
= MIN2(bound
.miny
, fb
->tiled_h
);
536 bound
.maxx
= MIN2(bound
.maxx
, fb
->tiled_w
);
537 bound
.maxy
= MIN2(bound
.maxy
, fb
->tiled_h
);
539 int tiled_w
= bound
.maxx
- bound
.minx
;
540 int tiled_h
= bound
.maxy
- bound
.miny
;
542 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
543 int size
= lima_get_pp_stream_size(
544 screen
->num_pp
, tiled_w
, tiled_h
, ctx
->pp_stream
.offset
);
546 ctx
->pp_stream
.map
= lima_submit_create_stream_bo(
547 submit
, LIMA_PIPE_PP
, size
, &ctx
->pp_stream
.va
);
549 lima_generate_pp_stream(submit
, bound
.minx
, bound
.miny
, tiled_w
, tiled_h
);
553 lima_update_full_pp_stream(struct lima_submit
*submit
)
555 struct lima_context
*ctx
= submit
->ctx
;
556 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
557 struct lima_ctx_plb_pp_stream_key key
= {
558 .plb_index
= ctx
->plb_index
,
559 .tiled_w
= fb
->tiled_w
,
560 .tiled_h
= fb
->tiled_h
,
563 struct hash_entry
*entry
=
564 _mesa_hash_table_search(ctx
->plb_pp_stream
, &key
);
565 struct lima_ctx_plb_pp_stream
*s
= entry
->data
;
568 ctx
->pp_stream
.map
= lima_bo_map(s
->bo
);
569 ctx
->pp_stream
.va
= s
->bo
->va
;
570 memcpy(ctx
->pp_stream
.offset
, s
->offset
, sizeof(s
->offset
));
573 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
574 int size
= lima_get_pp_stream_size(
575 screen
->num_pp
, fb
->tiled_w
, fb
->tiled_h
, s
->offset
);
576 s
->bo
= lima_bo_create(screen
, size
, 0);
578 ctx
->pp_stream
.map
= lima_bo_map(s
->bo
);
579 ctx
->pp_stream
.va
= s
->bo
->va
;
580 memcpy(ctx
->pp_stream
.offset
, s
->offset
, sizeof(s
->offset
));
582 lima_generate_pp_stream(submit
, 0, 0, fb
->tiled_w
, fb
->tiled_h
);
585 lima_submit_add_bo(submit
, LIMA_PIPE_PP
, s
->bo
, LIMA_SUBMIT_BO_READ
);
589 lima_damage_fullscreen(struct lima_submit
*submit
)
591 struct lima_context
*ctx
= submit
->ctx
;
593 return ctx
->damage_rect
.minx
== 0 &&
594 ctx
->damage_rect
.miny
== 0 &&
595 ctx
->damage_rect
.maxx
== ctx
->framebuffer
.base
.width
&&
596 ctx
->damage_rect
.maxy
== ctx
->framebuffer
.base
.height
;
600 lima_update_pp_stream(struct lima_submit
*submit
)
602 struct lima_context
*ctx
= submit
->ctx
;
603 struct lima_damage_region
*damage
= lima_submit_get_damage(submit
);
604 if ((damage
&& damage
->region
) || !lima_damage_fullscreen(submit
))
605 lima_update_damage_pp_stream(submit
);
606 else if (ctx
->plb_pp_stream
)
607 lima_update_full_pp_stream(submit
);
609 ctx
->pp_stream
.map
= NULL
;
613 lima_update_submit_bo(struct lima_submit
*submit
)
615 struct lima_context
*ctx
= submit
->ctx
;
617 lima_submit_add_bo(submit
, LIMA_PIPE_GP
, ctx
->plb_gp_stream
,
618 LIMA_SUBMIT_BO_READ
);
619 lima_submit_add_bo(submit
, LIMA_PIPE_GP
, ctx
->plb
[ctx
->plb_index
],
620 LIMA_SUBMIT_BO_WRITE
);
621 lima_submit_add_bo(submit
, LIMA_PIPE_GP
, ctx
->gp_tile_heap
[ctx
->plb_index
],
622 LIMA_SUBMIT_BO_WRITE
);
624 lima_dump_command_stream_print(
625 ctx
->plb_gp_stream
->map
+ ctx
->plb_index
* ctx
->plb_gp_size
,
626 ctx
->plb_gp_size
, false, "gp plb stream at va %x\n",
627 ctx
->plb_gp_stream
->va
+ ctx
->plb_index
* ctx
->plb_gp_size
);
629 lima_submit_add_bo(submit
, LIMA_PIPE_PP
, ctx
->plb
[ctx
->plb_index
],
630 LIMA_SUBMIT_BO_READ
);
631 lima_submit_add_bo(submit
, LIMA_PIPE_PP
, ctx
->gp_tile_heap
[ctx
->plb_index
],
632 LIMA_SUBMIT_BO_READ
);
634 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
635 lima_submit_add_bo(submit
, LIMA_PIPE_PP
, screen
->pp_buffer
, LIMA_SUBMIT_BO_READ
);
639 lima_finish_plbu_cmd(struct util_dynarray
*plbu_cmd_array
)
642 uint32_t *plbu_cmd
= util_dynarray_ensure_cap(plbu_cmd_array
, plbu_cmd_array
->size
+ 2 * 4);
644 plbu_cmd
[i
++] = 0x00000000;
645 plbu_cmd
[i
++] = 0x50000000; /* END */
647 plbu_cmd_array
->size
+= i
* 4;
651 lima_pack_wb_zsbuf_reg(struct lima_submit
*submit
, uint32_t *wb_reg
, int wb_idx
)
653 struct lima_context
*ctx
= submit
->ctx
;
654 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
655 struct lima_resource
*res
= lima_resource(fb
->base
.zsbuf
->texture
);
656 int level
= fb
->base
.zsbuf
->u
.tex
.level
;
657 uint32_t format
= lima_format_get_pixel(fb
->base
.zsbuf
->format
);
659 struct lima_pp_wb_reg
*wb
= (void *)wb_reg
;
660 wb
[wb_idx
].type
= 0x01; /* 1 for depth, stencil */
661 wb
[wb_idx
].address
= res
->bo
->va
+ res
->levels
[level
].offset
;
662 wb
[wb_idx
].pixel_format
= format
;
664 wb
[wb_idx
].pixel_layout
= 0x2;
665 wb
[wb_idx
].pitch
= fb
->tiled_w
;
667 wb
[wb_idx
].pixel_layout
= 0x0;
668 wb
[wb_idx
].pitch
= res
->levels
[level
].stride
/ 8;
670 wb
[wb_idx
].mrt_bits
= 0;
674 lima_pack_wb_cbuf_reg(struct lima_submit
*submit
, uint32_t *wb_reg
, int wb_idx
)
676 struct lima_context
*ctx
= submit
->ctx
;
677 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
678 struct lima_resource
*res
= lima_resource(fb
->base
.cbufs
[0]->texture
);
679 int level
= fb
->base
.cbufs
[0]->u
.tex
.level
;
680 unsigned layer
= fb
->base
.cbufs
[0]->u
.tex
.first_layer
;
681 uint32_t format
= lima_format_get_pixel(fb
->base
.cbufs
[0]->format
);
682 bool swap_channels
= lima_format_get_swap_rb(fb
->base
.cbufs
[0]->format
);
684 struct lima_pp_wb_reg
*wb
= (void *)wb_reg
;
685 wb
[wb_idx
].type
= 0x02; /* 2 for color buffer */
686 wb
[wb_idx
].address
= res
->bo
->va
+ res
->levels
[level
].offset
+ layer
* res
->levels
[level
].layer_stride
;
687 wb
[wb_idx
].pixel_format
= format
;
689 wb
[wb_idx
].pixel_layout
= 0x2;
690 wb
[wb_idx
].pitch
= fb
->tiled_w
;
692 wb
[wb_idx
].pixel_layout
= 0x0;
693 wb
[wb_idx
].pitch
= res
->levels
[level
].stride
/ 8;
695 wb
[wb_idx
].mrt_bits
= swap_channels
? 0x4 : 0x0;
699 lima_pack_pp_frame_reg(struct lima_submit
*submit
, uint32_t *frame_reg
,
702 struct lima_context
*ctx
= submit
->ctx
;
703 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
704 struct lima_pp_frame_reg
*frame
= (void *)frame_reg
;
705 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
708 frame
->render_address
= screen
->pp_buffer
->va
+ pp_frame_rsw_offset
;
710 frame
->clear_value_depth
= ctx
->clear
.depth
;
711 frame
->clear_value_stencil
= ctx
->clear
.stencil
;
712 frame
->clear_value_color
= ctx
->clear
.color_8pc
;
713 frame
->clear_value_color_1
= ctx
->clear
.color_8pc
;
714 frame
->clear_value_color_2
= ctx
->clear
.color_8pc
;
715 frame
->clear_value_color_3
= ctx
->clear
.color_8pc
;
718 frame
->width
= fb
->base
.width
- 1;
719 frame
->height
= fb
->base
.height
- 1;
721 /* frame->fragment_stack_address is overwritten per-pp in the kernel
722 * by the values of pp_frame.fragment_stack_address[i] */
724 /* These are "stack size" and "stack offset" shifted,
725 * here they are assumed to be always the same. */
726 frame
->fragment_stack_size
= ctx
->pp_max_stack_size
<< 16 | ctx
->pp_max_stack_size
;
728 /* related with MSAA and different value when r4p0/r7p0 */
729 frame
->supersampled_height
= fb
->base
.height
* 2 - 1;
730 frame
->scale
= 0xE0C;
734 frame
->blocking
= (fb
->shift_min
<< 28) | (fb
->shift_h
<< 16) | fb
->shift_w
;
735 frame
->foureight
= 0x8888;
737 if (fb
->base
.nr_cbufs
&& (ctx
->resolve
& PIPE_CLEAR_COLOR0
))
738 lima_pack_wb_cbuf_reg(submit
, wb_reg
, wb_idx
++);
740 if (fb
->base
.zsbuf
&&
741 (ctx
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)))
742 lima_pack_wb_zsbuf_reg(submit
, wb_reg
, wb_idx
++);
746 lima_do_submit(struct lima_submit
*submit
)
748 #define pp_stack_pp_size 0x400
750 struct lima_context
*ctx
= submit
->ctx
;
752 lima_pack_head_plbu_cmd(submit
);
753 lima_finish_plbu_cmd(&ctx
->plbu_cmd_array
);
755 lima_update_submit_bo(submit
);
757 int vs_cmd_size
= ctx
->vs_cmd_array
.size
;
758 uint32_t vs_cmd_va
= 0;
761 void *vs_cmd
= lima_submit_create_stream_bo(
762 submit
, LIMA_PIPE_GP
, vs_cmd_size
, &vs_cmd_va
);
763 memcpy(vs_cmd
, util_dynarray_begin(&ctx
->vs_cmd_array
), vs_cmd_size
);
764 util_dynarray_clear(&ctx
->vs_cmd_array
);
766 lima_dump_command_stream_print(
767 vs_cmd
, vs_cmd_size
, false, "flush vs cmd at va %x\n", vs_cmd_va
);
768 lima_dump_vs_command_stream_print(vs_cmd
, vs_cmd_size
, vs_cmd_va
);
771 uint32_t plbu_cmd_va
;
772 int plbu_cmd_size
= ctx
->plbu_cmd_array
.size
+ ctx
->plbu_cmd_head
.size
;
773 void *plbu_cmd
= lima_submit_create_stream_bo(
774 submit
, LIMA_PIPE_GP
, plbu_cmd_size
, &plbu_cmd_va
);
776 util_dynarray_begin(&ctx
->plbu_cmd_head
),
777 ctx
->plbu_cmd_head
.size
);
778 memcpy(plbu_cmd
+ ctx
->plbu_cmd_head
.size
,
779 util_dynarray_begin(&ctx
->plbu_cmd_array
),
780 ctx
->plbu_cmd_array
.size
);
781 util_dynarray_clear(&ctx
->plbu_cmd_array
);
782 util_dynarray_clear(&ctx
->plbu_cmd_head
);
784 lima_dump_command_stream_print(
785 plbu_cmd
, plbu_cmd_size
, false, "flush plbu cmd at va %x\n", plbu_cmd_va
);
786 lima_dump_plbu_command_stream_print(plbu_cmd
, plbu_cmd_size
, plbu_cmd_va
);
788 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
789 struct drm_lima_gp_frame gp_frame
;
790 struct lima_gp_frame_reg
*gp_frame_reg
= (void *)gp_frame
.frame
;
791 gp_frame_reg
->vs_cmd_start
= vs_cmd_va
;
792 gp_frame_reg
->vs_cmd_end
= vs_cmd_va
+ vs_cmd_size
;
793 gp_frame_reg
->plbu_cmd_start
= plbu_cmd_va
;
794 gp_frame_reg
->plbu_cmd_end
= plbu_cmd_va
+ plbu_cmd_size
;
795 gp_frame_reg
->tile_heap_start
= ctx
->gp_tile_heap
[ctx
->plb_index
]->va
;
796 gp_frame_reg
->tile_heap_end
= ctx
->gp_tile_heap
[ctx
->plb_index
]->va
+ ctx
->gp_tile_heap_size
;
798 lima_dump_command_stream_print(
799 &gp_frame
, sizeof(gp_frame
), false, "add gp frame\n");
801 if (!lima_submit_start(submit
, LIMA_PIPE_GP
, &gp_frame
, sizeof(gp_frame
)))
802 fprintf(stderr
, "gp submit error\n");
804 if (lima_dump_command_stream
) {
805 if (lima_submit_wait(submit
, LIMA_PIPE_GP
, PIPE_TIMEOUT_INFINITE
)) {
806 if (ctx
->gp_output
) {
807 float *pos
= lima_bo_map(ctx
->gp_output
);
808 lima_dump_command_stream_print(
809 pos
, 4 * 4 * 16, true, "gl_pos dump at va %x\n",
813 uint32_t *plb
= lima_bo_map(ctx
->plb
[ctx
->plb_index
]);
814 lima_dump_command_stream_print(
815 plb
, LIMA_CTX_PLB_BLK_SIZE
, false, "plb dump at va %x\n",
816 ctx
->plb
[ctx
->plb_index
]->va
);
819 fprintf(stderr
, "gp submit wait error\n");
824 uint32_t pp_stack_va
= 0;
825 if (ctx
->pp_max_stack_size
) {
826 lima_submit_create_stream_bo(
827 submit
, LIMA_PIPE_PP
,
828 screen
->num_pp
* ctx
->pp_max_stack_size
* pp_stack_pp_size
,
832 lima_update_pp_stream(submit
);
834 struct lima_pp_stream_state
*ps
= &ctx
->pp_stream
;
835 if (screen
->gpu_type
== DRM_LIMA_PARAM_GPU_ID_MALI400
) {
836 struct drm_lima_m400_pp_frame pp_frame
= {0};
837 lima_pack_pp_frame_reg(submit
, pp_frame
.frame
, pp_frame
.wb
);
838 pp_frame
.num_pp
= screen
->num_pp
;
840 for (int i
= 0; i
< screen
->num_pp
; i
++) {
841 pp_frame
.plbu_array_address
[i
] = ps
->va
+ ps
->offset
[i
];
842 if (ctx
->pp_max_stack_size
)
843 pp_frame
.fragment_stack_address
[i
] = pp_stack_va
+
844 ctx
->pp_max_stack_size
* pp_stack_pp_size
* i
;
847 lima_dump_command_stream_print(
848 &pp_frame
, sizeof(pp_frame
), false, "add pp frame\n");
850 if (!lima_submit_start(submit
, LIMA_PIPE_PP
, &pp_frame
, sizeof(pp_frame
)))
851 fprintf(stderr
, "pp submit error\n");
854 struct drm_lima_m450_pp_frame pp_frame
= {0};
855 lima_pack_pp_frame_reg(submit
, pp_frame
.frame
, pp_frame
.wb
);
856 pp_frame
.num_pp
= screen
->num_pp
;
858 if (ctx
->pp_max_stack_size
)
859 for (int i
= 0; i
< screen
->num_pp
; i
++)
860 pp_frame
.fragment_stack_address
[i
] = pp_stack_va
+
861 ctx
->pp_max_stack_size
* pp_stack_pp_size
* i
;
864 for (int i
= 0; i
< screen
->num_pp
; i
++)
865 pp_frame
.plbu_array_address
[i
] = ps
->va
+ ps
->offset
[i
];
868 pp_frame
.use_dlbu
= true;
870 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
871 pp_frame
.dlbu_regs
[0] = ctx
->plb
[ctx
->plb_index
]->va
;
872 pp_frame
.dlbu_regs
[1] = ((fb
->tiled_h
- 1) << 16) | (fb
->tiled_w
- 1);
873 unsigned s
= util_logbase2(LIMA_CTX_PLB_BLK_SIZE
) - 7;
874 pp_frame
.dlbu_regs
[2] = (s
<< 28) | (fb
->shift_h
<< 16) | fb
->shift_w
;
875 pp_frame
.dlbu_regs
[3] = ((fb
->tiled_h
- 1) << 24) | ((fb
->tiled_w
- 1) << 16);
878 lima_dump_command_stream_print(
879 &pp_frame
, sizeof(pp_frame
), false, "add pp frame\n");
881 if (!lima_submit_start(submit
, LIMA_PIPE_PP
, &pp_frame
, sizeof(pp_frame
)))
882 fprintf(stderr
, "pp submit error\n");
885 if (lima_dump_command_stream
) {
886 if (!lima_submit_wait(submit
, LIMA_PIPE_PP
, PIPE_TIMEOUT_INFINITE
)) {
887 fprintf(stderr
, "pp wait error\n");
892 ctx
->plb_index
= (ctx
->plb_index
+ 1) % lima_ctx_num_plb
;
894 if (ctx
->framebuffer
.base
.nr_cbufs
&& (ctx
->resolve
& PIPE_CLEAR_COLOR0
)) {
895 /* Set reload flag for next draw. It'll be unset if buffer is cleared */
896 struct lima_surface
*surf
= lima_surface(ctx
->framebuffer
.base
.cbufs
[0]);
900 ctx
->pp_max_stack_size
= 0;
902 ctx
->damage_rect
.minx
= ctx
->damage_rect
.miny
= 0xffff;
903 ctx
->damage_rect
.maxx
= ctx
->damage_rect
.maxy
= 0;
907 lima_dump_file_next();
911 lima_flush(struct lima_context
*ctx
)
913 if (!lima_submit_dirty(ctx
->submit
))
916 lima_do_submit(ctx
->submit
);
920 lima_pipe_flush(struct pipe_context
*pctx
, struct pipe_fence_handle
**fence
,
923 struct lima_context
*ctx
= lima_context(pctx
);
924 if (lima_submit_dirty(ctx
->submit
))
925 lima_do_submit(ctx
->submit
);
928 int drm_fd
= lima_screen(ctx
->base
.screen
)->fd
;
931 if (!drmSyncobjExportSyncFile(drm_fd
, ctx
->out_sync
[LIMA_PIPE_PP
], &fd
))
932 *fence
= lima_fence_create(fd
);
936 bool lima_submit_init(struct lima_context
*ctx
)
938 int fd
= lima_screen(ctx
->base
.screen
)->fd
;
940 ctx
->submit
= lima_submit_create(ctx
);
944 ctx
->in_sync_fd
= -1;
946 for (int i
= 0; i
< 2; i
++) {
947 if (drmSyncobjCreate(fd
, DRM_SYNCOBJ_CREATE_SIGNALED
, ctx
->in_sync
+ i
) ||
948 drmSyncobjCreate(fd
, DRM_SYNCOBJ_CREATE_SIGNALED
, ctx
->out_sync
+ i
))
952 ctx
->base
.flush
= lima_pipe_flush
;
957 void lima_submit_fini(struct lima_context
*ctx
)
959 int fd
= lima_screen(ctx
->base
.screen
)->fd
;
961 for (int i
= 0; i
< 2; i
++) {
963 drmSyncobjDestroy(fd
, ctx
->in_sync
[i
]);
964 if (ctx
->out_sync
[i
])
965 drmSyncobjDestroy(fd
, ctx
->out_sync
[i
]);
968 if (ctx
->in_sync_fd
>= 0)
969 close(ctx
->in_sync_fd
);
972 lima_submit_free(ctx
->submit
);