2 * Copyright (C) 2017-2019 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "drm-uapi/lima_drm.h"
30 #include "util/u_math.h"
31 #include "util/ralloc.h"
32 #include "util/os_time.h"
33 #include "util/hash_table.h"
34 #include "util/u_upload_mgr.h"
35 #include "util/u_inlines.h"
37 #include "lima_screen.h"
38 #include "lima_context.h"
39 #include "lima_submit.h"
41 #include "lima_util.h"
42 #include "lima_format.h"
43 #include "lima_resource.h"
44 #include "lima_texture.h"
45 #include "lima_fence.h"
48 #define VOID2U64(x) ((uint64_t)(unsigned long)(x))
50 static struct lima_submit
*
51 lima_submit_create(struct lima_context
*ctx
)
53 struct lima_submit
*s
;
55 s
= rzalloc(ctx
, struct lima_submit
);
59 s
->fd
= lima_screen(ctx
->base
.screen
)->fd
;
62 for (int i
= 0; i
< 2; i
++) {
63 util_dynarray_init(s
->gem_bos
+ i
, s
);
64 util_dynarray_init(s
->bos
+ i
, s
);
67 util_dynarray_init(&s
->vs_cmd_array
, s
);
68 util_dynarray_init(&s
->plbu_cmd_array
, s
);
69 util_dynarray_init(&s
->plbu_cmd_head
, s
);
71 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
72 pipe_surface_reference(&s
->key
.cbuf
, fb
->base
.cbufs
[0]);
73 pipe_surface_reference(&s
->key
.zsbuf
, fb
->base
.zsbuf
);
79 lima_submit_free(struct lima_submit
*submit
)
81 struct lima_context
*ctx
= submit
->ctx
;
83 _mesa_hash_table_remove_key(ctx
->submits
, &submit
->key
);
85 if (submit
->key
.cbuf
&& (submit
->resolve
& PIPE_CLEAR_COLOR0
))
86 _mesa_hash_table_remove_key(ctx
->write_submits
, submit
->key
.cbuf
->texture
);
87 if (submit
->key
.zsbuf
&& (submit
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)))
88 _mesa_hash_table_remove_key(ctx
->write_submits
, submit
->key
.zsbuf
->texture
);
90 pipe_surface_reference(&submit
->key
.cbuf
, NULL
);
91 pipe_surface_reference(&submit
->key
.zsbuf
, NULL
);
93 /* TODO: do we need a cache for submit? */
97 static struct lima_submit
*
98 _lima_submit_get(struct lima_context
*ctx
)
100 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
101 struct lima_submit_key local_key
= {
102 .cbuf
= fb
->base
.cbufs
[0],
103 .zsbuf
= fb
->base
.zsbuf
,
106 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->submits
, &local_key
);
110 struct lima_submit
*submit
= lima_submit_create(ctx
);
114 _mesa_hash_table_insert(ctx
->submits
, &submit
->key
, submit
);
120 * Note: this function can only be called in draw code path,
121 * must not exist in flush code path.
124 lima_submit_get(struct lima_context
*ctx
)
129 ctx
->submit
= _lima_submit_get(ctx
);
133 bool lima_submit_add_bo(struct lima_submit
*submit
, int pipe
,
134 struct lima_bo
*bo
, uint32_t flags
)
136 util_dynarray_foreach(submit
->gem_bos
+ pipe
, struct drm_lima_gem_submit_bo
, gem_bo
) {
137 if (bo
->handle
== gem_bo
->handle
) {
138 gem_bo
->flags
|= flags
;
143 struct drm_lima_gem_submit_bo
*submit_bo
=
144 util_dynarray_grow(submit
->gem_bos
+ pipe
, struct drm_lima_gem_submit_bo
, 1);
145 submit_bo
->handle
= bo
->handle
;
146 submit_bo
->flags
= flags
;
148 struct lima_bo
**jbo
= util_dynarray_grow(submit
->bos
+ pipe
, struct lima_bo
*, 1);
151 /* prevent bo from being freed when submit start */
152 lima_bo_reference(bo
);
158 lima_submit_start(struct lima_submit
*submit
, int pipe
, void *frame
, uint32_t size
)
160 struct lima_context
*ctx
= submit
->ctx
;
161 struct drm_lima_gem_submit req
= {
164 .nr_bos
= submit
->gem_bos
[pipe
].size
/ sizeof(struct drm_lima_gem_submit_bo
),
165 .bos
= VOID2U64(util_dynarray_begin(submit
->gem_bos
+ pipe
)),
166 .frame
= VOID2U64(frame
),
168 .out_sync
= ctx
->out_sync
[pipe
],
171 if (ctx
->in_sync_fd
>= 0) {
172 int err
= drmSyncobjImportSyncFile(submit
->fd
, ctx
->in_sync
[pipe
],
177 req
.in_sync
[0] = ctx
->in_sync
[pipe
];
178 close(ctx
->in_sync_fd
);
179 ctx
->in_sync_fd
= -1;
182 bool ret
= drmIoctl(submit
->fd
, DRM_IOCTL_LIMA_GEM_SUBMIT
, &req
) == 0;
184 util_dynarray_foreach(submit
->bos
+ pipe
, struct lima_bo
*, bo
) {
185 lima_bo_unreference(*bo
);
192 lima_submit_wait(struct lima_submit
*submit
, int pipe
, uint64_t timeout_ns
)
194 int64_t abs_timeout
= os_time_get_absolute_timeout(timeout_ns
);
195 if (abs_timeout
== OS_TIMEOUT_INFINITE
)
196 abs_timeout
= INT64_MAX
;
198 struct lima_context
*ctx
= submit
->ctx
;
199 return !drmSyncobjWait(submit
->fd
, ctx
->out_sync
+ pipe
, 1, abs_timeout
, 0, NULL
);
203 lima_submit_has_bo(struct lima_submit
*submit
, struct lima_bo
*bo
, bool all
)
205 for (int i
= 0; i
< 2; i
++) {
206 util_dynarray_foreach(submit
->gem_bos
+ i
, struct drm_lima_gem_submit_bo
, gem_bo
) {
207 if (bo
->handle
== gem_bo
->handle
) {
208 if (all
|| gem_bo
->flags
& LIMA_SUBMIT_BO_WRITE
)
220 lima_submit_create_stream_bo(struct lima_submit
*submit
, int pipe
,
221 unsigned size
, uint32_t *va
)
223 struct lima_context
*ctx
= submit
->ctx
;
227 struct pipe_resource
*pres
= NULL
;
228 u_upload_alloc(ctx
->uploader
, 0, size
, 0x40, &offset
, &pres
, &cpu
);
230 struct lima_resource
*res
= lima_resource(pres
);
231 *va
= res
->bo
->va
+ offset
;
233 lima_submit_add_bo(submit
, pipe
, res
->bo
, LIMA_SUBMIT_BO_READ
);
235 pipe_resource_reference(&pres
, NULL
);
240 static inline struct lima_damage_region
*
241 lima_submit_get_damage(struct lima_submit
*submit
)
243 struct lima_context
*ctx
= submit
->ctx
;
245 if (!(ctx
->framebuffer
.base
.nr_cbufs
&& (submit
->resolve
& PIPE_CLEAR_COLOR0
)))
248 struct lima_surface
*surf
= lima_surface(ctx
->framebuffer
.base
.cbufs
[0]);
249 struct lima_resource
*res
= lima_resource(surf
->base
.texture
);
254 lima_fb_need_reload(struct lima_submit
*submit
)
256 struct lima_context
*ctx
= submit
->ctx
;
258 /* Depth buffer is always discarded */
259 if (!(ctx
->framebuffer
.base
.nr_cbufs
&& (submit
->resolve
& PIPE_CLEAR_COLOR0
)))
262 struct lima_surface
*surf
= lima_surface(ctx
->framebuffer
.base
.cbufs
[0]);
263 struct lima_resource
*res
= lima_resource(surf
->base
.texture
);
264 if (res
->damage
.region
) {
265 /* for EGL_KHR_partial_update, when EGL_EXT_buffer_age is enabled,
266 * we need to reload damage region, otherwise just want to reload
267 * the region not aligned to tile boundary */
268 //if (!res->damage.aligned)
272 else if (surf
->reload
)
279 lima_pack_reload_plbu_cmd(struct lima_submit
*submit
)
281 #define lima_reload_render_state_offset 0x0000
282 #define lima_reload_gl_pos_offset 0x0040
283 #define lima_reload_varying_offset 0x0080
284 #define lima_reload_tex_desc_offset 0x00c0
285 #define lima_reload_tex_array_offset 0x0100
286 #define lima_reload_buffer_size 0x0140
288 struct lima_context
*ctx
= submit
->ctx
;
291 void *cpu
= lima_submit_create_stream_bo(
292 submit
, LIMA_PIPE_PP
, lima_reload_buffer_size
, &va
);
294 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
296 uint32_t reload_shader_first_instr_size
=
297 ((uint32_t *)(screen
->pp_buffer
->map
+ pp_reload_program_offset
))[0] & 0x1f;
298 uint32_t reload_shader_va
= screen
->pp_buffer
->va
+ pp_reload_program_offset
;
300 struct lima_render_state reload_render_state
= {
301 .alpha_blend
= 0xf03b1ad2,
302 .depth_test
= 0x0000000e,
303 .depth_range
= 0xffff0000,
304 .stencil_front
= 0x00000007,
305 .stencil_back
= 0x00000007,
306 .multi_sample
= 0x0000f007,
307 .shader_address
= reload_shader_va
| reload_shader_first_instr_size
,
308 .varying_types
= 0x00000001,
309 .textures_address
= va
+ lima_reload_tex_array_offset
,
311 .varyings_address
= va
+ lima_reload_varying_offset
,
313 memcpy(cpu
+ lima_reload_render_state_offset
, &reload_render_state
,
314 sizeof(reload_render_state
));
316 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
317 lima_tex_desc
*td
= cpu
+ lima_reload_tex_desc_offset
;
318 memset(td
, 0, lima_min_tex_desc_size
);
319 lima_texture_desc_set_res(ctx
, td
, fb
->base
.cbufs
[0]->texture
, 0, 0);
320 td
->unnorm_coords
= 1;
321 td
->texture_type
= LIMA_TEXTURE_TYPE_2D
;
322 td
->min_img_filter_nearest
= 1;
323 td
->mag_img_filter_nearest
= 1;
324 td
->wrap_s_clamp_to_edge
= 1;
325 td
->wrap_t_clamp_to_edge
= 1;
326 td
->unknown_2_2
= 0x1;
328 uint32_t *ta
= cpu
+ lima_reload_tex_array_offset
;
329 ta
[0] = va
+ lima_reload_tex_desc_offset
;
331 float reload_gl_pos
[] = {
332 fb
->base
.width
, 0, 0, 1,
334 0, fb
->base
.height
, 0, 1,
336 memcpy(cpu
+ lima_reload_gl_pos_offset
, reload_gl_pos
,
337 sizeof(reload_gl_pos
));
339 float reload_varying
[] = {
340 fb
->base
.width
, 0, 0, 0,
341 0, fb
->base
.height
, 0, 0,
343 memcpy(cpu
+ lima_reload_varying_offset
, reload_varying
,
344 sizeof(reload_varying
));
346 PLBU_CMD_BEGIN(&submit
->plbu_cmd_head
, 20);
348 PLBU_CMD_VIEWPORT_LEFT(0);
349 PLBU_CMD_VIEWPORT_RIGHT(fui(fb
->base
.width
));
350 PLBU_CMD_VIEWPORT_BOTTOM(0);
351 PLBU_CMD_VIEWPORT_TOP(fui(fb
->base
.height
));
353 PLBU_CMD_RSW_VERTEX_ARRAY(
354 va
+ lima_reload_render_state_offset
,
355 va
+ lima_reload_gl_pos_offset
);
360 PLBU_CMD_INDICES(screen
->pp_buffer
->va
+ pp_shared_index_offset
);
361 PLBU_CMD_INDEXED_DEST(va
+ lima_reload_gl_pos_offset
);
362 PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
368 lima_pack_head_plbu_cmd(struct lima_submit
*submit
)
370 struct lima_context
*ctx
= submit
->ctx
;
371 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
373 PLBU_CMD_BEGIN(&submit
->plbu_cmd_head
, 10);
376 PLBU_CMD_BLOCK_STEP(fb
->shift_min
, fb
->shift_h
, fb
->shift_w
);
377 PLBU_CMD_TILED_DIMENSIONS(fb
->tiled_w
, fb
->tiled_h
);
378 PLBU_CMD_BLOCK_STRIDE(fb
->block_w
);
380 PLBU_CMD_ARRAY_ADDRESS(
381 ctx
->plb_gp_stream
->va
+ ctx
->plb_index
* ctx
->plb_gp_size
,
382 fb
->block_w
* fb
->block_h
);
386 if (lima_fb_need_reload(submit
))
387 lima_pack_reload_plbu_cmd(submit
);
391 hilbert_rotate(int n
, int *x
, int *y
, int rx
, int ry
)
407 hilbert_coords(int n
, int d
, int *x
, int *y
)
413 for (i
= 0; (1 << i
) < n
; i
++) {
418 hilbert_rotate(1 << i
, x
, y
, rx
, ry
);
428 lima_get_pp_stream_size(int num_pp
, int tiled_w
, int tiled_h
, uint32_t *off
)
430 /* carefully calculate each stream start address:
431 * 1. overflow: each stream size may be different due to
432 * fb->tiled_w * fb->tiled_h can't be divided by num_pp,
433 * extra size should be added to the preceeding stream
434 * 2. alignment: each stream address should be 0x20 aligned
436 int delta
= tiled_w
* tiled_h
/ num_pp
* 16 + 16;
437 int remain
= tiled_w
* tiled_h
% num_pp
;
440 for (int i
= 0; i
< num_pp
; i
++) {
448 offset
= align(offset
, 0x20);
455 inside_damage_region(int x
, int y
, struct lima_damage_region
*ds
)
457 if (!ds
|| !ds
->region
)
460 for (int i
= 0; i
< ds
->num_region
; i
++) {
461 struct pipe_scissor_state
*ss
= ds
->region
+ i
;
462 if (x
>= ss
->minx
&& x
< ss
->maxx
&&
463 y
>= ss
->miny
&& y
< ss
->maxy
)
471 lima_generate_pp_stream(struct lima_submit
*submit
, int off_x
, int off_y
,
472 int tiled_w
, int tiled_h
)
474 struct lima_context
*ctx
= submit
->ctx
;
475 struct lima_pp_stream_state
*ps
= &ctx
->pp_stream
;
476 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
477 struct lima_damage_region
*damage
= lima_submit_get_damage(submit
);
478 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
479 int i
, num_pp
= screen
->num_pp
;
481 /* use hilbert_coords to generates 1D to 2D relationship.
482 * 1D for pp stream index and 2D for plb block x/y on framebuffer.
483 * if multi pp, interleave the 1D index to make each pp's render target
484 * close enough which should result close workload
486 int max
= MAX2(tiled_w
, tiled_h
);
493 /* Don't update count if we get zero rect. We'll just generate
494 * PP stream with just terminators in it.
496 if ((tiled_w
* tiled_h
) != 0) {
497 dim
= util_logbase2_ceil(max
);
498 count
= 1 << (dim
+ dim
);
501 for (i
= 0; i
< num_pp
; i
++)
502 stream
[i
] = ps
->map
+ ps
->offset
[i
];
504 for (i
= 0; i
< count
; i
++) {
506 hilbert_coords(max
, i
, &x
, &y
);
507 if (x
< tiled_w
&& y
< tiled_h
) {
511 if (!inside_damage_region(x
, y
, damage
))
514 int pp
= index
% num_pp
;
515 int offset
= ((y
>> fb
->shift_h
) * fb
->block_w
+
516 (x
>> fb
->shift_w
)) * LIMA_CTX_PLB_BLK_SIZE
;
517 int plb_va
= ctx
->plb
[ctx
->plb_index
]->va
+ offset
;
519 stream
[pp
][si
[pp
]++] = 0;
520 stream
[pp
][si
[pp
]++] = 0xB8000000 | x
| (y
<< 8);
521 stream
[pp
][si
[pp
]++] = 0xE0000002 | ((plb_va
>> 3) & ~0xE0000003);
522 stream
[pp
][si
[pp
]++] = 0xB0000000;
528 for (i
= 0; i
< num_pp
; i
++) {
529 stream
[i
][si
[i
]++] = 0;
530 stream
[i
][si
[i
]++] = 0xBC000000;
531 stream
[i
][si
[i
]++] = 0;
532 stream
[i
][si
[i
]++] = 0;
534 lima_dump_command_stream_print(
535 stream
[i
], si
[i
] * 4, false, "pp plb stream %d at va %x\n",
536 i
, ps
->va
+ ps
->offset
[i
]);
541 lima_update_damage_pp_stream(struct lima_submit
*submit
)
543 struct lima_context
*ctx
= submit
->ctx
;
544 struct lima_damage_region
*ds
= lima_submit_get_damage(submit
);
545 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
546 struct pipe_scissor_state bound
;
548 if (ds
&& ds
->region
) {
549 struct pipe_scissor_state
*dbound
= &ds
->bound
;
550 bound
.minx
= MAX2(dbound
->minx
, ctx
->damage_rect
.minx
>> 4);
551 bound
.miny
= MAX2(dbound
->miny
, ctx
->damage_rect
.miny
>> 4);
552 bound
.maxx
= MIN2(dbound
->maxx
, (ctx
->damage_rect
.maxx
+ 0xf) >> 4);
553 bound
.maxy
= MIN2(dbound
->maxy
, (ctx
->damage_rect
.maxy
+ 0xf) >> 4);
555 bound
.minx
= ctx
->damage_rect
.minx
>> 4;
556 bound
.miny
= ctx
->damage_rect
.miny
>> 4;
557 bound
.maxx
= (ctx
->damage_rect
.maxx
+ 0xf) >> 4;
558 bound
.maxy
= (ctx
->damage_rect
.maxy
+ 0xf) >> 4;
561 /* Clamp to FB size */
562 bound
.minx
= MIN2(bound
.minx
, fb
->tiled_w
);
563 bound
.miny
= MIN2(bound
.miny
, fb
->tiled_h
);
564 bound
.maxx
= MIN2(bound
.maxx
, fb
->tiled_w
);
565 bound
.maxy
= MIN2(bound
.maxy
, fb
->tiled_h
);
567 int tiled_w
= bound
.maxx
- bound
.minx
;
568 int tiled_h
= bound
.maxy
- bound
.miny
;
570 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
571 int size
= lima_get_pp_stream_size(
572 screen
->num_pp
, tiled_w
, tiled_h
, ctx
->pp_stream
.offset
);
574 ctx
->pp_stream
.map
= lima_submit_create_stream_bo(
575 submit
, LIMA_PIPE_PP
, size
, &ctx
->pp_stream
.va
);
577 lima_generate_pp_stream(submit
, bound
.minx
, bound
.miny
, tiled_w
, tiled_h
);
581 lima_update_full_pp_stream(struct lima_submit
*submit
)
583 struct lima_context
*ctx
= submit
->ctx
;
584 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
585 struct lima_ctx_plb_pp_stream_key key
= {
586 .plb_index
= ctx
->plb_index
,
587 .tiled_w
= fb
->tiled_w
,
588 .tiled_h
= fb
->tiled_h
,
591 struct hash_entry
*entry
=
592 _mesa_hash_table_search(ctx
->plb_pp_stream
, &key
);
593 struct lima_ctx_plb_pp_stream
*s
= entry
->data
;
596 ctx
->pp_stream
.map
= lima_bo_map(s
->bo
);
597 ctx
->pp_stream
.va
= s
->bo
->va
;
598 memcpy(ctx
->pp_stream
.offset
, s
->offset
, sizeof(s
->offset
));
601 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
602 int size
= lima_get_pp_stream_size(
603 screen
->num_pp
, fb
->tiled_w
, fb
->tiled_h
, s
->offset
);
604 s
->bo
= lima_bo_create(screen
, size
, 0);
606 ctx
->pp_stream
.map
= lima_bo_map(s
->bo
);
607 ctx
->pp_stream
.va
= s
->bo
->va
;
608 memcpy(ctx
->pp_stream
.offset
, s
->offset
, sizeof(s
->offset
));
610 lima_generate_pp_stream(submit
, 0, 0, fb
->tiled_w
, fb
->tiled_h
);
613 lima_submit_add_bo(submit
, LIMA_PIPE_PP
, s
->bo
, LIMA_SUBMIT_BO_READ
);
617 lima_damage_fullscreen(struct lima_submit
*submit
)
619 struct lima_context
*ctx
= submit
->ctx
;
621 return ctx
->damage_rect
.minx
== 0 &&
622 ctx
->damage_rect
.miny
== 0 &&
623 ctx
->damage_rect
.maxx
== ctx
->framebuffer
.base
.width
&&
624 ctx
->damage_rect
.maxy
== ctx
->framebuffer
.base
.height
;
628 lima_update_pp_stream(struct lima_submit
*submit
)
630 struct lima_context
*ctx
= submit
->ctx
;
631 struct lima_damage_region
*damage
= lima_submit_get_damage(submit
);
632 if ((damage
&& damage
->region
) || !lima_damage_fullscreen(submit
))
633 lima_update_damage_pp_stream(submit
);
634 else if (ctx
->plb_pp_stream
)
635 lima_update_full_pp_stream(submit
);
637 ctx
->pp_stream
.map
= NULL
;
641 lima_update_submit_bo(struct lima_submit
*submit
)
643 struct lima_context
*ctx
= submit
->ctx
;
645 lima_submit_add_bo(submit
, LIMA_PIPE_GP
, ctx
->plb_gp_stream
,
646 LIMA_SUBMIT_BO_READ
);
647 lima_submit_add_bo(submit
, LIMA_PIPE_GP
, ctx
->plb
[ctx
->plb_index
],
648 LIMA_SUBMIT_BO_WRITE
);
649 lima_submit_add_bo(submit
, LIMA_PIPE_GP
, ctx
->gp_tile_heap
[ctx
->plb_index
],
650 LIMA_SUBMIT_BO_WRITE
);
652 lima_dump_command_stream_print(
653 ctx
->plb_gp_stream
->map
+ ctx
->plb_index
* ctx
->plb_gp_size
,
654 ctx
->plb_gp_size
, false, "gp plb stream at va %x\n",
655 ctx
->plb_gp_stream
->va
+ ctx
->plb_index
* ctx
->plb_gp_size
);
657 lima_submit_add_bo(submit
, LIMA_PIPE_PP
, ctx
->plb
[ctx
->plb_index
],
658 LIMA_SUBMIT_BO_READ
);
659 lima_submit_add_bo(submit
, LIMA_PIPE_PP
, ctx
->gp_tile_heap
[ctx
->plb_index
],
660 LIMA_SUBMIT_BO_READ
);
662 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
663 lima_submit_add_bo(submit
, LIMA_PIPE_PP
, screen
->pp_buffer
, LIMA_SUBMIT_BO_READ
);
667 lima_finish_plbu_cmd(struct util_dynarray
*plbu_cmd_array
)
670 uint32_t *plbu_cmd
= util_dynarray_ensure_cap(plbu_cmd_array
, plbu_cmd_array
->size
+ 2 * 4);
672 plbu_cmd
[i
++] = 0x00000000;
673 plbu_cmd
[i
++] = 0x50000000; /* END */
675 plbu_cmd_array
->size
+= i
* 4;
679 lima_pack_wb_zsbuf_reg(struct lima_submit
*submit
, uint32_t *wb_reg
, int wb_idx
)
681 struct lima_context
*ctx
= submit
->ctx
;
682 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
683 struct lima_resource
*res
= lima_resource(fb
->base
.zsbuf
->texture
);
684 int level
= fb
->base
.zsbuf
->u
.tex
.level
;
685 uint32_t format
= lima_format_get_pixel(fb
->base
.zsbuf
->format
);
687 struct lima_pp_wb_reg
*wb
= (void *)wb_reg
;
688 wb
[wb_idx
].type
= 0x01; /* 1 for depth, stencil */
689 wb
[wb_idx
].address
= res
->bo
->va
+ res
->levels
[level
].offset
;
690 wb
[wb_idx
].pixel_format
= format
;
692 wb
[wb_idx
].pixel_layout
= 0x2;
693 wb
[wb_idx
].pitch
= fb
->tiled_w
;
695 wb
[wb_idx
].pixel_layout
= 0x0;
696 wb
[wb_idx
].pitch
= res
->levels
[level
].stride
/ 8;
698 wb
[wb_idx
].mrt_bits
= 0;
702 lima_pack_wb_cbuf_reg(struct lima_submit
*submit
, uint32_t *wb_reg
, int wb_idx
)
704 struct lima_context
*ctx
= submit
->ctx
;
705 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
706 struct lima_resource
*res
= lima_resource(fb
->base
.cbufs
[0]->texture
);
707 int level
= fb
->base
.cbufs
[0]->u
.tex
.level
;
708 unsigned layer
= fb
->base
.cbufs
[0]->u
.tex
.first_layer
;
709 uint32_t format
= lima_format_get_pixel(fb
->base
.cbufs
[0]->format
);
710 bool swap_channels
= lima_format_get_swap_rb(fb
->base
.cbufs
[0]->format
);
712 struct lima_pp_wb_reg
*wb
= (void *)wb_reg
;
713 wb
[wb_idx
].type
= 0x02; /* 2 for color buffer */
714 wb
[wb_idx
].address
= res
->bo
->va
+ res
->levels
[level
].offset
+ layer
* res
->levels
[level
].layer_stride
;
715 wb
[wb_idx
].pixel_format
= format
;
717 wb
[wb_idx
].pixel_layout
= 0x2;
718 wb
[wb_idx
].pitch
= fb
->tiled_w
;
720 wb
[wb_idx
].pixel_layout
= 0x0;
721 wb
[wb_idx
].pitch
= res
->levels
[level
].stride
/ 8;
723 wb
[wb_idx
].mrt_bits
= swap_channels
? 0x4 : 0x0;
727 lima_pack_pp_frame_reg(struct lima_submit
*submit
, uint32_t *frame_reg
,
730 struct lima_context
*ctx
= submit
->ctx
;
731 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
732 struct lima_pp_frame_reg
*frame
= (void *)frame_reg
;
733 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
736 frame
->render_address
= screen
->pp_buffer
->va
+ pp_frame_rsw_offset
;
738 frame
->clear_value_depth
= ctx
->clear
.depth
;
739 frame
->clear_value_stencil
= ctx
->clear
.stencil
;
740 frame
->clear_value_color
= ctx
->clear
.color_8pc
;
741 frame
->clear_value_color_1
= ctx
->clear
.color_8pc
;
742 frame
->clear_value_color_2
= ctx
->clear
.color_8pc
;
743 frame
->clear_value_color_3
= ctx
->clear
.color_8pc
;
746 frame
->width
= fb
->base
.width
- 1;
747 frame
->height
= fb
->base
.height
- 1;
749 /* frame->fragment_stack_address is overwritten per-pp in the kernel
750 * by the values of pp_frame.fragment_stack_address[i] */
752 /* These are "stack size" and "stack offset" shifted,
753 * here they are assumed to be always the same. */
754 frame
->fragment_stack_size
= ctx
->pp_max_stack_size
<< 16 | ctx
->pp_max_stack_size
;
756 /* related with MSAA and different value when r4p0/r7p0 */
757 frame
->supersampled_height
= fb
->base
.height
* 2 - 1;
758 frame
->scale
= 0xE0C;
762 frame
->blocking
= (fb
->shift_min
<< 28) | (fb
->shift_h
<< 16) | fb
->shift_w
;
763 frame
->foureight
= 0x8888;
765 if (fb
->base
.nr_cbufs
&& (submit
->resolve
& PIPE_CLEAR_COLOR0
))
766 lima_pack_wb_cbuf_reg(submit
, wb_reg
, wb_idx
++);
768 if (fb
->base
.zsbuf
&&
769 (submit
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)))
770 lima_pack_wb_zsbuf_reg(submit
, wb_reg
, wb_idx
++);
774 lima_do_submit(struct lima_submit
*submit
)
776 #define pp_stack_pp_size 0x400
778 struct lima_context
*ctx
= submit
->ctx
;
780 lima_pack_head_plbu_cmd(submit
);
781 lima_finish_plbu_cmd(&submit
->plbu_cmd_array
);
783 lima_update_submit_bo(submit
);
785 int vs_cmd_size
= submit
->vs_cmd_array
.size
;
786 uint32_t vs_cmd_va
= 0;
789 void *vs_cmd
= lima_submit_create_stream_bo(
790 submit
, LIMA_PIPE_GP
, vs_cmd_size
, &vs_cmd_va
);
791 memcpy(vs_cmd
, util_dynarray_begin(&submit
->vs_cmd_array
), vs_cmd_size
);
793 lima_dump_command_stream_print(
794 vs_cmd
, vs_cmd_size
, false, "flush vs cmd at va %x\n", vs_cmd_va
);
795 lima_dump_vs_command_stream_print(vs_cmd
, vs_cmd_size
, vs_cmd_va
);
798 uint32_t plbu_cmd_va
;
799 int plbu_cmd_size
= submit
->plbu_cmd_array
.size
+ submit
->plbu_cmd_head
.size
;
800 void *plbu_cmd
= lima_submit_create_stream_bo(
801 submit
, LIMA_PIPE_GP
, plbu_cmd_size
, &plbu_cmd_va
);
803 util_dynarray_begin(&submit
->plbu_cmd_head
),
804 submit
->plbu_cmd_head
.size
);
805 memcpy(plbu_cmd
+ submit
->plbu_cmd_head
.size
,
806 util_dynarray_begin(&submit
->plbu_cmd_array
),
807 submit
->plbu_cmd_array
.size
);
809 lima_dump_command_stream_print(
810 plbu_cmd
, plbu_cmd_size
, false, "flush plbu cmd at va %x\n", plbu_cmd_va
);
811 lima_dump_plbu_command_stream_print(plbu_cmd
, plbu_cmd_size
, plbu_cmd_va
);
813 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
814 struct drm_lima_gp_frame gp_frame
;
815 struct lima_gp_frame_reg
*gp_frame_reg
= (void *)gp_frame
.frame
;
816 gp_frame_reg
->vs_cmd_start
= vs_cmd_va
;
817 gp_frame_reg
->vs_cmd_end
= vs_cmd_va
+ vs_cmd_size
;
818 gp_frame_reg
->plbu_cmd_start
= plbu_cmd_va
;
819 gp_frame_reg
->plbu_cmd_end
= plbu_cmd_va
+ plbu_cmd_size
;
820 gp_frame_reg
->tile_heap_start
= ctx
->gp_tile_heap
[ctx
->plb_index
]->va
;
821 gp_frame_reg
->tile_heap_end
= ctx
->gp_tile_heap
[ctx
->plb_index
]->va
+ ctx
->gp_tile_heap_size
;
823 lima_dump_command_stream_print(
824 &gp_frame
, sizeof(gp_frame
), false, "add gp frame\n");
826 if (!lima_submit_start(submit
, LIMA_PIPE_GP
, &gp_frame
, sizeof(gp_frame
)))
827 fprintf(stderr
, "gp submit error\n");
829 if (lima_dump_command_stream
) {
830 if (lima_submit_wait(submit
, LIMA_PIPE_GP
, PIPE_TIMEOUT_INFINITE
)) {
831 if (ctx
->gp_output
) {
832 float *pos
= lima_bo_map(ctx
->gp_output
);
833 lima_dump_command_stream_print(
834 pos
, 4 * 4 * 16, true, "gl_pos dump at va %x\n",
838 uint32_t *plb
= lima_bo_map(ctx
->plb
[ctx
->plb_index
]);
839 lima_dump_command_stream_print(
840 plb
, LIMA_CTX_PLB_BLK_SIZE
, false, "plb dump at va %x\n",
841 ctx
->plb
[ctx
->plb_index
]->va
);
844 fprintf(stderr
, "gp submit wait error\n");
849 uint32_t pp_stack_va
= 0;
850 if (ctx
->pp_max_stack_size
) {
851 lima_submit_create_stream_bo(
852 submit
, LIMA_PIPE_PP
,
853 screen
->num_pp
* ctx
->pp_max_stack_size
* pp_stack_pp_size
,
857 lima_update_pp_stream(submit
);
859 struct lima_pp_stream_state
*ps
= &ctx
->pp_stream
;
860 if (screen
->gpu_type
== DRM_LIMA_PARAM_GPU_ID_MALI400
) {
861 struct drm_lima_m400_pp_frame pp_frame
= {0};
862 lima_pack_pp_frame_reg(submit
, pp_frame
.frame
, pp_frame
.wb
);
863 pp_frame
.num_pp
= screen
->num_pp
;
865 for (int i
= 0; i
< screen
->num_pp
; i
++) {
866 pp_frame
.plbu_array_address
[i
] = ps
->va
+ ps
->offset
[i
];
867 if (ctx
->pp_max_stack_size
)
868 pp_frame
.fragment_stack_address
[i
] = pp_stack_va
+
869 ctx
->pp_max_stack_size
* pp_stack_pp_size
* i
;
872 lima_dump_command_stream_print(
873 &pp_frame
, sizeof(pp_frame
), false, "add pp frame\n");
875 if (!lima_submit_start(submit
, LIMA_PIPE_PP
, &pp_frame
, sizeof(pp_frame
)))
876 fprintf(stderr
, "pp submit error\n");
879 struct drm_lima_m450_pp_frame pp_frame
= {0};
880 lima_pack_pp_frame_reg(submit
, pp_frame
.frame
, pp_frame
.wb
);
881 pp_frame
.num_pp
= screen
->num_pp
;
883 if (ctx
->pp_max_stack_size
)
884 for (int i
= 0; i
< screen
->num_pp
; i
++)
885 pp_frame
.fragment_stack_address
[i
] = pp_stack_va
+
886 ctx
->pp_max_stack_size
* pp_stack_pp_size
* i
;
889 for (int i
= 0; i
< screen
->num_pp
; i
++)
890 pp_frame
.plbu_array_address
[i
] = ps
->va
+ ps
->offset
[i
];
893 pp_frame
.use_dlbu
= true;
895 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
896 pp_frame
.dlbu_regs
[0] = ctx
->plb
[ctx
->plb_index
]->va
;
897 pp_frame
.dlbu_regs
[1] = ((fb
->tiled_h
- 1) << 16) | (fb
->tiled_w
- 1);
898 unsigned s
= util_logbase2(LIMA_CTX_PLB_BLK_SIZE
) - 7;
899 pp_frame
.dlbu_regs
[2] = (s
<< 28) | (fb
->shift_h
<< 16) | fb
->shift_w
;
900 pp_frame
.dlbu_regs
[3] = ((fb
->tiled_h
- 1) << 24) | ((fb
->tiled_w
- 1) << 16);
903 lima_dump_command_stream_print(
904 &pp_frame
, sizeof(pp_frame
), false, "add pp frame\n");
906 if (!lima_submit_start(submit
, LIMA_PIPE_PP
, &pp_frame
, sizeof(pp_frame
)))
907 fprintf(stderr
, "pp submit error\n");
910 if (lima_dump_command_stream
) {
911 if (!lima_submit_wait(submit
, LIMA_PIPE_PP
, PIPE_TIMEOUT_INFINITE
)) {
912 fprintf(stderr
, "pp wait error\n");
917 ctx
->plb_index
= (ctx
->plb_index
+ 1) % lima_ctx_num_plb
;
919 if (ctx
->framebuffer
.base
.nr_cbufs
&& (submit
->resolve
& PIPE_CLEAR_COLOR0
)) {
920 /* Set reload flag for next draw. It'll be unset if buffer is cleared */
921 struct lima_surface
*surf
= lima_surface(ctx
->framebuffer
.base
.cbufs
[0]);
925 ctx
->pp_max_stack_size
= 0;
927 ctx
->damage_rect
.minx
= ctx
->damage_rect
.miny
= 0xffff;
928 ctx
->damage_rect
.maxx
= ctx
->damage_rect
.maxy
= 0;
930 lima_dump_file_next();
932 if (ctx
->submit
== submit
)
935 lima_submit_free(submit
);
939 lima_flush(struct lima_context
*ctx
)
941 hash_table_foreach(ctx
->submits
, entry
) {
942 struct lima_submit
*submit
= entry
->data
;
943 lima_do_submit(submit
);
948 lima_flush_submit_accessing_bo(
949 struct lima_context
*ctx
, struct lima_bo
*bo
, bool write
)
951 hash_table_foreach(ctx
->submits
, entry
) {
952 struct lima_submit
*submit
= entry
->data
;
953 if (lima_submit_has_bo(submit
, bo
, write
))
954 lima_do_submit(submit
);
959 * This is for current submit flush previous submit which write to the resource it wants
960 * to read. Tipical usage is flush the FBO which is used as current task's texture.
963 lima_flush_previous_submit_writing_resource(
964 struct lima_context
*ctx
, struct pipe_resource
*prsc
)
966 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->write_submits
, prsc
);
969 struct lima_submit
*submit
= entry
->data
;
971 /* do not flush current submit */
972 if (submit
!= ctx
->submit
)
973 lima_do_submit(submit
);
978 lima_pipe_flush(struct pipe_context
*pctx
, struct pipe_fence_handle
**fence
,
981 struct lima_context
*ctx
= lima_context(pctx
);
986 int drm_fd
= lima_screen(ctx
->base
.screen
)->fd
;
989 if (!drmSyncobjExportSyncFile(drm_fd
, ctx
->out_sync
[LIMA_PIPE_PP
], &fd
))
990 *fence
= lima_fence_create(fd
);
995 lima_submit_compare(const void *s1
, const void *s2
)
997 return memcmp(s1
, s2
, sizeof(struct lima_submit_key
)) == 0;
1001 lima_submit_hash(const void *key
)
1003 return _mesa_hash_data(key
, sizeof(struct lima_submit_key
));
1006 bool lima_submit_init(struct lima_context
*ctx
)
1008 int fd
= lima_screen(ctx
->base
.screen
)->fd
;
1010 ctx
->submits
= _mesa_hash_table_create(ctx
, lima_submit_hash
, lima_submit_compare
);
1014 ctx
->write_submits
= _mesa_hash_table_create(
1015 ctx
, _mesa_hash_pointer
, _mesa_key_pointer_equal
);
1016 if (!ctx
->write_submits
)
1019 ctx
->in_sync_fd
= -1;
1021 for (int i
= 0; i
< 2; i
++) {
1022 if (drmSyncobjCreate(fd
, DRM_SYNCOBJ_CREATE_SIGNALED
, ctx
->in_sync
+ i
) ||
1023 drmSyncobjCreate(fd
, DRM_SYNCOBJ_CREATE_SIGNALED
, ctx
->out_sync
+ i
))
1027 ctx
->base
.flush
= lima_pipe_flush
;
1032 void lima_submit_fini(struct lima_context
*ctx
)
1034 int fd
= lima_screen(ctx
->base
.screen
)->fd
;
1038 for (int i
= 0; i
< 2; i
++) {
1039 if (ctx
->in_sync
[i
])
1040 drmSyncobjDestroy(fd
, ctx
->in_sync
[i
]);
1041 if (ctx
->out_sync
[i
])
1042 drmSyncobjDestroy(fd
, ctx
->out_sync
[i
]);
1045 if (ctx
->in_sync_fd
>= 0)
1046 close(ctx
->in_sync_fd
);