2 * Copyright (C) 2017-2019 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "drm-uapi/lima_drm.h"
30 #include "util/u_math.h"
31 #include "util/ralloc.h"
32 #include "util/os_time.h"
33 #include "util/hash_table.h"
34 #include "util/u_upload_mgr.h"
35 #include "util/u_inlines.h"
37 #include "lima_screen.h"
38 #include "lima_context.h"
41 #include "lima_util.h"
42 #include "lima_format.h"
43 #include "lima_resource.h"
44 #include "lima_texture.h"
45 #include "lima_fence.h"
48 #define VOID2U64(x) ((uint64_t)(unsigned long)(x))
51 lima_get_fb_info(struct lima_job
*job
)
53 struct lima_context
*ctx
= job
->ctx
;
54 struct lima_job_fb_info
*fb
= &job
->fb
;
56 fb
->width
= ctx
->framebuffer
.base
.width
;
57 fb
->height
= ctx
->framebuffer
.base
.height
;
59 int width
= align(fb
->width
, 16) >> 4;
60 int height
= align(fb
->height
, 16) >> 4;
62 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
70 int limit
= screen
->plb_max_blk
;
71 while ((width
* height
) > limit
) {
72 if (width
>= height
) {
73 width
= (width
+ 1) >> 1;
76 height
= (height
+ 1) >> 1;
84 fb
->shift_min
= MIN3(fb
->shift_w
, fb
->shift_h
, 2);
87 static struct lima_job
*
88 lima_job_create(struct lima_context
*ctx
)
92 s
= rzalloc(ctx
, struct lima_job
);
96 s
->fd
= lima_screen(ctx
->base
.screen
)->fd
;
99 s
->damage_rect
.minx
= s
->damage_rect
.miny
= 0xffff;
100 s
->damage_rect
.maxx
= s
->damage_rect
.maxy
= 0;
102 for (int i
= 0; i
< 2; i
++) {
103 util_dynarray_init(s
->gem_bos
+ i
, s
);
104 util_dynarray_init(s
->bos
+ i
, s
);
107 util_dynarray_init(&s
->vs_cmd_array
, s
);
108 util_dynarray_init(&s
->plbu_cmd_array
, s
);
109 util_dynarray_init(&s
->plbu_cmd_head
, s
);
111 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
112 pipe_surface_reference(&s
->key
.cbuf
, fb
->base
.cbufs
[0]);
113 pipe_surface_reference(&s
->key
.zsbuf
, fb
->base
.zsbuf
);
117 s
->dump
= lima_dump_create();
123 lima_job_free(struct lima_job
*job
)
125 struct lima_context
*ctx
= job
->ctx
;
127 _mesa_hash_table_remove_key(ctx
->jobs
, &job
->key
);
129 if (job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
))
130 _mesa_hash_table_remove_key(ctx
->write_jobs
, job
->key
.cbuf
->texture
);
131 if (job
->key
.zsbuf
&& (job
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)))
132 _mesa_hash_table_remove_key(ctx
->write_jobs
, job
->key
.zsbuf
->texture
);
134 pipe_surface_reference(&job
->key
.cbuf
, NULL
);
135 pipe_surface_reference(&job
->key
.zsbuf
, NULL
);
137 lima_dump_free(job
->dump
);
140 /* TODO: do we need a cache for job? */
144 static struct lima_job
*
145 _lima_job_get(struct lima_context
*ctx
)
147 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
148 struct lima_job_key local_key
= {
149 .cbuf
= fb
->base
.cbufs
[0],
150 .zsbuf
= fb
->base
.zsbuf
,
153 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->jobs
, &local_key
);
157 struct lima_job
*job
= lima_job_create(ctx
);
161 _mesa_hash_table_insert(ctx
->jobs
, &job
->key
, job
);
167 * Note: this function can only be called in draw code path,
168 * must not exist in flush code path.
171 lima_job_get(struct lima_context
*ctx
)
176 ctx
->job
= _lima_job_get(ctx
);
180 bool lima_job_add_bo(struct lima_job
*job
, int pipe
,
181 struct lima_bo
*bo
, uint32_t flags
)
183 util_dynarray_foreach(job
->gem_bos
+ pipe
, struct drm_lima_gem_submit_bo
, gem_bo
) {
184 if (bo
->handle
== gem_bo
->handle
) {
185 gem_bo
->flags
|= flags
;
190 struct drm_lima_gem_submit_bo
*job_bo
=
191 util_dynarray_grow(job
->gem_bos
+ pipe
, struct drm_lima_gem_submit_bo
, 1);
192 job_bo
->handle
= bo
->handle
;
193 job_bo
->flags
= flags
;
195 struct lima_bo
**jbo
= util_dynarray_grow(job
->bos
+ pipe
, struct lima_bo
*, 1);
198 /* prevent bo from being freed when job start */
199 lima_bo_reference(bo
);
205 lima_job_start(struct lima_job
*job
, int pipe
, void *frame
, uint32_t size
)
207 struct lima_context
*ctx
= job
->ctx
;
208 struct drm_lima_gem_submit req
= {
211 .nr_bos
= job
->gem_bos
[pipe
].size
/ sizeof(struct drm_lima_gem_submit_bo
),
212 .bos
= VOID2U64(util_dynarray_begin(job
->gem_bos
+ pipe
)),
213 .frame
= VOID2U64(frame
),
215 .out_sync
= ctx
->out_sync
[pipe
],
218 if (ctx
->in_sync_fd
>= 0) {
219 int err
= drmSyncobjImportSyncFile(job
->fd
, ctx
->in_sync
[pipe
],
224 req
.in_sync
[0] = ctx
->in_sync
[pipe
];
225 close(ctx
->in_sync_fd
);
226 ctx
->in_sync_fd
= -1;
229 bool ret
= drmIoctl(job
->fd
, DRM_IOCTL_LIMA_GEM_SUBMIT
, &req
) == 0;
231 util_dynarray_foreach(job
->bos
+ pipe
, struct lima_bo
*, bo
) {
232 lima_bo_unreference(*bo
);
239 lima_job_wait(struct lima_job
*job
, int pipe
, uint64_t timeout_ns
)
241 int64_t abs_timeout
= os_time_get_absolute_timeout(timeout_ns
);
242 if (abs_timeout
== OS_TIMEOUT_INFINITE
)
243 abs_timeout
= INT64_MAX
;
245 struct lima_context
*ctx
= job
->ctx
;
246 return !drmSyncobjWait(job
->fd
, ctx
->out_sync
+ pipe
, 1, abs_timeout
, 0, NULL
);
250 lima_job_has_bo(struct lima_job
*job
, struct lima_bo
*bo
, bool all
)
252 for (int i
= 0; i
< 2; i
++) {
253 util_dynarray_foreach(job
->gem_bos
+ i
, struct drm_lima_gem_submit_bo
, gem_bo
) {
254 if (bo
->handle
== gem_bo
->handle
) {
255 if (all
|| gem_bo
->flags
& LIMA_SUBMIT_BO_WRITE
)
267 lima_job_create_stream_bo(struct lima_job
*job
, int pipe
,
268 unsigned size
, uint32_t *va
)
270 struct lima_context
*ctx
= job
->ctx
;
274 struct pipe_resource
*pres
= NULL
;
275 u_upload_alloc(ctx
->uploader
, 0, size
, 0x40, &offset
, &pres
, &cpu
);
277 struct lima_resource
*res
= lima_resource(pres
);
278 *va
= res
->bo
->va
+ offset
;
280 lima_job_add_bo(job
, pipe
, res
->bo
, LIMA_SUBMIT_BO_READ
);
282 pipe_resource_reference(&pres
, NULL
);
287 static inline struct lima_damage_region
*
288 lima_job_get_damage(struct lima_job
*job
)
290 if (!(job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
)))
293 struct lima_surface
*surf
= lima_surface(job
->key
.cbuf
);
294 struct lima_resource
*res
= lima_resource(surf
->base
.texture
);
299 lima_fb_need_reload(struct lima_job
*job
)
301 /* Depth buffer is always discarded */
302 if (!(job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
)))
305 struct lima_surface
*surf
= lima_surface(job
->key
.cbuf
);
306 struct lima_resource
*res
= lima_resource(surf
->base
.texture
);
307 if (res
->damage
.region
) {
308 /* for EGL_KHR_partial_update, when EGL_EXT_buffer_age is enabled,
309 * we need to reload damage region, otherwise just want to reload
310 * the region not aligned to tile boundary */
311 //if (!res->damage.aligned)
315 else if (surf
->reload
)
322 lima_pack_reload_plbu_cmd(struct lima_job
*job
)
324 #define lima_reload_render_state_offset 0x0000
325 #define lima_reload_gl_pos_offset 0x0040
326 #define lima_reload_varying_offset 0x0080
327 #define lima_reload_tex_desc_offset 0x00c0
328 #define lima_reload_tex_array_offset 0x0100
329 #define lima_reload_buffer_size 0x0140
331 struct lima_context
*ctx
= job
->ctx
;
334 void *cpu
= lima_job_create_stream_bo(
335 job
, LIMA_PIPE_PP
, lima_reload_buffer_size
, &va
);
337 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
339 uint32_t reload_shader_first_instr_size
=
340 ((uint32_t *)(screen
->pp_buffer
->map
+ pp_reload_program_offset
))[0] & 0x1f;
341 uint32_t reload_shader_va
= screen
->pp_buffer
->va
+ pp_reload_program_offset
;
343 struct lima_render_state reload_render_state
= {
344 .alpha_blend
= 0xf03b1ad2,
345 .depth_test
= 0x0000000e,
346 .depth_range
= 0xffff0000,
347 .stencil_front
= 0x00000007,
348 .stencil_back
= 0x00000007,
349 .multi_sample
= 0x0000f007,
350 .shader_address
= reload_shader_va
| reload_shader_first_instr_size
,
351 .varying_types
= 0x00000001,
352 .textures_address
= va
+ lima_reload_tex_array_offset
,
354 .varyings_address
= va
+ lima_reload_varying_offset
,
356 memcpy(cpu
+ lima_reload_render_state_offset
, &reload_render_state
,
357 sizeof(reload_render_state
));
359 lima_tex_desc
*td
= cpu
+ lima_reload_tex_desc_offset
;
360 memset(td
, 0, lima_min_tex_desc_size
);
361 lima_texture_desc_set_res(ctx
, td
, job
->key
.cbuf
->texture
, 0, 0);
362 td
->unnorm_coords
= 1;
363 td
->texture_type
= LIMA_TEXTURE_TYPE_2D
;
364 td
->min_img_filter_nearest
= 1;
365 td
->mag_img_filter_nearest
= 1;
366 td
->wrap_s_clamp_to_edge
= 1;
367 td
->wrap_t_clamp_to_edge
= 1;
368 td
->unknown_2_2
= 0x1;
370 uint32_t *ta
= cpu
+ lima_reload_tex_array_offset
;
371 ta
[0] = va
+ lima_reload_tex_desc_offset
;
373 struct lima_job_fb_info
*fb
= &job
->fb
;
374 float reload_gl_pos
[] = {
379 memcpy(cpu
+ lima_reload_gl_pos_offset
, reload_gl_pos
,
380 sizeof(reload_gl_pos
));
382 float reload_varying
[] = {
386 memcpy(cpu
+ lima_reload_varying_offset
, reload_varying
,
387 sizeof(reload_varying
));
389 PLBU_CMD_BEGIN(&job
->plbu_cmd_head
, 20);
391 PLBU_CMD_VIEWPORT_LEFT(0);
392 PLBU_CMD_VIEWPORT_RIGHT(fui(fb
->width
));
393 PLBU_CMD_VIEWPORT_BOTTOM(0);
394 PLBU_CMD_VIEWPORT_TOP(fui(fb
->height
));
396 PLBU_CMD_RSW_VERTEX_ARRAY(
397 va
+ lima_reload_render_state_offset
,
398 va
+ lima_reload_gl_pos_offset
);
403 PLBU_CMD_INDICES(screen
->pp_buffer
->va
+ pp_shared_index_offset
);
404 PLBU_CMD_INDEXED_DEST(va
+ lima_reload_gl_pos_offset
);
405 PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
411 lima_pack_head_plbu_cmd(struct lima_job
*job
)
413 struct lima_context
*ctx
= job
->ctx
;
414 struct lima_job_fb_info
*fb
= &job
->fb
;
416 PLBU_CMD_BEGIN(&job
->plbu_cmd_head
, 10);
419 PLBU_CMD_BLOCK_STEP(fb
->shift_min
, fb
->shift_h
, fb
->shift_w
);
420 PLBU_CMD_TILED_DIMENSIONS(fb
->tiled_w
, fb
->tiled_h
);
421 PLBU_CMD_BLOCK_STRIDE(fb
->block_w
);
423 PLBU_CMD_ARRAY_ADDRESS(
424 ctx
->plb_gp_stream
->va
+ ctx
->plb_index
* ctx
->plb_gp_size
,
425 fb
->block_w
* fb
->block_h
);
429 if (lima_fb_need_reload(job
))
430 lima_pack_reload_plbu_cmd(job
);
434 hilbert_rotate(int n
, int *x
, int *y
, int rx
, int ry
)
450 hilbert_coords(int n
, int d
, int *x
, int *y
)
456 for (i
= 0; (1 << i
) < n
; i
++) {
461 hilbert_rotate(1 << i
, x
, y
, rx
, ry
);
471 lima_get_pp_stream_size(int num_pp
, int tiled_w
, int tiled_h
, uint32_t *off
)
473 /* carefully calculate each stream start address:
474 * 1. overflow: each stream size may be different due to
475 * fb->tiled_w * fb->tiled_h can't be divided by num_pp,
476 * extra size should be added to the preceeding stream
477 * 2. alignment: each stream address should be 0x20 aligned
479 int delta
= tiled_w
* tiled_h
/ num_pp
* 16 + 16;
480 int remain
= tiled_w
* tiled_h
% num_pp
;
483 for (int i
= 0; i
< num_pp
; i
++) {
491 offset
= align(offset
, 0x20);
498 lima_generate_pp_stream(struct lima_job
*job
, int off_x
, int off_y
,
499 int tiled_w
, int tiled_h
)
501 struct lima_context
*ctx
= job
->ctx
;
502 struct lima_pp_stream_state
*ps
= &ctx
->pp_stream
;
503 struct lima_job_fb_info
*fb
= &job
->fb
;
504 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
505 int i
, num_pp
= screen
->num_pp
;
507 /* use hilbert_coords to generates 1D to 2D relationship.
508 * 1D for pp stream index and 2D for plb block x/y on framebuffer.
509 * if multi pp, interleave the 1D index to make each pp's render target
510 * close enough which should result close workload
512 int max
= MAX2(tiled_w
, tiled_h
);
519 /* Don't update count if we get zero rect. We'll just generate
520 * PP stream with just terminators in it.
522 if ((tiled_w
* tiled_h
) != 0) {
523 dim
= util_logbase2_ceil(max
);
524 count
= 1 << (dim
+ dim
);
527 for (i
= 0; i
< num_pp
; i
++)
528 stream
[i
] = ps
->map
+ ps
->offset
[i
];
530 for (i
= 0; i
< count
; i
++) {
532 hilbert_coords(max
, i
, &x
, &y
);
533 if (x
< tiled_w
&& y
< tiled_h
) {
537 int pp
= index
% num_pp
;
538 int offset
= ((y
>> fb
->shift_h
) * fb
->block_w
+
539 (x
>> fb
->shift_w
)) * LIMA_CTX_PLB_BLK_SIZE
;
540 int plb_va
= ctx
->plb
[ctx
->plb_index
]->va
+ offset
;
542 stream
[pp
][si
[pp
]++] = 0;
543 stream
[pp
][si
[pp
]++] = 0xB8000000 | x
| (y
<< 8);
544 stream
[pp
][si
[pp
]++] = 0xE0000002 | ((plb_va
>> 3) & ~0xE0000003);
545 stream
[pp
][si
[pp
]++] = 0xB0000000;
551 for (i
= 0; i
< num_pp
; i
++) {
552 stream
[i
][si
[i
]++] = 0;
553 stream
[i
][si
[i
]++] = 0xBC000000;
554 stream
[i
][si
[i
]++] = 0;
555 stream
[i
][si
[i
]++] = 0;
557 lima_dump_command_stream_print(
558 job
->dump
, stream
[i
], si
[i
] * 4,
559 false, "pp plb stream %d at va %x\n",
560 i
, ps
->va
+ ps
->offset
[i
]);
565 lima_free_stale_pp_stream_bo(struct lima_context
*ctx
)
567 list_for_each_entry_safe(struct lima_ctx_plb_pp_stream
, entry
,
568 &ctx
->plb_pp_stream_lru_list
, lru_list
) {
569 if (ctx
->plb_stream_cache_size
<= lima_plb_pp_stream_cache_size
)
572 struct hash_entry
*hash_entry
=
573 _mesa_hash_table_search(ctx
->plb_pp_stream
, &entry
->key
);
575 _mesa_hash_table_remove(ctx
->plb_pp_stream
, hash_entry
);
576 list_del(&entry
->lru_list
);
578 ctx
->plb_stream_cache_size
-= entry
->bo
->size
;
579 lima_bo_unreference(entry
->bo
);
586 lima_update_damage_pp_stream(struct lima_job
*job
)
588 struct lima_context
*ctx
= job
->ctx
;
589 struct lima_damage_region
*ds
= lima_job_get_damage(job
);
590 struct lima_job_fb_info
*fb
= &job
->fb
;
591 struct pipe_scissor_state bound
;
592 struct pipe_scissor_state
*dr
= &job
->damage_rect
;
594 if (ds
&& ds
->region
) {
595 struct pipe_scissor_state
*dbound
= &ds
->bound
;
596 bound
.minx
= MAX2(dbound
->minx
, dr
->minx
>> 4);
597 bound
.miny
= MAX2(dbound
->miny
, dr
->miny
>> 4);
598 bound
.maxx
= MIN2(dbound
->maxx
, (dr
->maxx
+ 0xf) >> 4);
599 bound
.maxy
= MIN2(dbound
->maxy
, (dr
->maxy
+ 0xf) >> 4);
601 bound
.minx
= dr
->minx
>> 4;
602 bound
.miny
= dr
->miny
>> 4;
603 bound
.maxx
= (dr
->maxx
+ 0xf) >> 4;
604 bound
.maxy
= (dr
->maxy
+ 0xf) >> 4;
607 /* Clamp to FB size */
608 bound
.minx
= MIN2(bound
.minx
, fb
->tiled_w
);
609 bound
.miny
= MIN2(bound
.miny
, fb
->tiled_h
);
610 bound
.maxx
= MIN2(bound
.maxx
, fb
->tiled_w
);
611 bound
.maxy
= MIN2(bound
.maxy
, fb
->tiled_h
);
613 struct lima_ctx_plb_pp_stream_key key
= {
614 .plb_index
= ctx
->plb_index
,
619 .shift_w
= fb
->shift_w
,
620 .shift_h
= fb
->shift_h
,
621 .block_w
= fb
->block_w
,
622 .block_h
= fb
->block_h
,
625 struct hash_entry
*entry
=
626 _mesa_hash_table_search(ctx
->plb_pp_stream
, &key
);
628 struct lima_ctx_plb_pp_stream
*s
= entry
->data
;
630 list_del(&s
->lru_list
);
631 list_addtail(&s
->lru_list
, &ctx
->plb_pp_stream_lru_list
);
633 ctx
->pp_stream
.map
= lima_bo_map(s
->bo
);
634 ctx
->pp_stream
.va
= s
->bo
->va
;
635 memcpy(ctx
->pp_stream
.offset
, s
->offset
, sizeof(s
->offset
));
637 lima_job_add_bo(job
, LIMA_PIPE_PP
, s
->bo
, LIMA_SUBMIT_BO_READ
);
642 lima_free_stale_pp_stream_bo(ctx
);
644 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
645 struct lima_ctx_plb_pp_stream
*s
=
646 rzalloc(ctx
->plb_pp_stream
, struct lima_ctx_plb_pp_stream
);
648 list_inithead(&s
->lru_list
);
649 s
->key
.plb_index
= ctx
->plb_index
;
650 s
->key
.minx
= bound
.minx
;
651 s
->key
.maxx
= bound
.maxx
;
652 s
->key
.miny
= bound
.miny
;
653 s
->key
.maxy
= bound
.maxy
;
654 s
->key
.shift_w
= fb
->shift_w
;
655 s
->key
.shift_h
= fb
->shift_h
;
656 s
->key
.block_w
= fb
->block_w
;
657 s
->key
.block_h
= fb
->block_h
;
659 int tiled_w
= bound
.maxx
- bound
.minx
;
660 int tiled_h
= bound
.maxy
- bound
.miny
;
661 int size
= lima_get_pp_stream_size(
662 screen
->num_pp
, tiled_w
, tiled_h
, s
->offset
);
664 s
->bo
= lima_bo_create(screen
, size
, 0);
666 ctx
->pp_stream
.map
= lima_bo_map(s
->bo
);
667 ctx
->pp_stream
.va
= s
->bo
->va
;
668 memcpy(ctx
->pp_stream
.offset
, s
->offset
, sizeof(s
->offset
));
670 lima_generate_pp_stream(job
, bound
.minx
, bound
.miny
, tiled_w
, tiled_h
);
672 ctx
->plb_stream_cache_size
+= size
;
673 list_addtail(&s
->lru_list
, &ctx
->plb_pp_stream_lru_list
);
674 _mesa_hash_table_insert(ctx
->plb_pp_stream
, &s
->key
, s
);
676 lima_job_add_bo(job
, LIMA_PIPE_PP
, s
->bo
, LIMA_SUBMIT_BO_READ
);
680 lima_damage_fullscreen(struct lima_job
*job
)
682 struct pipe_scissor_state
*dr
= &job
->damage_rect
;
684 return dr
->minx
== 0 &&
686 dr
->maxx
== job
->fb
.width
&&
687 dr
->maxy
== job
->fb
.height
;
691 lima_update_pp_stream(struct lima_job
*job
)
693 struct lima_context
*ctx
= job
->ctx
;
694 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
695 struct lima_damage_region
*damage
= lima_job_get_damage(job
);
696 if ((screen
->gpu_type
== DRM_LIMA_PARAM_GPU_ID_MALI400
) ||
697 (damage
&& damage
->region
) || !lima_damage_fullscreen(job
))
698 lima_update_damage_pp_stream(job
);
700 /* Mali450 doesn't need full PP stream */
701 ctx
->pp_stream
.map
= NULL
;
705 lima_update_job_bo(struct lima_job
*job
)
707 struct lima_context
*ctx
= job
->ctx
;
709 lima_job_add_bo(job
, LIMA_PIPE_GP
, ctx
->plb_gp_stream
,
710 LIMA_SUBMIT_BO_READ
);
711 lima_job_add_bo(job
, LIMA_PIPE_GP
, ctx
->plb
[ctx
->plb_index
],
712 LIMA_SUBMIT_BO_WRITE
);
713 lima_job_add_bo(job
, LIMA_PIPE_GP
, ctx
->gp_tile_heap
[ctx
->plb_index
],
714 LIMA_SUBMIT_BO_WRITE
);
716 lima_dump_command_stream_print(
717 job
->dump
, ctx
->plb_gp_stream
->map
+ ctx
->plb_index
* ctx
->plb_gp_size
,
718 ctx
->plb_gp_size
, false, "gp plb stream at va %x\n",
719 ctx
->plb_gp_stream
->va
+ ctx
->plb_index
* ctx
->plb_gp_size
);
721 lima_job_add_bo(job
, LIMA_PIPE_PP
, ctx
->plb
[ctx
->plb_index
],
722 LIMA_SUBMIT_BO_READ
);
723 lima_job_add_bo(job
, LIMA_PIPE_PP
, ctx
->gp_tile_heap
[ctx
->plb_index
],
724 LIMA_SUBMIT_BO_READ
);
726 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
727 lima_job_add_bo(job
, LIMA_PIPE_PP
, screen
->pp_buffer
, LIMA_SUBMIT_BO_READ
);
731 lima_finish_plbu_cmd(struct util_dynarray
*plbu_cmd_array
)
734 uint32_t *plbu_cmd
= util_dynarray_ensure_cap(plbu_cmd_array
, plbu_cmd_array
->size
+ 2 * 4);
736 plbu_cmd
[i
++] = 0x00000000;
737 plbu_cmd
[i
++] = 0x50000000; /* END */
739 plbu_cmd_array
->size
+= i
* 4;
743 lima_pack_wb_zsbuf_reg(struct lima_job
*job
, uint32_t *wb_reg
, int wb_idx
)
745 struct lima_job_fb_info
*fb
= &job
->fb
;
746 struct pipe_surface
*zsbuf
= job
->key
.zsbuf
;
747 struct lima_resource
*res
= lima_resource(zsbuf
->texture
);
748 int level
= zsbuf
->u
.tex
.level
;
749 uint32_t format
= lima_format_get_pixel(zsbuf
->format
);
751 struct lima_pp_wb_reg
*wb
= (void *)wb_reg
;
752 wb
[wb_idx
].type
= 0x01; /* 1 for depth, stencil */
753 wb
[wb_idx
].address
= res
->bo
->va
+ res
->levels
[level
].offset
;
754 wb
[wb_idx
].pixel_format
= format
;
756 wb
[wb_idx
].pixel_layout
= 0x2;
757 wb
[wb_idx
].pitch
= fb
->tiled_w
;
759 wb
[wb_idx
].pixel_layout
= 0x0;
760 wb
[wb_idx
].pitch
= res
->levels
[level
].stride
/ 8;
762 wb
[wb_idx
].mrt_bits
= 0;
766 lima_pack_wb_cbuf_reg(struct lima_job
*job
, uint32_t *wb_reg
, int wb_idx
)
768 struct lima_job_fb_info
*fb
= &job
->fb
;
769 struct pipe_surface
*cbuf
= job
->key
.cbuf
;
770 struct lima_resource
*res
= lima_resource(cbuf
->texture
);
771 int level
= cbuf
->u
.tex
.level
;
772 unsigned layer
= cbuf
->u
.tex
.first_layer
;
773 uint32_t format
= lima_format_get_pixel(cbuf
->format
);
774 bool swap_channels
= lima_format_get_swap_rb(cbuf
->format
);
776 struct lima_pp_wb_reg
*wb
= (void *)wb_reg
;
777 wb
[wb_idx
].type
= 0x02; /* 2 for color buffer */
778 wb
[wb_idx
].address
= res
->bo
->va
+ res
->levels
[level
].offset
+ layer
* res
->levels
[level
].layer_stride
;
779 wb
[wb_idx
].pixel_format
= format
;
781 wb
[wb_idx
].pixel_layout
= 0x2;
782 wb
[wb_idx
].pitch
= fb
->tiled_w
;
784 wb
[wb_idx
].pixel_layout
= 0x0;
785 wb
[wb_idx
].pitch
= res
->levels
[level
].stride
/ 8;
787 wb
[wb_idx
].mrt_bits
= swap_channels
? 0x4 : 0x0;
791 lima_pack_pp_frame_reg(struct lima_job
*job
, uint32_t *frame_reg
,
794 struct lima_context
*ctx
= job
->ctx
;
795 struct lima_job_fb_info
*fb
= &job
->fb
;
796 struct lima_pp_frame_reg
*frame
= (void *)frame_reg
;
797 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
800 frame
->render_address
= screen
->pp_buffer
->va
+ pp_frame_rsw_offset
;
802 frame
->clear_value_depth
= job
->clear
.depth
;
803 frame
->clear_value_stencil
= job
->clear
.stencil
;
804 frame
->clear_value_color
= job
->clear
.color_8pc
;
805 frame
->clear_value_color_1
= job
->clear
.color_8pc
;
806 frame
->clear_value_color_2
= job
->clear
.color_8pc
;
807 frame
->clear_value_color_3
= job
->clear
.color_8pc
;
810 frame
->width
= fb
->width
- 1;
811 frame
->height
= fb
->height
- 1;
813 /* frame->fragment_stack_address is overwritten per-pp in the kernel
814 * by the values of pp_frame.fragment_stack_address[i] */
816 /* These are "stack size" and "stack offset" shifted,
817 * here they are assumed to be always the same. */
818 frame
->fragment_stack_size
= job
->pp_max_stack_size
<< 16 | job
->pp_max_stack_size
;
820 /* related with MSAA and different value when r4p0/r7p0 */
821 frame
->supersampled_height
= fb
->height
* 2 - 1;
822 frame
->scale
= 0xE0C;
826 frame
->blocking
= (fb
->shift_min
<< 28) | (fb
->shift_h
<< 16) | fb
->shift_w
;
827 frame
->foureight
= 0x8888;
829 if (job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
))
830 lima_pack_wb_cbuf_reg(job
, wb_reg
, wb_idx
++);
832 if (job
->key
.zsbuf
&&
833 (job
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)))
834 lima_pack_wb_zsbuf_reg(job
, wb_reg
, wb_idx
++);
838 lima_do_job(struct lima_job
*job
)
840 #define pp_stack_pp_size 0x400
842 struct lima_context
*ctx
= job
->ctx
;
844 lima_pack_head_plbu_cmd(job
);
845 lima_finish_plbu_cmd(&job
->plbu_cmd_array
);
847 lima_update_job_bo(job
);
849 int vs_cmd_size
= job
->vs_cmd_array
.size
;
850 uint32_t vs_cmd_va
= 0;
853 void *vs_cmd
= lima_job_create_stream_bo(
854 job
, LIMA_PIPE_GP
, vs_cmd_size
, &vs_cmd_va
);
855 memcpy(vs_cmd
, util_dynarray_begin(&job
->vs_cmd_array
), vs_cmd_size
);
857 lima_dump_command_stream_print(
858 job
->dump
, vs_cmd
, vs_cmd_size
, false, "flush vs cmd at va %x\n", vs_cmd_va
);
859 lima_dump_vs_command_stream_print(job
->dump
, vs_cmd
, vs_cmd_size
, vs_cmd_va
);
862 uint32_t plbu_cmd_va
;
863 int plbu_cmd_size
= job
->plbu_cmd_array
.size
+ job
->plbu_cmd_head
.size
;
864 void *plbu_cmd
= lima_job_create_stream_bo(
865 job
, LIMA_PIPE_GP
, plbu_cmd_size
, &plbu_cmd_va
);
867 util_dynarray_begin(&job
->plbu_cmd_head
),
868 job
->plbu_cmd_head
.size
);
869 memcpy(plbu_cmd
+ job
->plbu_cmd_head
.size
,
870 util_dynarray_begin(&job
->plbu_cmd_array
),
871 job
->plbu_cmd_array
.size
);
873 lima_dump_command_stream_print(
874 job
->dump
, plbu_cmd
, plbu_cmd_size
, false, "flush plbu cmd at va %x\n", plbu_cmd_va
);
875 lima_dump_plbu_command_stream_print(job
->dump
, plbu_cmd
, plbu_cmd_size
, plbu_cmd_va
);
877 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
878 struct drm_lima_gp_frame gp_frame
;
879 struct lima_gp_frame_reg
*gp_frame_reg
= (void *)gp_frame
.frame
;
880 gp_frame_reg
->vs_cmd_start
= vs_cmd_va
;
881 gp_frame_reg
->vs_cmd_end
= vs_cmd_va
+ vs_cmd_size
;
882 gp_frame_reg
->plbu_cmd_start
= plbu_cmd_va
;
883 gp_frame_reg
->plbu_cmd_end
= plbu_cmd_va
+ plbu_cmd_size
;
884 gp_frame_reg
->tile_heap_start
= ctx
->gp_tile_heap
[ctx
->plb_index
]->va
;
885 gp_frame_reg
->tile_heap_end
= ctx
->gp_tile_heap
[ctx
->plb_index
]->va
+ ctx
->gp_tile_heap_size
;
887 lima_dump_command_stream_print(
888 job
->dump
, &gp_frame
, sizeof(gp_frame
), false, "add gp frame\n");
890 if (!lima_job_start(job
, LIMA_PIPE_GP
, &gp_frame
, sizeof(gp_frame
)))
891 fprintf(stderr
, "gp job error\n");
894 if (lima_job_wait(job
, LIMA_PIPE_GP
, PIPE_TIMEOUT_INFINITE
)) {
895 if (ctx
->gp_output
) {
896 float *pos
= lima_bo_map(ctx
->gp_output
);
897 lima_dump_command_stream_print(
898 job
->dump
, pos
, 4 * 4 * 16, true, "gl_pos dump at va %x\n",
902 uint32_t *plb
= lima_bo_map(ctx
->plb
[ctx
->plb_index
]);
903 lima_dump_command_stream_print(
904 job
->dump
, plb
, LIMA_CTX_PLB_BLK_SIZE
, false, "plb dump at va %x\n",
905 ctx
->plb
[ctx
->plb_index
]->va
);
908 fprintf(stderr
, "gp job wait error\n");
913 uint32_t pp_stack_va
= 0;
914 if (job
->pp_max_stack_size
) {
915 lima_job_create_stream_bo(
917 screen
->num_pp
* job
->pp_max_stack_size
* pp_stack_pp_size
,
921 lima_update_pp_stream(job
);
923 struct lima_pp_stream_state
*ps
= &ctx
->pp_stream
;
924 if (screen
->gpu_type
== DRM_LIMA_PARAM_GPU_ID_MALI400
) {
925 struct drm_lima_m400_pp_frame pp_frame
= {0};
926 lima_pack_pp_frame_reg(job
, pp_frame
.frame
, pp_frame
.wb
);
927 pp_frame
.num_pp
= screen
->num_pp
;
929 for (int i
= 0; i
< screen
->num_pp
; i
++) {
930 pp_frame
.plbu_array_address
[i
] = ps
->va
+ ps
->offset
[i
];
931 if (job
->pp_max_stack_size
)
932 pp_frame
.fragment_stack_address
[i
] = pp_stack_va
+
933 job
->pp_max_stack_size
* pp_stack_pp_size
* i
;
936 lima_dump_command_stream_print(
937 job
->dump
, &pp_frame
, sizeof(pp_frame
), false, "add pp frame\n");
939 if (!lima_job_start(job
, LIMA_PIPE_PP
, &pp_frame
, sizeof(pp_frame
)))
940 fprintf(stderr
, "pp job error\n");
943 struct drm_lima_m450_pp_frame pp_frame
= {0};
944 lima_pack_pp_frame_reg(job
, pp_frame
.frame
, pp_frame
.wb
);
945 pp_frame
.num_pp
= screen
->num_pp
;
947 if (job
->pp_max_stack_size
)
948 for (int i
= 0; i
< screen
->num_pp
; i
++)
949 pp_frame
.fragment_stack_address
[i
] = pp_stack_va
+
950 job
->pp_max_stack_size
* pp_stack_pp_size
* i
;
953 for (int i
= 0; i
< screen
->num_pp
; i
++)
954 pp_frame
.plbu_array_address
[i
] = ps
->va
+ ps
->offset
[i
];
957 pp_frame
.use_dlbu
= true;
959 struct lima_job_fb_info
*fb
= &job
->fb
;
960 pp_frame
.dlbu_regs
[0] = ctx
->plb
[ctx
->plb_index
]->va
;
961 pp_frame
.dlbu_regs
[1] = ((fb
->tiled_h
- 1) << 16) | (fb
->tiled_w
- 1);
962 unsigned s
= util_logbase2(LIMA_CTX_PLB_BLK_SIZE
) - 7;
963 pp_frame
.dlbu_regs
[2] = (s
<< 28) | (fb
->shift_h
<< 16) | fb
->shift_w
;
964 pp_frame
.dlbu_regs
[3] = ((fb
->tiled_h
- 1) << 24) | ((fb
->tiled_w
- 1) << 16);
967 lima_dump_command_stream_print(
968 job
->dump
, &pp_frame
, sizeof(pp_frame
), false, "add pp frame\n");
970 if (!lima_job_start(job
, LIMA_PIPE_PP
, &pp_frame
, sizeof(pp_frame
)))
971 fprintf(stderr
, "pp job error\n");
975 if (!lima_job_wait(job
, LIMA_PIPE_PP
, PIPE_TIMEOUT_INFINITE
)) {
976 fprintf(stderr
, "pp wait error\n");
981 ctx
->plb_index
= (ctx
->plb_index
+ 1) % lima_ctx_num_plb
;
983 if (job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
)) {
984 /* Set reload flag for next draw. It'll be unset if buffer is cleared */
985 struct lima_surface
*surf
= lima_surface(job
->key
.cbuf
);
996 lima_flush(struct lima_context
*ctx
)
998 hash_table_foreach(ctx
->jobs
, entry
) {
999 struct lima_job
*job
= entry
->data
;
1005 lima_flush_job_accessing_bo(
1006 struct lima_context
*ctx
, struct lima_bo
*bo
, bool write
)
1008 hash_table_foreach(ctx
->jobs
, entry
) {
1009 struct lima_job
*job
= entry
->data
;
1010 if (lima_job_has_bo(job
, bo
, write
))
1016 * This is for current job flush previous job which write to the resource it wants
1017 * to read. Tipical usage is flush the FBO which is used as current task's texture.
1020 lima_flush_previous_job_writing_resource(
1021 struct lima_context
*ctx
, struct pipe_resource
*prsc
)
1023 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->write_jobs
, prsc
);
1026 struct lima_job
*job
= entry
->data
;
1028 /* do not flush current job */
1029 if (job
!= ctx
->job
)
1035 lima_pipe_flush(struct pipe_context
*pctx
, struct pipe_fence_handle
**fence
,
1038 struct lima_context
*ctx
= lima_context(pctx
);
1043 int drm_fd
= lima_screen(ctx
->base
.screen
)->fd
;
1046 if (!drmSyncobjExportSyncFile(drm_fd
, ctx
->out_sync
[LIMA_PIPE_PP
], &fd
))
1047 *fence
= lima_fence_create(fd
);
1052 lima_job_compare(const void *s1
, const void *s2
)
1054 return memcmp(s1
, s2
, sizeof(struct lima_job_key
)) == 0;
1058 lima_job_hash(const void *key
)
1060 return _mesa_hash_data(key
, sizeof(struct lima_job_key
));
1063 bool lima_job_init(struct lima_context
*ctx
)
1065 int fd
= lima_screen(ctx
->base
.screen
)->fd
;
1067 ctx
->jobs
= _mesa_hash_table_create(ctx
, lima_job_hash
, lima_job_compare
);
1071 ctx
->write_jobs
= _mesa_hash_table_create(
1072 ctx
, _mesa_hash_pointer
, _mesa_key_pointer_equal
);
1073 if (!ctx
->write_jobs
)
1076 ctx
->in_sync_fd
= -1;
1078 for (int i
= 0; i
< 2; i
++) {
1079 if (drmSyncobjCreate(fd
, DRM_SYNCOBJ_CREATE_SIGNALED
, ctx
->in_sync
+ i
) ||
1080 drmSyncobjCreate(fd
, DRM_SYNCOBJ_CREATE_SIGNALED
, ctx
->out_sync
+ i
))
1084 ctx
->base
.flush
= lima_pipe_flush
;
1089 void lima_job_fini(struct lima_context
*ctx
)
1091 int fd
= lima_screen(ctx
->base
.screen
)->fd
;
1095 for (int i
= 0; i
< 2; i
++) {
1096 if (ctx
->in_sync
[i
])
1097 drmSyncobjDestroy(fd
, ctx
->in_sync
[i
]);
1098 if (ctx
->out_sync
[i
])
1099 drmSyncobjDestroy(fd
, ctx
->out_sync
[i
]);
1102 if (ctx
->in_sync_fd
>= 0)
1103 close(ctx
->in_sync_fd
);