2 * Copyright (C) 2017-2019 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "drm-uapi/lima_drm.h"
30 #include "util/u_math.h"
31 #include "util/ralloc.h"
32 #include "util/os_time.h"
33 #include "util/hash_table.h"
34 #include "util/format/u_format.h"
35 #include "util/u_upload_mgr.h"
36 #include "util/u_inlines.h"
38 #include "lima_screen.h"
39 #include "lima_context.h"
42 #include "lima_util.h"
43 #include "lima_format.h"
44 #include "lima_resource.h"
45 #include "lima_texture.h"
46 #include "lima_fence.h"
49 #define VOID2U64(x) ((uint64_t)(unsigned long)(x))
52 lima_get_fb_info(struct lima_job
*job
)
54 struct lima_context
*ctx
= job
->ctx
;
55 struct lima_job_fb_info
*fb
= &job
->fb
;
57 fb
->width
= ctx
->framebuffer
.base
.width
;
58 fb
->height
= ctx
->framebuffer
.base
.height
;
60 int width
= align(fb
->width
, 16) >> 4;
61 int height
= align(fb
->height
, 16) >> 4;
63 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
71 int limit
= screen
->plb_max_blk
;
72 while ((width
* height
) > limit
) {
73 if (width
>= height
) {
74 width
= (width
+ 1) >> 1;
77 height
= (height
+ 1) >> 1;
85 fb
->shift_min
= MIN3(fb
->shift_w
, fb
->shift_h
, 2);
88 static struct lima_job
*
89 lima_job_create(struct lima_context
*ctx
)
93 s
= rzalloc(ctx
, struct lima_job
);
97 s
->fd
= lima_screen(ctx
->base
.screen
)->fd
;
100 s
->damage_rect
.minx
= s
->damage_rect
.miny
= 0xffff;
101 s
->damage_rect
.maxx
= s
->damage_rect
.maxy
= 0;
103 for (int i
= 0; i
< 2; i
++) {
104 util_dynarray_init(s
->gem_bos
+ i
, s
);
105 util_dynarray_init(s
->bos
+ i
, s
);
108 util_dynarray_init(&s
->vs_cmd_array
, s
);
109 util_dynarray_init(&s
->plbu_cmd_array
, s
);
110 util_dynarray_init(&s
->plbu_cmd_head
, s
);
112 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
113 pipe_surface_reference(&s
->key
.cbuf
, fb
->base
.cbufs
[0]);
114 pipe_surface_reference(&s
->key
.zsbuf
, fb
->base
.zsbuf
);
118 s
->dump
= lima_dump_create();
124 lima_job_free(struct lima_job
*job
)
126 struct lima_context
*ctx
= job
->ctx
;
128 _mesa_hash_table_remove_key(ctx
->jobs
, &job
->key
);
130 if (job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
))
131 _mesa_hash_table_remove_key(ctx
->write_jobs
, job
->key
.cbuf
->texture
);
132 if (job
->key
.zsbuf
&& (job
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)))
133 _mesa_hash_table_remove_key(ctx
->write_jobs
, job
->key
.zsbuf
->texture
);
135 pipe_surface_reference(&job
->key
.cbuf
, NULL
);
136 pipe_surface_reference(&job
->key
.zsbuf
, NULL
);
138 lima_dump_free(job
->dump
);
141 /* TODO: do we need a cache for job? */
145 static struct lima_job
*
146 _lima_job_get(struct lima_context
*ctx
)
148 struct lima_context_framebuffer
*fb
= &ctx
->framebuffer
;
149 struct lima_job_key local_key
= {
150 .cbuf
= fb
->base
.cbufs
[0],
151 .zsbuf
= fb
->base
.zsbuf
,
154 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->jobs
, &local_key
);
158 struct lima_job
*job
= lima_job_create(ctx
);
162 _mesa_hash_table_insert(ctx
->jobs
, &job
->key
, job
);
168 * Note: this function can only be called in draw code path,
169 * must not exist in flush code path.
172 lima_job_get(struct lima_context
*ctx
)
177 ctx
->job
= _lima_job_get(ctx
);
181 bool lima_job_add_bo(struct lima_job
*job
, int pipe
,
182 struct lima_bo
*bo
, uint32_t flags
)
184 util_dynarray_foreach(job
->gem_bos
+ pipe
, struct drm_lima_gem_submit_bo
, gem_bo
) {
185 if (bo
->handle
== gem_bo
->handle
) {
186 gem_bo
->flags
|= flags
;
191 struct drm_lima_gem_submit_bo
*job_bo
=
192 util_dynarray_grow(job
->gem_bos
+ pipe
, struct drm_lima_gem_submit_bo
, 1);
193 job_bo
->handle
= bo
->handle
;
194 job_bo
->flags
= flags
;
196 struct lima_bo
**jbo
= util_dynarray_grow(job
->bos
+ pipe
, struct lima_bo
*, 1);
199 /* prevent bo from being freed when job start */
200 lima_bo_reference(bo
);
206 lima_job_start(struct lima_job
*job
, int pipe
, void *frame
, uint32_t size
)
208 struct lima_context
*ctx
= job
->ctx
;
209 struct drm_lima_gem_submit req
= {
212 .nr_bos
= job
->gem_bos
[pipe
].size
/ sizeof(struct drm_lima_gem_submit_bo
),
213 .bos
= VOID2U64(util_dynarray_begin(job
->gem_bos
+ pipe
)),
214 .frame
= VOID2U64(frame
),
216 .out_sync
= ctx
->out_sync
[pipe
],
219 if (ctx
->in_sync_fd
>= 0) {
220 int err
= drmSyncobjImportSyncFile(job
->fd
, ctx
->in_sync
[pipe
],
225 req
.in_sync
[0] = ctx
->in_sync
[pipe
];
226 close(ctx
->in_sync_fd
);
227 ctx
->in_sync_fd
= -1;
230 bool ret
= drmIoctl(job
->fd
, DRM_IOCTL_LIMA_GEM_SUBMIT
, &req
) == 0;
232 util_dynarray_foreach(job
->bos
+ pipe
, struct lima_bo
*, bo
) {
233 lima_bo_unreference(*bo
);
240 lima_job_wait(struct lima_job
*job
, int pipe
, uint64_t timeout_ns
)
242 int64_t abs_timeout
= os_time_get_absolute_timeout(timeout_ns
);
243 if (abs_timeout
== OS_TIMEOUT_INFINITE
)
244 abs_timeout
= INT64_MAX
;
246 struct lima_context
*ctx
= job
->ctx
;
247 return !drmSyncobjWait(job
->fd
, ctx
->out_sync
+ pipe
, 1, abs_timeout
, 0, NULL
);
251 lima_job_has_bo(struct lima_job
*job
, struct lima_bo
*bo
, bool all
)
253 for (int i
= 0; i
< 2; i
++) {
254 util_dynarray_foreach(job
->gem_bos
+ i
, struct drm_lima_gem_submit_bo
, gem_bo
) {
255 if (bo
->handle
== gem_bo
->handle
) {
256 if (all
|| gem_bo
->flags
& LIMA_SUBMIT_BO_WRITE
)
268 lima_job_create_stream_bo(struct lima_job
*job
, int pipe
,
269 unsigned size
, uint32_t *va
)
271 struct lima_context
*ctx
= job
->ctx
;
275 struct pipe_resource
*pres
= NULL
;
276 u_upload_alloc(ctx
->uploader
, 0, size
, 0x40, &offset
, &pres
, &cpu
);
278 struct lima_resource
*res
= lima_resource(pres
);
279 *va
= res
->bo
->va
+ offset
;
281 lima_job_add_bo(job
, pipe
, res
->bo
, LIMA_SUBMIT_BO_READ
);
283 pipe_resource_reference(&pres
, NULL
);
288 static inline struct lima_damage_region
*
289 lima_job_get_damage(struct lima_job
*job
)
291 if (!(job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
)))
294 struct lima_surface
*surf
= lima_surface(job
->key
.cbuf
);
295 struct lima_resource
*res
= lima_resource(surf
->base
.texture
);
300 lima_fb_cbuf_needs_reload(struct lima_job
*job
)
302 if (!(job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
)))
305 struct lima_surface
*surf
= lima_surface(job
->key
.cbuf
);
306 struct lima_resource
*res
= lima_resource(surf
->base
.texture
);
307 if (res
->damage
.region
) {
308 /* for EGL_KHR_partial_update, when EGL_EXT_buffer_age is enabled,
309 * we need to reload damage region, otherwise just want to reload
310 * the region not aligned to tile boundary */
311 //if (!res->damage.aligned)
315 else if (surf
->reload
& PIPE_CLEAR_COLOR0
)
322 lima_fb_zsbuf_needs_reload(struct lima_job
*job
)
324 if (!(job
->key
.zsbuf
&& (job
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
))))
327 struct lima_surface
*surf
= lima_surface(job
->key
.zsbuf
);
328 if (surf
->reload
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
))
335 lima_pack_reload_plbu_cmd(struct lima_job
*job
, struct pipe_surface
*psurf
)
337 #define lima_reload_render_state_offset 0x0000
338 #define lima_reload_gl_pos_offset 0x0040
339 #define lima_reload_varying_offset 0x0080
340 #define lima_reload_tex_desc_offset 0x00c0
341 #define lima_reload_tex_array_offset 0x0100
342 #define lima_reload_buffer_size 0x0140
344 struct lima_context
*ctx
= job
->ctx
;
345 struct lima_surface
*surf
= lima_surface(psurf
);
348 void *cpu
= lima_job_create_stream_bo(
349 job
, LIMA_PIPE_PP
, lima_reload_buffer_size
, &va
);
351 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
353 uint32_t reload_shader_first_instr_size
=
354 ((uint32_t *)(screen
->pp_buffer
->map
+ pp_reload_program_offset
))[0] & 0x1f;
355 uint32_t reload_shader_va
= screen
->pp_buffer
->va
+ pp_reload_program_offset
;
357 struct lima_render_state reload_render_state
= {
358 .alpha_blend
= 0xf03b1ad2,
359 .depth_test
= 0x0000000e,
360 .depth_range
= 0xffff0000,
361 .stencil_front
= 0x00000007,
362 .stencil_back
= 0x00000007,
363 .multi_sample
= 0x0000f007,
364 .shader_address
= reload_shader_va
| reload_shader_first_instr_size
,
365 .varying_types
= 0x00000001,
366 .textures_address
= va
+ lima_reload_tex_array_offset
,
368 .varyings_address
= va
+ lima_reload_varying_offset
,
371 if (util_format_is_depth_or_stencil(psurf
->format
)) {
372 reload_render_state
.alpha_blend
&= 0x0fffffff;
373 reload_render_state
.depth_test
|= 0x400;
374 if (surf
->reload
& PIPE_CLEAR_DEPTH
)
375 reload_render_state
.depth_test
|= 0x801;
376 if (surf
->reload
& PIPE_CLEAR_STENCIL
) {
377 reload_render_state
.depth_test
|= 0x1000;
378 reload_render_state
.stencil_front
= 0x0000024f;
379 reload_render_state
.stencil_back
= 0x0000024f;
380 reload_render_state
.stencil_test
= 0x0000ffff;
384 memcpy(cpu
+ lima_reload_render_state_offset
, &reload_render_state
,
385 sizeof(reload_render_state
));
387 lima_tex_desc
*td
= cpu
+ lima_reload_tex_desc_offset
;
388 memset(td
, 0, lima_min_tex_desc_size
);
389 lima_texture_desc_set_res(ctx
, td
, psurf
->texture
, 0, 0);
390 td
->format
= lima_format_get_texel_reload(psurf
->format
);
391 td
->unnorm_coords
= 1;
392 td
->texture_type
= LIMA_TEXTURE_TYPE_2D
;
393 td
->min_img_filter_nearest
= 1;
394 td
->mag_img_filter_nearest
= 1;
395 td
->wrap_s_clamp_to_edge
= 1;
396 td
->wrap_t_clamp_to_edge
= 1;
397 td
->unknown_2_2
= 0x1;
399 uint32_t *ta
= cpu
+ lima_reload_tex_array_offset
;
400 ta
[0] = va
+ lima_reload_tex_desc_offset
;
402 struct lima_job_fb_info
*fb
= &job
->fb
;
403 float reload_gl_pos
[] = {
408 memcpy(cpu
+ lima_reload_gl_pos_offset
, reload_gl_pos
,
409 sizeof(reload_gl_pos
));
411 float reload_varying
[] = {
415 memcpy(cpu
+ lima_reload_varying_offset
, reload_varying
,
416 sizeof(reload_varying
));
418 PLBU_CMD_BEGIN(&job
->plbu_cmd_head
, 20);
420 PLBU_CMD_VIEWPORT_LEFT(0);
421 PLBU_CMD_VIEWPORT_RIGHT(fui(fb
->width
));
422 PLBU_CMD_VIEWPORT_BOTTOM(0);
423 PLBU_CMD_VIEWPORT_TOP(fui(fb
->height
));
425 PLBU_CMD_RSW_VERTEX_ARRAY(
426 va
+ lima_reload_render_state_offset
,
427 va
+ lima_reload_gl_pos_offset
);
432 PLBU_CMD_INDICES(screen
->pp_buffer
->va
+ pp_shared_index_offset
);
433 PLBU_CMD_INDEXED_DEST(va
+ lima_reload_gl_pos_offset
);
434 PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
440 lima_pack_head_plbu_cmd(struct lima_job
*job
)
442 struct lima_context
*ctx
= job
->ctx
;
443 struct lima_job_fb_info
*fb
= &job
->fb
;
445 PLBU_CMD_BEGIN(&job
->plbu_cmd_head
, 10);
448 PLBU_CMD_BLOCK_STEP(fb
->shift_min
, fb
->shift_h
, fb
->shift_w
);
449 PLBU_CMD_TILED_DIMENSIONS(fb
->tiled_w
, fb
->tiled_h
);
450 PLBU_CMD_BLOCK_STRIDE(fb
->block_w
);
452 PLBU_CMD_ARRAY_ADDRESS(
453 ctx
->plb_gp_stream
->va
+ ctx
->plb_index
* ctx
->plb_gp_size
,
454 fb
->block_w
* fb
->block_h
);
458 if (lima_fb_cbuf_needs_reload(job
))
459 lima_pack_reload_plbu_cmd(job
, job
->key
.cbuf
);
461 if (lima_fb_zsbuf_needs_reload(job
))
462 lima_pack_reload_plbu_cmd(job
, job
->key
.zsbuf
);
466 hilbert_rotate(int n
, int *x
, int *y
, int rx
, int ry
)
482 hilbert_coords(int n
, int d
, int *x
, int *y
)
488 for (i
= 0; (1 << i
) < n
; i
++) {
493 hilbert_rotate(1 << i
, x
, y
, rx
, ry
);
503 lima_get_pp_stream_size(int num_pp
, int tiled_w
, int tiled_h
, uint32_t *off
)
505 /* carefully calculate each stream start address:
506 * 1. overflow: each stream size may be different due to
507 * fb->tiled_w * fb->tiled_h can't be divided by num_pp,
508 * extra size should be added to the preceeding stream
509 * 2. alignment: each stream address should be 0x20 aligned
511 int delta
= tiled_w
* tiled_h
/ num_pp
* 16 + 16;
512 int remain
= tiled_w
* tiled_h
% num_pp
;
515 for (int i
= 0; i
< num_pp
; i
++) {
523 offset
= align(offset
, 0x20);
530 lima_generate_pp_stream(struct lima_job
*job
, int off_x
, int off_y
,
531 int tiled_w
, int tiled_h
)
533 struct lima_context
*ctx
= job
->ctx
;
534 struct lima_pp_stream_state
*ps
= &ctx
->pp_stream
;
535 struct lima_job_fb_info
*fb
= &job
->fb
;
536 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
537 int i
, num_pp
= screen
->num_pp
;
539 /* use hilbert_coords to generates 1D to 2D relationship.
540 * 1D for pp stream index and 2D for plb block x/y on framebuffer.
541 * if multi pp, interleave the 1D index to make each pp's render target
542 * close enough which should result close workload
544 int max
= MAX2(tiled_w
, tiled_h
);
551 /* Don't update count if we get zero rect. We'll just generate
552 * PP stream with just terminators in it.
554 if ((tiled_w
* tiled_h
) != 0) {
555 dim
= util_logbase2_ceil(max
);
556 count
= 1 << (dim
+ dim
);
559 for (i
= 0; i
< num_pp
; i
++)
560 stream
[i
] = ps
->map
+ ps
->offset
[i
];
562 for (i
= 0; i
< count
; i
++) {
564 hilbert_coords(max
, i
, &x
, &y
);
565 if (x
< tiled_w
&& y
< tiled_h
) {
569 int pp
= index
% num_pp
;
570 int offset
= ((y
>> fb
->shift_h
) * fb
->block_w
+
571 (x
>> fb
->shift_w
)) * LIMA_CTX_PLB_BLK_SIZE
;
572 int plb_va
= ctx
->plb
[ctx
->plb_index
]->va
+ offset
;
574 stream
[pp
][si
[pp
]++] = 0;
575 stream
[pp
][si
[pp
]++] = 0xB8000000 | x
| (y
<< 8);
576 stream
[pp
][si
[pp
]++] = 0xE0000002 | ((plb_va
>> 3) & ~0xE0000003);
577 stream
[pp
][si
[pp
]++] = 0xB0000000;
583 for (i
= 0; i
< num_pp
; i
++) {
584 stream
[i
][si
[i
]++] = 0;
585 stream
[i
][si
[i
]++] = 0xBC000000;
586 stream
[i
][si
[i
]++] = 0;
587 stream
[i
][si
[i
]++] = 0;
589 lima_dump_command_stream_print(
590 job
->dump
, stream
[i
], si
[i
] * 4,
591 false, "pp plb stream %d at va %x\n",
592 i
, ps
->va
+ ps
->offset
[i
]);
597 lima_free_stale_pp_stream_bo(struct lima_context
*ctx
)
599 list_for_each_entry_safe(struct lima_ctx_plb_pp_stream
, entry
,
600 &ctx
->plb_pp_stream_lru_list
, lru_list
) {
601 if (ctx
->plb_stream_cache_size
<= lima_plb_pp_stream_cache_size
)
604 struct hash_entry
*hash_entry
=
605 _mesa_hash_table_search(ctx
->plb_pp_stream
, &entry
->key
);
607 _mesa_hash_table_remove(ctx
->plb_pp_stream
, hash_entry
);
608 list_del(&entry
->lru_list
);
610 ctx
->plb_stream_cache_size
-= entry
->bo
->size
;
611 lima_bo_unreference(entry
->bo
);
618 lima_update_damage_pp_stream(struct lima_job
*job
)
620 struct lima_context
*ctx
= job
->ctx
;
621 struct lima_damage_region
*ds
= lima_job_get_damage(job
);
622 struct lima_job_fb_info
*fb
= &job
->fb
;
623 struct pipe_scissor_state bound
;
624 struct pipe_scissor_state
*dr
= &job
->damage_rect
;
626 if (ds
&& ds
->region
) {
627 struct pipe_scissor_state
*dbound
= &ds
->bound
;
628 bound
.minx
= MAX2(dbound
->minx
, dr
->minx
>> 4);
629 bound
.miny
= MAX2(dbound
->miny
, dr
->miny
>> 4);
630 bound
.maxx
= MIN2(dbound
->maxx
, (dr
->maxx
+ 0xf) >> 4);
631 bound
.maxy
= MIN2(dbound
->maxy
, (dr
->maxy
+ 0xf) >> 4);
633 bound
.minx
= dr
->minx
>> 4;
634 bound
.miny
= dr
->miny
>> 4;
635 bound
.maxx
= (dr
->maxx
+ 0xf) >> 4;
636 bound
.maxy
= (dr
->maxy
+ 0xf) >> 4;
639 /* Clamp to FB size */
640 bound
.minx
= MIN2(bound
.minx
, fb
->tiled_w
);
641 bound
.miny
= MIN2(bound
.miny
, fb
->tiled_h
);
642 bound
.maxx
= MIN2(bound
.maxx
, fb
->tiled_w
);
643 bound
.maxy
= MIN2(bound
.maxy
, fb
->tiled_h
);
645 struct lima_ctx_plb_pp_stream_key key
= {
646 .plb_index
= ctx
->plb_index
,
651 .shift_w
= fb
->shift_w
,
652 .shift_h
= fb
->shift_h
,
653 .block_w
= fb
->block_w
,
654 .block_h
= fb
->block_h
,
657 struct hash_entry
*entry
=
658 _mesa_hash_table_search(ctx
->plb_pp_stream
, &key
);
660 struct lima_ctx_plb_pp_stream
*s
= entry
->data
;
662 list_del(&s
->lru_list
);
663 list_addtail(&s
->lru_list
, &ctx
->plb_pp_stream_lru_list
);
665 ctx
->pp_stream
.map
= lima_bo_map(s
->bo
);
666 ctx
->pp_stream
.va
= s
->bo
->va
;
667 memcpy(ctx
->pp_stream
.offset
, s
->offset
, sizeof(s
->offset
));
669 lima_job_add_bo(job
, LIMA_PIPE_PP
, s
->bo
, LIMA_SUBMIT_BO_READ
);
674 lima_free_stale_pp_stream_bo(ctx
);
676 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
677 struct lima_ctx_plb_pp_stream
*s
=
678 rzalloc(ctx
->plb_pp_stream
, struct lima_ctx_plb_pp_stream
);
680 list_inithead(&s
->lru_list
);
681 s
->key
.plb_index
= ctx
->plb_index
;
682 s
->key
.minx
= bound
.minx
;
683 s
->key
.maxx
= bound
.maxx
;
684 s
->key
.miny
= bound
.miny
;
685 s
->key
.maxy
= bound
.maxy
;
686 s
->key
.shift_w
= fb
->shift_w
;
687 s
->key
.shift_h
= fb
->shift_h
;
688 s
->key
.block_w
= fb
->block_w
;
689 s
->key
.block_h
= fb
->block_h
;
691 int tiled_w
= bound
.maxx
- bound
.minx
;
692 int tiled_h
= bound
.maxy
- bound
.miny
;
693 int size
= lima_get_pp_stream_size(
694 screen
->num_pp
, tiled_w
, tiled_h
, s
->offset
);
696 s
->bo
= lima_bo_create(screen
, size
, 0);
698 ctx
->pp_stream
.map
= lima_bo_map(s
->bo
);
699 ctx
->pp_stream
.va
= s
->bo
->va
;
700 memcpy(ctx
->pp_stream
.offset
, s
->offset
, sizeof(s
->offset
));
702 lima_generate_pp_stream(job
, bound
.minx
, bound
.miny
, tiled_w
, tiled_h
);
704 ctx
->plb_stream_cache_size
+= size
;
705 list_addtail(&s
->lru_list
, &ctx
->plb_pp_stream_lru_list
);
706 _mesa_hash_table_insert(ctx
->plb_pp_stream
, &s
->key
, s
);
708 lima_job_add_bo(job
, LIMA_PIPE_PP
, s
->bo
, LIMA_SUBMIT_BO_READ
);
712 lima_damage_fullscreen(struct lima_job
*job
)
714 struct pipe_scissor_state
*dr
= &job
->damage_rect
;
716 return dr
->minx
== 0 &&
718 dr
->maxx
== job
->fb
.width
&&
719 dr
->maxy
== job
->fb
.height
;
723 lima_update_pp_stream(struct lima_job
*job
)
725 struct lima_context
*ctx
= job
->ctx
;
726 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
727 struct lima_damage_region
*damage
= lima_job_get_damage(job
);
728 if ((screen
->gpu_type
== DRM_LIMA_PARAM_GPU_ID_MALI400
) ||
729 (damage
&& damage
->region
) || !lima_damage_fullscreen(job
))
730 lima_update_damage_pp_stream(job
);
732 /* Mali450 doesn't need full PP stream */
733 ctx
->pp_stream
.map
= NULL
;
737 lima_update_job_bo(struct lima_job
*job
)
739 struct lima_context
*ctx
= job
->ctx
;
741 lima_job_add_bo(job
, LIMA_PIPE_GP
, ctx
->plb_gp_stream
,
742 LIMA_SUBMIT_BO_READ
);
743 lima_job_add_bo(job
, LIMA_PIPE_GP
, ctx
->plb
[ctx
->plb_index
],
744 LIMA_SUBMIT_BO_WRITE
);
745 lima_job_add_bo(job
, LIMA_PIPE_GP
, ctx
->gp_tile_heap
[ctx
->plb_index
],
746 LIMA_SUBMIT_BO_WRITE
);
748 lima_dump_command_stream_print(
749 job
->dump
, ctx
->plb_gp_stream
->map
+ ctx
->plb_index
* ctx
->plb_gp_size
,
750 ctx
->plb_gp_size
, false, "gp plb stream at va %x\n",
751 ctx
->plb_gp_stream
->va
+ ctx
->plb_index
* ctx
->plb_gp_size
);
753 lima_job_add_bo(job
, LIMA_PIPE_PP
, ctx
->plb
[ctx
->plb_index
],
754 LIMA_SUBMIT_BO_READ
);
755 lima_job_add_bo(job
, LIMA_PIPE_PP
, ctx
->gp_tile_heap
[ctx
->plb_index
],
756 LIMA_SUBMIT_BO_READ
);
758 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
759 lima_job_add_bo(job
, LIMA_PIPE_PP
, screen
->pp_buffer
, LIMA_SUBMIT_BO_READ
);
763 lima_finish_plbu_cmd(struct util_dynarray
*plbu_cmd_array
)
766 uint32_t *plbu_cmd
= util_dynarray_ensure_cap(plbu_cmd_array
, plbu_cmd_array
->size
+ 2 * 4);
768 plbu_cmd
[i
++] = 0x00000000;
769 plbu_cmd
[i
++] = 0x50000000; /* END */
771 plbu_cmd_array
->size
+= i
* 4;
775 lima_pack_wb_zsbuf_reg(struct lima_job
*job
, uint32_t *wb_reg
, int wb_idx
)
777 struct lima_job_fb_info
*fb
= &job
->fb
;
778 struct pipe_surface
*zsbuf
= job
->key
.zsbuf
;
779 struct lima_resource
*res
= lima_resource(zsbuf
->texture
);
780 int level
= zsbuf
->u
.tex
.level
;
781 uint32_t format
= lima_format_get_pixel(zsbuf
->format
);
783 struct lima_pp_wb_reg
*wb
= (void *)wb_reg
;
784 wb
[wb_idx
].type
= 0x01; /* 1 for depth, stencil */
785 wb
[wb_idx
].address
= res
->bo
->va
+ res
->levels
[level
].offset
;
786 wb
[wb_idx
].pixel_format
= format
;
788 wb
[wb_idx
].pixel_layout
= 0x2;
789 wb
[wb_idx
].pitch
= fb
->tiled_w
;
791 wb
[wb_idx
].pixel_layout
= 0x0;
792 wb
[wb_idx
].pitch
= res
->levels
[level
].stride
/ 8;
794 wb
[wb_idx
].mrt_bits
= 0;
798 lima_pack_wb_cbuf_reg(struct lima_job
*job
, uint32_t *frame_reg
,
799 uint32_t *wb_reg
, int wb_idx
)
801 struct lima_job_fb_info
*fb
= &job
->fb
;
802 struct pipe_surface
*cbuf
= job
->key
.cbuf
;
803 struct lima_resource
*res
= lima_resource(cbuf
->texture
);
804 int level
= cbuf
->u
.tex
.level
;
805 unsigned layer
= cbuf
->u
.tex
.first_layer
;
806 uint32_t format
= lima_format_get_pixel(cbuf
->format
);
807 bool swap_channels
= lima_format_get_pixel_swap_rb(cbuf
->format
);
809 struct lima_pp_frame_reg
*frame
= (void *)frame_reg
;
810 frame
->channel_layout
= lima_format_get_channel_layout(cbuf
->format
);
812 struct lima_pp_wb_reg
*wb
= (void *)wb_reg
;
813 wb
[wb_idx
].type
= 0x02; /* 2 for color buffer */
814 wb
[wb_idx
].address
= res
->bo
->va
+ res
->levels
[level
].offset
+ layer
* res
->levels
[level
].layer_stride
;
815 wb
[wb_idx
].pixel_format
= format
;
817 wb
[wb_idx
].pixel_layout
= 0x2;
818 wb
[wb_idx
].pitch
= fb
->tiled_w
;
820 wb
[wb_idx
].pixel_layout
= 0x0;
821 wb
[wb_idx
].pitch
= res
->levels
[level
].stride
/ 8;
823 wb
[wb_idx
].mrt_bits
= swap_channels
? 0x4 : 0x0;
827 lima_pack_pp_frame_reg(struct lima_job
*job
, uint32_t *frame_reg
,
830 struct lima_context
*ctx
= job
->ctx
;
831 struct lima_job_fb_info
*fb
= &job
->fb
;
832 struct lima_pp_frame_reg
*frame
= (void *)frame_reg
;
833 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
836 frame
->render_address
= screen
->pp_buffer
->va
+ pp_frame_rsw_offset
;
838 frame
->clear_value_depth
= job
->clear
.depth
;
839 frame
->clear_value_stencil
= job
->clear
.stencil
;
840 frame
->clear_value_color
= job
->clear
.color_8pc
;
841 frame
->clear_value_color_1
= job
->clear
.color_8pc
;
842 frame
->clear_value_color_2
= job
->clear
.color_8pc
;
843 frame
->clear_value_color_3
= job
->clear
.color_8pc
;
846 frame
->width
= fb
->width
- 1;
847 frame
->height
= fb
->height
- 1;
849 /* frame->fragment_stack_address is overwritten per-pp in the kernel
850 * by the values of pp_frame.fragment_stack_address[i] */
852 /* These are "stack size" and "stack offset" shifted,
853 * here they are assumed to be always the same. */
854 frame
->fragment_stack_size
= job
->pp_max_stack_size
<< 16 | job
->pp_max_stack_size
;
856 /* related with MSAA and different value when r4p0/r7p0 */
857 frame
->supersampled_height
= fb
->height
* 2 - 1;
858 frame
->scale
= 0xE0C;
862 frame
->blocking
= (fb
->shift_min
<< 28) | (fb
->shift_h
<< 16) | fb
->shift_w
;
864 /* Set default layout to 8888 */
865 frame
->channel_layout
= 0x8888;
867 if (job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
))
868 lima_pack_wb_cbuf_reg(job
, frame_reg
, wb_reg
, wb_idx
++);
870 if (job
->key
.zsbuf
&&
871 (job
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)))
872 lima_pack_wb_zsbuf_reg(job
, wb_reg
, wb_idx
++);
876 lima_do_job(struct lima_job
*job
)
878 #define pp_stack_pp_size 0x400
880 struct lima_context
*ctx
= job
->ctx
;
882 lima_pack_head_plbu_cmd(job
);
883 lima_finish_plbu_cmd(&job
->plbu_cmd_array
);
885 lima_update_job_bo(job
);
887 int vs_cmd_size
= job
->vs_cmd_array
.size
;
888 uint32_t vs_cmd_va
= 0;
891 void *vs_cmd
= lima_job_create_stream_bo(
892 job
, LIMA_PIPE_GP
, vs_cmd_size
, &vs_cmd_va
);
893 memcpy(vs_cmd
, util_dynarray_begin(&job
->vs_cmd_array
), vs_cmd_size
);
895 lima_dump_command_stream_print(
896 job
->dump
, vs_cmd
, vs_cmd_size
, false, "flush vs cmd at va %x\n", vs_cmd_va
);
897 lima_dump_vs_command_stream_print(job
->dump
, vs_cmd
, vs_cmd_size
, vs_cmd_va
);
900 uint32_t plbu_cmd_va
;
901 int plbu_cmd_size
= job
->plbu_cmd_array
.size
+ job
->plbu_cmd_head
.size
;
902 void *plbu_cmd
= lima_job_create_stream_bo(
903 job
, LIMA_PIPE_GP
, plbu_cmd_size
, &plbu_cmd_va
);
905 util_dynarray_begin(&job
->plbu_cmd_head
),
906 job
->plbu_cmd_head
.size
);
907 memcpy(plbu_cmd
+ job
->plbu_cmd_head
.size
,
908 util_dynarray_begin(&job
->plbu_cmd_array
),
909 job
->plbu_cmd_array
.size
);
911 lima_dump_command_stream_print(
912 job
->dump
, plbu_cmd
, plbu_cmd_size
, false, "flush plbu cmd at va %x\n", plbu_cmd_va
);
913 lima_dump_plbu_command_stream_print(job
->dump
, plbu_cmd
, plbu_cmd_size
, plbu_cmd_va
);
915 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
916 struct drm_lima_gp_frame gp_frame
;
917 struct lima_gp_frame_reg
*gp_frame_reg
= (void *)gp_frame
.frame
;
918 gp_frame_reg
->vs_cmd_start
= vs_cmd_va
;
919 gp_frame_reg
->vs_cmd_end
= vs_cmd_va
+ vs_cmd_size
;
920 gp_frame_reg
->plbu_cmd_start
= plbu_cmd_va
;
921 gp_frame_reg
->plbu_cmd_end
= plbu_cmd_va
+ plbu_cmd_size
;
922 gp_frame_reg
->tile_heap_start
= ctx
->gp_tile_heap
[ctx
->plb_index
]->va
;
923 gp_frame_reg
->tile_heap_end
= ctx
->gp_tile_heap
[ctx
->plb_index
]->va
+ ctx
->gp_tile_heap_size
;
925 lima_dump_command_stream_print(
926 job
->dump
, &gp_frame
, sizeof(gp_frame
), false, "add gp frame\n");
928 if (!lima_job_start(job
, LIMA_PIPE_GP
, &gp_frame
, sizeof(gp_frame
)))
929 fprintf(stderr
, "gp job error\n");
932 if (lima_job_wait(job
, LIMA_PIPE_GP
, PIPE_TIMEOUT_INFINITE
)) {
933 if (ctx
->gp_output
) {
934 float *pos
= lima_bo_map(ctx
->gp_output
);
935 lima_dump_command_stream_print(
936 job
->dump
, pos
, 4 * 4 * 16, true, "gl_pos dump at va %x\n",
940 uint32_t *plb
= lima_bo_map(ctx
->plb
[ctx
->plb_index
]);
941 lima_dump_command_stream_print(
942 job
->dump
, plb
, LIMA_CTX_PLB_BLK_SIZE
, false, "plb dump at va %x\n",
943 ctx
->plb
[ctx
->plb_index
]->va
);
946 fprintf(stderr
, "gp job wait error\n");
951 uint32_t pp_stack_va
= 0;
952 if (job
->pp_max_stack_size
) {
953 lima_job_create_stream_bo(
955 screen
->num_pp
* job
->pp_max_stack_size
* pp_stack_pp_size
,
959 lima_update_pp_stream(job
);
961 struct lima_pp_stream_state
*ps
= &ctx
->pp_stream
;
962 if (screen
->gpu_type
== DRM_LIMA_PARAM_GPU_ID_MALI400
) {
963 struct drm_lima_m400_pp_frame pp_frame
= {0};
964 lima_pack_pp_frame_reg(job
, pp_frame
.frame
, pp_frame
.wb
);
965 pp_frame
.num_pp
= screen
->num_pp
;
967 for (int i
= 0; i
< screen
->num_pp
; i
++) {
968 pp_frame
.plbu_array_address
[i
] = ps
->va
+ ps
->offset
[i
];
969 if (job
->pp_max_stack_size
)
970 pp_frame
.fragment_stack_address
[i
] = pp_stack_va
+
971 job
->pp_max_stack_size
* pp_stack_pp_size
* i
;
974 lima_dump_command_stream_print(
975 job
->dump
, &pp_frame
, sizeof(pp_frame
), false, "add pp frame\n");
977 if (!lima_job_start(job
, LIMA_PIPE_PP
, &pp_frame
, sizeof(pp_frame
)))
978 fprintf(stderr
, "pp job error\n");
981 struct drm_lima_m450_pp_frame pp_frame
= {0};
982 lima_pack_pp_frame_reg(job
, pp_frame
.frame
, pp_frame
.wb
);
983 pp_frame
.num_pp
= screen
->num_pp
;
985 if (job
->pp_max_stack_size
)
986 for (int i
= 0; i
< screen
->num_pp
; i
++)
987 pp_frame
.fragment_stack_address
[i
] = pp_stack_va
+
988 job
->pp_max_stack_size
* pp_stack_pp_size
* i
;
991 for (int i
= 0; i
< screen
->num_pp
; i
++)
992 pp_frame
.plbu_array_address
[i
] = ps
->va
+ ps
->offset
[i
];
995 pp_frame
.use_dlbu
= true;
997 struct lima_job_fb_info
*fb
= &job
->fb
;
998 pp_frame
.dlbu_regs
[0] = ctx
->plb
[ctx
->plb_index
]->va
;
999 pp_frame
.dlbu_regs
[1] = ((fb
->tiled_h
- 1) << 16) | (fb
->tiled_w
- 1);
1000 unsigned s
= util_logbase2(LIMA_CTX_PLB_BLK_SIZE
) - 7;
1001 pp_frame
.dlbu_regs
[2] = (s
<< 28) | (fb
->shift_h
<< 16) | fb
->shift_w
;
1002 pp_frame
.dlbu_regs
[3] = ((fb
->tiled_h
- 1) << 24) | ((fb
->tiled_w
- 1) << 16);
1005 lima_dump_command_stream_print(
1006 job
->dump
, &pp_frame
, sizeof(pp_frame
), false, "add pp frame\n");
1008 if (!lima_job_start(job
, LIMA_PIPE_PP
, &pp_frame
, sizeof(pp_frame
)))
1009 fprintf(stderr
, "pp job error\n");
1013 if (!lima_job_wait(job
, LIMA_PIPE_PP
, PIPE_TIMEOUT_INFINITE
)) {
1014 fprintf(stderr
, "pp wait error\n");
1019 ctx
->plb_index
= (ctx
->plb_index
+ 1) % lima_ctx_num_plb
;
1021 /* Set reload flags for next draw. It'll be unset if buffer is cleared */
1022 if (job
->key
.cbuf
&& (job
->resolve
& PIPE_CLEAR_COLOR0
)) {
1023 struct lima_surface
*surf
= lima_surface(job
->key
.cbuf
);
1024 surf
->reload
= PIPE_CLEAR_COLOR0
;
1027 if (job
->key
.zsbuf
&& (job
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
))) {
1028 struct lima_surface
*surf
= lima_surface(job
->key
.zsbuf
);
1029 surf
->reload
= (job
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
));
1032 if (ctx
->job
== job
)
1039 lima_flush(struct lima_context
*ctx
)
1041 hash_table_foreach(ctx
->jobs
, entry
) {
1042 struct lima_job
*job
= entry
->data
;
1048 lima_flush_job_accessing_bo(
1049 struct lima_context
*ctx
, struct lima_bo
*bo
, bool write
)
1051 hash_table_foreach(ctx
->jobs
, entry
) {
1052 struct lima_job
*job
= entry
->data
;
1053 if (lima_job_has_bo(job
, bo
, write
))
1059 * This is for current job flush previous job which write to the resource it wants
1060 * to read. Tipical usage is flush the FBO which is used as current task's texture.
1063 lima_flush_previous_job_writing_resource(
1064 struct lima_context
*ctx
, struct pipe_resource
*prsc
)
1066 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->write_jobs
, prsc
);
1069 struct lima_job
*job
= entry
->data
;
1071 /* do not flush current job */
1072 if (job
!= ctx
->job
)
1078 lima_pipe_flush(struct pipe_context
*pctx
, struct pipe_fence_handle
**fence
,
1081 struct lima_context
*ctx
= lima_context(pctx
);
1086 int drm_fd
= lima_screen(ctx
->base
.screen
)->fd
;
1089 if (!drmSyncobjExportSyncFile(drm_fd
, ctx
->out_sync
[LIMA_PIPE_PP
], &fd
))
1090 *fence
= lima_fence_create(fd
);
1095 lima_job_compare(const void *s1
, const void *s2
)
1097 return memcmp(s1
, s2
, sizeof(struct lima_job_key
)) == 0;
1101 lima_job_hash(const void *key
)
1103 return _mesa_hash_data(key
, sizeof(struct lima_job_key
));
1106 bool lima_job_init(struct lima_context
*ctx
)
1108 int fd
= lima_screen(ctx
->base
.screen
)->fd
;
1110 ctx
->jobs
= _mesa_hash_table_create(ctx
, lima_job_hash
, lima_job_compare
);
1114 ctx
->write_jobs
= _mesa_hash_table_create(
1115 ctx
, _mesa_hash_pointer
, _mesa_key_pointer_equal
);
1116 if (!ctx
->write_jobs
)
1119 ctx
->in_sync_fd
= -1;
1121 for (int i
= 0; i
< 2; i
++) {
1122 if (drmSyncobjCreate(fd
, DRM_SYNCOBJ_CREATE_SIGNALED
, ctx
->in_sync
+ i
) ||
1123 drmSyncobjCreate(fd
, DRM_SYNCOBJ_CREATE_SIGNALED
, ctx
->out_sync
+ i
))
1127 ctx
->base
.flush
= lima_pipe_flush
;
1132 void lima_job_fini(struct lima_context
*ctx
)
1134 int fd
= lima_screen(ctx
->base
.screen
)->fd
;
1138 for (int i
= 0; i
< 2; i
++) {
1139 if (ctx
->in_sync
[i
])
1140 drmSyncobjDestroy(fd
, ctx
->in_sync
[i
]);
1141 if (ctx
->out_sync
[i
])
1142 drmSyncobjDestroy(fd
, ctx
->out_sync
[i
]);
1145 if (ctx
->in_sync_fd
>= 0)
1146 close(ctx
->in_sync_fd
);