2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
30 #include "util/hash_table.h"
31 #include "util/slab.h"
33 #include "drm/freedreno_ringbuffer.h"
36 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
37 * by avoiding the additional tracking necessary to build cmds/relocs tables
38 * (but still builds a bos table)
42 #define INIT_SIZE 0x1000
45 struct msm_submit_sp
{
46 struct fd_submit base
;
48 DECLARE_ARRAY(struct drm_msm_gem_submit_bo
, submit_bos
);
49 DECLARE_ARRAY(struct fd_bo
*, bos
);
51 /* maps fd_bo to idx in bos table: */
52 struct hash_table
*bo_table
;
54 struct slab_child_pool ring_pool
;
56 struct fd_ringbuffer
*primary
;
58 /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
59 * the same underlying bo)..
61 * We also rely on previous stateobj having been fully constructed
62 * so we can reclaim extra space at it's end.
64 struct fd_ringbuffer
*suballoc_ring
;
66 FD_DEFINE_CAST(fd_submit
, msm_submit_sp
);
68 /* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
69 * and sizes. Ie. a finalized buffer can have no more commands appended to
73 struct fd_bo
*ring_bo
;
77 /* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
78 * later copy into the submit when the stateobj rb is later referenced by
81 struct msm_reloc_bo_sp
{
86 struct msm_ringbuffer_sp
{
87 struct fd_ringbuffer base
;
89 /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
92 // TODO check disasm.. hopefully compilers CSE can realize that
93 // reloc_bos and cmds are at the same offsets and optimize some
94 // divergent cases into single case
96 /* for _FD_RINGBUFFER_OBJECT case: */
99 DECLARE_ARRAY(struct msm_reloc_bo_sp
, reloc_bos
);
101 /* for other cases: */
103 struct fd_submit
*submit
;
104 DECLARE_ARRAY(struct msm_cmd_sp
, cmds
);
108 struct fd_bo
*ring_bo
;
110 FD_DEFINE_CAST(fd_ringbuffer
, msm_ringbuffer_sp
);
112 static void finalize_current_cmd(struct fd_ringbuffer
*ring
);
113 static struct fd_ringbuffer
* msm_ringbuffer_sp_init(
114 struct msm_ringbuffer_sp
*msm_ring
,
115 uint32_t size
, enum fd_ringbuffer_flags flags
);
117 /* add (if needed) bo to submit and return index: */
119 append_bo(struct msm_submit_sp
*submit
, struct fd_bo
*bo
, uint32_t flags
)
121 struct msm_bo
*msm_bo
= to_msm_bo(bo
);
124 /* NOTE: it is legal to use the same bo on different threads for
125 * different submits. But it is not legal to use the same submit
126 * from given threads.
128 idx
= READ_ONCE(msm_bo
->idx
);
130 if (unlikely((idx
>= submit
->nr_submit_bos
) ||
131 (submit
->submit_bos
[idx
].handle
!= bo
->handle
))) {
132 uint32_t hash
= _mesa_hash_pointer(bo
);
133 struct hash_entry
*entry
;
135 entry
= _mesa_hash_table_search_pre_hashed(submit
->bo_table
, hash
, bo
);
138 idx
= (uint32_t)(uintptr_t)entry
->data
;
140 idx
= APPEND(submit
, submit_bos
);
141 idx
= APPEND(submit
, bos
);
143 submit
->submit_bos
[idx
].flags
= 0;
144 submit
->submit_bos
[idx
].handle
= bo
->handle
;
145 submit
->submit_bos
[idx
].presumed
= 0;
147 submit
->bos
[idx
] = fd_bo_ref(bo
);
149 _mesa_hash_table_insert_pre_hashed(submit
->bo_table
, hash
, bo
,
150 (void *)(uintptr_t)idx
);
155 if (flags
& FD_RELOC_READ
)
156 submit
->submit_bos
[idx
].flags
|= MSM_SUBMIT_BO_READ
;
157 if (flags
& FD_RELOC_WRITE
)
158 submit
->submit_bos
[idx
].flags
|= MSM_SUBMIT_BO_WRITE
;
159 if (flags
& FD_RELOC_DUMP
)
160 submit
->submit_bos
[idx
].flags
|= MSM_SUBMIT_BO_DUMP
;
166 msm_submit_suballoc_ring_bo(struct fd_submit
*submit
,
167 struct msm_ringbuffer_sp
*msm_ring
, uint32_t size
)
169 struct msm_submit_sp
*msm_submit
= to_msm_submit_sp(submit
);
170 unsigned suballoc_offset
= 0;
171 struct fd_bo
*suballoc_bo
= NULL
;
173 if (msm_submit
->suballoc_ring
) {
174 struct msm_ringbuffer_sp
*suballoc_ring
=
175 to_msm_ringbuffer_sp(msm_submit
->suballoc_ring
);
177 suballoc_bo
= suballoc_ring
->ring_bo
;
178 suballoc_offset
= fd_ringbuffer_size(msm_submit
->suballoc_ring
) +
179 suballoc_ring
->offset
;
181 suballoc_offset
= align(suballoc_offset
, 0x10);
183 if ((size
+ suballoc_offset
) > suballoc_bo
->size
) {
189 // TODO possibly larger size for streaming bo?
190 msm_ring
->ring_bo
= fd_bo_new_ring(submit
->pipe
->dev
,
191 0x8000, DRM_FREEDRENO_GEM_GPUREADONLY
);
192 msm_ring
->offset
= 0;
194 msm_ring
->ring_bo
= fd_bo_ref(suballoc_bo
);
195 msm_ring
->offset
= suballoc_offset
;
198 struct fd_ringbuffer
*old_suballoc_ring
= msm_submit
->suballoc_ring
;
200 msm_submit
->suballoc_ring
= fd_ringbuffer_ref(&msm_ring
->base
);
202 if (old_suballoc_ring
)
203 fd_ringbuffer_del(old_suballoc_ring
);
206 static struct fd_ringbuffer
*
207 msm_submit_sp_new_ringbuffer(struct fd_submit
*submit
, uint32_t size
,
208 enum fd_ringbuffer_flags flags
)
210 struct msm_submit_sp
*msm_submit
= to_msm_submit_sp(submit
);
211 struct msm_ringbuffer_sp
*msm_ring
;
213 msm_ring
= slab_alloc(&msm_submit
->ring_pool
);
215 msm_ring
->u
.submit
= submit
;
217 /* NOTE: needs to be before _suballoc_ring_bo() since it could
218 * increment the refcnt of the current ring
220 msm_ring
->base
.refcnt
= 1;
222 if (flags
& FD_RINGBUFFER_STREAMING
) {
223 msm_submit_suballoc_ring_bo(submit
, msm_ring
, size
);
225 if (flags
& FD_RINGBUFFER_GROWABLE
)
228 msm_ring
->offset
= 0;
229 msm_ring
->ring_bo
= fd_bo_new_ring(submit
->pipe
->dev
, size
,
230 DRM_FREEDRENO_GEM_GPUREADONLY
);
233 if (!msm_ringbuffer_sp_init(msm_ring
, size
, flags
))
236 if (flags
& FD_RINGBUFFER_PRIMARY
) {
237 debug_assert(!msm_submit
->primary
);
238 msm_submit
->primary
= fd_ringbuffer_ref(&msm_ring
->base
);
241 return &msm_ring
->base
;
245 msm_submit_sp_flush(struct fd_submit
*submit
, int in_fence_fd
,
246 int *out_fence_fd
, uint32_t *out_fence
)
248 struct msm_submit_sp
*msm_submit
= to_msm_submit_sp(submit
);
249 struct msm_pipe
*msm_pipe
= to_msm_pipe(submit
->pipe
);
250 struct drm_msm_gem_submit req
= {
251 .flags
= msm_pipe
->pipe
,
252 .queueid
= msm_pipe
->queue_id
,
256 debug_assert(msm_submit
->primary
);
257 finalize_current_cmd(msm_submit
->primary
);
259 struct msm_ringbuffer_sp
*primary
= to_msm_ringbuffer_sp(msm_submit
->primary
);
260 struct drm_msm_gem_submit_cmd cmds
[primary
->u
.nr_cmds
];
262 for (unsigned i
= 0; i
< primary
->u
.nr_cmds
; i
++) {
263 cmds
[i
].type
= MSM_SUBMIT_CMD_BUF
;
264 cmds
[i
].submit_idx
= append_bo(msm_submit
,
265 primary
->u
.cmds
[i
].ring_bo
, FD_RELOC_READ
| FD_RELOC_DUMP
);
266 cmds
[i
].submit_offset
= primary
->offset
;
267 cmds
[i
].size
= primary
->u
.cmds
[i
].size
;
269 cmds
[i
].nr_relocs
= 0;
272 if (in_fence_fd
!= -1) {
273 req
.flags
|= MSM_SUBMIT_FENCE_FD_IN
| MSM_SUBMIT_NO_IMPLICIT
;
274 req
.fence_fd
= in_fence_fd
;
278 req
.flags
|= MSM_SUBMIT_FENCE_FD_OUT
;
281 /* needs to be after get_cmd() as that could create bos/cmds table: */
282 req
.bos
= VOID2U64(msm_submit
->submit_bos
),
283 req
.nr_bos
= msm_submit
->nr_submit_bos
;
284 req
.cmds
= VOID2U64(cmds
),
285 req
.nr_cmds
= primary
->u
.nr_cmds
;
287 DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req
.nr_cmds
, req
.nr_bos
);
289 ret
= drmCommandWriteRead(submit
->pipe
->dev
->fd
, DRM_MSM_GEM_SUBMIT
,
292 ERROR_MSG("submit failed: %d (%s)", ret
, strerror(errno
));
293 msm_dump_submit(&req
);
296 *out_fence
= req
.fence
;
299 *out_fence_fd
= req
.fence_fd
;
306 msm_submit_sp_destroy(struct fd_submit
*submit
)
308 struct msm_submit_sp
*msm_submit
= to_msm_submit_sp(submit
);
310 if (msm_submit
->primary
)
311 fd_ringbuffer_del(msm_submit
->primary
);
312 if (msm_submit
->suballoc_ring
)
313 fd_ringbuffer_del(msm_submit
->suballoc_ring
);
315 _mesa_hash_table_destroy(msm_submit
->bo_table
, NULL
);
317 // TODO it would be nice to have a way to debug_assert() if all
318 // rb's haven't been free'd back to the slab, because that is
319 // an indication that we are leaking bo's
320 slab_destroy_child(&msm_submit
->ring_pool
);
322 for (unsigned i
= 0; i
< msm_submit
->nr_bos
; i
++)
323 fd_bo_del(msm_submit
->bos
[i
]);
325 free(msm_submit
->submit_bos
);
326 free(msm_submit
->bos
);
330 static const struct fd_submit_funcs submit_funcs
= {
331 .new_ringbuffer
= msm_submit_sp_new_ringbuffer
,
332 .flush
= msm_submit_sp_flush
,
333 .destroy
= msm_submit_sp_destroy
,
337 msm_submit_sp_new(struct fd_pipe
*pipe
)
339 struct msm_submit_sp
*msm_submit
= calloc(1, sizeof(*msm_submit
));
340 struct fd_submit
*submit
;
342 msm_submit
->bo_table
= _mesa_hash_table_create(NULL
,
343 _mesa_hash_pointer
, _mesa_key_pointer_equal
);
345 slab_create_child(&msm_submit
->ring_pool
, &to_msm_pipe(pipe
)->ring_pool
);
347 submit
= &msm_submit
->base
;
349 submit
->funcs
= &submit_funcs
;
355 msm_pipe_sp_ringpool_init(struct msm_pipe
*msm_pipe
)
358 slab_create_parent(&msm_pipe
->ring_pool
, sizeof(struct msm_ringbuffer_sp
), 16);
362 msm_pipe_sp_ringpool_fini(struct msm_pipe
*msm_pipe
)
364 if (msm_pipe
->ring_pool
.num_elements
)
365 slab_destroy_parent(&msm_pipe
->ring_pool
);
369 finalize_current_cmd(struct fd_ringbuffer
*ring
)
371 debug_assert(!(ring
->flags
& _FD_RINGBUFFER_OBJECT
));
373 struct msm_ringbuffer_sp
*msm_ring
= to_msm_ringbuffer_sp(ring
);
374 unsigned idx
= APPEND(&msm_ring
->u
, cmds
);
376 msm_ring
->u
.cmds
[idx
].ring_bo
= fd_bo_ref(msm_ring
->ring_bo
);
377 msm_ring
->u
.cmds
[idx
].size
= offset_bytes(ring
->cur
, ring
->start
);
381 msm_ringbuffer_sp_grow(struct fd_ringbuffer
*ring
, uint32_t size
)
383 struct msm_ringbuffer_sp
*msm_ring
= to_msm_ringbuffer_sp(ring
);
384 struct fd_pipe
*pipe
= msm_ring
->u
.submit
->pipe
;
386 debug_assert(ring
->flags
& FD_RINGBUFFER_GROWABLE
);
388 finalize_current_cmd(ring
);
390 fd_bo_del(msm_ring
->ring_bo
);
391 msm_ring
->ring_bo
= fd_bo_new_ring(pipe
->dev
, size
,
392 DRM_FREEDRENO_GEM_GPUREADONLY
);
394 ring
->start
= fd_bo_map(msm_ring
->ring_bo
);
395 ring
->end
= &(ring
->start
[size
/4]);
396 ring
->cur
= ring
->start
;
401 msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer
*ring
,
402 const struct fd_reloc
*reloc
)
404 struct msm_ringbuffer_sp
*msm_ring
= to_msm_ringbuffer_sp(ring
);
405 struct fd_pipe
*pipe
;
407 if (ring
->flags
& _FD_RINGBUFFER_OBJECT
) {
408 unsigned idx
= APPEND(&msm_ring
->u
, reloc_bos
);
410 msm_ring
->u
.reloc_bos
[idx
].bo
= fd_bo_ref(reloc
->bo
);
411 msm_ring
->u
.reloc_bos
[idx
].flags
= reloc
->flags
;
413 pipe
= msm_ring
->u
.pipe
;
415 struct msm_submit_sp
*msm_submit
=
416 to_msm_submit_sp(msm_ring
->u
.submit
);
418 append_bo(msm_submit
, reloc
->bo
, reloc
->flags
);
420 pipe
= msm_ring
->u
.submit
->pipe
;
423 uint64_t iova
= fd_bo_get_iova(reloc
->bo
) + reloc
->offset
;
424 uint32_t dword
= iova
;
425 int shift
= reloc
->shift
;
432 (*ring
->cur
++) = dword
| reloc
->or;
434 if (pipe
->gpu_id
>= 500) {
443 (*ring
->cur
++) = dword
| reloc
->orhi
;
448 msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer
*ring
,
449 struct fd_ringbuffer
*target
, uint32_t cmd_idx
)
451 struct msm_ringbuffer_sp
*msm_target
= to_msm_ringbuffer_sp(target
);
455 if ((target
->flags
& FD_RINGBUFFER_GROWABLE
) &&
456 (cmd_idx
< msm_target
->u
.nr_cmds
)) {
457 bo
= msm_target
->u
.cmds
[cmd_idx
].ring_bo
;
458 size
= msm_target
->u
.cmds
[cmd_idx
].size
;
460 bo
= msm_target
->ring_bo
;
461 size
= offset_bytes(target
->cur
, target
->start
);
464 msm_ringbuffer_sp_emit_reloc(ring
, &(struct fd_reloc
){
466 .flags
= FD_RELOC_READ
| FD_RELOC_DUMP
,
467 .offset
= msm_target
->offset
,
470 if (!(target
->flags
& _FD_RINGBUFFER_OBJECT
))
473 struct msm_ringbuffer_sp
*msm_ring
= to_msm_ringbuffer_sp(ring
);
475 if (ring
->flags
& _FD_RINGBUFFER_OBJECT
) {
476 for (unsigned i
= 0; i
< msm_target
->u
.nr_reloc_bos
; i
++) {
477 unsigned idx
= APPEND(&msm_ring
->u
, reloc_bos
);
479 msm_ring
->u
.reloc_bos
[idx
].bo
=
480 fd_bo_ref(msm_target
->u
.reloc_bos
[i
].bo
);
481 msm_ring
->u
.reloc_bos
[idx
].flags
=
482 msm_target
->u
.reloc_bos
[i
].flags
;
485 // TODO it would be nice to know whether we have already
486 // seen this target before. But hopefully we hit the
487 // append_bo() fast path enough for this to not matter:
488 struct msm_submit_sp
*msm_submit
= to_msm_submit_sp(msm_ring
->u
.submit
);
490 for (unsigned i
= 0; i
< msm_target
->u
.nr_reloc_bos
; i
++) {
491 append_bo(msm_submit
, msm_target
->u
.reloc_bos
[i
].bo
,
492 msm_target
->u
.reloc_bos
[i
].flags
);
500 msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer
*ring
)
502 if (ring
->flags
& FD_RINGBUFFER_GROWABLE
)
503 return to_msm_ringbuffer_sp(ring
)->u
.nr_cmds
+ 1;
508 msm_ringbuffer_sp_destroy(struct fd_ringbuffer
*ring
)
510 struct msm_ringbuffer_sp
*msm_ring
= to_msm_ringbuffer_sp(ring
);
512 fd_bo_del(msm_ring
->ring_bo
);
514 if (ring
->flags
& _FD_RINGBUFFER_OBJECT
) {
515 for (unsigned i
= 0; i
< msm_ring
->u
.nr_reloc_bos
; i
++) {
516 fd_bo_del(msm_ring
->u
.reloc_bos
[i
].bo
);
521 struct fd_submit
*submit
= msm_ring
->u
.submit
;
523 for (unsigned i
= 0; i
< msm_ring
->u
.nr_cmds
; i
++) {
524 fd_bo_del(msm_ring
->u
.cmds
[i
].ring_bo
);
527 slab_free(&to_msm_submit_sp(submit
)->ring_pool
, msm_ring
);
531 static const struct fd_ringbuffer_funcs ring_funcs
= {
532 .grow
= msm_ringbuffer_sp_grow
,
533 .emit_reloc
= msm_ringbuffer_sp_emit_reloc
,
534 .emit_reloc_ring
= msm_ringbuffer_sp_emit_reloc_ring
,
535 .cmd_count
= msm_ringbuffer_sp_cmd_count
,
536 .destroy
= msm_ringbuffer_sp_destroy
,
539 static inline struct fd_ringbuffer
*
540 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp
*msm_ring
, uint32_t size
,
541 enum fd_ringbuffer_flags flags
)
543 struct fd_ringbuffer
*ring
= &msm_ring
->base
;
545 debug_assert(msm_ring
->ring_bo
);
547 uint8_t *base
= fd_bo_map(msm_ring
->ring_bo
);
548 ring
->start
= (void *)(base
+ msm_ring
->offset
);
549 ring
->end
= &(ring
->start
[size
/4]);
550 ring
->cur
= ring
->start
;
555 ring
->funcs
= &ring_funcs
;
557 // TODO initializing these could probably be conditional on flags
558 // since unneed for FD_RINGBUFFER_STAGING case..
559 msm_ring
->u
.cmds
= NULL
;
560 msm_ring
->u
.nr_cmds
= msm_ring
->u
.max_cmds
= 0;
562 msm_ring
->u
.reloc_bos
= NULL
;
563 msm_ring
->u
.nr_reloc_bos
= msm_ring
->u
.max_reloc_bos
= 0;
568 struct fd_ringbuffer
*
569 msm_ringbuffer_sp_new_object(struct fd_pipe
*pipe
, uint32_t size
)
571 struct msm_ringbuffer_sp
*msm_ring
= malloc(sizeof(*msm_ring
));
573 msm_ring
->u
.pipe
= pipe
;
574 msm_ring
->offset
= 0;
575 msm_ring
->ring_bo
= fd_bo_new_ring(pipe
->dev
, size
,
576 DRM_FREEDRENO_GEM_GPUREADONLY
);
577 msm_ring
->base
.refcnt
= 1;
579 return msm_ringbuffer_sp_init(msm_ring
, size
, _FD_RINGBUFFER_OBJECT
);