freedreno/drm: readonly cmdstream
[mesa.git] / src / freedreno / drm / msm_ringbuffer_sp.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include <assert.h>
28 #include <inttypes.h>
29
30 #include "util/hash_table.h"
31 #include "util/slab.h"
32
33 #include "drm/freedreno_ringbuffer.h"
34 #include "msm_priv.h"
35
36 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
37 * by avoiding the additional tracking necessary to build cmds/relocs tables
38 * (but still builds a bos table)
39 */
40
41
42 #define INIT_SIZE 0x1000
43
44
45 struct msm_submit_sp {
46 struct fd_submit base;
47
48 DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
49 DECLARE_ARRAY(struct fd_bo *, bos);
50
51 /* maps fd_bo to idx in bos table: */
52 struct hash_table *bo_table;
53
54 struct slab_child_pool ring_pool;
55
56 struct fd_ringbuffer *primary;
57
58 /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
59 * the same underlying bo)..
60 *
61 * We also rely on previous stateobj having been fully constructed
62 * so we can reclaim extra space at it's end.
63 */
64 struct fd_ringbuffer *suballoc_ring;
65 };
66 FD_DEFINE_CAST(fd_submit, msm_submit_sp);
67
68 /* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
69 * and sizes. Ie. a finalized buffer can have no more commands appended to
70 * it.
71 */
72 struct msm_cmd_sp {
73 struct fd_bo *ring_bo;
74 unsigned size;
75 };
76
77 /* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
78 * later copy into the submit when the stateobj rb is later referenced by
79 * a regular rb:
80 */
81 struct msm_reloc_bo_sp {
82 struct fd_bo *bo;
83 unsigned flags;
84 };
85
86 struct msm_ringbuffer_sp {
87 struct fd_ringbuffer base;
88
89 /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
90 unsigned offset;
91
92 // TODO check disasm.. hopefully compilers CSE can realize that
93 // reloc_bos and cmds are at the same offsets and optimize some
94 // divergent cases into single case
95 union {
96 /* for _FD_RINGBUFFER_OBJECT case: */
97 struct {
98 struct fd_pipe *pipe;
99 DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
100 };
101 /* for other cases: */
102 struct {
103 struct fd_submit *submit;
104 DECLARE_ARRAY(struct msm_cmd_sp, cmds);
105 };
106 } u;
107
108 struct fd_bo *ring_bo;
109 };
110 FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
111
112 static void finalize_current_cmd(struct fd_ringbuffer *ring);
113 static struct fd_ringbuffer * msm_ringbuffer_sp_init(
114 struct msm_ringbuffer_sp *msm_ring,
115 uint32_t size, enum fd_ringbuffer_flags flags);
116
117 /* add (if needed) bo to submit and return index: */
118 static uint32_t
119 append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
120 {
121 struct msm_bo *msm_bo = to_msm_bo(bo);
122 uint32_t idx;
123
124 /* NOTE: it is legal to use the same bo on different threads for
125 * different submits. But it is not legal to use the same submit
126 * from given threads.
127 */
128 idx = READ_ONCE(msm_bo->idx);
129
130 if (unlikely((idx >= submit->nr_submit_bos) ||
131 (submit->submit_bos[idx].handle != bo->handle))) {
132 uint32_t hash = _mesa_hash_pointer(bo);
133 struct hash_entry *entry;
134
135 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
136 if (entry) {
137 /* found */
138 idx = (uint32_t)(uintptr_t)entry->data;
139 } else {
140 idx = APPEND(submit, submit_bos);
141 idx = APPEND(submit, bos);
142
143 submit->submit_bos[idx].flags = 0;
144 submit->submit_bos[idx].handle = bo->handle;
145 submit->submit_bos[idx].presumed = 0;
146
147 submit->bos[idx] = fd_bo_ref(bo);
148
149 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
150 (void *)(uintptr_t)idx);
151 }
152 msm_bo->idx = idx;
153 }
154
155 if (flags & FD_RELOC_READ)
156 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
157 if (flags & FD_RELOC_WRITE)
158 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
159 if (flags & FD_RELOC_DUMP)
160 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP;
161
162 return idx;
163 }
164
165 static void
166 msm_submit_suballoc_ring_bo(struct fd_submit *submit,
167 struct msm_ringbuffer_sp *msm_ring, uint32_t size)
168 {
169 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
170 unsigned suballoc_offset = 0;
171 struct fd_bo *suballoc_bo = NULL;
172
173 if (msm_submit->suballoc_ring) {
174 struct msm_ringbuffer_sp *suballoc_ring =
175 to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
176
177 suballoc_bo = suballoc_ring->ring_bo;
178 suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
179 suballoc_ring->offset;
180
181 suballoc_offset = align(suballoc_offset, 0x10);
182
183 if ((size + suballoc_offset) > suballoc_bo->size) {
184 suballoc_bo = NULL;
185 }
186 }
187
188 if (!suballoc_bo) {
189 // TODO possibly larger size for streaming bo?
190 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, 0x8000);
191 msm_ring->offset = 0;
192 } else {
193 msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
194 msm_ring->offset = suballoc_offset;
195 }
196
197 struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
198
199 msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
200
201 if (old_suballoc_ring)
202 fd_ringbuffer_del(old_suballoc_ring);
203 }
204
205 static struct fd_ringbuffer *
206 msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
207 enum fd_ringbuffer_flags flags)
208 {
209 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
210 struct msm_ringbuffer_sp *msm_ring;
211
212 msm_ring = slab_alloc(&msm_submit->ring_pool);
213
214 msm_ring->u.submit = submit;
215
216 /* NOTE: needs to be before _suballoc_ring_bo() since it could
217 * increment the refcnt of the current ring
218 */
219 msm_ring->base.refcnt = 1;
220
221 if (flags & FD_RINGBUFFER_STREAMING) {
222 msm_submit_suballoc_ring_bo(submit, msm_ring, size);
223 } else {
224 if (flags & FD_RINGBUFFER_GROWABLE)
225 size = INIT_SIZE;
226
227 msm_ring->offset = 0;
228 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
229 }
230
231 if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
232 return NULL;
233
234 if (flags & FD_RINGBUFFER_PRIMARY) {
235 debug_assert(!msm_submit->primary);
236 msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
237 }
238
239 return &msm_ring->base;
240 }
241
242 static int
243 msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
244 int *out_fence_fd, uint32_t *out_fence)
245 {
246 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
247 struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
248 struct drm_msm_gem_submit req = {
249 .flags = msm_pipe->pipe,
250 .queueid = msm_pipe->queue_id,
251 };
252 int ret;
253
254 debug_assert(msm_submit->primary);
255 finalize_current_cmd(msm_submit->primary);
256
257 struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
258 struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
259
260 for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
261 cmds[i].type = MSM_SUBMIT_CMD_BUF;
262 cmds[i].submit_idx = append_bo(msm_submit,
263 primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP);
264 cmds[i].submit_offset = primary->offset;
265 cmds[i].size = primary->u.cmds[i].size;
266 cmds[i].pad = 0;
267 cmds[i].nr_relocs = 0;
268 }
269
270 if (in_fence_fd != -1) {
271 req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
272 req.fence_fd = in_fence_fd;
273 }
274
275 if (out_fence_fd) {
276 req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
277 }
278
279 /* needs to be after get_cmd() as that could create bos/cmds table: */
280 req.bos = VOID2U64(msm_submit->submit_bos),
281 req.nr_bos = msm_submit->nr_submit_bos;
282 req.cmds = VOID2U64(cmds),
283 req.nr_cmds = primary->u.nr_cmds;
284
285 DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
286
287 ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
288 &req, sizeof(req));
289 if (ret) {
290 ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
291 msm_dump_submit(&req);
292 } else if (!ret) {
293 if (out_fence)
294 *out_fence = req.fence;
295
296 if (out_fence_fd)
297 *out_fence_fd = req.fence_fd;
298 }
299
300 return ret;
301 }
302
303 static void
304 msm_submit_sp_destroy(struct fd_submit *submit)
305 {
306 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
307
308 if (msm_submit->primary)
309 fd_ringbuffer_del(msm_submit->primary);
310 if (msm_submit->suballoc_ring)
311 fd_ringbuffer_del(msm_submit->suballoc_ring);
312
313 _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
314
315 // TODO it would be nice to have a way to debug_assert() if all
316 // rb's haven't been free'd back to the slab, because that is
317 // an indication that we are leaking bo's
318 slab_destroy_child(&msm_submit->ring_pool);
319
320 for (unsigned i = 0; i < msm_submit->nr_bos; i++)
321 fd_bo_del(msm_submit->bos[i]);
322
323 free(msm_submit->submit_bos);
324 free(msm_submit->bos);
325 free(msm_submit);
326 }
327
328 static const struct fd_submit_funcs submit_funcs = {
329 .new_ringbuffer = msm_submit_sp_new_ringbuffer,
330 .flush = msm_submit_sp_flush,
331 .destroy = msm_submit_sp_destroy,
332 };
333
334 struct fd_submit *
335 msm_submit_sp_new(struct fd_pipe *pipe)
336 {
337 struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
338 struct fd_submit *submit;
339
340 msm_submit->bo_table = _mesa_hash_table_create(NULL,
341 _mesa_hash_pointer, _mesa_key_pointer_equal);
342
343 slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool);
344
345 submit = &msm_submit->base;
346 submit->pipe = pipe;
347 submit->funcs = &submit_funcs;
348
349 return submit;
350 }
351
352 void
353 msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe)
354 {
355 // TODO tune size:
356 slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
357 }
358
359 void
360 msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe)
361 {
362 if (msm_pipe->ring_pool.num_elements)
363 slab_destroy_parent(&msm_pipe->ring_pool);
364 }
365
366 static void
367 finalize_current_cmd(struct fd_ringbuffer *ring)
368 {
369 debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
370
371 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
372 unsigned idx = APPEND(&msm_ring->u, cmds);
373
374 msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
375 msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
376 }
377
378 static void
379 msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
380 {
381 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
382 struct fd_pipe *pipe = msm_ring->u.submit->pipe;
383
384 debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
385
386 finalize_current_cmd(ring);
387
388 fd_bo_del(msm_ring->ring_bo);
389 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
390
391 ring->start = fd_bo_map(msm_ring->ring_bo);
392 ring->end = &(ring->start[size/4]);
393 ring->cur = ring->start;
394 ring->size = size;
395 }
396
397 static void
398 msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
399 const struct fd_reloc *reloc)
400 {
401 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
402 struct fd_pipe *pipe;
403
404 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
405 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
406
407 msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
408 msm_ring->u.reloc_bos[idx].flags = reloc->flags;
409
410 pipe = msm_ring->u.pipe;
411 } else {
412 struct msm_submit_sp *msm_submit =
413 to_msm_submit_sp(msm_ring->u.submit);
414
415 append_bo(msm_submit, reloc->bo, reloc->flags);
416
417 pipe = msm_ring->u.submit->pipe;
418 }
419
420 uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
421 int shift = reloc->shift;
422
423 if (shift < 0)
424 iova >>= -shift;
425 else
426 iova <<= shift;
427
428 uint32_t dword = iova;
429
430 (*ring->cur++) = dword | reloc->or;
431
432 if (pipe->gpu_id >= 500) {
433 dword = iova >> 32;
434 (*ring->cur++) = dword | reloc->orhi;
435 }
436 }
437
438 static uint32_t
439 msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
440 struct fd_ringbuffer *target, uint32_t cmd_idx)
441 {
442 struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
443 struct fd_bo *bo;
444 uint32_t size;
445
446 if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
447 (cmd_idx < msm_target->u.nr_cmds)) {
448 bo = msm_target->u.cmds[cmd_idx].ring_bo;
449 size = msm_target->u.cmds[cmd_idx].size;
450 } else {
451 bo = msm_target->ring_bo;
452 size = offset_bytes(target->cur, target->start);
453 }
454
455 msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
456 .bo = bo,
457 .flags = FD_RELOC_READ | FD_RELOC_DUMP,
458 .offset = msm_target->offset,
459 });
460
461 if (!(target->flags & _FD_RINGBUFFER_OBJECT))
462 return size;
463
464 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
465
466 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
467 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
468 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
469
470 msm_ring->u.reloc_bos[idx].bo =
471 fd_bo_ref(msm_target->u.reloc_bos[i].bo);
472 msm_ring->u.reloc_bos[idx].flags =
473 msm_target->u.reloc_bos[i].flags;
474 }
475 } else {
476 // TODO it would be nice to know whether we have already
477 // seen this target before. But hopefully we hit the
478 // append_bo() fast path enough for this to not matter:
479 struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
480
481 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
482 append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
483 msm_target->u.reloc_bos[i].flags);
484 }
485 }
486
487 return size;
488 }
489
490 static uint32_t
491 msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
492 {
493 if (ring->flags & FD_RINGBUFFER_GROWABLE)
494 return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
495 return 1;
496 }
497
498 static void
499 msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
500 {
501 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
502
503 fd_bo_del(msm_ring->ring_bo);
504
505 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
506 for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
507 fd_bo_del(msm_ring->u.reloc_bos[i].bo);
508 }
509 free(msm_ring->u.reloc_bos);
510
511 free(msm_ring);
512 } else {
513 struct fd_submit *submit = msm_ring->u.submit;
514
515 for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
516 fd_bo_del(msm_ring->u.cmds[i].ring_bo);
517 }
518 free(msm_ring->u.cmds);
519
520 slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
521 }
522 }
523
524 static const struct fd_ringbuffer_funcs ring_funcs = {
525 .grow = msm_ringbuffer_sp_grow,
526 .emit_reloc = msm_ringbuffer_sp_emit_reloc,
527 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
528 .cmd_count = msm_ringbuffer_sp_cmd_count,
529 .destroy = msm_ringbuffer_sp_destroy,
530 };
531
532 static inline struct fd_ringbuffer *
533 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
534 enum fd_ringbuffer_flags flags)
535 {
536 struct fd_ringbuffer *ring = &msm_ring->base;
537
538 debug_assert(msm_ring->ring_bo);
539
540 uint8_t *base = fd_bo_map(msm_ring->ring_bo);
541 ring->start = (void *)(base + msm_ring->offset);
542 ring->end = &(ring->start[size/4]);
543 ring->cur = ring->start;
544
545 ring->size = size;
546 ring->flags = flags;
547
548 ring->funcs = &ring_funcs;
549
550 // TODO initializing these could probably be conditional on flags
551 // since unneed for FD_RINGBUFFER_STAGING case..
552 msm_ring->u.cmds = NULL;
553 msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
554
555 msm_ring->u.reloc_bos = NULL;
556 msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
557
558 return ring;
559 }
560
561 struct fd_ringbuffer *
562 msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
563 {
564 struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
565
566 msm_ring->u.pipe = pipe;
567 msm_ring->offset = 0;
568 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
569 msm_ring->base.refcnt = 1;
570
571 return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
572 }