freedreno/ir3: add more disasm stats
[mesa.git] / src / freedreno / drm / msm_ringbuffer_sp.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include <assert.h>
28 #include <inttypes.h>
29
30 #include "util/hash_table.h"
31 #include "util/slab.h"
32
33 #include "drm/freedreno_ringbuffer.h"
34 #include "msm_priv.h"
35
36 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
37 * by avoiding the additional tracking necessary to build cmds/relocs tables
38 * (but still builds a bos table)
39 */
40
41
42 #define INIT_SIZE 0x1000
43
44
45 struct msm_submit_sp {
46 struct fd_submit base;
47
48 DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
49 DECLARE_ARRAY(struct fd_bo *, bos);
50
51 /* maps fd_bo to idx in bos table: */
52 struct hash_table *bo_table;
53
54 struct slab_child_pool ring_pool;
55
56 struct fd_ringbuffer *primary;
57
58 /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
59 * the same underlying bo)..
60 *
61 * We also rely on previous stateobj having been fully constructed
62 * so we can reclaim extra space at it's end.
63 */
64 struct fd_ringbuffer *suballoc_ring;
65 };
66 FD_DEFINE_CAST(fd_submit, msm_submit_sp);
67
68 /* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
69 * and sizes. Ie. a finalized buffer can have no more commands appended to
70 * it.
71 */
72 struct msm_cmd_sp {
73 struct fd_bo *ring_bo;
74 unsigned size;
75 };
76
77 struct msm_ringbuffer_sp {
78 struct fd_ringbuffer base;
79
80 /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
81 unsigned offset;
82
83 union {
84 /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
85 * this one
86 */
87 struct {
88 struct fd_pipe *pipe;
89 DECLARE_ARRAY(struct fd_bo *, reloc_bos);
90 };
91 /* for other cases: */
92 struct {
93 struct fd_submit *submit;
94 DECLARE_ARRAY(struct msm_cmd_sp, cmds);
95 };
96 } u;
97
98 struct fd_bo *ring_bo;
99 };
100 FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
101
102 static void finalize_current_cmd(struct fd_ringbuffer *ring);
103 static struct fd_ringbuffer * msm_ringbuffer_sp_init(
104 struct msm_ringbuffer_sp *msm_ring,
105 uint32_t size, enum fd_ringbuffer_flags flags);
106
107 /* add (if needed) bo to submit and return index: */
108 static uint32_t
109 msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo)
110 {
111 struct msm_bo *msm_bo = to_msm_bo(bo);
112 uint32_t idx;
113
114 /* NOTE: it is legal to use the same bo on different threads for
115 * different submits. But it is not legal to use the same submit
116 * from given threads.
117 */
118 idx = READ_ONCE(msm_bo->idx);
119
120 if (unlikely((idx >= submit->nr_submit_bos) ||
121 (submit->submit_bos[idx].handle != bo->handle))) {
122 uint32_t hash = _mesa_hash_pointer(bo);
123 struct hash_entry *entry;
124
125 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
126 if (entry) {
127 /* found */
128 idx = (uint32_t)(uintptr_t)entry->data;
129 } else {
130 idx = APPEND(submit, submit_bos);
131 idx = APPEND(submit, bos);
132
133 submit->submit_bos[idx].flags = bo->flags;
134 submit->submit_bos[idx].handle = bo->handle;
135 submit->submit_bos[idx].presumed = 0;
136
137 submit->bos[idx] = fd_bo_ref(bo);
138
139 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
140 (void *)(uintptr_t)idx);
141 }
142 msm_bo->idx = idx;
143 }
144
145 return idx;
146 }
147
148 static void
149 msm_submit_suballoc_ring_bo(struct fd_submit *submit,
150 struct msm_ringbuffer_sp *msm_ring, uint32_t size)
151 {
152 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
153 unsigned suballoc_offset = 0;
154 struct fd_bo *suballoc_bo = NULL;
155
156 if (msm_submit->suballoc_ring) {
157 struct msm_ringbuffer_sp *suballoc_ring =
158 to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
159
160 suballoc_bo = suballoc_ring->ring_bo;
161 suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
162 suballoc_ring->offset;
163
164 suballoc_offset = align(suballoc_offset, 0x10);
165
166 if ((size + suballoc_offset) > suballoc_bo->size) {
167 suballoc_bo = NULL;
168 }
169 }
170
171 if (!suballoc_bo) {
172 // TODO possibly larger size for streaming bo?
173 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, 0x8000);
174 msm_ring->offset = 0;
175 } else {
176 msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
177 msm_ring->offset = suballoc_offset;
178 }
179
180 struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
181
182 msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
183
184 if (old_suballoc_ring)
185 fd_ringbuffer_del(old_suballoc_ring);
186 }
187
188 static struct fd_ringbuffer *
189 msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
190 enum fd_ringbuffer_flags flags)
191 {
192 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
193 struct msm_ringbuffer_sp *msm_ring;
194
195 msm_ring = slab_alloc(&msm_submit->ring_pool);
196
197 msm_ring->u.submit = submit;
198
199 /* NOTE: needs to be before _suballoc_ring_bo() since it could
200 * increment the refcnt of the current ring
201 */
202 msm_ring->base.refcnt = 1;
203
204 if (flags & FD_RINGBUFFER_STREAMING) {
205 msm_submit_suballoc_ring_bo(submit, msm_ring, size);
206 } else {
207 if (flags & FD_RINGBUFFER_GROWABLE)
208 size = INIT_SIZE;
209
210 msm_ring->offset = 0;
211 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
212 }
213
214 if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
215 return NULL;
216
217 if (flags & FD_RINGBUFFER_PRIMARY) {
218 debug_assert(!msm_submit->primary);
219 msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
220 }
221
222 return &msm_ring->base;
223 }
224
225 static int
226 msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
227 int *out_fence_fd, uint32_t *out_fence)
228 {
229 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
230 struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
231 struct drm_msm_gem_submit req = {
232 .flags = msm_pipe->pipe,
233 .queueid = msm_pipe->queue_id,
234 };
235 int ret;
236
237 debug_assert(msm_submit->primary);
238 finalize_current_cmd(msm_submit->primary);
239
240 struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
241 struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
242
243 for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
244 cmds[i].type = MSM_SUBMIT_CMD_BUF;
245 cmds[i].submit_idx = msm_submit_append_bo(msm_submit,
246 primary->u.cmds[i].ring_bo);
247 cmds[i].submit_offset = primary->offset;
248 cmds[i].size = primary->u.cmds[i].size;
249 cmds[i].pad = 0;
250 cmds[i].nr_relocs = 0;
251 }
252
253 if (in_fence_fd != -1) {
254 req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
255 req.fence_fd = in_fence_fd;
256 }
257
258 if (out_fence_fd) {
259 req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
260 }
261
262 /* needs to be after get_cmd() as that could create bos/cmds table: */
263 req.bos = VOID2U64(msm_submit->submit_bos),
264 req.nr_bos = msm_submit->nr_submit_bos;
265 req.cmds = VOID2U64(cmds),
266 req.nr_cmds = primary->u.nr_cmds;
267
268 DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
269
270 ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
271 &req, sizeof(req));
272 if (ret) {
273 ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
274 msm_dump_submit(&req);
275 } else if (!ret) {
276 if (out_fence)
277 *out_fence = req.fence;
278
279 if (out_fence_fd)
280 *out_fence_fd = req.fence_fd;
281 }
282
283 return ret;
284 }
285
286 static void
287 msm_submit_sp_destroy(struct fd_submit *submit)
288 {
289 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
290
291 if (msm_submit->primary)
292 fd_ringbuffer_del(msm_submit->primary);
293 if (msm_submit->suballoc_ring)
294 fd_ringbuffer_del(msm_submit->suballoc_ring);
295
296 _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
297
298 // TODO it would be nice to have a way to debug_assert() if all
299 // rb's haven't been free'd back to the slab, because that is
300 // an indication that we are leaking bo's
301 slab_destroy_child(&msm_submit->ring_pool);
302
303 for (unsigned i = 0; i < msm_submit->nr_bos; i++)
304 fd_bo_del(msm_submit->bos[i]);
305
306 free(msm_submit->submit_bos);
307 free(msm_submit->bos);
308 free(msm_submit);
309 }
310
311 static const struct fd_submit_funcs submit_funcs = {
312 .new_ringbuffer = msm_submit_sp_new_ringbuffer,
313 .flush = msm_submit_sp_flush,
314 .destroy = msm_submit_sp_destroy,
315 };
316
317 struct fd_submit *
318 msm_submit_sp_new(struct fd_pipe *pipe)
319 {
320 struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
321 struct fd_submit *submit;
322
323 msm_submit->bo_table = _mesa_hash_table_create(NULL,
324 _mesa_hash_pointer, _mesa_key_pointer_equal);
325
326 slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool);
327
328 submit = &msm_submit->base;
329 submit->pipe = pipe;
330 submit->funcs = &submit_funcs;
331
332 return submit;
333 }
334
335 void
336 msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe)
337 {
338 // TODO tune size:
339 slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
340 }
341
342 void
343 msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe)
344 {
345 if (msm_pipe->ring_pool.num_elements)
346 slab_destroy_parent(&msm_pipe->ring_pool);
347 }
348
349 static void
350 finalize_current_cmd(struct fd_ringbuffer *ring)
351 {
352 debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
353
354 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
355 unsigned idx = APPEND(&msm_ring->u, cmds);
356
357 msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
358 msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
359 }
360
361 static void
362 msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
363 {
364 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
365 struct fd_pipe *pipe = msm_ring->u.submit->pipe;
366
367 debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
368
369 finalize_current_cmd(ring);
370
371 fd_bo_del(msm_ring->ring_bo);
372 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
373
374 ring->start = fd_bo_map(msm_ring->ring_bo);
375 ring->end = &(ring->start[size/4]);
376 ring->cur = ring->start;
377 ring->size = size;
378 }
379
380 static void
381 msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
382 const struct fd_reloc *reloc)
383 {
384 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
385 struct fd_pipe *pipe;
386
387 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
388 /* Avoid emitting duplicate BO references into the list. Ringbuffer
389 * objects are long-lived, so this saves ongoing work at draw time in
390 * exchange for a bit at context setup/first draw. And the number of
391 * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
392 * hurt much.
393 */
394 bool found = false;
395 for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
396 if (msm_ring->u.reloc_bos[i] == reloc->bo) {
397 found = true;
398 break;
399 }
400 }
401 if (!found) {
402 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
403 msm_ring->u.reloc_bos[idx] = fd_bo_ref(reloc->bo);
404 }
405
406 pipe = msm_ring->u.pipe;
407 } else {
408 struct msm_submit_sp *msm_submit =
409 to_msm_submit_sp(msm_ring->u.submit);
410
411 msm_submit_append_bo(msm_submit, reloc->bo);
412
413 pipe = msm_ring->u.submit->pipe;
414 }
415
416 uint64_t iova = reloc->bo->iova + reloc->offset;
417 int shift = reloc->shift;
418
419 if (shift < 0)
420 iova >>= -shift;
421 else
422 iova <<= shift;
423
424 uint32_t dword = iova;
425
426 (*ring->cur++) = dword | reloc->or;
427
428 if (pipe->gpu_id >= 500) {
429 dword = iova >> 32;
430 (*ring->cur++) = dword | reloc->orhi;
431 }
432 }
433
434 static uint32_t
435 msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
436 struct fd_ringbuffer *target, uint32_t cmd_idx)
437 {
438 struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
439 struct fd_bo *bo;
440 uint32_t size;
441
442 if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
443 (cmd_idx < msm_target->u.nr_cmds)) {
444 bo = msm_target->u.cmds[cmd_idx].ring_bo;
445 size = msm_target->u.cmds[cmd_idx].size;
446 } else {
447 bo = msm_target->ring_bo;
448 size = offset_bytes(target->cur, target->start);
449 }
450
451 msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
452 .bo = bo,
453 .offset = msm_target->offset,
454 });
455
456 if (!(target->flags & _FD_RINGBUFFER_OBJECT))
457 return size;
458
459 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
460
461 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
462 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
463 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
464
465 msm_ring->u.reloc_bos[idx] =
466 fd_bo_ref(msm_target->u.reloc_bos[i]);
467 }
468 } else {
469 // TODO it would be nice to know whether we have already
470 // seen this target before. But hopefully we hit the
471 // append_bo() fast path enough for this to not matter:
472 struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
473
474 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
475 msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]);
476 }
477 }
478
479 return size;
480 }
481
482 static uint32_t
483 msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
484 {
485 if (ring->flags & FD_RINGBUFFER_GROWABLE)
486 return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
487 return 1;
488 }
489
490 static void
491 msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
492 {
493 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
494
495 fd_bo_del(msm_ring->ring_bo);
496
497 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
498 for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
499 fd_bo_del(msm_ring->u.reloc_bos[i]);
500 }
501 free(msm_ring->u.reloc_bos);
502
503 free(msm_ring);
504 } else {
505 struct fd_submit *submit = msm_ring->u.submit;
506
507 for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
508 fd_bo_del(msm_ring->u.cmds[i].ring_bo);
509 }
510 free(msm_ring->u.cmds);
511
512 slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
513 }
514 }
515
516 static const struct fd_ringbuffer_funcs ring_funcs = {
517 .grow = msm_ringbuffer_sp_grow,
518 .emit_reloc = msm_ringbuffer_sp_emit_reloc,
519 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
520 .cmd_count = msm_ringbuffer_sp_cmd_count,
521 .destroy = msm_ringbuffer_sp_destroy,
522 };
523
524 static inline struct fd_ringbuffer *
525 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
526 enum fd_ringbuffer_flags flags)
527 {
528 struct fd_ringbuffer *ring = &msm_ring->base;
529
530 /* We don't do any translation from internal FD_RELOC flags to MSM flags. */
531 STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
532 STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
533 STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
534
535 debug_assert(msm_ring->ring_bo);
536
537 uint8_t *base = fd_bo_map(msm_ring->ring_bo);
538 ring->start = (void *)(base + msm_ring->offset);
539 ring->end = &(ring->start[size/4]);
540 ring->cur = ring->start;
541
542 ring->size = size;
543 ring->flags = flags;
544
545 ring->funcs = &ring_funcs;
546
547 // TODO initializing these could probably be conditional on flags
548 // since unneed for FD_RINGBUFFER_STAGING case..
549 msm_ring->u.cmds = NULL;
550 msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
551
552 msm_ring->u.reloc_bos = NULL;
553 msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
554
555 return ring;
556 }
557
558 struct fd_ringbuffer *
559 msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
560 {
561 struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
562
563 msm_ring->u.pipe = pipe;
564 msm_ring->offset = 0;
565 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
566 msm_ring->base.refcnt = 1;
567
568 return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
569 }