freedreno/drm: fix relocs in nested stateobjs
[mesa.git] / src / freedreno / drm / msm_ringbuffer_sp.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include <assert.h>
28 #include <inttypes.h>
29
30 #include "util/hash_table.h"
31 #include "util/slab.h"
32
33 #include "drm/freedreno_ringbuffer.h"
34 #include "msm_priv.h"
35
36 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
37 * by avoiding the additional tracking necessary to build cmds/relocs tables
38 * (but still builds a bos table)
39 */
40
41
42 #define INIT_SIZE 0x1000
43
44 static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
45
46
47 struct msm_submit_sp {
48 struct fd_submit base;
49
50 DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
51 DECLARE_ARRAY(struct fd_bo *, bos);
52
53 unsigned seqno;
54
55 /* maps fd_bo to idx in bos table: */
56 struct hash_table *bo_table;
57
58 struct slab_mempool ring_pool;
59
60 struct fd_ringbuffer *primary;
61
62 /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
63 * the same underlying bo)..
64 *
65 * We also rely on previous stateobj having been fully constructed
66 * so we can reclaim extra space at it's end.
67 */
68 struct fd_ringbuffer *suballoc_ring;
69 };
70 FD_DEFINE_CAST(fd_submit, msm_submit_sp);
71
72 /* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
73 * and sizes. Ie. a finalized buffer can have no more commands appended to
74 * it.
75 */
76 struct msm_cmd_sp {
77 struct fd_bo *ring_bo;
78 unsigned size;
79 };
80
81 /* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
82 * later copy into the submit when the stateobj rb is later referenced by
83 * a regular rb:
84 */
85 struct msm_reloc_bo_sp {
86 struct fd_bo *bo;
87 unsigned flags;
88 };
89
90 struct msm_ringbuffer_sp {
91 struct fd_ringbuffer base;
92
93 /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
94 unsigned offset;
95
96 // TODO check disasm.. hopefully compilers CSE can realize that
97 // reloc_bos and cmds are at the same offsets and optimize some
98 // divergent cases into single case
99 union {
100 /* for _FD_RINGBUFFER_OBJECT case: */
101 struct {
102 struct fd_pipe *pipe;
103 DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
104 };
105 /* for other cases: */
106 struct {
107 struct fd_submit *submit;
108 DECLARE_ARRAY(struct msm_cmd_sp, cmds);
109 };
110 } u;
111
112 struct fd_bo *ring_bo;
113 };
114 FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
115
116 static void finalize_current_cmd(struct fd_ringbuffer *ring);
117 static struct fd_ringbuffer * msm_ringbuffer_sp_init(
118 struct msm_ringbuffer_sp *msm_ring,
119 uint32_t size, enum fd_ringbuffer_flags flags);
120
121 /* add (if needed) bo to submit and return index: */
122 static uint32_t
123 append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
124 {
125 struct msm_bo *msm_bo = to_msm_bo(bo);
126 uint32_t idx;
127 pthread_mutex_lock(&idx_lock);
128 if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
129 idx = msm_bo->idx;
130 } else {
131 uint32_t hash = _mesa_hash_pointer(bo);
132 struct hash_entry *entry;
133
134 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
135 if (entry) {
136 /* found */
137 idx = (uint32_t)(uintptr_t)entry->data;
138 } else {
139 idx = APPEND(submit, submit_bos);
140 idx = APPEND(submit, bos);
141
142 submit->submit_bos[idx].flags = 0;
143 submit->submit_bos[idx].handle = bo->handle;
144 submit->submit_bos[idx].presumed = 0;
145
146 submit->bos[idx] = fd_bo_ref(bo);
147
148 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
149 (void *)(uintptr_t)idx);
150 }
151 msm_bo->current_submit_seqno = submit->seqno;
152 msm_bo->idx = idx;
153 }
154 pthread_mutex_unlock(&idx_lock);
155 if (flags & FD_RELOC_READ)
156 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
157 if (flags & FD_RELOC_WRITE)
158 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
159 return idx;
160 }
161
162 static void
163 msm_submit_suballoc_ring_bo(struct fd_submit *submit,
164 struct msm_ringbuffer_sp *msm_ring, uint32_t size)
165 {
166 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
167 unsigned suballoc_offset = 0;
168 struct fd_bo *suballoc_bo = NULL;
169
170 if (msm_submit->suballoc_ring) {
171 struct msm_ringbuffer_sp *suballoc_ring =
172 to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
173
174 suballoc_bo = suballoc_ring->ring_bo;
175 suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
176 suballoc_ring->offset;
177
178 suballoc_offset = align(suballoc_offset, 0x10);
179
180 if ((size + suballoc_offset) > suballoc_bo->size) {
181 suballoc_bo = NULL;
182 }
183 }
184
185 if (!suballoc_bo) {
186 // TODO possibly larger size for streaming bo?
187 msm_ring->ring_bo = fd_bo_new_ring(
188 submit->pipe->dev, 0x8000, 0);
189 msm_ring->offset = 0;
190 } else {
191 msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
192 msm_ring->offset = suballoc_offset;
193 }
194
195 struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
196
197 msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
198
199 if (old_suballoc_ring)
200 fd_ringbuffer_del(old_suballoc_ring);
201 }
202
203 static struct fd_ringbuffer *
204 msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
205 enum fd_ringbuffer_flags flags)
206 {
207 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
208 struct msm_ringbuffer_sp *msm_ring;
209
210 msm_ring = slab_alloc_st(&msm_submit->ring_pool);
211
212 msm_ring->u.submit = submit;
213
214 /* NOTE: needs to be before _suballoc_ring_bo() since it could
215 * increment the refcnt of the current ring
216 */
217 msm_ring->base.refcnt = 1;
218
219 if (flags & FD_RINGBUFFER_STREAMING) {
220 msm_submit_suballoc_ring_bo(submit, msm_ring, size);
221 } else {
222 if (flags & FD_RINGBUFFER_GROWABLE)
223 size = INIT_SIZE;
224
225 msm_ring->offset = 0;
226 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0);
227 }
228
229 if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
230 return NULL;
231
232 if (flags & FD_RINGBUFFER_PRIMARY) {
233 debug_assert(!msm_submit->primary);
234 msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
235 }
236
237 return &msm_ring->base;
238 }
239
240 static int
241 msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
242 int *out_fence_fd, uint32_t *out_fence)
243 {
244 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
245 struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
246 struct drm_msm_gem_submit req = {
247 .flags = msm_pipe->pipe,
248 .queueid = msm_pipe->queue_id,
249 };
250 int ret;
251
252 debug_assert(msm_submit->primary);
253 finalize_current_cmd(msm_submit->primary);
254
255 struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
256 struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
257
258 for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
259 cmds[i].type = MSM_SUBMIT_CMD_BUF;
260 cmds[i].submit_idx =
261 append_bo(msm_submit, primary->u.cmds[i].ring_bo, FD_RELOC_READ);
262 cmds[i].submit_offset = primary->offset;
263 cmds[i].size = primary->u.cmds[i].size;
264 cmds[i].pad = 0;
265 cmds[i].nr_relocs = 0;
266 }
267
268 if (in_fence_fd != -1) {
269 req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
270 req.fence_fd = in_fence_fd;
271 }
272
273 if (out_fence_fd) {
274 req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
275 }
276
277 /* needs to be after get_cmd() as that could create bos/cmds table: */
278 req.bos = VOID2U64(msm_submit->submit_bos),
279 req.nr_bos = msm_submit->nr_submit_bos;
280 req.cmds = VOID2U64(cmds),
281 req.nr_cmds = primary->u.nr_cmds;
282
283 DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
284
285 ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
286 &req, sizeof(req));
287 if (ret) {
288 ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
289 msm_dump_submit(&req);
290 } else if (!ret) {
291 if (out_fence)
292 *out_fence = req.fence;
293
294 if (out_fence_fd)
295 *out_fence_fd = req.fence_fd;
296 }
297
298 return ret;
299 }
300
301 static void
302 msm_submit_sp_destroy(struct fd_submit *submit)
303 {
304 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
305
306 if (msm_submit->primary)
307 fd_ringbuffer_del(msm_submit->primary);
308 if (msm_submit->suballoc_ring)
309 fd_ringbuffer_del(msm_submit->suballoc_ring);
310
311 _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
312
313 // TODO it would be nice to have a way to debug_assert() if all
314 // rb's haven't been free'd back to the slab, because that is
315 // an indication that we are leaking bo's
316 slab_destroy(&msm_submit->ring_pool);
317
318 for (unsigned i = 0; i < msm_submit->nr_bos; i++)
319 fd_bo_del(msm_submit->bos[i]);
320
321 free(msm_submit->submit_bos);
322 free(msm_submit->bos);
323 free(msm_submit);
324 }
325
326 static const struct fd_submit_funcs submit_funcs = {
327 .new_ringbuffer = msm_submit_sp_new_ringbuffer,
328 .flush = msm_submit_sp_flush,
329 .destroy = msm_submit_sp_destroy,
330 };
331
332 struct fd_submit *
333 msm_submit_sp_new(struct fd_pipe *pipe)
334 {
335 struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
336 struct fd_submit *submit;
337 static unsigned submit_cnt = 0;
338
339 msm_submit->seqno = ++submit_cnt;
340 msm_submit->bo_table = _mesa_hash_table_create(NULL,
341 _mesa_hash_pointer, _mesa_key_pointer_equal);
342 // TODO tune size:
343 slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
344
345 submit = &msm_submit->base;
346 submit->pipe = pipe;
347 submit->funcs = &submit_funcs;
348
349 return submit;
350 }
351
352
353 static void
354 finalize_current_cmd(struct fd_ringbuffer *ring)
355 {
356 debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
357
358 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
359 unsigned idx = APPEND(&msm_ring->u, cmds);
360
361 msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
362 msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
363 }
364
365 static void
366 msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
367 {
368 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
369 struct fd_pipe *pipe = msm_ring->u.submit->pipe;
370
371 debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
372
373 finalize_current_cmd(ring);
374
375 fd_bo_del(msm_ring->ring_bo);
376 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
377
378 ring->start = fd_bo_map(msm_ring->ring_bo);
379 ring->end = &(ring->start[size/4]);
380 ring->cur = ring->start;
381 ring->size = size;
382 }
383
384 static void
385 msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
386 const struct fd_reloc *reloc)
387 {
388 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
389 struct fd_pipe *pipe;
390
391 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
392 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
393
394 msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
395 msm_ring->u.reloc_bos[idx].flags = reloc->flags;
396
397 pipe = msm_ring->u.pipe;
398 } else {
399 struct msm_submit_sp *msm_submit =
400 to_msm_submit_sp(msm_ring->u.submit);
401
402 append_bo(msm_submit, reloc->bo, reloc->flags);
403
404 pipe = msm_ring->u.submit->pipe;
405 }
406
407 uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
408 uint32_t dword = iova;
409 int shift = reloc->shift;
410
411 if (shift < 0)
412 dword >>= -shift;
413 else
414 dword <<= shift;
415
416 (*ring->cur++) = dword | reloc->or;
417
418 if (pipe->gpu_id >= 500) {
419 dword = iova >> 32;
420 shift -= 32;
421
422 if (shift < 0)
423 dword >>= -shift;
424 else
425 dword <<= shift;
426
427 (*ring->cur++) = dword | reloc->orhi;
428 }
429 }
430
431 static uint32_t
432 msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
433 struct fd_ringbuffer *target, uint32_t cmd_idx)
434 {
435 struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
436 struct fd_bo *bo;
437 uint32_t size;
438
439 if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
440 (cmd_idx < msm_target->u.nr_cmds)) {
441 bo = msm_target->u.cmds[cmd_idx].ring_bo;
442 size = msm_target->u.cmds[cmd_idx].size;
443 } else {
444 bo = msm_target->ring_bo;
445 size = offset_bytes(target->cur, target->start);
446 }
447
448 msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
449 .bo = bo,
450 .flags = FD_RELOC_READ,
451 .offset = msm_target->offset,
452 });
453
454 if (!(target->flags & _FD_RINGBUFFER_OBJECT))
455 return size;
456
457 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
458
459 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
460 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
461 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
462
463 msm_ring->u.reloc_bos[idx].bo =
464 fd_bo_ref(msm_target->u.reloc_bos[i].bo);
465 msm_ring->u.reloc_bos[idx].flags =
466 msm_target->u.reloc_bos[i].flags;
467 }
468 } else {
469 // TODO it would be nice to know whether we have already
470 // seen this target before. But hopefully we hit the
471 // append_bo() fast path enough for this to not matter:
472 struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
473
474 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
475 append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
476 msm_target->u.reloc_bos[i].flags);
477 }
478 }
479
480 return size;
481 }
482
483 static uint32_t
484 msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
485 {
486 if (ring->flags & FD_RINGBUFFER_GROWABLE)
487 return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
488 return 1;
489 }
490
491 static void
492 msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
493 {
494 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
495
496 fd_bo_del(msm_ring->ring_bo);
497
498 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
499 for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
500 fd_bo_del(msm_ring->u.reloc_bos[i].bo);
501 }
502
503 free(msm_ring);
504 } else {
505 struct fd_submit *submit = msm_ring->u.submit;
506
507 for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
508 fd_bo_del(msm_ring->u.cmds[i].ring_bo);
509 }
510
511 slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
512 }
513 }
514
515 static const struct fd_ringbuffer_funcs ring_funcs = {
516 .grow = msm_ringbuffer_sp_grow,
517 .emit_reloc = msm_ringbuffer_sp_emit_reloc,
518 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
519 .cmd_count = msm_ringbuffer_sp_cmd_count,
520 .destroy = msm_ringbuffer_sp_destroy,
521 };
522
523 static inline struct fd_ringbuffer *
524 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
525 enum fd_ringbuffer_flags flags)
526 {
527 struct fd_ringbuffer *ring = &msm_ring->base;
528
529 debug_assert(msm_ring->ring_bo);
530
531 uint8_t *base = fd_bo_map(msm_ring->ring_bo);
532 ring->start = (void *)(base + msm_ring->offset);
533 ring->end = &(ring->start[size/4]);
534 ring->cur = ring->start;
535
536 ring->size = size;
537 ring->flags = flags;
538
539 ring->funcs = &ring_funcs;
540
541 // TODO initializing these could probably be conditional on flags
542 // since unneed for FD_RINGBUFFER_STAGING case..
543 msm_ring->u.cmds = NULL;
544 msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
545
546 msm_ring->u.reloc_bos = NULL;
547 msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
548
549 return ring;
550 }
551
552 struct fd_ringbuffer *
553 msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
554 {
555 struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
556
557 msm_ring->u.pipe = pipe;
558 msm_ring->offset = 0;
559 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
560 msm_ring->base.refcnt = 1;
561
562 return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
563 }