freedreno/drm: sync uapi and enable softpin
[mesa.git] / src / freedreno / drm / msm_ringbuffer_sp.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include <assert.h>
28 #include <inttypes.h>
29
30 #include "util/hash_table.h"
31 #include "util/slab.h"
32
33 #include "drm/freedreno_ringbuffer.h"
34 #include "msm_priv.h"
35
36 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
37 * by avoiding the additional tracking necessary to build cmds/relocs tables
38 * (but still builds a bos table)
39 */
40
41
42 #define INIT_SIZE 0x1000
43
44 static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
45
46
47 struct msm_submit_sp {
48 struct fd_submit base;
49
50 DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
51 DECLARE_ARRAY(struct fd_bo *, bos);
52
53 unsigned seqno;
54
55 /* maps fd_bo to idx in bos table: */
56 struct hash_table *bo_table;
57
58 struct slab_mempool ring_pool;
59
60 struct fd_ringbuffer *primary;
61
62 /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
63 * the same underlying bo)..
64 *
65 * We also rely on previous stateobj having been fully constructed
66 * so we can reclaim extra space at it's end.
67 */
68 struct fd_ringbuffer *suballoc_ring;
69 };
70 FD_DEFINE_CAST(fd_submit, msm_submit_sp);
71
72 /* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
73 * and sizes. Ie. a finalized buffer can have no more commands appended to
74 * it.
75 */
76 struct msm_cmd_sp {
77 struct fd_bo *ring_bo;
78 unsigned size;
79 };
80
81 /* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
82 * later copy into the submit when the stateobj rb is later referenced by
83 * a regular rb:
84 */
85 struct msm_reloc_bo_sp {
86 struct fd_bo *bo;
87 unsigned flags;
88 };
89
90 struct msm_ringbuffer_sp {
91 struct fd_ringbuffer base;
92
93 /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
94 unsigned offset;
95
96 // TODO check disasm.. hopefully compilers CSE can realize that
97 // reloc_bos and cmds are at the same offsets and optimize some
98 // divergent cases into single case
99 union {
100 /* for _FD_RINGBUFFER_OBJECT case: */
101 struct {
102 struct fd_pipe *pipe;
103 DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
104 };
105 /* for other cases: */
106 struct {
107 struct fd_submit *submit;
108 DECLARE_ARRAY(struct msm_cmd_sp, cmds);
109 };
110 } u;
111
112 struct fd_bo *ring_bo;
113 };
114 FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
115
116 static void finalize_current_cmd(struct fd_ringbuffer *ring);
117 static struct fd_ringbuffer * msm_ringbuffer_sp_init(
118 struct msm_ringbuffer_sp *msm_ring,
119 uint32_t size, enum fd_ringbuffer_flags flags);
120
121 /* add (if needed) bo to submit and return index: */
122 static uint32_t
123 append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
124 {
125 struct msm_bo *msm_bo = to_msm_bo(bo);
126 uint32_t idx;
127 pthread_mutex_lock(&idx_lock);
128 if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
129 idx = msm_bo->idx;
130 } else {
131 uint32_t hash = _mesa_hash_pointer(bo);
132 struct hash_entry *entry;
133
134 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
135 if (entry) {
136 /* found */
137 idx = (uint32_t)(uintptr_t)entry->data;
138 } else {
139 idx = APPEND(submit, submit_bos);
140 idx = APPEND(submit, bos);
141
142 submit->submit_bos[idx].flags = 0;
143 submit->submit_bos[idx].handle = bo->handle;
144 submit->submit_bos[idx].presumed = 0;
145
146 submit->bos[idx] = fd_bo_ref(bo);
147
148 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
149 (void *)(uintptr_t)idx);
150 }
151 msm_bo->current_submit_seqno = submit->seqno;
152 msm_bo->idx = idx;
153 }
154 pthread_mutex_unlock(&idx_lock);
155 if (flags & FD_RELOC_READ)
156 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
157 if (flags & FD_RELOC_WRITE)
158 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
159 if (flags & FD_RELOC_DUMP)
160 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP;
161 return idx;
162 }
163
164 static void
165 msm_submit_suballoc_ring_bo(struct fd_submit *submit,
166 struct msm_ringbuffer_sp *msm_ring, uint32_t size)
167 {
168 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
169 unsigned suballoc_offset = 0;
170 struct fd_bo *suballoc_bo = NULL;
171
172 if (msm_submit->suballoc_ring) {
173 struct msm_ringbuffer_sp *suballoc_ring =
174 to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
175
176 suballoc_bo = suballoc_ring->ring_bo;
177 suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
178 suballoc_ring->offset;
179
180 suballoc_offset = align(suballoc_offset, 0x10);
181
182 if ((size + suballoc_offset) > suballoc_bo->size) {
183 suballoc_bo = NULL;
184 }
185 }
186
187 if (!suballoc_bo) {
188 // TODO possibly larger size for streaming bo?
189 msm_ring->ring_bo = fd_bo_new_ring(
190 submit->pipe->dev, 0x8000, 0);
191 msm_ring->offset = 0;
192 } else {
193 msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
194 msm_ring->offset = suballoc_offset;
195 }
196
197 struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
198
199 msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
200
201 if (old_suballoc_ring)
202 fd_ringbuffer_del(old_suballoc_ring);
203 }
204
205 static struct fd_ringbuffer *
206 msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
207 enum fd_ringbuffer_flags flags)
208 {
209 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
210 struct msm_ringbuffer_sp *msm_ring;
211
212 msm_ring = slab_alloc_st(&msm_submit->ring_pool);
213
214 msm_ring->u.submit = submit;
215
216 /* NOTE: needs to be before _suballoc_ring_bo() since it could
217 * increment the refcnt of the current ring
218 */
219 msm_ring->base.refcnt = 1;
220
221 if (flags & FD_RINGBUFFER_STREAMING) {
222 msm_submit_suballoc_ring_bo(submit, msm_ring, size);
223 } else {
224 if (flags & FD_RINGBUFFER_GROWABLE)
225 size = INIT_SIZE;
226
227 msm_ring->offset = 0;
228 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0);
229 }
230
231 if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
232 return NULL;
233
234 if (flags & FD_RINGBUFFER_PRIMARY) {
235 debug_assert(!msm_submit->primary);
236 msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
237 }
238
239 return &msm_ring->base;
240 }
241
242 static int
243 msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
244 int *out_fence_fd, uint32_t *out_fence)
245 {
246 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
247 struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
248 struct drm_msm_gem_submit req = {
249 .flags = msm_pipe->pipe,
250 .queueid = msm_pipe->queue_id,
251 };
252 int ret;
253
254 debug_assert(msm_submit->primary);
255 finalize_current_cmd(msm_submit->primary);
256
257 struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
258 struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
259
260 for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
261 cmds[i].type = MSM_SUBMIT_CMD_BUF;
262 cmds[i].submit_idx = append_bo(msm_submit,
263 primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP);
264 cmds[i].submit_offset = primary->offset;
265 cmds[i].size = primary->u.cmds[i].size;
266 cmds[i].pad = 0;
267 cmds[i].nr_relocs = 0;
268 }
269
270 if (in_fence_fd != -1) {
271 req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
272 req.fence_fd = in_fence_fd;
273 }
274
275 if (out_fence_fd) {
276 req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
277 }
278
279 /* needs to be after get_cmd() as that could create bos/cmds table: */
280 req.bos = VOID2U64(msm_submit->submit_bos),
281 req.nr_bos = msm_submit->nr_submit_bos;
282 req.cmds = VOID2U64(cmds),
283 req.nr_cmds = primary->u.nr_cmds;
284
285 DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
286
287 ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
288 &req, sizeof(req));
289 if (ret) {
290 ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
291 msm_dump_submit(&req);
292 } else if (!ret) {
293 if (out_fence)
294 *out_fence = req.fence;
295
296 if (out_fence_fd)
297 *out_fence_fd = req.fence_fd;
298 }
299
300 return ret;
301 }
302
303 static void
304 msm_submit_sp_destroy(struct fd_submit *submit)
305 {
306 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
307
308 if (msm_submit->primary)
309 fd_ringbuffer_del(msm_submit->primary);
310 if (msm_submit->suballoc_ring)
311 fd_ringbuffer_del(msm_submit->suballoc_ring);
312
313 _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
314
315 // TODO it would be nice to have a way to debug_assert() if all
316 // rb's haven't been free'd back to the slab, because that is
317 // an indication that we are leaking bo's
318 slab_destroy(&msm_submit->ring_pool);
319
320 for (unsigned i = 0; i < msm_submit->nr_bos; i++)
321 fd_bo_del(msm_submit->bos[i]);
322
323 free(msm_submit->submit_bos);
324 free(msm_submit->bos);
325 free(msm_submit);
326 }
327
328 static const struct fd_submit_funcs submit_funcs = {
329 .new_ringbuffer = msm_submit_sp_new_ringbuffer,
330 .flush = msm_submit_sp_flush,
331 .destroy = msm_submit_sp_destroy,
332 };
333
334 struct fd_submit *
335 msm_submit_sp_new(struct fd_pipe *pipe)
336 {
337 struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
338 struct fd_submit *submit;
339 static unsigned submit_cnt = 0;
340
341 msm_submit->seqno = ++submit_cnt;
342 msm_submit->bo_table = _mesa_hash_table_create(NULL,
343 _mesa_hash_pointer, _mesa_key_pointer_equal);
344 // TODO tune size:
345 slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
346
347 submit = &msm_submit->base;
348 submit->pipe = pipe;
349 submit->funcs = &submit_funcs;
350
351 return submit;
352 }
353
354
355 static void
356 finalize_current_cmd(struct fd_ringbuffer *ring)
357 {
358 debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
359
360 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
361 unsigned idx = APPEND(&msm_ring->u, cmds);
362
363 msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
364 msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
365 }
366
367 static void
368 msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
369 {
370 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
371 struct fd_pipe *pipe = msm_ring->u.submit->pipe;
372
373 debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
374
375 finalize_current_cmd(ring);
376
377 fd_bo_del(msm_ring->ring_bo);
378 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
379
380 ring->start = fd_bo_map(msm_ring->ring_bo);
381 ring->end = &(ring->start[size/4]);
382 ring->cur = ring->start;
383 ring->size = size;
384 }
385
386 static void
387 msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
388 const struct fd_reloc *reloc)
389 {
390 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
391 struct fd_pipe *pipe;
392
393 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
394 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
395
396 msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
397 msm_ring->u.reloc_bos[idx].flags = reloc->flags;
398
399 pipe = msm_ring->u.pipe;
400 } else {
401 struct msm_submit_sp *msm_submit =
402 to_msm_submit_sp(msm_ring->u.submit);
403
404 append_bo(msm_submit, reloc->bo, reloc->flags);
405
406 pipe = msm_ring->u.submit->pipe;
407 }
408
409 uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
410 uint32_t dword = iova;
411 int shift = reloc->shift;
412
413 if (shift < 0)
414 dword >>= -shift;
415 else
416 dword <<= shift;
417
418 (*ring->cur++) = dword | reloc->or;
419
420 if (pipe->gpu_id >= 500) {
421 dword = iova >> 32;
422 shift -= 32;
423
424 if (shift < 0)
425 dword >>= -shift;
426 else
427 dword <<= shift;
428
429 (*ring->cur++) = dword | reloc->orhi;
430 }
431 }
432
433 static uint32_t
434 msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
435 struct fd_ringbuffer *target, uint32_t cmd_idx)
436 {
437 struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
438 struct fd_bo *bo;
439 uint32_t size;
440
441 if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
442 (cmd_idx < msm_target->u.nr_cmds)) {
443 bo = msm_target->u.cmds[cmd_idx].ring_bo;
444 size = msm_target->u.cmds[cmd_idx].size;
445 } else {
446 bo = msm_target->ring_bo;
447 size = offset_bytes(target->cur, target->start);
448 }
449
450 msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
451 .bo = bo,
452 .flags = FD_RELOC_READ | FD_RELOC_DUMP,
453 .offset = msm_target->offset,
454 });
455
456 if (!(target->flags & _FD_RINGBUFFER_OBJECT))
457 return size;
458
459 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
460
461 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
462 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
463 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
464
465 msm_ring->u.reloc_bos[idx].bo =
466 fd_bo_ref(msm_target->u.reloc_bos[i].bo);
467 msm_ring->u.reloc_bos[idx].flags =
468 msm_target->u.reloc_bos[i].flags;
469 }
470 } else {
471 // TODO it would be nice to know whether we have already
472 // seen this target before. But hopefully we hit the
473 // append_bo() fast path enough for this to not matter:
474 struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
475
476 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
477 append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
478 msm_target->u.reloc_bos[i].flags);
479 }
480 }
481
482 return size;
483 }
484
485 static uint32_t
486 msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
487 {
488 if (ring->flags & FD_RINGBUFFER_GROWABLE)
489 return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
490 return 1;
491 }
492
493 static void
494 msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
495 {
496 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
497
498 fd_bo_del(msm_ring->ring_bo);
499
500 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
501 for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
502 fd_bo_del(msm_ring->u.reloc_bos[i].bo);
503 }
504
505 free(msm_ring);
506 } else {
507 struct fd_submit *submit = msm_ring->u.submit;
508
509 for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
510 fd_bo_del(msm_ring->u.cmds[i].ring_bo);
511 }
512
513 slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
514 }
515 }
516
517 static const struct fd_ringbuffer_funcs ring_funcs = {
518 .grow = msm_ringbuffer_sp_grow,
519 .emit_reloc = msm_ringbuffer_sp_emit_reloc,
520 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
521 .cmd_count = msm_ringbuffer_sp_cmd_count,
522 .destroy = msm_ringbuffer_sp_destroy,
523 };
524
525 static inline struct fd_ringbuffer *
526 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
527 enum fd_ringbuffer_flags flags)
528 {
529 struct fd_ringbuffer *ring = &msm_ring->base;
530
531 debug_assert(msm_ring->ring_bo);
532
533 uint8_t *base = fd_bo_map(msm_ring->ring_bo);
534 ring->start = (void *)(base + msm_ring->offset);
535 ring->end = &(ring->start[size/4]);
536 ring->cur = ring->start;
537
538 ring->size = size;
539 ring->flags = flags;
540
541 ring->funcs = &ring_funcs;
542
543 // TODO initializing these could probably be conditional on flags
544 // since unneed for FD_RINGBUFFER_STAGING case..
545 msm_ring->u.cmds = NULL;
546 msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
547
548 msm_ring->u.reloc_bos = NULL;
549 msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
550
551 return ring;
552 }
553
554 struct fd_ringbuffer *
555 msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
556 {
557 struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
558
559 msm_ring->u.pipe = pipe;
560 msm_ring->offset = 0;
561 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
562 msm_ring->base.refcnt = 1;
563
564 return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
565 }