freedreno/ir3: track # of driver params
[mesa.git] / src / freedreno / drm / msm_ringbuffer_sp.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include <assert.h>
28 #include <inttypes.h>
29
30 #include "util/hash_table.h"
31 #include "util/slab.h"
32
33 #include "drm/freedreno_ringbuffer.h"
34 #include "msm_priv.h"
35
36 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
37 * by avoiding the additional tracking necessary to build cmds/relocs tables
38 * (but still builds a bos table)
39 */
40
41
42 #define INIT_SIZE 0x1000
43
44
45 struct msm_submit_sp {
46 struct fd_submit base;
47
48 DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
49 DECLARE_ARRAY(struct fd_bo *, bos);
50
51 /* maps fd_bo to idx in bos table: */
52 struct hash_table *bo_table;
53
54 struct slab_child_pool ring_pool;
55
56 struct fd_ringbuffer *primary;
57
58 /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
59 * the same underlying bo)..
60 *
61 * We also rely on previous stateobj having been fully constructed
62 * so we can reclaim extra space at it's end.
63 */
64 struct fd_ringbuffer *suballoc_ring;
65 };
66 FD_DEFINE_CAST(fd_submit, msm_submit_sp);
67
68 /* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
69 * and sizes. Ie. a finalized buffer can have no more commands appended to
70 * it.
71 */
72 struct msm_cmd_sp {
73 struct fd_bo *ring_bo;
74 unsigned size;
75 };
76
77 /* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
78 * later copy into the submit when the stateobj rb is later referenced by
79 * a regular rb:
80 */
81 struct msm_reloc_bo_sp {
82 struct fd_bo *bo;
83 unsigned flags;
84 };
85
86 struct msm_ringbuffer_sp {
87 struct fd_ringbuffer base;
88
89 /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
90 unsigned offset;
91
92 // TODO check disasm.. hopefully compilers CSE can realize that
93 // reloc_bos and cmds are at the same offsets and optimize some
94 // divergent cases into single case
95 union {
96 /* for _FD_RINGBUFFER_OBJECT case: */
97 struct {
98 struct fd_pipe *pipe;
99 DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
100 };
101 /* for other cases: */
102 struct {
103 struct fd_submit *submit;
104 DECLARE_ARRAY(struct msm_cmd_sp, cmds);
105 };
106 } u;
107
108 struct fd_bo *ring_bo;
109 };
110 FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
111
112 static void finalize_current_cmd(struct fd_ringbuffer *ring);
113 static struct fd_ringbuffer * msm_ringbuffer_sp_init(
114 struct msm_ringbuffer_sp *msm_ring,
115 uint32_t size, enum fd_ringbuffer_flags flags);
116
117 /* add (if needed) bo to submit and return index: */
118 static uint32_t
119 append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
120 {
121 struct msm_bo *msm_bo = to_msm_bo(bo);
122 uint32_t idx;
123
124 /* NOTE: it is legal to use the same bo on different threads for
125 * different submits. But it is not legal to use the same submit
126 * from given threads.
127 */
128 idx = READ_ONCE(msm_bo->idx);
129
130 if (unlikely((idx >= submit->nr_submit_bos) ||
131 (submit->submit_bos[idx].handle != bo->handle))) {
132 uint32_t hash = _mesa_hash_pointer(bo);
133 struct hash_entry *entry;
134
135 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
136 if (entry) {
137 /* found */
138 idx = (uint32_t)(uintptr_t)entry->data;
139 } else {
140 idx = APPEND(submit, submit_bos);
141 idx = APPEND(submit, bos);
142
143 submit->submit_bos[idx].flags = 0;
144 submit->submit_bos[idx].handle = bo->handle;
145 submit->submit_bos[idx].presumed = 0;
146
147 submit->bos[idx] = fd_bo_ref(bo);
148
149 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
150 (void *)(uintptr_t)idx);
151 }
152 msm_bo->idx = idx;
153 }
154
155 if (flags & FD_RELOC_READ)
156 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
157 if (flags & FD_RELOC_WRITE)
158 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
159 if (flags & FD_RELOC_DUMP)
160 submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP;
161
162 return idx;
163 }
164
165 static void
166 msm_submit_suballoc_ring_bo(struct fd_submit *submit,
167 struct msm_ringbuffer_sp *msm_ring, uint32_t size)
168 {
169 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
170 unsigned suballoc_offset = 0;
171 struct fd_bo *suballoc_bo = NULL;
172
173 if (msm_submit->suballoc_ring) {
174 struct msm_ringbuffer_sp *suballoc_ring =
175 to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
176
177 suballoc_bo = suballoc_ring->ring_bo;
178 suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
179 suballoc_ring->offset;
180
181 suballoc_offset = align(suballoc_offset, 0x10);
182
183 if ((size + suballoc_offset) > suballoc_bo->size) {
184 suballoc_bo = NULL;
185 }
186 }
187
188 if (!suballoc_bo) {
189 // TODO possibly larger size for streaming bo?
190 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev,
191 0x8000, DRM_FREEDRENO_GEM_GPUREADONLY);
192 msm_ring->offset = 0;
193 } else {
194 msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
195 msm_ring->offset = suballoc_offset;
196 }
197
198 struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
199
200 msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
201
202 if (old_suballoc_ring)
203 fd_ringbuffer_del(old_suballoc_ring);
204 }
205
206 static struct fd_ringbuffer *
207 msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
208 enum fd_ringbuffer_flags flags)
209 {
210 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
211 struct msm_ringbuffer_sp *msm_ring;
212
213 msm_ring = slab_alloc(&msm_submit->ring_pool);
214
215 msm_ring->u.submit = submit;
216
217 /* NOTE: needs to be before _suballoc_ring_bo() since it could
218 * increment the refcnt of the current ring
219 */
220 msm_ring->base.refcnt = 1;
221
222 if (flags & FD_RINGBUFFER_STREAMING) {
223 msm_submit_suballoc_ring_bo(submit, msm_ring, size);
224 } else {
225 if (flags & FD_RINGBUFFER_GROWABLE)
226 size = INIT_SIZE;
227
228 msm_ring->offset = 0;
229 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size,
230 DRM_FREEDRENO_GEM_GPUREADONLY);
231 }
232
233 if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
234 return NULL;
235
236 if (flags & FD_RINGBUFFER_PRIMARY) {
237 debug_assert(!msm_submit->primary);
238 msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
239 }
240
241 return &msm_ring->base;
242 }
243
244 static int
245 msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
246 int *out_fence_fd, uint32_t *out_fence)
247 {
248 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
249 struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
250 struct drm_msm_gem_submit req = {
251 .flags = msm_pipe->pipe,
252 .queueid = msm_pipe->queue_id,
253 };
254 int ret;
255
256 debug_assert(msm_submit->primary);
257 finalize_current_cmd(msm_submit->primary);
258
259 struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
260 struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
261
262 for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
263 cmds[i].type = MSM_SUBMIT_CMD_BUF;
264 cmds[i].submit_idx = append_bo(msm_submit,
265 primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP);
266 cmds[i].submit_offset = primary->offset;
267 cmds[i].size = primary->u.cmds[i].size;
268 cmds[i].pad = 0;
269 cmds[i].nr_relocs = 0;
270 }
271
272 if (in_fence_fd != -1) {
273 req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
274 req.fence_fd = in_fence_fd;
275 }
276
277 if (out_fence_fd) {
278 req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
279 }
280
281 /* needs to be after get_cmd() as that could create bos/cmds table: */
282 req.bos = VOID2U64(msm_submit->submit_bos),
283 req.nr_bos = msm_submit->nr_submit_bos;
284 req.cmds = VOID2U64(cmds),
285 req.nr_cmds = primary->u.nr_cmds;
286
287 DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
288
289 ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
290 &req, sizeof(req));
291 if (ret) {
292 ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
293 msm_dump_submit(&req);
294 } else if (!ret) {
295 if (out_fence)
296 *out_fence = req.fence;
297
298 if (out_fence_fd)
299 *out_fence_fd = req.fence_fd;
300 }
301
302 return ret;
303 }
304
305 static void
306 msm_submit_sp_destroy(struct fd_submit *submit)
307 {
308 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
309
310 if (msm_submit->primary)
311 fd_ringbuffer_del(msm_submit->primary);
312 if (msm_submit->suballoc_ring)
313 fd_ringbuffer_del(msm_submit->suballoc_ring);
314
315 _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
316
317 // TODO it would be nice to have a way to debug_assert() if all
318 // rb's haven't been free'd back to the slab, because that is
319 // an indication that we are leaking bo's
320 slab_destroy_child(&msm_submit->ring_pool);
321
322 for (unsigned i = 0; i < msm_submit->nr_bos; i++)
323 fd_bo_del(msm_submit->bos[i]);
324
325 free(msm_submit->submit_bos);
326 free(msm_submit->bos);
327 free(msm_submit);
328 }
329
330 static const struct fd_submit_funcs submit_funcs = {
331 .new_ringbuffer = msm_submit_sp_new_ringbuffer,
332 .flush = msm_submit_sp_flush,
333 .destroy = msm_submit_sp_destroy,
334 };
335
336 struct fd_submit *
337 msm_submit_sp_new(struct fd_pipe *pipe)
338 {
339 struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
340 struct fd_submit *submit;
341
342 msm_submit->bo_table = _mesa_hash_table_create(NULL,
343 _mesa_hash_pointer, _mesa_key_pointer_equal);
344
345 slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool);
346
347 submit = &msm_submit->base;
348 submit->pipe = pipe;
349 submit->funcs = &submit_funcs;
350
351 return submit;
352 }
353
354 void
355 msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe)
356 {
357 // TODO tune size:
358 slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
359 }
360
361 void
362 msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe)
363 {
364 if (msm_pipe->ring_pool.num_elements)
365 slab_destroy_parent(&msm_pipe->ring_pool);
366 }
367
368 static void
369 finalize_current_cmd(struct fd_ringbuffer *ring)
370 {
371 debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
372
373 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
374 unsigned idx = APPEND(&msm_ring->u, cmds);
375
376 msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
377 msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
378 }
379
380 static void
381 msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
382 {
383 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
384 struct fd_pipe *pipe = msm_ring->u.submit->pipe;
385
386 debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
387
388 finalize_current_cmd(ring);
389
390 fd_bo_del(msm_ring->ring_bo);
391 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
392 DRM_FREEDRENO_GEM_GPUREADONLY);
393
394 ring->start = fd_bo_map(msm_ring->ring_bo);
395 ring->end = &(ring->start[size/4]);
396 ring->cur = ring->start;
397 ring->size = size;
398 }
399
400 static void
401 msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
402 const struct fd_reloc *reloc)
403 {
404 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
405 struct fd_pipe *pipe;
406
407 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
408 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
409
410 msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
411 msm_ring->u.reloc_bos[idx].flags = reloc->flags;
412
413 pipe = msm_ring->u.pipe;
414 } else {
415 struct msm_submit_sp *msm_submit =
416 to_msm_submit_sp(msm_ring->u.submit);
417
418 append_bo(msm_submit, reloc->bo, reloc->flags);
419
420 pipe = msm_ring->u.submit->pipe;
421 }
422
423 uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
424 uint32_t dword = iova;
425 int shift = reloc->shift;
426
427 if (shift < 0)
428 dword >>= -shift;
429 else
430 dword <<= shift;
431
432 (*ring->cur++) = dword | reloc->or;
433
434 if (pipe->gpu_id >= 500) {
435 dword = iova >> 32;
436 shift -= 32;
437
438 if (shift < 0)
439 dword >>= -shift;
440 else
441 dword <<= shift;
442
443 (*ring->cur++) = dword | reloc->orhi;
444 }
445 }
446
447 static uint32_t
448 msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
449 struct fd_ringbuffer *target, uint32_t cmd_idx)
450 {
451 struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
452 struct fd_bo *bo;
453 uint32_t size;
454
455 if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
456 (cmd_idx < msm_target->u.nr_cmds)) {
457 bo = msm_target->u.cmds[cmd_idx].ring_bo;
458 size = msm_target->u.cmds[cmd_idx].size;
459 } else {
460 bo = msm_target->ring_bo;
461 size = offset_bytes(target->cur, target->start);
462 }
463
464 msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
465 .bo = bo,
466 .flags = FD_RELOC_READ | FD_RELOC_DUMP,
467 .offset = msm_target->offset,
468 });
469
470 if (!(target->flags & _FD_RINGBUFFER_OBJECT))
471 return size;
472
473 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
474
475 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
476 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
477 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
478
479 msm_ring->u.reloc_bos[idx].bo =
480 fd_bo_ref(msm_target->u.reloc_bos[i].bo);
481 msm_ring->u.reloc_bos[idx].flags =
482 msm_target->u.reloc_bos[i].flags;
483 }
484 } else {
485 // TODO it would be nice to know whether we have already
486 // seen this target before. But hopefully we hit the
487 // append_bo() fast path enough for this to not matter:
488 struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
489
490 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
491 append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
492 msm_target->u.reloc_bos[i].flags);
493 }
494 }
495
496 return size;
497 }
498
499 static uint32_t
500 msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
501 {
502 if (ring->flags & FD_RINGBUFFER_GROWABLE)
503 return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
504 return 1;
505 }
506
507 static void
508 msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
509 {
510 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
511
512 fd_bo_del(msm_ring->ring_bo);
513
514 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
515 for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
516 fd_bo_del(msm_ring->u.reloc_bos[i].bo);
517 }
518
519 free(msm_ring);
520 } else {
521 struct fd_submit *submit = msm_ring->u.submit;
522
523 for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
524 fd_bo_del(msm_ring->u.cmds[i].ring_bo);
525 }
526
527 slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
528 }
529 }
530
531 static const struct fd_ringbuffer_funcs ring_funcs = {
532 .grow = msm_ringbuffer_sp_grow,
533 .emit_reloc = msm_ringbuffer_sp_emit_reloc,
534 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
535 .cmd_count = msm_ringbuffer_sp_cmd_count,
536 .destroy = msm_ringbuffer_sp_destroy,
537 };
538
539 static inline struct fd_ringbuffer *
540 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
541 enum fd_ringbuffer_flags flags)
542 {
543 struct fd_ringbuffer *ring = &msm_ring->base;
544
545 debug_assert(msm_ring->ring_bo);
546
547 uint8_t *base = fd_bo_map(msm_ring->ring_bo);
548 ring->start = (void *)(base + msm_ring->offset);
549 ring->end = &(ring->start[size/4]);
550 ring->cur = ring->start;
551
552 ring->size = size;
553 ring->flags = flags;
554
555 ring->funcs = &ring_funcs;
556
557 // TODO initializing these could probably be conditional on flags
558 // since unneed for FD_RINGBUFFER_STAGING case..
559 msm_ring->u.cmds = NULL;
560 msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
561
562 msm_ring->u.reloc_bos = NULL;
563 msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
564
565 return ring;
566 }
567
568 struct fd_ringbuffer *
569 msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
570 {
571 struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
572
573 msm_ring->u.pipe = pipe;
574 msm_ring->offset = 0;
575 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
576 DRM_FREEDRENO_GEM_GPUREADONLY);
577 msm_ring->base.refcnt = 1;
578
579 return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
580 }