freedreno: Tell the kernel that all BOs are for writing.
[mesa.git] / src / freedreno / drm / msm_ringbuffer_sp.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include <assert.h>
28 #include <inttypes.h>
29
30 #include "util/hash_table.h"
31 #include "util/slab.h"
32
33 #include "drm/freedreno_ringbuffer.h"
34 #include "msm_priv.h"
35
36 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
37 * by avoiding the additional tracking necessary to build cmds/relocs tables
38 * (but still builds a bos table)
39 */
40
41
42 #define INIT_SIZE 0x1000
43
44
45 struct msm_submit_sp {
46 struct fd_submit base;
47
48 DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
49 DECLARE_ARRAY(struct fd_bo *, bos);
50
51 /* maps fd_bo to idx in bos table: */
52 struct hash_table *bo_table;
53
54 struct slab_child_pool ring_pool;
55
56 struct fd_ringbuffer *primary;
57
58 /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
59 * the same underlying bo)..
60 *
61 * We also rely on previous stateobj having been fully constructed
62 * so we can reclaim extra space at it's end.
63 */
64 struct fd_ringbuffer *suballoc_ring;
65 };
66 FD_DEFINE_CAST(fd_submit, msm_submit_sp);
67
68 /* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
69 * and sizes. Ie. a finalized buffer can have no more commands appended to
70 * it.
71 */
72 struct msm_cmd_sp {
73 struct fd_bo *ring_bo;
74 unsigned size;
75 };
76
77 struct msm_ringbuffer_sp {
78 struct fd_ringbuffer base;
79
80 /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
81 unsigned offset;
82
83 union {
84 /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
85 * this one
86 */
87 struct {
88 struct fd_pipe *pipe;
89 DECLARE_ARRAY(struct fd_bo *, reloc_bos);
90 };
91 /* for other cases: */
92 struct {
93 struct fd_submit *submit;
94 DECLARE_ARRAY(struct msm_cmd_sp, cmds);
95 };
96 } u;
97
98 struct fd_bo *ring_bo;
99 };
100 FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
101
102 static void finalize_current_cmd(struct fd_ringbuffer *ring);
103 static struct fd_ringbuffer * msm_ringbuffer_sp_init(
104 struct msm_ringbuffer_sp *msm_ring,
105 uint32_t size, enum fd_ringbuffer_flags flags);
106
107 /* add (if needed) bo to submit and return index: */
108 static uint32_t
109 msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo)
110 {
111 struct msm_bo *msm_bo = to_msm_bo(bo);
112 uint32_t idx;
113
114 /* NOTE: it is legal to use the same bo on different threads for
115 * different submits. But it is not legal to use the same submit
116 * from given threads.
117 */
118 idx = READ_ONCE(msm_bo->idx);
119
120 if (unlikely((idx >= submit->nr_submit_bos) ||
121 (submit->submit_bos[idx].handle != bo->handle))) {
122 uint32_t hash = _mesa_hash_pointer(bo);
123 struct hash_entry *entry;
124
125 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
126 if (entry) {
127 /* found */
128 idx = (uint32_t)(uintptr_t)entry->data;
129 } else {
130 idx = APPEND(submit, submit_bos);
131 idx = APPEND(submit, bos);
132
133 submit->submit_bos[idx].flags = bo->flags;
134 submit->submit_bos[idx].handle = bo->handle;
135 submit->submit_bos[idx].presumed = 0;
136
137 submit->bos[idx] = fd_bo_ref(bo);
138
139 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
140 (void *)(uintptr_t)idx);
141 }
142 msm_bo->idx = idx;
143 }
144
145 return idx;
146 }
147
148 static void
149 msm_submit_suballoc_ring_bo(struct fd_submit *submit,
150 struct msm_ringbuffer_sp *msm_ring, uint32_t size)
151 {
152 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
153 unsigned suballoc_offset = 0;
154 struct fd_bo *suballoc_bo = NULL;
155
156 if (msm_submit->suballoc_ring) {
157 struct msm_ringbuffer_sp *suballoc_ring =
158 to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
159
160 suballoc_bo = suballoc_ring->ring_bo;
161 suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
162 suballoc_ring->offset;
163
164 suballoc_offset = align(suballoc_offset, 0x10);
165
166 if ((size + suballoc_offset) > suballoc_bo->size) {
167 suballoc_bo = NULL;
168 }
169 }
170
171 if (!suballoc_bo) {
172 // TODO possibly larger size for streaming bo?
173 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, 0x8000);
174 msm_ring->offset = 0;
175 } else {
176 msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
177 msm_ring->offset = suballoc_offset;
178 }
179
180 struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
181
182 msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
183
184 if (old_suballoc_ring)
185 fd_ringbuffer_del(old_suballoc_ring);
186 }
187
188 static struct fd_ringbuffer *
189 msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
190 enum fd_ringbuffer_flags flags)
191 {
192 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
193 struct msm_ringbuffer_sp *msm_ring;
194
195 msm_ring = slab_alloc(&msm_submit->ring_pool);
196
197 msm_ring->u.submit = submit;
198
199 /* NOTE: needs to be before _suballoc_ring_bo() since it could
200 * increment the refcnt of the current ring
201 */
202 msm_ring->base.refcnt = 1;
203
204 if (flags & FD_RINGBUFFER_STREAMING) {
205 msm_submit_suballoc_ring_bo(submit, msm_ring, size);
206 } else {
207 if (flags & FD_RINGBUFFER_GROWABLE)
208 size = INIT_SIZE;
209
210 msm_ring->offset = 0;
211 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
212 }
213
214 if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
215 return NULL;
216
217 if (flags & FD_RINGBUFFER_PRIMARY) {
218 debug_assert(!msm_submit->primary);
219 msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
220 }
221
222 return &msm_ring->base;
223 }
224
225 static int
226 msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
227 int *out_fence_fd, uint32_t *out_fence)
228 {
229 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
230 struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
231 struct drm_msm_gem_submit req = {
232 .flags = msm_pipe->pipe,
233 .queueid = msm_pipe->queue_id,
234 };
235 int ret;
236
237 debug_assert(msm_submit->primary);
238 finalize_current_cmd(msm_submit->primary);
239
240 struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
241 struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
242
243 for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
244 cmds[i].type = MSM_SUBMIT_CMD_BUF;
245 cmds[i].submit_idx = msm_submit_append_bo(msm_submit,
246 primary->u.cmds[i].ring_bo);
247 cmds[i].submit_offset = primary->offset;
248 cmds[i].size = primary->u.cmds[i].size;
249 cmds[i].pad = 0;
250 cmds[i].nr_relocs = 0;
251 }
252
253 if (in_fence_fd != -1) {
254 req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
255 req.fence_fd = in_fence_fd;
256 }
257
258 if (out_fence_fd) {
259 req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
260 }
261
262 /* needs to be after get_cmd() as that could create bos/cmds table: */
263 req.bos = VOID2U64(msm_submit->submit_bos),
264 req.nr_bos = msm_submit->nr_submit_bos;
265 req.cmds = VOID2U64(cmds),
266 req.nr_cmds = primary->u.nr_cmds;
267
268 DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
269
270 ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
271 &req, sizeof(req));
272 if (ret) {
273 ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
274 msm_dump_submit(&req);
275 } else if (!ret) {
276 if (out_fence)
277 *out_fence = req.fence;
278
279 if (out_fence_fd)
280 *out_fence_fd = req.fence_fd;
281 }
282
283 return ret;
284 }
285
286 static void
287 msm_submit_sp_destroy(struct fd_submit *submit)
288 {
289 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
290
291 if (msm_submit->primary)
292 fd_ringbuffer_del(msm_submit->primary);
293 if (msm_submit->suballoc_ring)
294 fd_ringbuffer_del(msm_submit->suballoc_ring);
295
296 _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
297
298 // TODO it would be nice to have a way to debug_assert() if all
299 // rb's haven't been free'd back to the slab, because that is
300 // an indication that we are leaking bo's
301 slab_destroy_child(&msm_submit->ring_pool);
302
303 for (unsigned i = 0; i < msm_submit->nr_bos; i++)
304 fd_bo_del(msm_submit->bos[i]);
305
306 free(msm_submit->submit_bos);
307 free(msm_submit->bos);
308 free(msm_submit);
309 }
310
311 static const struct fd_submit_funcs submit_funcs = {
312 .new_ringbuffer = msm_submit_sp_new_ringbuffer,
313 .flush = msm_submit_sp_flush,
314 .destroy = msm_submit_sp_destroy,
315 };
316
317 struct fd_submit *
318 msm_submit_sp_new(struct fd_pipe *pipe)
319 {
320 struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
321 struct fd_submit *submit;
322
323 msm_submit->bo_table = _mesa_hash_table_create(NULL,
324 _mesa_hash_pointer, _mesa_key_pointer_equal);
325
326 slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool);
327
328 submit = &msm_submit->base;
329 submit->pipe = pipe;
330 submit->funcs = &submit_funcs;
331
332 return submit;
333 }
334
335 void
336 msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe)
337 {
338 // TODO tune size:
339 slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
340 }
341
342 void
343 msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe)
344 {
345 if (msm_pipe->ring_pool.num_elements)
346 slab_destroy_parent(&msm_pipe->ring_pool);
347 }
348
349 static void
350 finalize_current_cmd(struct fd_ringbuffer *ring)
351 {
352 debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
353
354 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
355 unsigned idx = APPEND(&msm_ring->u, cmds);
356
357 msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
358 msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
359 }
360
361 static void
362 msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
363 {
364 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
365 struct fd_pipe *pipe = msm_ring->u.submit->pipe;
366
367 debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
368
369 finalize_current_cmd(ring);
370
371 fd_bo_del(msm_ring->ring_bo);
372 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
373
374 ring->start = fd_bo_map(msm_ring->ring_bo);
375 ring->end = &(ring->start[size/4]);
376 ring->cur = ring->start;
377 ring->size = size;
378 }
379
380 static void
381 msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
382 const struct fd_reloc *reloc)
383 {
384 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
385 struct fd_pipe *pipe;
386
387 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
388 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
389
390 msm_ring->u.reloc_bos[idx] = fd_bo_ref(reloc->bo);
391
392 pipe = msm_ring->u.pipe;
393 } else {
394 struct msm_submit_sp *msm_submit =
395 to_msm_submit_sp(msm_ring->u.submit);
396
397 msm_submit_append_bo(msm_submit, reloc->bo);
398
399 pipe = msm_ring->u.submit->pipe;
400 }
401
402 uint64_t iova = reloc->bo->iova + reloc->offset;
403 int shift = reloc->shift;
404
405 if (shift < 0)
406 iova >>= -shift;
407 else
408 iova <<= shift;
409
410 uint32_t dword = iova;
411
412 (*ring->cur++) = dword | reloc->or;
413
414 if (pipe->gpu_id >= 500) {
415 dword = iova >> 32;
416 (*ring->cur++) = dword | reloc->orhi;
417 }
418 }
419
420 static uint32_t
421 msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
422 struct fd_ringbuffer *target, uint32_t cmd_idx)
423 {
424 struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
425 struct fd_bo *bo;
426 uint32_t size;
427
428 if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
429 (cmd_idx < msm_target->u.nr_cmds)) {
430 bo = msm_target->u.cmds[cmd_idx].ring_bo;
431 size = msm_target->u.cmds[cmd_idx].size;
432 } else {
433 bo = msm_target->ring_bo;
434 size = offset_bytes(target->cur, target->start);
435 }
436
437 msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
438 .bo = bo,
439 .offset = msm_target->offset,
440 });
441
442 if (!(target->flags & _FD_RINGBUFFER_OBJECT))
443 return size;
444
445 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
446
447 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
448 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
449 unsigned idx = APPEND(&msm_ring->u, reloc_bos);
450
451 msm_ring->u.reloc_bos[idx] =
452 fd_bo_ref(msm_target->u.reloc_bos[i]);
453 }
454 } else {
455 // TODO it would be nice to know whether we have already
456 // seen this target before. But hopefully we hit the
457 // append_bo() fast path enough for this to not matter:
458 struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
459
460 for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
461 msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]);
462 }
463 }
464
465 return size;
466 }
467
468 static uint32_t
469 msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
470 {
471 if (ring->flags & FD_RINGBUFFER_GROWABLE)
472 return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
473 return 1;
474 }
475
476 static void
477 msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
478 {
479 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
480
481 fd_bo_del(msm_ring->ring_bo);
482
483 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
484 for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
485 fd_bo_del(msm_ring->u.reloc_bos[i]);
486 }
487 free(msm_ring->u.reloc_bos);
488
489 free(msm_ring);
490 } else {
491 struct fd_submit *submit = msm_ring->u.submit;
492
493 for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
494 fd_bo_del(msm_ring->u.cmds[i].ring_bo);
495 }
496 free(msm_ring->u.cmds);
497
498 slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
499 }
500 }
501
502 static const struct fd_ringbuffer_funcs ring_funcs = {
503 .grow = msm_ringbuffer_sp_grow,
504 .emit_reloc = msm_ringbuffer_sp_emit_reloc,
505 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
506 .cmd_count = msm_ringbuffer_sp_cmd_count,
507 .destroy = msm_ringbuffer_sp_destroy,
508 };
509
510 static inline struct fd_ringbuffer *
511 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
512 enum fd_ringbuffer_flags flags)
513 {
514 struct fd_ringbuffer *ring = &msm_ring->base;
515
516 /* We don't do any translation from internal FD_RELOC flags to MSM flags. */
517 STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
518 STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
519 STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
520
521 debug_assert(msm_ring->ring_bo);
522
523 uint8_t *base = fd_bo_map(msm_ring->ring_bo);
524 ring->start = (void *)(base + msm_ring->offset);
525 ring->end = &(ring->start[size/4]);
526 ring->cur = ring->start;
527
528 ring->size = size;
529 ring->flags = flags;
530
531 ring->funcs = &ring_funcs;
532
533 // TODO initializing these could probably be conditional on flags
534 // since unneed for FD_RINGBUFFER_STAGING case..
535 msm_ring->u.cmds = NULL;
536 msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
537
538 msm_ring->u.reloc_bos = NULL;
539 msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
540
541 return ring;
542 }
543
544 struct fd_ringbuffer *
545 msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
546 {
547 struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
548
549 msm_ring->u.pipe = pipe;
550 msm_ring->offset = 0;
551 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
552 msm_ring->base.refcnt = 1;
553
554 return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
555 }