freedreno: Initialize lower_int64_options to a proper value
[mesa.git] / src / freedreno / drm / msm_ringbuffer_sp.c
index 997ff147659f4f34bf61379804c59159aaea6ba0..869c41ddda66c8ceaaf82ad5379b543938abb10c 100644 (file)
@@ -41,8 +41,6 @@
 
 #define INIT_SIZE 0x1000
 
-static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
-
 
 struct msm_submit_sp {
        struct fd_submit base;
@@ -50,12 +48,10 @@ struct msm_submit_sp {
        DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
        DECLARE_ARRAY(struct fd_bo *, bos);
 
-       unsigned seqno;
-
        /* maps fd_bo to idx in bos table: */
        struct hash_table *bo_table;
 
-       struct slab_mempool ring_pool;
+       struct slab_child_pool ring_pool;
 
        struct fd_ringbuffer *primary;
 
@@ -78,29 +74,19 @@ struct msm_cmd_sp {
        unsigned size;
 };
 
-/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
- * later copy into the submit when the stateobj rb is later referenced by
- * a regular rb:
- */
-struct msm_reloc_bo_sp {
-       struct fd_bo *bo;
-       unsigned flags;
-};
-
 struct msm_ringbuffer_sp {
        struct fd_ringbuffer base;
 
        /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
        unsigned offset;
 
-// TODO check disasm.. hopefully compilers CSE can realize that
-// reloc_bos and cmds are at the same offsets and optimize some
-// divergent cases into single case
        union {
-               /* for _FD_RINGBUFFER_OBJECT case: */
+               /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
+                * this one
+                */
                struct {
                        struct fd_pipe *pipe;
-                       DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
+                       DECLARE_ARRAY(struct fd_bo *, reloc_bos);
                };
                /* for other cases: */
                struct {
@@ -120,14 +106,19 @@ static struct fd_ringbuffer * msm_ringbuffer_sp_init(
 
 /* add (if needed) bo to submit and return index: */
 static uint32_t
-append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
+msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo)
 {
        struct msm_bo *msm_bo = to_msm_bo(bo);
        uint32_t idx;
-       pthread_mutex_lock(&idx_lock);
-       if (likely(msm_bo->current_submit_seqno == submit->seqno)) {
-               idx = msm_bo->idx;
-       } else {
+
+       /* NOTE: it is legal to use the same bo on different threads for
+        * different submits.  But it is not legal to use the same submit
+        * from given threads.
+        */
+       idx = READ_ONCE(msm_bo->idx);
+
+       if (unlikely((idx >= submit->nr_submit_bos) ||
+                       (submit->submit_bos[idx].handle != bo->handle))) {
                uint32_t hash = _mesa_hash_pointer(bo);
                struct hash_entry *entry;
 
@@ -139,7 +130,7 @@ append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
                        idx = APPEND(submit, submit_bos);
                        idx = APPEND(submit, bos);
 
-                       submit->submit_bos[idx].flags = 0;
+                       submit->submit_bos[idx].flags = bo->flags;
                        submit->submit_bos[idx].handle = bo->handle;
                        submit->submit_bos[idx].presumed = 0;
 
@@ -148,14 +139,9 @@ append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
                        _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
                                        (void *)(uintptr_t)idx);
                }
-               msm_bo->current_submit_seqno = submit->seqno;
                msm_bo->idx = idx;
        }
-       pthread_mutex_unlock(&idx_lock);
-       if (flags & FD_RELOC_READ)
-               submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
-       if (flags & FD_RELOC_WRITE)
-               submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
+
        return idx;
 }
 
@@ -184,8 +170,7 @@ msm_submit_suballoc_ring_bo(struct fd_submit *submit,
 
        if (!suballoc_bo) {
                // TODO possibly larger size for streaming bo?
-               msm_ring->ring_bo = fd_bo_new_ring(
-                               submit->pipe->dev, 0x8000, 0);
+               msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, 0x8000);
                msm_ring->offset = 0;
        } else {
                msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
@@ -207,7 +192,7 @@ msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
        struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
        struct msm_ringbuffer_sp *msm_ring;
 
-       msm_ring = slab_alloc_st(&msm_submit->ring_pool);
+       msm_ring = slab_alloc(&msm_submit->ring_pool);
 
        msm_ring->u.submit = submit;
 
@@ -223,7 +208,7 @@ msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
                        size = INIT_SIZE;
 
                msm_ring->offset = 0;
-               msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size, 0);
+               msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
        }
 
        if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
@@ -257,8 +242,8 @@ msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
 
        for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
                cmds[i].type = MSM_SUBMIT_CMD_BUF;
-               cmds[i].submit_idx =
-                       append_bo(msm_submit, primary->u.cmds[i].ring_bo, FD_RELOC_READ);
+               cmds[i].submit_idx = msm_submit_append_bo(msm_submit,
+                               primary->u.cmds[i].ring_bo);
                cmds[i].submit_offset = primary->offset;
                cmds[i].size = primary->u.cmds[i].size;
                cmds[i].pad = 0;
@@ -313,7 +298,7 @@ msm_submit_sp_destroy(struct fd_submit *submit)
        // TODO it would be nice to have a way to debug_assert() if all
        // rb's haven't been free'd back to the slab, because that is
        // an indication that we are leaking bo's
-       slab_destroy(&msm_submit->ring_pool);
+       slab_destroy_child(&msm_submit->ring_pool);
 
        for (unsigned i = 0; i < msm_submit->nr_bos; i++)
                fd_bo_del(msm_submit->bos[i]);
@@ -334,13 +319,11 @@ msm_submit_sp_new(struct fd_pipe *pipe)
 {
        struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
        struct fd_submit *submit;
-       static unsigned submit_cnt = 0;
 
-       msm_submit->seqno = ++submit_cnt;
        msm_submit->bo_table = _mesa_hash_table_create(NULL,
                        _mesa_hash_pointer, _mesa_key_pointer_equal);
-       // TODO tune size:
-       slab_create(&msm_submit->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
+
+       slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool);
 
        submit = &msm_submit->base;
        submit->pipe = pipe;
@@ -349,6 +332,19 @@ msm_submit_sp_new(struct fd_pipe *pipe)
        return submit;
 }
 
+void
+msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe)
+{
+       // TODO tune size:
+       slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
+}
+
+void
+msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe)
+{
+       if (msm_pipe->ring_pool.num_elements)
+               slab_destroy_parent(&msm_pipe->ring_pool);
+}
 
 static void
 finalize_current_cmd(struct fd_ringbuffer *ring)
@@ -373,7 +369,7 @@ msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
        finalize_current_cmd(ring);
 
        fd_bo_del(msm_ring->ring_bo);
-       msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
+       msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
 
        ring->start = fd_bo_map(msm_ring->ring_bo);
        ring->end = &(ring->start[size/4]);
@@ -389,41 +385,48 @@ msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
        struct fd_pipe *pipe;
 
        if (ring->flags & _FD_RINGBUFFER_OBJECT) {
-               unsigned idx = APPEND(&msm_ring->u, reloc_bos);
-
-               msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
-               msm_ring->u.reloc_bos[idx].flags = reloc->flags;
+               /* Avoid emitting duplicate BO references into the list.  Ringbuffer
+                * objects are long-lived, so this saves ongoing work at draw time in
+                * exchange for a bit at context setup/first draw.  And the number of
+                * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
+                * hurt much.
+                */
+               bool found = false;
+               for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
+                       if (msm_ring->u.reloc_bos[i] == reloc->bo) {
+                               found = true;
+                               break;
+                       }
+               }
+               if (!found) {
+                       unsigned idx = APPEND(&msm_ring->u, reloc_bos);
+                       msm_ring->u.reloc_bos[idx] = fd_bo_ref(reloc->bo);
+               }
 
                pipe = msm_ring->u.pipe;
        } else {
                struct msm_submit_sp *msm_submit =
                                to_msm_submit_sp(msm_ring->u.submit);
 
-               append_bo(msm_submit, reloc->bo, reloc->flags);
+               msm_submit_append_bo(msm_submit, reloc->bo);
 
                pipe = msm_ring->u.submit->pipe;
        }
 
-       uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
-       uint32_t dword = iova;
+       uint64_t iova = reloc->bo->iova + reloc->offset;
        int shift = reloc->shift;
 
        if (shift < 0)
-               dword >>= -shift;
+               iova >>= -shift;
        else
-               dword <<= shift;
+               iova <<= shift;
+
+       uint32_t dword = iova;
 
        (*ring->cur++) = dword | reloc->or;
 
        if (pipe->gpu_id >= 500) {
                dword = iova >> 32;
-               shift -= 32;
-
-               if (shift < 0)
-                       dword >>= -shift;
-               else
-                       dword <<= shift;
-
                (*ring->cur++) = dword | reloc->orhi;
        }
 }
@@ -447,21 +450,29 @@ msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
 
        msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
                .bo     = bo,
-               .flags  = FD_RELOC_READ,
                .offset = msm_target->offset,
        });
 
-       if ((target->flags & _FD_RINGBUFFER_OBJECT) &&
-                       !(ring->flags & _FD_RINGBUFFER_OBJECT)) {
+       if (!(target->flags & _FD_RINGBUFFER_OBJECT))
+               return size;
+
+       struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
+
+       if (ring->flags & _FD_RINGBUFFER_OBJECT) {
+               for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
+                       unsigned idx = APPEND(&msm_ring->u, reloc_bos);
+
+                       msm_ring->u.reloc_bos[idx] =
+                               fd_bo_ref(msm_target->u.reloc_bos[i]);
+               }
+       } else {
                // TODO it would be nice to know whether we have already
                // seen this target before.  But hopefully we hit the
                // append_bo() fast path enough for this to not matter:
-               struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
                struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
 
                for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
-                       append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
-                                       msm_target->u.reloc_bos[i].flags);
+                       msm_submit_append_bo(msm_submit, msm_target->u.reloc_bos[i]);
                }
        }
 
@@ -485,8 +496,9 @@ msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
 
        if (ring->flags & _FD_RINGBUFFER_OBJECT) {
                for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
-                       fd_bo_del(msm_ring->u.reloc_bos[i].bo);
+                       fd_bo_del(msm_ring->u.reloc_bos[i]);
                }
+               free(msm_ring->u.reloc_bos);
 
                free(msm_ring);
        } else {
@@ -495,8 +507,9 @@ msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
                for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
                        fd_bo_del(msm_ring->u.cmds[i].ring_bo);
                }
+               free(msm_ring->u.cmds);
 
-               slab_free_st(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
+               slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
        }
 }
 
@@ -514,6 +527,11 @@ msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
 {
        struct fd_ringbuffer *ring = &msm_ring->base;
 
+       /* We don't do any translation from internal FD_RELOC flags to MSM flags. */
+       STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
+       STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
+       STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
+
        debug_assert(msm_ring->ring_bo);
 
        uint8_t *base = fd_bo_map(msm_ring->ring_bo);
@@ -544,7 +562,7 @@ msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
 
        msm_ring->u.pipe = pipe;
        msm_ring->offset = 0;
-       msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size, 0);
+       msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
        msm_ring->base.refcnt = 1;
 
        return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);