bo->size = size;
        bo->handle = handle;
        bo->iova = bo->funcs->iova(bo);
+       bo->flags = FD_RELOC_FLAGS_INIT;
 
        p_atomic_set(&bo->refcnt, 1);
        list_inithead(&bo->list);
 
                        }
                        p_atomic_set(&bo->refcnt, 1);
                        fd_device_ref(bo->dev);
+                       bo->flags = FD_RELOC_FLAGS_INIT;
                        return bo;
                }
        }
 
        uint32_t handle;
        uint32_t name;
        int32_t refcnt;
+       uint32_t flags; /* flags like FD_RELOC_DUMP to use for relocs to this BO */
        uint64_t iova;
        void *map;
        const struct fd_bo_funcs *funcs;
 
        uint32_t orhi;      /* used for a5xx+ */
 };
 
+#define FD_RELOC_FLAGS_INIT FD_RELOC_READ
+
 /* NOTE: relocs are 2 dwords on a5xx+ */
 
 static inline void
 OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
                uint32_t offset, uint64_t or, int32_t shift)
 {
-       __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ);
+       __out_reloc(ring, bo, offset, or, shift, 0);
 }
 
 static inline void
 OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
                uint32_t offset, uint64_t or, int32_t shift)
 {
-       __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE);
+       __out_reloc(ring, bo, offset, or, shift, FD_RELOC_WRITE);
 }
 
 static inline void
 OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo,
                uint32_t offset, uint64_t or, int32_t shift)
 {
-       __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP);
+       __out_reloc(ring, bo, offset, or, shift, FD_RELOC_DUMP);
 }
 
 static inline void
 
                        idx = APPEND(submit, submit_bos);
                        idx = APPEND(submit, bos);
 
-                       submit->submit_bos[idx].flags = 0;
+                       submit->submit_bos[idx].flags = bo->flags;
                        submit->submit_bos[idx].handle = bo->handle;
                        submit->submit_bos[idx].presumed = 0;
 
                msm_bo->idx = idx;
        }
 
-       if (flags & FD_RELOC_READ)
-               submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
        if (flags & FD_RELOC_WRITE)
                submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
 
                struct fd_bo *bo = ring->u.reloc_bos[idx].bo;
                unsigned flags = 0;
 
-               if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_READ)
-                       flags |= FD_RELOC_READ;
                if (ring->u.reloc_bos[idx].flags & MSM_SUBMIT_BO_WRITE)
                        flags |= FD_RELOC_WRITE;
 
 
                        cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
                        cmds[i].submit_idx =
-                               append_bo(msm_submit, msm_ring->ring_bo, FD_RELOC_READ);
+                               append_bo(msm_submit, msm_ring->ring_bo, 0);
                        cmds[i].submit_offset = msm_ring->offset;
                        cmds[i].size = offset_bytes(ring->cur, ring->start);
                        cmds[i].pad = 0;
                                        cmds[i].type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
                                }
                                cmds[i].submit_idx = append_bo(msm_submit,
-                                               msm_ring->u.cmds[j]->ring_bo, FD_RELOC_READ);
+                                               msm_ring->u.cmds[j]->ring_bo, 0);
                                cmds[i].submit_offset = msm_ring->offset;
                                cmds[i].size = msm_ring->u.cmds[j]->size;
                                cmds[i].pad = 0;
 
        msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){
                .bo     = bo,
-               .flags  = FD_RELOC_READ,
+               .flags  = 0,
                .offset = msm_target->offset,
        });
 
 
                        idx = APPEND(submit, submit_bos);
                        idx = APPEND(submit, bos);
 
-                       submit->submit_bos[idx].flags = 0;
+                       submit->submit_bos[idx].flags = bo->flags;
                        submit->submit_bos[idx].handle = bo->handle;
                        submit->submit_bos[idx].presumed = 0;
 
        for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
                cmds[i].type = MSM_SUBMIT_CMD_BUF;
                cmds[i].submit_idx = msm_submit_append_bo(msm_submit,
-                               primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP);
+                               primary->u.cmds[i].ring_bo, FD_RELOC_DUMP);
                cmds[i].submit_offset = primary->offset;
                cmds[i].size = primary->u.cmds[i].size;
                cmds[i].pad = 0;
 
        msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
                .bo     = bo,
-               .flags  = FD_RELOC_READ | FD_RELOC_DUMP,
+               .flags  = FD_RELOC_DUMP,
                .offset = msm_target->offset,
        });
 
 
                        if (regs[i].bo) {                                                                               \
                                struct fd_reloc reloc = {                                                       \
                                        .bo = regs[i].bo,                                                               \
-                                       .flags = FD_RELOC_READ |                                                \
-                                               (regs[i].bo_write ? FD_RELOC_WRITE : 0),        \
-                                                                                                                                       \
+                                       .flags = (regs[i].bo_write ? FD_RELOC_WRITE : 0),       \
                                        .offset = regs[i].bo_offset,                                    \
                                        .or = regs[i].value,                                                    \
                                        .shift = regs[i].bo_shift,                                              \