2 * Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Rob Clark <robclark@freedesktop.org>
27 #ifndef FREEDRENO_RINGBUFFER_H_
28 #define FREEDRENO_RINGBUFFER_H_
31 #include "util/u_debug.h"
32 #include "util/u_dynarray.h"
34 #include "freedreno_drmif.h"
35 #include "adreno_common.xml.h"
36 #include "adreno_pm4.xml.h"
41 enum fd_ringbuffer_flags
{
43 /* Primary ringbuffer for a submit, ie. an IB1 level rb
44 * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
47 FD_RINGBUFFER_PRIMARY
= 0x1,
49 /* Hint that the stateobj will be used for streaming state
50 * that is used once or a few times and then discarded.
52 * For sub-allocation, non streaming stateobj's should be
53 * sub-allocated from a page size buffer, so one long lived
54 * state obj doesn't prevent other pages from being freed.
55 * (Ie. it would be no worse than allocating a page sized
56 * bo for each small non-streaming stateobj).
58 * But streaming stateobj's could be sub-allocated from a
59 * larger buffer to reduce the alloc/del overhead.
61 FD_RINGBUFFER_STREAMING
= 0x2,
63 /* Indicates that "growable" cmdstream can be used,
64 * consisting of multiple physical cmdstream buffers
66 FD_RINGBUFFER_GROWABLE
= 0x4,
68 /* Internal use only: */
69 _FD_RINGBUFFER_OBJECT
= 0x8,
72 /* A submit object manages/tracks all the state buildup for a "submit"
73 * ioctl to the kernel. Additionally, with the exception of long-lived
74 * non-STREAMING stateobj rb's, rb's are allocated from the submit.
76 struct fd_submit
* fd_submit_new(struct fd_pipe
*pipe
);
78 /* NOTE: all ringbuffer's create from the submit should be unref'd
79 * before destroying the submit.
81 void fd_submit_del(struct fd_submit
*submit
);
83 /* Allocate a new rb from the submit. */
84 struct fd_ringbuffer
* fd_submit_new_ringbuffer(struct fd_submit
*submit
,
85 uint32_t size
, enum fd_ringbuffer_flags flags
);
87 /* in_fence_fd: -1 for no in-fence, else fence fd
88 * out_fence_fd: NULL for no output-fence requested, else ptr to return out-fence
90 int fd_submit_flush(struct fd_submit
*submit
,
91 int in_fence_fd
, int *out_fence_fd
,
97 struct fd_ringbuffer_funcs
{
98 void (*grow
)(struct fd_ringbuffer
*ring
, uint32_t size
);
99 void (*emit_reloc
)(struct fd_ringbuffer
*ring
,
100 const struct fd_reloc
*reloc
);
101 uint32_t (*emit_reloc_ring
)(struct fd_ringbuffer
*ring
,
102 struct fd_ringbuffer
*target
, uint32_t cmd_idx
);
103 uint32_t (*cmd_count
)(struct fd_ringbuffer
*ring
);
104 void (*destroy
)(struct fd_ringbuffer
*ring
);
107 /* the ringbuffer object is not opaque so that OUT_RING() type stuff
108 * can be inlined. Note that users should not make assumptions about
109 * the size of this struct.
111 struct fd_ringbuffer
{
112 uint32_t *cur
, *end
, *start
;
113 const struct fd_ringbuffer_funcs
*funcs
;
115 // size or end coudl probably go away
118 enum fd_ringbuffer_flags flags
;
121 /* Allocate a new long-lived state object, not associated with
124 struct fd_ringbuffer
* fd_ringbuffer_new_object(struct fd_pipe
*pipe
,
128 fd_ringbuffer_del(struct fd_ringbuffer
*ring
)
130 if (--ring
->refcnt
> 0)
133 ring
->funcs
->destroy(ring
);
137 struct fd_ringbuffer
*
138 fd_ringbuffer_ref(struct fd_ringbuffer
*ring
)
145 fd_ringbuffer_grow(struct fd_ringbuffer
*ring
, uint32_t ndwords
)
147 assert(ring
->funcs
->grow
); /* unsupported on kgsl */
149 /* there is an upper bound on IB size, which appears to be 0x100000 */
150 if (ring
->size
< 0x100000)
153 ring
->funcs
->grow(ring
, ring
->size
);
157 fd_ringbuffer_emit(struct fd_ringbuffer
*ring
,
160 (*ring
->cur
++) = data
;
165 #define FD_RELOC_READ 0x0001
166 #define FD_RELOC_WRITE 0x0002
167 #define FD_RELOC_DUMP 0x0004
171 uint32_t orhi
; /* used for a5xx+ */
174 /* We always mark BOs for write, instead of tracking it across reloc
175 * sources in userspace. On the kernel side, this means we track a single
176 * excl fence in the BO instead of a set of read fences, which is cheaper.
177 * The downside is that a dmabuf-shared device won't be able to read in
178 * parallel with a read-only access by freedreno, but most other drivers
179 * have decided that that usecase isn't important enough to do this
182 #define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE)
184 /* NOTE: relocs are 2 dwords on a5xx+ */
187 fd_ringbuffer_reloc(struct fd_ringbuffer
*ring
,
188 const struct fd_reloc
*reloc
)
190 ring
->funcs
->emit_reloc(ring
, reloc
);
193 static inline uint32_t
194 fd_ringbuffer_cmd_count(struct fd_ringbuffer
*ring
)
196 if (!ring
->funcs
->cmd_count
)
198 return ring
->funcs
->cmd_count(ring
);
201 static inline uint32_t
202 fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer
*ring
,
203 struct fd_ringbuffer
*target
, uint32_t cmd_idx
)
205 return ring
->funcs
->emit_reloc_ring(ring
, target
, cmd_idx
);
208 static inline uint32_t
209 offset_bytes(void *end
, void *start
)
211 return ((char *)end
) - ((char *)start
);
214 static inline uint32_t
215 fd_ringbuffer_size(struct fd_ringbuffer
*ring
)
217 /* only really needed for stateobj ringbuffers, and won't really
218 * do what you expect for growable rb's.. so lets just restrict
219 * this to stateobj's for now:
221 debug_assert(!(ring
->flags
& FD_RINGBUFFER_GROWABLE
));
222 return offset_bytes(ring
->cur
, ring
->start
);
228 OUT_RING(struct fd_ringbuffer
*ring
, uint32_t data
)
231 fprintf(stderr
, "ring[%p]: OUT_RING %04x: %08x", ring
,
232 (uint32_t)(ring
->cur
- ring
->start
), data
);
234 fd_ringbuffer_emit(ring
, data
);
238 * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+
241 OUT_RELOC(struct fd_ringbuffer
*ring
, struct fd_bo
*bo
,
242 uint32_t offset
, uint64_t or, int32_t shift
)
245 fprintf(stderr
, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring
,
246 (uint32_t)(ring
->cur
- ring
->start
), bo
, offset
, shift
);
248 debug_assert(offset
< fd_bo_size(bo
));
249 fd_ringbuffer_reloc(ring
, &(struct fd_reloc
){
259 OUT_RB(struct fd_ringbuffer
*ring
, struct fd_ringbuffer
*target
)
261 fd_ringbuffer_emit_reloc_ring_full(ring
, target
, 0);
264 static inline void BEGIN_RING(struct fd_ringbuffer
*ring
, uint32_t ndwords
)
266 if (unlikely(ring
->cur
+ ndwords
> ring
->end
))
267 fd_ringbuffer_grow(ring
, ndwords
);
271 OUT_PKT0(struct fd_ringbuffer
*ring
, uint16_t regindx
, uint16_t cnt
)
273 BEGIN_RING(ring
, cnt
+1);
274 OUT_RING(ring
, CP_TYPE0_PKT
| ((cnt
-1) << 16) | (regindx
& 0x7FFF));
278 OUT_PKT2(struct fd_ringbuffer
*ring
)
281 OUT_RING(ring
, CP_TYPE2_PKT
);
285 OUT_PKT3(struct fd_ringbuffer
*ring
, uint8_t opcode
, uint16_t cnt
)
287 BEGIN_RING(ring
, cnt
+1);
288 OUT_RING(ring
, CP_TYPE3_PKT
| ((cnt
-1) << 16) | ((opcode
& 0xFF) << 8));
292 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
295 static inline unsigned
296 _odd_parity_bit(unsigned val
)
298 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
299 * note that we want odd parity so 0x6996 is inverted.
305 return (~0x6996 >> val
) & 1;
309 OUT_PKT4(struct fd_ringbuffer
*ring
, uint16_t regindx
, uint16_t cnt
)
311 BEGIN_RING(ring
, cnt
+1);
312 OUT_RING(ring
, CP_TYPE4_PKT
| cnt
|
313 (_odd_parity_bit(cnt
) << 7) |
314 ((regindx
& 0x3ffff) << 8) |
315 ((_odd_parity_bit(regindx
) << 27)));
319 OUT_PKT7(struct fd_ringbuffer
*ring
, uint8_t opcode
, uint16_t cnt
)
321 BEGIN_RING(ring
, cnt
+1);
322 OUT_RING(ring
, CP_TYPE7_PKT
| cnt
|
323 (_odd_parity_bit(cnt
) << 15) |
324 ((opcode
& 0x7f) << 16) |
325 ((_odd_parity_bit(opcode
) << 23)));
329 OUT_WFI(struct fd_ringbuffer
*ring
)
331 OUT_PKT3(ring
, CP_WAIT_FOR_IDLE
, 1);
332 OUT_RING(ring
, 0x00000000);
336 OUT_WFI5(struct fd_ringbuffer
*ring
)
338 OUT_PKT7(ring
, CP_WAIT_FOR_IDLE
, 0);
341 #endif /* FREEDRENO_RINGBUFFER_H_ */