2 * Copyright © 2019 Google LLC
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
26 #include "tu_private.h"
28 #include "adreno_pm4.xml.h"
31 tu_cs_init(struct tu_cs
*cs
,
32 struct tu_device
*device
,
34 uint32_t initial_size
);
37 tu_cs_init_external(struct tu_cs
*cs
, uint32_t *start
, uint32_t *end
);
40 tu_cs_finish(struct tu_cs
*cs
);
43 tu_cs_begin(struct tu_cs
*cs
);
46 tu_cs_end(struct tu_cs
*cs
);
49 tu_cs_begin_sub_stream(struct tu_cs
*cs
, uint32_t size
, struct tu_cs
*sub_cs
);
52 tu_cs_alloc(struct tu_cs
*cs
,
55 struct tu_cs_memory
*memory
);
58 tu_cs_end_sub_stream(struct tu_cs
*cs
, struct tu_cs
*sub_cs
);
60 static inline struct tu_draw_state
61 tu_cs_end_draw_state(struct tu_cs
*cs
, struct tu_cs
*sub_cs
)
63 struct tu_cs_entry entry
= tu_cs_end_sub_stream(cs
, sub_cs
);
64 return (struct tu_draw_state
) {
65 .iova
= entry
.bo
->iova
+ entry
.offset
,
66 .size
= entry
.size
/ sizeof(uint32_t),
71 tu_cs_reserve_space(struct tu_cs
*cs
, uint32_t reserved_size
);
73 static inline struct tu_draw_state
74 tu_cs_draw_state(struct tu_cs
*sub_cs
, struct tu_cs
*cs
, uint32_t size
)
76 struct tu_cs_memory memory
;
78 /* TODO: clean this up */
79 tu_cs_alloc(sub_cs
, size
, 1, &memory
);
80 tu_cs_init_external(cs
, memory
.map
, memory
.map
+ size
);
82 tu_cs_reserve_space(cs
, size
);
84 return (struct tu_draw_state
) {
91 tu_cs_reset(struct tu_cs
*cs
);
94 tu_cs_add_entries(struct tu_cs
*cs
, struct tu_cs
*target
);
97 * Get the size of the command packets emitted since the last call to
100 static inline uint32_t
101 tu_cs_get_size(const struct tu_cs
*cs
)
103 return cs
->cur
- cs
->start
;
107 * Return true if there is no command packet emitted since the last call to
110 static inline uint32_t
111 tu_cs_is_empty(const struct tu_cs
*cs
)
113 return tu_cs_get_size(cs
) == 0;
117 * Discard all entries. This allows \a cs to be reused while keeping the
118 * existing BOs and command packets intact.
121 tu_cs_discard_entries(struct tu_cs
*cs
)
123 assert(cs
->mode
== TU_CS_MODE_GROW
);
128 * Get the size needed for tu_cs_emit_call.
130 static inline uint32_t
131 tu_cs_get_call_size(const struct tu_cs
*cs
)
133 assert(cs
->mode
== TU_CS_MODE_GROW
);
134 /* each CP_INDIRECT_BUFFER needs 4 dwords */
135 return cs
->entry_count
* 4;
139 * Assert that we did not exceed the reserved space.
142 tu_cs_sanity_check(const struct tu_cs
*cs
)
144 assert(cs
->start
<= cs
->cur
);
145 assert(cs
->cur
<= cs
->reserved_end
);
146 assert(cs
->reserved_end
<= cs
->end
);
150 * Emit a uint32_t value into a command stream, without boundary checking.
153 tu_cs_emit(struct tu_cs
*cs
, uint32_t value
)
155 assert(cs
->cur
< cs
->reserved_end
);
161 * Emit an array of uint32_t into a command stream, without boundary checking.
164 tu_cs_emit_array(struct tu_cs
*cs
, const uint32_t *values
, uint32_t length
)
166 assert(cs
->cur
+ length
<= cs
->reserved_end
);
167 memcpy(cs
->cur
, values
, sizeof(uint32_t) * length
);
171 static inline unsigned
172 tu_odd_parity_bit(unsigned val
)
174 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
175 * note that we want odd parity so 0x6996 is inverted.
181 return (~0x6996 >> val
) & 1;
185 * Get the size of the remaining space in the current BO.
187 static inline uint32_t
188 tu_cs_get_space(const struct tu_cs
*cs
)
190 return cs
->end
- cs
->cur
;
194 tu_cs_reserve(struct tu_cs
*cs
, uint32_t reserved_size
)
196 if (cs
->mode
!= TU_CS_MODE_GROW
) {
197 assert(tu_cs_get_space(cs
) >= reserved_size
);
198 assert(cs
->reserved_end
== cs
->end
);
202 if (tu_cs_get_space(cs
) >= reserved_size
&&
203 cs
->entry_count
< cs
->entry_capacity
) {
204 cs
->reserved_end
= cs
->cur
+ reserved_size
;
208 VkResult result
= tu_cs_reserve_space(cs
, reserved_size
);
209 /* TODO: set this error in tu_cs and use it */
210 assert(result
== VK_SUCCESS
);
214 * Emit a type-4 command packet header into a command stream.
217 tu_cs_emit_pkt4(struct tu_cs
*cs
, uint16_t regindx
, uint16_t cnt
)
219 tu_cs_reserve(cs
, cnt
+ 1);
220 tu_cs_emit(cs
, CP_TYPE4_PKT
| cnt
| (tu_odd_parity_bit(cnt
) << 7) |
221 ((regindx
& 0x3ffff) << 8) |
222 ((tu_odd_parity_bit(regindx
) << 27)));
226 * Emit a type-7 command packet header into a command stream.
229 tu_cs_emit_pkt7(struct tu_cs
*cs
, uint8_t opcode
, uint16_t cnt
)
231 tu_cs_reserve(cs
, cnt
+ 1);
232 tu_cs_emit(cs
, CP_TYPE7_PKT
| cnt
| (tu_odd_parity_bit(cnt
) << 15) |
233 ((opcode
& 0x7f) << 16) |
234 ((tu_odd_parity_bit(opcode
) << 23)));
238 tu_cs_emit_wfi(struct tu_cs
*cs
)
240 tu_cs_emit_pkt7(cs
, CP_WAIT_FOR_IDLE
, 0);
244 tu_cs_emit_qw(struct tu_cs
*cs
, uint64_t value
)
246 tu_cs_emit(cs
, (uint32_t) value
);
247 tu_cs_emit(cs
, (uint32_t) (value
>> 32));
251 tu_cs_emit_write_reg(struct tu_cs
*cs
, uint16_t reg
, uint32_t value
)
253 tu_cs_emit_pkt4(cs
, reg
, 1);
254 tu_cs_emit(cs
, value
);
258 * Emit a CP_INDIRECT_BUFFER command packet.
261 tu_cs_emit_ib(struct tu_cs
*cs
, const struct tu_cs_entry
*entry
)
264 assert(entry
->size
&& entry
->offset
+ entry
->size
<= entry
->bo
->size
);
265 assert(entry
->size
% sizeof(uint32_t) == 0);
266 assert(entry
->offset
% sizeof(uint32_t) == 0);
268 tu_cs_emit_pkt7(cs
, CP_INDIRECT_BUFFER
, 3);
269 tu_cs_emit_qw(cs
, entry
->bo
->iova
+ entry
->offset
);
270 tu_cs_emit(cs
, entry
->size
/ sizeof(uint32_t));
273 /* for compute which isn't using SET_DRAW_STATE */
275 tu_cs_emit_state_ib(struct tu_cs
*cs
, struct tu_draw_state state
)
278 tu_cs_emit_pkt7(cs
, CP_INDIRECT_BUFFER
, 3);
279 tu_cs_emit_qw(cs
, state
.iova
);
280 tu_cs_emit(cs
, state
.size
);
285 * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target
289 tu_cs_emit_call(struct tu_cs
*cs
, const struct tu_cs
*target
)
291 assert(target
->mode
== TU_CS_MODE_GROW
);
292 for (uint32_t i
= 0; i
< target
->entry_count
; i
++)
293 tu_cs_emit_ib(cs
, target
->entries
+ i
);
296 /* Helpers for bracketing a large sequence of commands of unknown size inside
297 * a CP_COND_REG_EXEC packet.
300 tu_cond_exec_start(struct tu_cs
*cs
, uint32_t cond_flags
)
302 assert(cs
->mode
== TU_CS_MODE_GROW
);
303 assert(!cs
->cond_flags
&& cond_flags
);
305 tu_cs_emit_pkt7(cs
, CP_COND_REG_EXEC
, 2);
306 tu_cs_emit(cs
, cond_flags
);
308 cs
->cond_flags
= cond_flags
;
309 cs
->cond_dwords
= cs
->cur
;
311 /* Emit dummy DWORD field here */
312 tu_cs_emit(cs
, CP_COND_REG_EXEC_1_DWORDS(0));
314 #define CP_COND_EXEC_0_RENDER_MODE_GMEM \
315 (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM)
316 #define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \
317 (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM)
320 tu_cond_exec_end(struct tu_cs
*cs
)
322 assert(cs
->cond_flags
);
325 /* Subtract one here to account for the DWORD field itself. */
326 *cs
->cond_dwords
= cs
->cur
- cs
->cond_dwords
- 1;
329 #define fd_reg_pair tu_reg_value
330 #define __bo_type struct tu_bo *
332 #include "a6xx.xml.h"
333 #include "a6xx-pack.xml.h"
335 #define __assert_eq(a, b) \
338 fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
339 assert((a) == (b)); \
343 #define __ONE_REG(i, regs) \
345 if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \
346 __assert_eq(regs[0].reg + i, regs[i].reg); \
348 uint64_t v = regs[i].bo->iova + regs[i].bo_offset; \
349 v >>= regs[i].bo_shift; \
350 v |= regs[i].value; \
355 *p++ = regs[i].value; \
356 if (regs[i].is_address) \
357 *p++ = regs[i].value >> 32; \
362 /* Emits a sequence of register writes in order using a pkt4. This will check
363 * (at runtime on a !NDEBUG build) that the registers were actually set up in
366 * Note that references to buffers aren't automatically added to the CS,
367 * unlike in freedreno. We are clever in various places to avoid duplicating
368 * the reference add work.
370 * Also, 64-bit address registers don't have a way (currently) to set a 64-bit
371 * address without having a reference to a BO, since the .dword field in the
372 * register's struct is only 32-bit wide. We should fix this in the pack
375 #define tu_cs_emit_regs(cs, ...) do { \
376 const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
377 unsigned count = ARRAY_SIZE(regs); \
379 STATIC_ASSERT(count > 0); \
380 STATIC_ASSERT(count <= 16); \
382 tu_cs_emit_pkt4((cs), regs[0].reg, count); \
383 uint32_t *p = (cs)->cur; \
384 __ONE_REG( 0, regs); \
385 __ONE_REG( 1, regs); \
386 __ONE_REG( 2, regs); \
387 __ONE_REG( 3, regs); \
388 __ONE_REG( 4, regs); \
389 __ONE_REG( 5, regs); \
390 __ONE_REG( 6, regs); \
391 __ONE_REG( 7, regs); \
392 __ONE_REG( 8, regs); \
393 __ONE_REG( 9, regs); \
394 __ONE_REG(10, regs); \
395 __ONE_REG(11, regs); \
396 __ONE_REG(12, regs); \
397 __ONE_REG(13, regs); \
398 __ONE_REG(14, regs); \
399 __ONE_REG(15, regs); \