turnip: add tu_device pointer to tu_cs
[mesa.git] / src / freedreno / vulkan / tu_cs.h
1 /*
2 * Copyright © 2019 Google LLC
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23 #ifndef TU_CS_H
24 #define TU_CS_H
25
26 #include "tu_private.h"
27
28 #include "registers/adreno_pm4.xml.h"
29
30 void
31 tu_cs_init(struct tu_cs *cs,
32 struct tu_device *device,
33 enum tu_cs_mode mode,
34 uint32_t initial_size);
35
36 void
37 tu_cs_init_external(struct tu_cs *cs, uint32_t *start, uint32_t *end);
38
39 void
40 tu_cs_finish(struct tu_cs *cs);
41
42 void
43 tu_cs_begin(struct tu_cs *cs);
44
45 void
46 tu_cs_end(struct tu_cs *cs);
47
48 VkResult
49 tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs);
50
51 VkResult
52 tu_cs_alloc(struct tu_cs *cs,
53 uint32_t count,
54 uint32_t size,
55 struct ts_cs_memory *memory);
56
57 struct tu_cs_entry
58 tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs);
59
60 VkResult
61 tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size);
62
63 void
64 tu_cs_reset(struct tu_cs *cs);
65
66 VkResult
67 tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target);
68
69 /**
70 * Discard all entries. This allows \a cs to be reused while keeping the
71 * existing BOs and command packets intact.
72 */
73 static inline void
74 tu_cs_discard_entries(struct tu_cs *cs)
75 {
76 assert(cs->mode == TU_CS_MODE_GROW);
77 cs->entry_count = 0;
78 }
79
80 /**
81 * Get the size needed for tu_cs_emit_call.
82 */
83 static inline uint32_t
84 tu_cs_get_call_size(const struct tu_cs *cs)
85 {
86 assert(cs->mode == TU_CS_MODE_GROW);
87 /* each CP_INDIRECT_BUFFER needs 4 dwords */
88 return cs->entry_count * 4;
89 }
90
91 /**
92 * Assert that we did not exceed the reserved space.
93 */
94 static inline void
95 tu_cs_sanity_check(const struct tu_cs *cs)
96 {
97 assert(cs->start <= cs->cur);
98 assert(cs->cur <= cs->reserved_end);
99 assert(cs->reserved_end <= cs->end);
100 }
101
102 /**
103 * Emit a uint32_t value into a command stream, without boundary checking.
104 */
105 static inline void
106 tu_cs_emit(struct tu_cs *cs, uint32_t value)
107 {
108 assert(cs->cur < cs->reserved_end);
109 *cs->cur = value;
110 ++cs->cur;
111 }
112
113 /**
114 * Emit an array of uint32_t into a command stream, without boundary checking.
115 */
116 static inline void
117 tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length)
118 {
119 assert(cs->cur + length <= cs->reserved_end);
120 memcpy(cs->cur, values, sizeof(uint32_t) * length);
121 cs->cur += length;
122 }
123
124 static inline unsigned
125 tu_odd_parity_bit(unsigned val)
126 {
127 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
128 * note that we want odd parity so 0x6996 is inverted.
129 */
130 val ^= val >> 16;
131 val ^= val >> 8;
132 val ^= val >> 4;
133 val &= 0xf;
134 return (~0x6996 >> val) & 1;
135 }
136
137 /**
138 * Emit a type-4 command packet header into a command stream.
139 */
140 static inline void
141 tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt)
142 {
143 tu_cs_emit(cs, CP_TYPE4_PKT | cnt | (tu_odd_parity_bit(cnt) << 7) |
144 ((regindx & 0x3ffff) << 8) |
145 ((tu_odd_parity_bit(regindx) << 27)));
146 }
147
148 /**
149 * Emit a type-7 command packet header into a command stream.
150 */
151 static inline void
152 tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
153 {
154 tu_cs_emit(cs, CP_TYPE7_PKT | cnt | (tu_odd_parity_bit(cnt) << 15) |
155 ((opcode & 0x7f) << 16) |
156 ((tu_odd_parity_bit(opcode) << 23)));
157 }
158
159 static inline void
160 tu_cs_emit_wfi(struct tu_cs *cs)
161 {
162 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
163 }
164
165 static inline void
166 tu_cs_emit_qw(struct tu_cs *cs, uint64_t value)
167 {
168 tu_cs_emit(cs, (uint32_t) value);
169 tu_cs_emit(cs, (uint32_t) (value >> 32));
170 }
171
172 static inline void
173 tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value)
174 {
175 tu_cs_emit_pkt4(cs, reg, 1);
176 tu_cs_emit(cs, value);
177 }
178
179 /**
180 * Emit a CP_INDIRECT_BUFFER command packet.
181 */
182 static inline void
183 tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)
184 {
185 assert(entry->bo);
186 assert(entry->size && entry->offset + entry->size <= entry->bo->size);
187 assert(entry->size % sizeof(uint32_t) == 0);
188 assert(entry->offset % sizeof(uint32_t) == 0);
189
190 tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
191 tu_cs_emit_qw(cs, entry->bo->iova + entry->offset);
192 tu_cs_emit(cs, entry->size / sizeof(uint32_t));
193 }
194
195 /**
196 * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target
197 * command stream.
198 */
199 static inline void
200 tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)
201 {
202 assert(target->mode == TU_CS_MODE_GROW);
203 for (uint32_t i = 0; i < target->entry_count; i++)
204 tu_cs_emit_ib(cs, target->entries + i);
205 }
206
207 /* Helpers for bracketing a large sequence of commands of unknown size inside
208 * a CP_COND_REG_EXEC packet.
209 */
210
211 struct tu_cond_exec_state {
212 uint32_t *dword_ptr;
213 uint32_t max_dwords;
214 };
215
216 static inline VkResult
217 tu_cond_exec_start(struct tu_device *dev, struct tu_cs *cs,
218 struct tu_cond_exec_state *state,
219 uint32_t condition, uint32_t max_dwords)
220 {
221 /* Reserve enough space so that both the condition packet and the actual
222 * condition will fit in the same IB.
223 */
224 VkResult result = tu_cs_reserve_space(cs, max_dwords + 3);
225 if (result != VK_SUCCESS)
226 return result;
227
228 state->max_dwords = max_dwords;
229 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
230 tu_cs_emit(cs, condition);
231 state->dword_ptr = cs->cur;
232 /* Emit dummy DWORD field here */
233 tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));
234
235 return VK_SUCCESS;
236 }
237
238 static inline void
239 tu_cond_exec_end(struct tu_cs *cs, struct tu_cond_exec_state *state)
240 {
241 /* Subtract one here to account for the DWORD field itself. */
242 uint32_t actual_dwords = cs->cur - state->dword_ptr - 1;
243 assert(actual_dwords <= state->max_dwords);
244 *state->dword_ptr = actual_dwords;
245 }
246
247 #define fd_reg_pair tu_reg_value
248 #define __bo_type struct tu_bo *
249
250 #include "a6xx.xml.h"
251 #include "a6xx-pack.xml.h"
252
253 #define __assert_eq(a, b) \
254 do { \
255 if ((a) != (b)) { \
256 fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
257 assert((a) == (b)); \
258 } \
259 } while (0)
260
261 #define __ONE_REG(i, regs) \
262 do { \
263 if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \
264 __assert_eq(regs[0].reg + i, regs[i].reg); \
265 if (regs[i].bo) { \
266 uint64_t v = regs[i].bo->iova + regs[i].bo_offset; \
267 v >>= regs[i].bo_shift; \
268 v |= regs[i].value; \
269 \
270 *p++ = v; \
271 *p++ = v >> 32; \
272 } else { \
273 *p++ = regs[i].value; \
274 if (regs[i].is_address) \
275 *p++ = regs[i].value >> 32; \
276 } \
277 } \
278 } while (0)
279
280 /* Emits a sequence of register writes in order using a pkt4. This will check
281 * (at runtime on a !NDEBUG build) that the registers were actually set up in
282 * order in the code.
283 *
284 * Note that references to buffers aren't automatically added to the CS,
285 * unlike in freedreno. We are clever in various places to avoid duplicating
286 * the reference add work.
287 *
288 * Also, 64-bit address registers don't have a way (currently) to set a 64-bit
289 * address without having a reference to a BO, since the .dword field in the
290 * register's struct is only 32-bit wide. We should fix this in the pack
291 * codegen later.
292 */
293 #define tu_cs_emit_regs(cs, ...) do { \
294 const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
295 unsigned count = ARRAY_SIZE(regs); \
296 \
297 STATIC_ASSERT(count > 0); \
298 STATIC_ASSERT(count <= 16); \
299 \
300 uint32_t *p = cs->cur; \
301 *p++ = CP_TYPE4_PKT | count | \
302 (tu_odd_parity_bit(count) << 7) | \
303 ((regs[0].reg & 0x3ffff) << 8) | \
304 ((tu_odd_parity_bit(regs[0].reg) << 27)); \
305 \
306 __ONE_REG( 0, regs); \
307 __ONE_REG( 1, regs); \
308 __ONE_REG( 2, regs); \
309 __ONE_REG( 3, regs); \
310 __ONE_REG( 4, regs); \
311 __ONE_REG( 5, regs); \
312 __ONE_REG( 6, regs); \
313 __ONE_REG( 7, regs); \
314 __ONE_REG( 8, regs); \
315 __ONE_REG( 9, regs); \
316 __ONE_REG(10, regs); \
317 __ONE_REG(11, regs); \
318 __ONE_REG(12, regs); \
319 __ONE_REG(13, regs); \
320 __ONE_REG(14, regs); \
321 __ONE_REG(15, regs); \
322 cs->cur = p; \
323 } while (0)
324
325 #endif /* TU_CS_H */