turnip: enable 420_UNORM formats
[mesa.git] / src / freedreno / vulkan / tu_cs.h
1 /*
2 * Copyright © 2019 Google LLC
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23 #ifndef TU_CS_H
24 #define TU_CS_H
25
26 #include "tu_private.h"
27
28 #include "registers/adreno_pm4.xml.h"
29
30 void
31 tu_cs_init(struct tu_cs *cs,
32 struct tu_device *device,
33 enum tu_cs_mode mode,
34 uint32_t initial_size);
35
36 void
37 tu_cs_init_external(struct tu_cs *cs, uint32_t *start, uint32_t *end);
38
39 void
40 tu_cs_finish(struct tu_cs *cs);
41
42 void
43 tu_cs_begin(struct tu_cs *cs);
44
45 void
46 tu_cs_end(struct tu_cs *cs);
47
48 VkResult
49 tu_cs_begin_sub_stream(struct tu_cs *cs, uint32_t size, struct tu_cs *sub_cs);
50
51 VkResult
52 tu_cs_alloc(struct tu_cs *cs,
53 uint32_t count,
54 uint32_t size,
55 struct tu_cs_memory *memory);
56
57 struct tu_cs_entry
58 tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs);
59
60 VkResult
61 tu_cs_reserve_space(struct tu_cs *cs, uint32_t reserved_size);
62
63 void
64 tu_cs_reset(struct tu_cs *cs);
65
66 VkResult
67 tu_cs_add_entries(struct tu_cs *cs, struct tu_cs *target);
68
69 /**
70 * Get the size of the command packets emitted since the last call to
71 * tu_cs_add_entry.
72 */
73 static inline uint32_t
74 tu_cs_get_size(const struct tu_cs *cs)
75 {
76 return cs->cur - cs->start;
77 }
78
79 /**
80 * Return true if there is no command packet emitted since the last call to
81 * tu_cs_add_entry.
82 */
83 static inline uint32_t
84 tu_cs_is_empty(const struct tu_cs *cs)
85 {
86 return tu_cs_get_size(cs) == 0;
87 }
88
89 /**
90 * Discard all entries. This allows \a cs to be reused while keeping the
91 * existing BOs and command packets intact.
92 */
93 static inline void
94 tu_cs_discard_entries(struct tu_cs *cs)
95 {
96 assert(cs->mode == TU_CS_MODE_GROW);
97 cs->entry_count = 0;
98 }
99
100 /**
101 * Get the size needed for tu_cs_emit_call.
102 */
103 static inline uint32_t
104 tu_cs_get_call_size(const struct tu_cs *cs)
105 {
106 assert(cs->mode == TU_CS_MODE_GROW);
107 /* each CP_INDIRECT_BUFFER needs 4 dwords */
108 return cs->entry_count * 4;
109 }
110
111 /**
112 * Assert that we did not exceed the reserved space.
113 */
114 static inline void
115 tu_cs_sanity_check(const struct tu_cs *cs)
116 {
117 assert(cs->start <= cs->cur);
118 assert(cs->cur <= cs->reserved_end);
119 assert(cs->reserved_end <= cs->end);
120 }
121
122 /**
123 * Emit a uint32_t value into a command stream, without boundary checking.
124 */
125 static inline void
126 tu_cs_emit(struct tu_cs *cs, uint32_t value)
127 {
128 assert(cs->cur < cs->reserved_end);
129 *cs->cur = value;
130 ++cs->cur;
131 }
132
133 /**
134 * Emit an array of uint32_t into a command stream, without boundary checking.
135 */
136 static inline void
137 tu_cs_emit_array(struct tu_cs *cs, const uint32_t *values, uint32_t length)
138 {
139 assert(cs->cur + length <= cs->reserved_end);
140 memcpy(cs->cur, values, sizeof(uint32_t) * length);
141 cs->cur += length;
142 }
143
144 static inline unsigned
145 tu_odd_parity_bit(unsigned val)
146 {
147 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
148 * note that we want odd parity so 0x6996 is inverted.
149 */
150 val ^= val >> 16;
151 val ^= val >> 8;
152 val ^= val >> 4;
153 val &= 0xf;
154 return (~0x6996 >> val) & 1;
155 }
156
157 /**
158 * Get the size of the remaining space in the current BO.
159 */
160 static inline uint32_t
161 tu_cs_get_space(const struct tu_cs *cs)
162 {
163 return cs->end - cs->cur;
164 }
165
166 static inline void
167 tu_cs_reserve(struct tu_cs *cs, uint32_t reserved_size)
168 {
169 if (cs->mode != TU_CS_MODE_GROW) {
170 assert(tu_cs_get_space(cs) >= reserved_size);
171 assert(cs->reserved_end == cs->end);
172 return;
173 }
174
175 if (tu_cs_get_space(cs) >= reserved_size &&
176 cs->entry_count < cs->entry_capacity) {
177 cs->reserved_end = cs->cur + reserved_size;
178 return;
179 }
180
181 VkResult result = tu_cs_reserve_space(cs, reserved_size);
182 /* TODO: set this error in tu_cs and use it */
183 assert(result == VK_SUCCESS);
184 }
185
186 /**
187 * Emit a type-4 command packet header into a command stream.
188 */
189 static inline void
190 tu_cs_emit_pkt4(struct tu_cs *cs, uint16_t regindx, uint16_t cnt)
191 {
192 tu_cs_reserve(cs, cnt + 1);
193 tu_cs_emit(cs, CP_TYPE4_PKT | cnt | (tu_odd_parity_bit(cnt) << 7) |
194 ((regindx & 0x3ffff) << 8) |
195 ((tu_odd_parity_bit(regindx) << 27)));
196 }
197
198 /**
199 * Emit a type-7 command packet header into a command stream.
200 */
201 static inline void
202 tu_cs_emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt)
203 {
204 tu_cs_reserve(cs, cnt + 1);
205 tu_cs_emit(cs, CP_TYPE7_PKT | cnt | (tu_odd_parity_bit(cnt) << 15) |
206 ((opcode & 0x7f) << 16) |
207 ((tu_odd_parity_bit(opcode) << 23)));
208 }
209
210 static inline void
211 tu_cs_emit_wfi(struct tu_cs *cs)
212 {
213 tu_cs_emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0);
214 }
215
216 static inline void
217 tu_cs_emit_qw(struct tu_cs *cs, uint64_t value)
218 {
219 tu_cs_emit(cs, (uint32_t) value);
220 tu_cs_emit(cs, (uint32_t) (value >> 32));
221 }
222
223 static inline void
224 tu_cs_emit_write_reg(struct tu_cs *cs, uint16_t reg, uint32_t value)
225 {
226 tu_cs_emit_pkt4(cs, reg, 1);
227 tu_cs_emit(cs, value);
228 }
229
230 /**
231 * Emit a CP_INDIRECT_BUFFER command packet.
232 */
233 static inline void
234 tu_cs_emit_ib(struct tu_cs *cs, const struct tu_cs_entry *entry)
235 {
236 assert(entry->bo);
237 assert(entry->size && entry->offset + entry->size <= entry->bo->size);
238 assert(entry->size % sizeof(uint32_t) == 0);
239 assert(entry->offset % sizeof(uint32_t) == 0);
240
241 tu_cs_emit_pkt7(cs, CP_INDIRECT_BUFFER, 3);
242 tu_cs_emit_qw(cs, entry->bo->iova + entry->offset);
243 tu_cs_emit(cs, entry->size / sizeof(uint32_t));
244 }
245
246 /**
247 * Emit a CP_INDIRECT_BUFFER command packet for each entry in the target
248 * command stream.
249 */
250 static inline void
251 tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target)
252 {
253 assert(target->mode == TU_CS_MODE_GROW);
254 for (uint32_t i = 0; i < target->entry_count; i++)
255 tu_cs_emit_ib(cs, target->entries + i);
256 }
257
258 /* Helpers for bracketing a large sequence of commands of unknown size inside
259 * a CP_COND_REG_EXEC packet.
260 */
261 static inline void
262 tu_cond_exec_start(struct tu_cs *cs, uint32_t cond_flags)
263 {
264 assert(cs->mode == TU_CS_MODE_GROW);
265 assert(!cs->cond_flags && cond_flags);
266
267 tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
268 tu_cs_emit(cs, cond_flags);
269
270 cs->cond_flags = cond_flags;
271 cs->cond_dwords = cs->cur;
272
273 /* Emit dummy DWORD field here */
274 tu_cs_emit(cs, CP_COND_REG_EXEC_1_DWORDS(0));
275 }
276 #define CP_COND_EXEC_0_RENDER_MODE_GMEM \
277 (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_GMEM)
278 #define CP_COND_EXEC_0_RENDER_MODE_SYSMEM \
279 (CP_COND_REG_EXEC_0_MODE(RENDER_MODE) | CP_COND_REG_EXEC_0_SYSMEM)
280
281 static inline void
282 tu_cond_exec_end(struct tu_cs *cs)
283 {
284 assert(cs->cond_flags);
285
286 cs->cond_flags = 0;
287 /* Subtract one here to account for the DWORD field itself. */
288 *cs->cond_dwords = cs->cur - cs->cond_dwords - 1;
289 }
290
291 #define fd_reg_pair tu_reg_value
292 #define __bo_type struct tu_bo *
293
294 #include "a6xx.xml.h"
295 #include "a6xx-pack.xml.h"
296
297 #define __assert_eq(a, b) \
298 do { \
299 if ((a) != (b)) { \
300 fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \
301 assert((a) == (b)); \
302 } \
303 } while (0)
304
305 #define __ONE_REG(i, regs) \
306 do { \
307 if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \
308 __assert_eq(regs[0].reg + i, regs[i].reg); \
309 if (regs[i].bo) { \
310 uint64_t v = regs[i].bo->iova + regs[i].bo_offset; \
311 v >>= regs[i].bo_shift; \
312 v |= regs[i].value; \
313 \
314 *p++ = v; \
315 *p++ = v >> 32; \
316 } else { \
317 *p++ = regs[i].value; \
318 if (regs[i].is_address) \
319 *p++ = regs[i].value >> 32; \
320 } \
321 } \
322 } while (0)
323
324 /* Emits a sequence of register writes in order using a pkt4. This will check
325 * (at runtime on a !NDEBUG build) that the registers were actually set up in
326 * order in the code.
327 *
328 * Note that references to buffers aren't automatically added to the CS,
329 * unlike in freedreno. We are clever in various places to avoid duplicating
330 * the reference add work.
331 *
332 * Also, 64-bit address registers don't have a way (currently) to set a 64-bit
333 * address without having a reference to a BO, since the .dword field in the
334 * register's struct is only 32-bit wide. We should fix this in the pack
335 * codegen later.
336 */
337 #define tu_cs_emit_regs(cs, ...) do { \
338 const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
339 unsigned count = ARRAY_SIZE(regs); \
340 \
341 STATIC_ASSERT(count > 0); \
342 STATIC_ASSERT(count <= 16); \
343 \
344 tu_cs_emit_pkt4((cs), regs[0].reg, count); \
345 uint32_t *p = (cs)->cur; \
346 __ONE_REG( 0, regs); \
347 __ONE_REG( 1, regs); \
348 __ONE_REG( 2, regs); \
349 __ONE_REG( 3, regs); \
350 __ONE_REG( 4, regs); \
351 __ONE_REG( 5, regs); \
352 __ONE_REG( 6, regs); \
353 __ONE_REG( 7, regs); \
354 __ONE_REG( 8, regs); \
355 __ONE_REG( 9, regs); \
356 __ONE_REG(10, regs); \
357 __ONE_REG(11, regs); \
358 __ONE_REG(12, regs); \
359 __ONE_REG(13, regs); \
360 __ONE_REG(14, regs); \
361 __ONE_REG(15, regs); \
362 (cs)->cur = p; \
363 } while (0)
364
365 #endif /* TU_CS_H */