vc4: Convert the driver to emitting the shader record using pack macros.
[mesa.git] / src / gallium / drivers / vc4 / vc4_cl.h
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef VC4_CL_H
25 #define VC4_CL_H
26
27 #include <stdint.h>
28
29 #include "util/u_math.h"
30 #include "util/macros.h"
31
32 struct vc4_bo;
33 struct vc4_job;
34 struct vc4_cl;
35
36 /**
37 * Undefined structure, used for typechecking that you're passing the pointers
38 * to these functions correctly.
39 */
40 struct vc4_cl_out;
41
42 /** A reference to a BO used in the CL packing functions */
43 struct vc4_cl_reloc {
44 struct vc4_bo *bo;
45 uint32_t offset;
46 };
47
48 static inline void cl_pack_emit_reloc(struct vc4_cl *cl, const struct vc4_cl_reloc *);
49
50 #define __gen_user_data struct vc4_cl
51 #define __gen_address_type struct vc4_cl_reloc
52 #define __gen_address_offset(reloc) ((reloc)->offset)
53 #define __gen_emit_reloc cl_pack_emit_reloc
54
55 #include "kernel/vc4_packet.h"
56 #include "broadcom/cle/v3d_packet_v21_pack.h"
57
58 struct vc4_cl {
59 void *base;
60 struct vc4_job *job;
61 struct vc4_cl_out *next;
62 struct vc4_cl_out *reloc_next;
63 uint32_t size;
64 #ifdef DEBUG
65 uint32_t reloc_count;
66 #endif
67 };
68
69 void vc4_init_cl(struct vc4_job *job, struct vc4_cl *cl);
70 void vc4_reset_cl(struct vc4_cl *cl);
71 void vc4_dump_cl(void *cl, uint32_t size, bool is_render);
72 uint32_t vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo);
73
74 struct PACKED unaligned_16 { uint16_t x; };
75 struct PACKED unaligned_32 { uint32_t x; };
76
77 static inline uint32_t cl_offset(struct vc4_cl *cl)
78 {
79 return (char *)cl->next - (char *)cl->base;
80 }
81
82 static inline void
83 cl_advance(struct vc4_cl_out **cl, uint32_t n)
84 {
85 (*cl) = (struct vc4_cl_out *)((char *)(*cl) + n);
86 }
87
88 static inline struct vc4_cl_out *
89 cl_start(struct vc4_cl *cl)
90 {
91 return cl->next;
92 }
93
94 static inline void
95 cl_end(struct vc4_cl *cl, struct vc4_cl_out *next)
96 {
97 cl->next = next;
98 assert(cl_offset(cl) <= cl->size);
99 }
100
101
102 static inline void
103 put_unaligned_32(struct vc4_cl_out *ptr, uint32_t val)
104 {
105 struct unaligned_32 *p = (void *)ptr;
106 p->x = val;
107 }
108
109 static inline void
110 put_unaligned_16(struct vc4_cl_out *ptr, uint16_t val)
111 {
112 struct unaligned_16 *p = (void *)ptr;
113 p->x = val;
114 }
115
116 static inline void
117 cl_u8(struct vc4_cl_out **cl, uint8_t n)
118 {
119 *(uint8_t *)(*cl) = n;
120 cl_advance(cl, 1);
121 }
122
123 static inline void
124 cl_u16(struct vc4_cl_out **cl, uint16_t n)
125 {
126 put_unaligned_16(*cl, n);
127 cl_advance(cl, 2);
128 }
129
130 static inline void
131 cl_u32(struct vc4_cl_out **cl, uint32_t n)
132 {
133 put_unaligned_32(*cl, n);
134 cl_advance(cl, 4);
135 }
136
137 static inline void
138 cl_aligned_u32(struct vc4_cl_out **cl, uint32_t n)
139 {
140 *(uint32_t *)(*cl) = n;
141 cl_advance(cl, 4);
142 }
143
144 static inline void
145 cl_ptr(struct vc4_cl_out **cl, void *ptr)
146 {
147 *(struct vc4_cl_out **)(*cl) = ptr;
148 cl_advance(cl, sizeof(void *));
149 }
150
151 static inline void
152 cl_f(struct vc4_cl_out **cl, float f)
153 {
154 cl_u32(cl, fui(f));
155 }
156
157 static inline void
158 cl_aligned_f(struct vc4_cl_out **cl, float f)
159 {
160 cl_aligned_u32(cl, fui(f));
161 }
162
163 static inline void
164 cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
165 {
166 assert(n == 1 || n == 2);
167 #ifdef DEBUG
168 assert(cl->reloc_count == 0);
169 cl->reloc_count = n;
170 #endif
171
172 cl_u8(out, VC4_PACKET_GEM_HANDLES);
173 cl->reloc_next = *out;
174 cl_u32(out, 0); /* Space where hindex will be written. */
175 cl_u32(out, 0); /* Space where hindex will be written. */
176 }
177
178 static inline struct vc4_cl_out *
179 cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
180 {
181 #ifdef DEBUG
182 assert(cl->reloc_count == 0);
183 cl->reloc_count = n;
184 #endif
185 cl->reloc_next = cl->next;
186
187 /* Reserve the space where hindex will be written. */
188 cl_advance(&cl->next, n * 4);
189
190 return cl->next;
191 }
192
193 static inline void
194 cl_reloc(struct vc4_job *job, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
195 struct vc4_bo *bo, uint32_t offset)
196 {
197 *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
198 cl_advance(&cl->reloc_next, 4);
199
200 #ifdef DEBUG
201 cl->reloc_count--;
202 #endif
203
204 cl_u32(cl_out, offset);
205 }
206
207 static inline void
208 cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
209 struct vc4_cl_out **cl_out,
210 struct vc4_bo *bo, uint32_t offset)
211 {
212 *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
213 cl_advance(&cl->reloc_next, 4);
214
215 #ifdef DEBUG
216 cl->reloc_count--;
217 #endif
218
219 cl_aligned_u32(cl_out, offset);
220 }
221
222 /**
223 * Reference to a BO with its associated offset, used in the pack process.
224 */
225 static inline struct vc4_cl_reloc
226 cl_address(struct vc4_bo *bo, uint32_t offset)
227 {
228 struct vc4_cl_reloc reloc = {
229 .bo = bo,
230 .offset = offset,
231 };
232 return reloc;
233 }
234
235 void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
236
237 #define cl_packet_header(packet) V3D21_ ## packet ## _header
238 #define cl_packet_length(packet) V3D21_ ## packet ## _length
239 #define cl_packet_pack(packet) V3D21_ ## packet ## _pack
240 #define cl_packet_struct(packet) V3D21_ ## packet
241
242 static inline void *
243 cl_get_emit_space(struct vc4_cl_out **cl, size_t size)
244 {
245 void *addr = *cl;
246 cl_advance(cl, size);
247 return addr;
248 }
249
250 /* Macro for setting up an emit of a CL struct. A temporary unpacked struct
251 * is created, which you get to set fields in of the form:
252 *
253 * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
254 * .flags.flat_shade_flags = 1 << 2,
255 * }
256 *
257 * or default values only can be emitted with just:
258 *
259 * cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
260 *
261 * The trick here is that we make a for loop that will execute the body
262 * (either the block or the ';' after the macro invocation) exactly once.
263 * Also, *dst is actually of the wrong type, it's the
264 * uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet).
265 */
266 #define cl_emit(cl, packet, name) \
267 for (struct cl_packet_struct(packet) name = { \
268 cl_packet_header(packet) \
269 }, \
270 *_loop_terminate = &name; \
271 __builtin_expect(_loop_terminate != NULL, 1); \
272 ({ \
273 struct vc4_cl_out *cl_out = cl_start(cl); \
274 cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \
275 VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out, \
276 cl_packet_length(packet))); \
277 cl_advance(&cl_out, cl_packet_length(packet)); \
278 cl_end(cl, cl_out); \
279 _loop_terminate = NULL; \
280 })) \
281
282 /**
283 * Helper function called by the XML-generated pack functions for filling in
284 * an address field in shader records.
285 *
286 * Relocations for shader recs and texturing involve the packet (or uniforms
287 * stream) being preceded by the handles to the BOs, and the offset within the
288 * BO being in the stream (the output of this function).
289 */
290 static inline void
291 cl_pack_emit_reloc(struct vc4_cl *cl, const struct vc4_cl_reloc *reloc)
292 {
293 *(uint32_t *)cl->reloc_next = vc4_gem_hindex(cl->job, reloc->bo);
294 cl_advance(&cl->reloc_next, 4);
295
296 #ifdef DEBUG
297 cl->reloc_count--;
298 #endif
299 }
300
301 #endif /* VC4_CL_H */