v3d: fix primitive queries for geometry shaders
[mesa.git] / src / gallium / drivers / v3d / v3dx_simulator.c
1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file v3d_simulator_hw.c
26 *
27 * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator.
28 *
29 * The register headers between V3D versions will have conflicting defines, so
30 * all register interactions appear in this file and are compiled per V3D version
31 * we support.
32 */
33
34 #ifdef USE_V3D_SIMULATOR
35
36 #include "v3d_screen.h"
37 #include "v3d_context.h"
38 #include "v3d_simulator_wrapper.h"
39
40 #define HW_REGISTER_RO(x) (x)
41 #define HW_REGISTER_RW(x) (x)
42 #if V3D_VERSION >= 41
43 #include "libs/core/v3d/registers/4.1.34.0/v3d.h"
44 #else
45 #include "libs/core/v3d/registers/3.3.0.0/v3d.h"
46 #endif
47
48 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
49 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
50
51 static void
52 v3d_invalidate_l3(struct v3d_hw *v3d)
53 {
54 if (!v3d_hw_has_gca(v3d))
55 return;
56
57 #if V3D_VERSION < 40
58 uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
59
60 V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
61 V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
62 #endif
63 }
64
65 /* Invalidates the L2C cache. This is a read-only cache for uniforms and instructions. */
66 static void
67 v3d_invalidate_l2c(struct v3d_hw *v3d)
68 {
69 if (V3D_VERSION >= 33)
70 return;
71
72 V3D_WRITE(V3D_CTL_0_L2CACTL,
73 V3D_CTL_0_L2CACTL_L2CCLR_SET |
74 V3D_CTL_0_L2CACTL_L2CENA_SET);
75 }
76
77 /* Invalidates texture L2 cachelines */
78 static void
79 v3d_invalidate_l2t(struct v3d_hw *v3d)
80 {
81 V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
82 V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
83 V3D_WRITE(V3D_CTL_0_L2TCACTL,
84 V3D_CTL_0_L2TCACTL_L2TFLS_SET |
85 (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
86 }
87
88 /* Flushes dirty texture cachelines from the L1 write combiner */
89 static void
90 v3d_flush_l1td(struct v3d_hw *v3d)
91 {
92 V3D_WRITE(V3D_CTL_0_L2TCACTL,
93 V3D_CTL_0_L2TCACTL_TMUWCF_SET);
94
95 assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET));
96 }
97
98 /* Flushes dirty texture L2 cachelines */
99 static void
100 v3d_flush_l2t(struct v3d_hw *v3d)
101 {
102 V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
103 V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
104 V3D_WRITE(V3D_CTL_0_L2TCACTL,
105 V3D_CTL_0_L2TCACTL_L2TFLS_SET |
106 (2 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
107
108 assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET));
109 }
110
111 /* Invalidates the slice caches. These are read-only caches. */
112 static void
113 v3d_invalidate_slices(struct v3d_hw *v3d)
114 {
115 V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
116 }
117
118 static void
119 v3d_invalidate_caches(struct v3d_hw *v3d)
120 {
121 v3d_invalidate_l3(v3d);
122 v3d_invalidate_l2c(v3d);
123 v3d_invalidate_l2t(v3d);
124 v3d_invalidate_slices(v3d);
125 }
126
127 static uint32_t g_gmp_ofs;
128 static void
129 v3d_reload_gmp(struct v3d_hw *v3d)
130 {
131 /* Completely reset the GMP. */
132 V3D_WRITE(V3D_GMP_0_CFG,
133 V3D_GMP_0_CFG_PROTENABLE_SET);
134 V3D_WRITE(V3D_GMP_0_TABLE_ADDR, g_gmp_ofs);
135 V3D_WRITE(V3D_GMP_0_CLEAR_LOAD, ~0);
136 while (V3D_READ(V3D_GMP_0_STATUS) &
137 V3D_GMP_0_STATUS_CFG_BUSY_SET) {
138 ;
139 }
140 }
141
142 static UNUSED void
143 v3d_flush_caches(struct v3d_hw *v3d)
144 {
145 v3d_flush_l1td(v3d);
146 v3d_flush_l2t(v3d);
147 }
148
149 int
150 v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
151 struct drm_v3d_submit_tfu *args)
152 {
153 int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
154
155 V3D_WRITE(V3D_TFU_IIA, args->iia);
156 V3D_WRITE(V3D_TFU_IIS, args->iis);
157 V3D_WRITE(V3D_TFU_ICA, args->ica);
158 V3D_WRITE(V3D_TFU_IUA, args->iua);
159 V3D_WRITE(V3D_TFU_IOA, args->ioa);
160 V3D_WRITE(V3D_TFU_IOS, args->ios);
161 V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);
162 V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);
163 V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);
164 V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);
165
166 V3D_WRITE(V3D_TFU_ICFG, args->icfg);
167
168 while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
169 v3d_hw_tick(v3d);
170 }
171
172 return 0;
173 }
174
175 #if V3D_VERSION >= 41
176 int
177 v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
178 struct drm_v3d_submit_csd *args,
179 uint32_t gmp_ofs)
180 {
181 g_gmp_ofs = gmp_ofs;
182 v3d_reload_gmp(v3d);
183
184 v3d_invalidate_caches(v3d);
185
186 V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
187 V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
188 V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
189 V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
190 V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
191 V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
192 /* CFG0 kicks off the job */
193 V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
194
195 while (V3D_READ(V3D_CSD_0_STATUS) &
196 (V3D_CSD_0_STATUS_HAVE_CURRENT_DISPATCH_SET |
197 V3D_CSD_0_STATUS_HAVE_QUEUED_DISPATCH_SET)) {
198 v3d_hw_tick(v3d);
199 }
200
201 v3d_flush_caches(v3d);
202
203 return 0;
204 }
205 #endif
206
207 int
208 v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
209 struct drm_v3d_get_param *args)
210 {
211 static const uint32_t reg_map[] = {
212 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
213 [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
214 [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
215 [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
216 [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
217 [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
218 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
219 };
220
221 switch (args->param) {
222 case DRM_V3D_PARAM_SUPPORTS_TFU:
223 args->value = 1;
224 return 0;
225 case DRM_V3D_PARAM_SUPPORTS_CSD:
226 args->value = V3D_VERSION >= 41;
227 return 0;
228 case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
229 args->value = 1;
230 return 0;
231 }
232
233 if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
234 args->value = V3D_READ(reg_map[args->param]);
235 return 0;
236 }
237
238 fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n",
239 (long long)args->value);
240 abort();
241 }
242
243 static struct v3d_hw *v3d_isr_hw;
244
245 static void
246 v3d_isr(uint32_t hub_status)
247 {
248 struct v3d_hw *v3d = v3d_isr_hw;
249
250 /* Check the per-core bits */
251 if (hub_status & (1 << 0)) {
252 uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
253 V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
254
255 if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
256 uint32_t size = 256 * 1024;
257 uint32_t offset = v3d_simulator_get_spill(size);
258
259 v3d_reload_gmp(v3d);
260
261 V3D_WRITE(V3D_PTB_0_BPOA, offset);
262 V3D_WRITE(V3D_PTB_0_BPOS, size);
263 return;
264 }
265
266 if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
267 fprintf(stderr, "GMP violation at 0x%08x\n",
268 V3D_READ(V3D_GMP_0_VIO_ADDR));
269 abort();
270 } else {
271 fprintf(stderr,
272 "Unexpected ISR with core status 0x%08x\n",
273 core_status);
274 }
275 abort();
276 }
277
278 return;
279 }
280
281 void
282 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
283 {
284 #if V3D_VERSION == 33
285 /* Set OVRTMUOUT to match kernel behavior.
286 *
287 * This means that the texture sampler uniform configuration's tmu
288 * output type field is used, instead of using the hardware default
289 * behavior based on the texture type. If you want the default
290 * behavior, you can still put "2" in the indirect texture state's
291 * output_type field.
292 */
293 V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
294 #endif
295
296 uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
297 V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
298 V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
299 V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
300
301 v3d_isr_hw = v3d;
302 v3d_hw_set_isr(v3d, v3d_isr);
303 }
304
305 void
306 v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
307 struct drm_v3d_submit_cl *submit,
308 uint32_t gmp_ofs)
309 {
310 g_gmp_ofs = gmp_ofs;
311 v3d_reload_gmp(v3d);
312
313 v3d_invalidate_caches(v3d);
314
315 if (submit->qma) {
316 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
317 V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
318 }
319 #if V3D_VERSION >= 41
320 if (submit->qts) {
321 V3D_WRITE(V3D_CLE_0_CT0QTS,
322 V3D_CLE_0_CT0QTS_CTQTSEN_SET |
323 submit->qts);
324 }
325 #endif
326 V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
327 V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
328
329 /* Wait for bin to complete before firing render. The kernel's
330 * scheduler implements this using the GPU scheduler blocking on the
331 * bin fence completing. (We don't use HW semaphores).
332 */
333 while (V3D_READ(V3D_CLE_0_CT0CA) !=
334 V3D_READ(V3D_CLE_0_CT0EA)) {
335 v3d_hw_tick(v3d);
336 }
337
338 v3d_invalidate_caches(v3d);
339
340 V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
341 V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
342
343 while (V3D_READ(V3D_CLE_0_CT1CA) !=
344 V3D_READ(V3D_CLE_0_CT1EA) ||
345 V3D_READ(V3D_CLE_1_CT1CA) !=
346 V3D_READ(V3D_CLE_1_CT1EA)) {
347 v3d_hw_tick(v3d);
348 }
349 }
350
351 #endif /* USE_V3D_SIMULATOR */