2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @file v3dx_simulator.c
27 * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator.
29 * The register headers between V3D versions will have conflicting defines, so
30 * all register interactions appear in this file and are compiled per V3D version
34 #ifdef USE_V3D_SIMULATOR
40 #include "v3d_simulator.h"
41 #include "v3d_simulator_wrapper.h"
43 #include "util/macros.h"
44 #include "drm-uapi/v3d_drm.h"
46 #define HW_REGISTER_RO(x) (x)
47 #define HW_REGISTER_RW(x) (x)
49 #include "libs/core/v3d/registers/4.1.34.0/v3d.h"
51 #include "libs/core/v3d/registers/3.3.0.0/v3d.h"
54 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
55 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
58 v3d_invalidate_l3(struct v3d_hw
*v3d
)
60 if (!v3d_hw_has_gca(v3d
))
64 uint32_t gca_ctrl
= V3D_READ(V3D_GCA_CACHE_CTRL
);
66 V3D_WRITE(V3D_GCA_CACHE_CTRL
, gca_ctrl
| V3D_GCA_CACHE_CTRL_FLUSH_SET
);
67 V3D_WRITE(V3D_GCA_CACHE_CTRL
, gca_ctrl
& ~V3D_GCA_CACHE_CTRL_FLUSH_SET
);
71 /* Invalidates the L2C cache. This is a read-only cache for uniforms and instructions. */
73 v3d_invalidate_l2c(struct v3d_hw
*v3d
)
75 if (V3D_VERSION
>= 33)
78 V3D_WRITE(V3D_CTL_0_L2CACTL
,
79 V3D_CTL_0_L2CACTL_L2CCLR_SET
|
80 V3D_CTL_0_L2CACTL_L2CENA_SET
);
83 /* Invalidates texture L2 cachelines */
85 v3d_invalidate_l2t(struct v3d_hw
*v3d
)
87 V3D_WRITE(V3D_CTL_0_L2TFLSTA
, 0);
88 V3D_WRITE(V3D_CTL_0_L2TFLEND
, ~0);
89 V3D_WRITE(V3D_CTL_0_L2TCACTL
,
90 V3D_CTL_0_L2TCACTL_L2TFLS_SET
|
91 (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB
));
94 /* Flushes dirty texture cachelines from the L1 write combiner */
96 v3d_flush_l1td(struct v3d_hw
*v3d
)
98 V3D_WRITE(V3D_CTL_0_L2TCACTL
,
99 V3D_CTL_0_L2TCACTL_TMUWCF_SET
);
101 assert(!(V3D_READ(V3D_CTL_0_L2TCACTL
) & V3D_CTL_0_L2TCACTL_L2TFLS_SET
));
104 /* Flushes dirty texture L2 cachelines */
106 v3d_flush_l2t(struct v3d_hw
*v3d
)
108 V3D_WRITE(V3D_CTL_0_L2TFLSTA
, 0);
109 V3D_WRITE(V3D_CTL_0_L2TFLEND
, ~0);
110 V3D_WRITE(V3D_CTL_0_L2TCACTL
,
111 V3D_CTL_0_L2TCACTL_L2TFLS_SET
|
112 (2 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB
));
114 assert(!(V3D_READ(V3D_CTL_0_L2TCACTL
) & V3D_CTL_0_L2TCACTL_L2TFLS_SET
));
117 /* Invalidates the slice caches. These are read-only caches. */
119 v3d_invalidate_slices(struct v3d_hw
*v3d
)
121 V3D_WRITE(V3D_CTL_0_SLCACTL
, ~0);
125 v3d_invalidate_caches(struct v3d_hw
*v3d
)
127 v3d_invalidate_l3(v3d
);
128 v3d_invalidate_l2c(v3d
);
129 v3d_invalidate_l2t(v3d
);
130 v3d_invalidate_slices(v3d
);
133 static uint32_t g_gmp_ofs
;
135 v3d_reload_gmp(struct v3d_hw
*v3d
)
137 /* Completely reset the GMP. */
138 V3D_WRITE(V3D_GMP_0_CFG
,
139 V3D_GMP_0_CFG_PROTENABLE_SET
);
140 V3D_WRITE(V3D_GMP_0_TABLE_ADDR
, g_gmp_ofs
);
141 V3D_WRITE(V3D_GMP_0_CLEAR_LOAD
, ~0);
142 while (V3D_READ(V3D_GMP_0_STATUS
) &
143 V3D_GMP_0_STATUS_CFG_BUSY_SET
) {
149 v3d_flush_caches(struct v3d_hw
*v3d
)
156 v3dX(simulator_submit_tfu_ioctl
)(struct v3d_hw
*v3d
,
157 struct drm_v3d_submit_tfu
*args
)
159 int last_vtct
= V3D_READ(V3D_TFU_CS
) & V3D_TFU_CS_CVTCT_SET
;
161 V3D_WRITE(V3D_TFU_IIA
, args
->iia
);
162 V3D_WRITE(V3D_TFU_IIS
, args
->iis
);
163 V3D_WRITE(V3D_TFU_ICA
, args
->ica
);
164 V3D_WRITE(V3D_TFU_IUA
, args
->iua
);
165 V3D_WRITE(V3D_TFU_IOA
, args
->ioa
);
166 V3D_WRITE(V3D_TFU_IOS
, args
->ios
);
167 V3D_WRITE(V3D_TFU_COEF0
, args
->coef
[0]);
168 V3D_WRITE(V3D_TFU_COEF1
, args
->coef
[1]);
169 V3D_WRITE(V3D_TFU_COEF2
, args
->coef
[2]);
170 V3D_WRITE(V3D_TFU_COEF3
, args
->coef
[3]);
172 V3D_WRITE(V3D_TFU_ICFG
, args
->icfg
);
174 while ((V3D_READ(V3D_TFU_CS
) & V3D_TFU_CS_CVTCT_SET
) == last_vtct
) {
181 #if V3D_VERSION >= 41
183 v3dX(simulator_submit_csd_ioctl
)(struct v3d_hw
*v3d
,
184 struct drm_v3d_submit_csd
*args
,
190 v3d_invalidate_caches(v3d
);
192 V3D_WRITE(V3D_CSD_0_QUEUED_CFG1
, args
->cfg
[1]);
193 V3D_WRITE(V3D_CSD_0_QUEUED_CFG2
, args
->cfg
[2]);
194 V3D_WRITE(V3D_CSD_0_QUEUED_CFG3
, args
->cfg
[3]);
195 V3D_WRITE(V3D_CSD_0_QUEUED_CFG4
, args
->cfg
[4]);
196 V3D_WRITE(V3D_CSD_0_QUEUED_CFG5
, args
->cfg
[5]);
197 V3D_WRITE(V3D_CSD_0_QUEUED_CFG6
, args
->cfg
[6]);
198 /* CFG0 kicks off the job */
199 V3D_WRITE(V3D_CSD_0_QUEUED_CFG0
, args
->cfg
[0]);
201 while (V3D_READ(V3D_CSD_0_STATUS
) &
202 (V3D_CSD_0_STATUS_HAVE_CURRENT_DISPATCH_SET
|
203 V3D_CSD_0_STATUS_HAVE_QUEUED_DISPATCH_SET
)) {
207 v3d_flush_caches(v3d
);
214 v3dX(simulator_get_param_ioctl
)(struct v3d_hw
*v3d
,
215 struct drm_v3d_get_param
*args
)
217 static const uint32_t reg_map
[] = {
218 [DRM_V3D_PARAM_V3D_UIFCFG
] = V3D_HUB_CTL_UIFCFG
,
219 [DRM_V3D_PARAM_V3D_HUB_IDENT1
] = V3D_HUB_CTL_IDENT1
,
220 [DRM_V3D_PARAM_V3D_HUB_IDENT2
] = V3D_HUB_CTL_IDENT2
,
221 [DRM_V3D_PARAM_V3D_HUB_IDENT3
] = V3D_HUB_CTL_IDENT3
,
222 [DRM_V3D_PARAM_V3D_CORE0_IDENT0
] = V3D_CTL_0_IDENT0
,
223 [DRM_V3D_PARAM_V3D_CORE0_IDENT1
] = V3D_CTL_0_IDENT1
,
224 [DRM_V3D_PARAM_V3D_CORE0_IDENT2
] = V3D_CTL_0_IDENT2
,
227 switch (args
->param
) {
228 case DRM_V3D_PARAM_SUPPORTS_TFU
:
231 case DRM_V3D_PARAM_SUPPORTS_CSD
:
232 args
->value
= V3D_VERSION
>= 41;
234 case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH
:
239 if (args
->param
< ARRAY_SIZE(reg_map
) && reg_map
[args
->param
]) {
240 args
->value
= V3D_READ(reg_map
[args
->param
]);
244 fprintf(stderr
, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n",
245 (long long)args
->value
);
249 static struct v3d_hw
*v3d_isr_hw
;
252 v3d_isr(uint32_t hub_status
)
254 struct v3d_hw
*v3d
= v3d_isr_hw
;
256 /* Check the per-core bits */
257 if (hub_status
& (1 << 0)) {
258 uint32_t core_status
= V3D_READ(V3D_CTL_0_INT_STS
);
259 V3D_WRITE(V3D_CTL_0_INT_CLR
, core_status
);
261 if (core_status
& V3D_CTL_0_INT_STS_INT_OUTOMEM_SET
) {
262 uint32_t size
= 256 * 1024;
263 uint32_t offset
= v3d_simulator_get_spill(size
);
267 V3D_WRITE(V3D_PTB_0_BPOA
, offset
);
268 V3D_WRITE(V3D_PTB_0_BPOS
, size
);
272 if (core_status
& V3D_CTL_0_INT_STS_INT_GMPV_SET
) {
273 fprintf(stderr
, "GMP violation at 0x%08x\n",
274 V3D_READ(V3D_GMP_0_VIO_ADDR
));
278 "Unexpected ISR with core status 0x%08x\n",
288 v3dX(simulator_init_regs
)(struct v3d_hw
*v3d
)
290 #if V3D_VERSION == 33
291 /* Set OVRTMUOUT to match kernel behavior.
293 * This means that the texture sampler uniform configuration's tmu
294 * output type field is used, instead of using the hardware default
295 * behavior based on the texture type. If you want the default
296 * behavior, you can still put "2" in the indirect texture state's
299 V3D_WRITE(V3D_CTL_0_MISCCFG
, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET
);
302 uint32_t core_interrupts
= (V3D_CTL_0_INT_STS_INT_GMPV_SET
|
303 V3D_CTL_0_INT_STS_INT_OUTOMEM_SET
);
304 V3D_WRITE(V3D_CTL_0_INT_MSK_SET
, ~core_interrupts
);
305 V3D_WRITE(V3D_CTL_0_INT_MSK_CLR
, core_interrupts
);
308 v3d_hw_set_isr(v3d
, v3d_isr
);
312 v3dX(simulator_submit_cl_ioctl
)(struct v3d_hw
*v3d
,
313 struct drm_v3d_submit_cl
*submit
,
319 v3d_invalidate_caches(v3d
);
322 V3D_WRITE(V3D_CLE_0_CT0QMA
, submit
->qma
);
323 V3D_WRITE(V3D_CLE_0_CT0QMS
, submit
->qms
);
325 #if V3D_VERSION >= 41
327 V3D_WRITE(V3D_CLE_0_CT0QTS
,
328 V3D_CLE_0_CT0QTS_CTQTSEN_SET
|
332 V3D_WRITE(V3D_CLE_0_CT0QBA
, submit
->bcl_start
);
333 V3D_WRITE(V3D_CLE_0_CT0QEA
, submit
->bcl_end
);
335 /* Wait for bin to complete before firing render. The kernel's
336 * scheduler implements this using the GPU scheduler blocking on the
337 * bin fence completing. (We don't use HW semaphores).
339 while (V3D_READ(V3D_CLE_0_CT0CA
) !=
340 V3D_READ(V3D_CLE_0_CT0EA
)) {
344 v3d_invalidate_caches(v3d
);
346 V3D_WRITE(V3D_CLE_0_CT1QBA
, submit
->rcl_start
);
347 V3D_WRITE(V3D_CLE_0_CT1QEA
, submit
->rcl_end
);
349 while (V3D_READ(V3D_CLE_0_CT1CA
) !=
350 V3D_READ(V3D_CLE_0_CT1EA
) ||
351 V3D_READ(V3D_CLE_1_CT1CA
) !=
352 V3D_READ(V3D_CLE_1_CT1EA
)) {
357 #endif /* USE_V3D_SIMULATOR */