2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @file vc4_simulator.c
27 * Implements VC4 simulation on top of a non-VC4 GEM fd.
29 * This file's goal is to emulate the VC4 ioctls' behavior in the kernel on
30 * top of the simpenrose software simulator. Generally, VC4 driver BOs have a
31 * GEM-side copy of their contents and a simulator-side memory area that the
32 * GEM contents get copied into during simulation. Once simulation is done,
33 * the simulator's data is copied back out to the GEM BOs, so that rendering
34 * appears on the screen as if actual hardware rendering had been done.
36 * One of the limitations of this code is that we shouldn't really need a
37 * GEM-side BO for non-window-system BOs. However, do we need unique BO
38 * handles for each of our GEM bos so that this file can look up its state
39 * from the handle passed in at submit ioctl time (also, a couple of places
40 * outside of this file still call ioctls directly on the fd).
42 * Another limitation is that BO import doesn't work unless the underlying
43 * window system's BO size matches what VC4 is going to use, which of course
44 * doesn't work out in practice. This means that for now, only DRI3 (VC4
45 * makes the winsys BOs) is supported, not DRI2 (window system makes the winys
49 #ifdef USE_VC4_SIMULATOR
53 #include "util/u_memory.h"
54 #include "util/u_mm.h"
55 #include "util/ralloc.h"
57 #include "vc4_screen.h"
58 #include "vc4_cl_dump.h"
59 #include "vc4_context.h"
60 #include "kernel/vc4_drv.h"
61 #include "vc4_simulator_validate.h"
62 #include "simpenrose/simpenrose.h"
64 /** Global (across GEM fds) state for the simulator */
65 static struct vc4_simulator_state
{
70 struct mem_block
*heap
;
71 struct mem_block
*overflow
;
73 /** Mapping from GEM handle to struct vc4_simulator_bo * */
74 struct hash_table
*fd_map
;
78 .mutex
= _MTX_INITIALIZER_NP
,
81 /** Per-GEM-fd state for the simulator. */
82 struct vc4_simulator_file
{
85 /* This is weird -- we make a "vc4_device" per file, even though on
86 * the kernel side this is a global. We do this so that kernel code
87 * calling us for BO allocation can get to our screen.
89 struct drm_device dev
;
91 /** Mapping from GEM handle to struct vc4_simulator_bo * */
92 struct hash_table
*bo_map
;
95 /** Wrapper for drm_vc4_bo tracking the simulator-specific state. */
96 struct vc4_simulator_bo
{
97 struct drm_vc4_bo base
;
98 struct vc4_simulator_file
*file
;
100 /** Area for this BO within sim_state->mem */
101 struct mem_block
*block
;
105 /* Mapping of the underlying GEM object that we copy in/out of
114 return (void *)(uintptr_t)key
;
117 static struct vc4_simulator_file
*
118 vc4_get_simulator_file_for_fd(int fd
)
120 struct hash_entry
*entry
= _mesa_hash_table_search(sim_state
.fd_map
,
122 return entry
? entry
->data
: NULL
;
125 /* A marker placed just after each BO, then checked after rendering to make
126 * sure it's still there.
128 #define BO_SENTINEL 0xfedcba98
130 #define PAGE_ALIGN2 12
133 * Allocates space in simulator memory and returns a tracking struct for it
134 * that also contains the drm_gem_cma_object struct.
136 static struct vc4_simulator_bo
*
137 vc4_create_simulator_bo(int fd
, int handle
, unsigned size
)
139 struct vc4_simulator_file
*file
= vc4_get_simulator_file_for_fd(fd
);
140 struct vc4_simulator_bo
*sim_bo
= rzalloc(file
,
141 struct vc4_simulator_bo
);
142 struct drm_vc4_bo
*bo
= &sim_bo
->base
;
143 struct drm_gem_cma_object
*obj
= &bo
->base
;
144 size
= align(size
, 4096);
147 sim_bo
->handle
= handle
;
149 /* Allocate space for the buffer in simulator memory. */
150 mtx_lock(&sim_state
.mutex
);
151 sim_bo
->block
= u_mmAllocMem(sim_state
.heap
, size
+ 4, PAGE_ALIGN2
, 0);
152 mtx_unlock(&sim_state
.mutex
);
153 assert(sim_bo
->block
);
155 obj
->base
.size
= size
;
156 obj
->base
.dev
= &file
->dev
;
157 obj
->vaddr
= sim_state
.mem
+ sim_bo
->block
->ofs
;
158 obj
->paddr
= simpenrose_hw_addr(obj
->vaddr
);
160 *(uint32_t *)(obj
->vaddr
+ size
) = BO_SENTINEL
;
162 /* A handle of 0 is used for vc4_gem.c internal allocations that
163 * don't need to go in the lookup table.
166 mtx_lock(&sim_state
.mutex
);
167 _mesa_hash_table_insert(file
->bo_map
, int_to_key(handle
), bo
);
168 mtx_unlock(&sim_state
.mutex
);
170 /* Map the GEM buffer for copy in/out to the simulator. */
171 struct drm_mode_map_dumb map
= {
174 int ret
= drmIoctl(fd
, DRM_IOCTL_MODE_MAP_DUMB
, &map
);
176 fprintf(stderr
, "Failed to get MMAP offset: %d\n",
180 sim_bo
->gem_vaddr
= mmap(NULL
, obj
->base
.size
,
181 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
183 if (sim_bo
->gem_vaddr
== MAP_FAILED
) {
184 fprintf(stderr
, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
185 handle
, (long long)map
.offset
, (int)obj
->base
.size
);
194 vc4_free_simulator_bo(struct vc4_simulator_bo
*sim_bo
)
196 struct vc4_simulator_file
*sim_file
= sim_bo
->file
;
197 struct drm_vc4_bo
*bo
= &sim_bo
->base
;
198 struct drm_gem_cma_object
*obj
= &bo
->base
;
200 if (bo
->validated_shader
) {
201 free(bo
->validated_shader
->texture_samples
);
202 free(bo
->validated_shader
);
205 if (sim_bo
->gem_vaddr
)
206 munmap(sim_bo
->gem_vaddr
, obj
->base
.size
);
208 mtx_lock(&sim_state
.mutex
);
209 u_mmFreeMem(sim_bo
->block
);
210 if (sim_bo
->handle
) {
211 _mesa_hash_table_remove_key(sim_file
->bo_map
,
212 int_to_key(sim_bo
->handle
));
214 mtx_unlock(&sim_state
.mutex
);
218 static struct vc4_simulator_bo
*
219 vc4_get_simulator_bo(struct vc4_simulator_file
*file
, int gem_handle
)
221 mtx_lock(&sim_state
.mutex
);
222 struct hash_entry
*entry
=
223 _mesa_hash_table_search(file
->bo_map
, int_to_key(gem_handle
));
224 mtx_unlock(&sim_state
.mutex
);
226 return entry
? entry
->data
: NULL
;
229 struct drm_gem_cma_object
*
230 drm_gem_cma_create(struct drm_device
*dev
, size_t size
)
232 struct vc4_screen
*screen
= dev
->screen
;
233 struct vc4_simulator_bo
*sim_bo
= vc4_create_simulator_bo(screen
->fd
,
235 return &sim_bo
->base
.base
;
239 vc4_simulator_pin_bos(struct vc4_simulator_file
*file
,
240 struct vc4_exec_info
*exec
)
242 struct drm_vc4_submit_cl
*args
= exec
->args
;
243 uint32_t *bo_handles
= (uint32_t *)(uintptr_t)args
->bo_handles
;
245 exec
->bo_count
= args
->bo_handle_count
;
246 exec
->bo
= calloc(exec
->bo_count
, sizeof(void *));
247 for (int i
= 0; i
< exec
->bo_count
; i
++) {
248 struct vc4_simulator_bo
*sim_bo
=
249 vc4_get_simulator_bo(file
, bo_handles
[i
]);
250 struct drm_vc4_bo
*drm_bo
= &sim_bo
->base
;
251 struct drm_gem_cma_object
*obj
= &drm_bo
->base
;
253 memcpy(obj
->vaddr
, sim_bo
->gem_vaddr
, obj
->base
.size
);
261 vc4_simulator_unpin_bos(struct vc4_exec_info
*exec
)
263 for (int i
= 0; i
< exec
->bo_count
; i
++) {
264 struct drm_gem_cma_object
*obj
= exec
->bo
[i
];
265 struct drm_vc4_bo
*drm_bo
= to_vc4_bo(&obj
->base
);
266 struct vc4_simulator_bo
*sim_bo
=
267 (struct vc4_simulator_bo
*)drm_bo
;
269 assert(*(uint32_t *)(obj
->vaddr
+
270 obj
->base
.size
) == BO_SENTINEL
);
271 if (sim_bo
->gem_vaddr
)
272 memcpy(sim_bo
->gem_vaddr
, obj
->vaddr
, obj
->base
.size
);
281 vc4_dump_to_file(struct vc4_exec_info
*exec
)
283 static int dumpno
= 0;
284 struct drm_vc4_get_hang_state
*state
;
285 struct drm_vc4_get_hang_state_bo
*bo_state
;
286 unsigned int dump_version
= 0;
288 if (!(vc4_debug
& VC4_DEBUG_DUMP
))
291 state
= calloc(1, sizeof(*state
));
294 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
->unref_list
,
299 /* Add one more for the overflow area that isn't wrapped in a BO. */
300 state
->bo_count
= exec
->bo_count
+ unref_count
+ 1;
301 bo_state
= calloc(state
->bo_count
, sizeof(*bo_state
));
303 char *filename
= NULL
;
304 asprintf(&filename
, "vc4-dri-%d.dump", dumpno
++);
305 FILE *f
= fopen(filename
, "w+");
307 fprintf(stderr
, "Couldn't open %s: %s", filename
,
312 fwrite(&dump_version
, sizeof(dump_version
), 1, f
);
314 state
->ct0ca
= exec
->ct0ca
;
315 state
->ct0ea
= exec
->ct0ea
;
316 state
->ct1ca
= exec
->ct1ca
;
317 state
->ct1ea
= exec
->ct1ea
;
318 state
->start_bin
= exec
->ct0ca
;
319 state
->start_render
= exec
->ct1ca
;
320 fwrite(state
, sizeof(*state
), 1, f
);
323 for (i
= 0; i
< exec
->bo_count
; i
++) {
324 struct drm_gem_cma_object
*cma_bo
= exec
->bo
[i
];
325 bo_state
[i
].handle
= i
; /* Not used by the parser. */
326 bo_state
[i
].paddr
= cma_bo
->paddr
;
327 bo_state
[i
].size
= cma_bo
->base
.size
;
330 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
->unref_list
,
332 struct drm_gem_cma_object
*cma_bo
= &bo
->base
;
333 bo_state
[i
].handle
= 0;
334 bo_state
[i
].paddr
= cma_bo
->paddr
;
335 bo_state
[i
].size
= cma_bo
->base
.size
;
339 /* Add the static overflow memory area. */
340 bo_state
[i
].handle
= exec
->bo_count
;
341 bo_state
[i
].paddr
= sim_state
.overflow
->ofs
;
342 bo_state
[i
].size
= sim_state
.overflow
->size
;
345 fwrite(bo_state
, sizeof(*bo_state
), state
->bo_count
, f
);
347 for (int i
= 0; i
< exec
->bo_count
; i
++) {
348 struct drm_gem_cma_object
*cma_bo
= exec
->bo
[i
];
349 fwrite(cma_bo
->vaddr
, cma_bo
->base
.size
, 1, f
);
352 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
->unref_list
,
354 struct drm_gem_cma_object
*cma_bo
= &bo
->base
;
355 fwrite(cma_bo
->vaddr
, cma_bo
->base
.size
, 1, f
);
358 void *overflow
= calloc(1, sim_state
.overflow
->size
);
359 fwrite(overflow
, 1, sim_state
.overflow
->size
, f
);
368 vc4_simulator_submit_cl_ioctl(int fd
, struct drm_vc4_submit_cl
*args
)
370 struct vc4_simulator_file
*file
= vc4_get_simulator_file_for_fd(fd
);
371 struct vc4_exec_info exec
;
372 struct drm_device
*dev
= &file
->dev
;
375 memset(&exec
, 0, sizeof(exec
));
376 list_inithead(&exec
.unref_list
);
380 ret
= vc4_simulator_pin_bos(file
, &exec
);
384 ret
= vc4_cl_validate(dev
, &exec
);
388 if (vc4_debug
& VC4_DEBUG_CL
) {
389 fprintf(stderr
, "RCL:\n");
390 vc4_dump_cl(sim_state
.mem
+ exec
.ct1ca
,
391 exec
.ct1ea
- exec
.ct1ca
, true);
394 vc4_dump_to_file(&exec
);
396 if (exec
.ct0ca
!= exec
.ct0ea
) {
397 int bfc
= simpenrose_do_binning(exec
.ct0ca
, exec
.ct0ea
);
399 fprintf(stderr
, "Binning returned %d flushes, should be 1.\n",
401 fprintf(stderr
, "Relocated binning command list:\n");
402 vc4_dump_cl(sim_state
.mem
+ exec
.ct0ca
,
403 exec
.ct0ea
- exec
.ct0ca
, false);
407 int rfc
= simpenrose_do_rendering(exec
.ct1ca
, exec
.ct1ea
);
409 fprintf(stderr
, "Rendering returned %d frames, should be 1.\n",
411 fprintf(stderr
, "Relocated render command list:\n");
412 vc4_dump_cl(sim_state
.mem
+ exec
.ct1ca
,
413 exec
.ct1ea
- exec
.ct1ca
, true);
417 ret
= vc4_simulator_unpin_bos(&exec
);
421 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
.unref_list
,
423 struct vc4_simulator_bo
*sim_bo
= (struct vc4_simulator_bo
*)bo
;
424 struct drm_gem_cma_object
*obj
= &sim_bo
->base
.base
;
425 list_del(&bo
->unref_head
);
426 assert(*(uint32_t *)(obj
->vaddr
+ obj
->base
.size
) ==
428 vc4_free_simulator_bo(sim_bo
);
435 * Do fixups after a BO has been opened from a handle.
437 * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE
438 * time, but we're still using drmPrimeFDToHandle() so we have this helper to
439 * be called afterward instead.
441 void vc4_simulator_open_from_handle(int fd
, int handle
, uint32_t size
)
443 vc4_create_simulator_bo(fd
, handle
, size
);
447 * Simulated ioctl(fd, DRM_VC4_CREATE_BO) implementation.
449 * Making a VC4 BO is just a matter of making a corresponding BO on the host.
452 vc4_simulator_create_bo_ioctl(int fd
, struct drm_vc4_create_bo
*args
)
455 struct drm_mode_create_dumb create
= {
458 .height
= (args
->size
+ 127) / 128,
461 ret
= drmIoctl(fd
, DRM_IOCTL_MODE_CREATE_DUMB
, &create
);
462 assert(create
.size
>= args
->size
);
464 args
->handle
= create
.handle
;
466 vc4_create_simulator_bo(fd
, create
.handle
, args
->size
);
472 * Simulated ioctl(fd, DRM_VC4_CREATE_SHADER_BO) implementation.
474 * In simulation we defer shader validation until exec time. Just make a host
475 * BO and memcpy the contents in.
478 vc4_simulator_create_shader_bo_ioctl(int fd
,
479 struct drm_vc4_create_shader_bo
*args
)
482 struct drm_mode_create_dumb create
= {
485 .height
= (args
->size
+ 127) / 128,
488 ret
= drmIoctl(fd
, DRM_IOCTL_MODE_CREATE_DUMB
, &create
);
491 assert(create
.size
>= args
->size
);
493 args
->handle
= create
.handle
;
495 struct vc4_simulator_bo
*sim_bo
=
496 vc4_create_simulator_bo(fd
, create
.handle
, args
->size
);
497 struct drm_vc4_bo
*drm_bo
= &sim_bo
->base
;
498 struct drm_gem_cma_object
*obj
= &drm_bo
->base
;
500 /* Copy into the simulator's BO for validation. */
501 memcpy(obj
->vaddr
, (void *)(uintptr_t)args
->data
, args
->size
);
503 /* Copy into the GEM BO to prevent the simulator_pin_bos() from
506 memcpy(sim_bo
->gem_vaddr
, (void *)(uintptr_t)args
->data
, args
->size
);
508 drm_bo
->validated_shader
= vc4_validate_shader(obj
);
509 if (!drm_bo
->validated_shader
)
516 * Simulated ioctl(fd, DRM_VC4_MMAP_BO) implementation.
518 * We just pass this straight through to dumb mmap.
521 vc4_simulator_mmap_bo_ioctl(int fd
, struct drm_vc4_mmap_bo
*args
)
524 struct drm_mode_map_dumb map
= {
525 .handle
= args
->handle
,
528 ret
= drmIoctl(fd
, DRM_IOCTL_MODE_MAP_DUMB
, &map
);
529 args
->offset
= map
.offset
;
535 vc4_simulator_gem_close_ioctl(int fd
, struct drm_gem_close
*args
)
537 /* Free the simulator's internal tracking. */
538 struct vc4_simulator_file
*file
= vc4_get_simulator_file_for_fd(fd
);
539 struct vc4_simulator_bo
*sim_bo
= vc4_get_simulator_bo(file
,
542 vc4_free_simulator_bo(sim_bo
);
544 /* Pass the call on down. */
545 return drmIoctl(fd
, DRM_IOCTL_GEM_CLOSE
, args
);
549 vc4_simulator_get_param_ioctl(int fd
, struct drm_vc4_get_param
*args
)
551 switch (args
->param
) {
552 case DRM_VC4_PARAM_SUPPORTS_BRANCHES
:
553 case DRM_VC4_PARAM_SUPPORTS_ETC1
:
554 case DRM_VC4_PARAM_SUPPORTS_THREADED_FS
:
555 case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER
:
559 case DRM_VC4_PARAM_SUPPORTS_MADVISE
:
560 case DRM_VC4_PARAM_SUPPORTS_PERFMON
:
564 case DRM_VC4_PARAM_V3D_IDENT0
:
565 args
->value
= 0x02000000;
568 case DRM_VC4_PARAM_V3D_IDENT1
:
569 args
->value
= 0x00000001;
573 fprintf(stderr
, "Unknown DRM_IOCTL_VC4_GET_PARAM(%lld)\n",
574 (long long)args
->param
);
580 vc4_simulator_ioctl(int fd
, unsigned long request
, void *args
)
583 case DRM_IOCTL_VC4_SUBMIT_CL
:
584 return vc4_simulator_submit_cl_ioctl(fd
, args
);
585 case DRM_IOCTL_VC4_CREATE_BO
:
586 return vc4_simulator_create_bo_ioctl(fd
, args
);
587 case DRM_IOCTL_VC4_CREATE_SHADER_BO
:
588 return vc4_simulator_create_shader_bo_ioctl(fd
, args
);
589 case DRM_IOCTL_VC4_MMAP_BO
:
590 return vc4_simulator_mmap_bo_ioctl(fd
, args
);
592 case DRM_IOCTL_VC4_WAIT_BO
:
593 case DRM_IOCTL_VC4_WAIT_SEQNO
:
594 /* We do all of the vc4 rendering synchronously, so we just
595 * return immediately on the wait ioctls. This ignores any
596 * native rendering to the host BO, so it does mean we race on
597 * front buffer rendering.
601 case DRM_IOCTL_VC4_LABEL_BO
:
602 /* This is just debug information, nothing to do. */
605 case DRM_IOCTL_VC4_GET_TILING
:
606 case DRM_IOCTL_VC4_SET_TILING
:
607 /* Disable these for now, since the sharing with i965 requires
613 case DRM_IOCTL_VC4_GET_PARAM
:
614 return vc4_simulator_get_param_ioctl(fd
, args
);
616 case DRM_IOCTL_GEM_CLOSE
:
617 return vc4_simulator_gem_close_ioctl(fd
, args
);
619 case DRM_IOCTL_GEM_OPEN
:
620 case DRM_IOCTL_GEM_FLINK
:
621 return drmIoctl(fd
, request
, args
);
623 fprintf(stderr
, "Unknown ioctl 0x%08x\n", (int)request
);
629 vc4_simulator_init_global(void)
631 mtx_lock(&sim_state
.mutex
);
632 if (sim_state
.refcount
++) {
633 mtx_unlock(&sim_state
.mutex
);
637 sim_state
.mem_size
= 256 * 1024 * 1024;
638 sim_state
.mem
= calloc(sim_state
.mem_size
, 1);
641 sim_state
.heap
= u_mmInit(0, sim_state
.mem_size
);
643 /* We supply our own memory so that we can have more aperture
644 * available (256MB instead of simpenrose's default 64MB).
646 simpenrose_init_hardware_supply_mem(sim_state
.mem
, sim_state
.mem_size
);
648 /* Carve out low memory for tile allocation overflow. The kernel
649 * should be automatically handling overflow memory setup on real
650 * hardware, but for simulation we just get one shot to set up enough
651 * overflow memory before execution. This overflow mem will be used
652 * up over the whole lifetime of simpenrose (not reused on each
653 * flush), so it had better be big.
655 sim_state
.overflow
= u_mmAllocMem(sim_state
.heap
, 32 * 1024 * 1024,
657 simpenrose_supply_overflow_mem(sim_state
.overflow
->ofs
,
658 sim_state
.overflow
->size
);
660 mtx_unlock(&sim_state
.mutex
);
663 _mesa_hash_table_create(NULL
,
665 _mesa_key_pointer_equal
);
669 vc4_simulator_init(struct vc4_screen
*screen
)
671 vc4_simulator_init_global();
673 screen
->sim_file
= rzalloc(screen
, struct vc4_simulator_file
);
675 screen
->sim_file
->bo_map
=
676 _mesa_hash_table_create(screen
->sim_file
,
678 _mesa_key_pointer_equal
);
680 mtx_lock(&sim_state
.mutex
);
681 _mesa_hash_table_insert(sim_state
.fd_map
, int_to_key(screen
->fd
+ 1),
683 mtx_unlock(&sim_state
.mutex
);
685 screen
->sim_file
->dev
.screen
= screen
;
689 vc4_simulator_destroy(struct vc4_screen
*screen
)
691 mtx_lock(&sim_state
.mutex
);
692 if (!--sim_state
.refcount
) {
693 _mesa_hash_table_destroy(sim_state
.fd_map
, NULL
);
694 u_mmDestroy(sim_state
.heap
);
696 /* No memsetting it, because it contains the mutex. */
698 mtx_unlock(&sim_state
.mutex
);
701 #endif /* USE_VC4_SIMULATOR */