2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #ifdef USE_VC4_SIMULATOR
28 #include "util/u_memory.h"
29 #include "util/u_mm.h"
30 #include "util/ralloc.h"
32 #include "vc4_screen.h"
33 #include "vc4_context.h"
34 #include "kernel/vc4_drv.h"
35 #include "vc4_simulator_validate.h"
36 #include "simpenrose/simpenrose.h"
38 /** Global (across GEM fds) state for the simulator */
39 static struct vc4_simulator_state
{
44 struct mem_block
*heap
;
45 struct mem_block
*overflow
;
47 /** Mapping from GEM handle to struct vc4_simulator_bo * */
48 struct hash_table
*fd_map
;
52 .mutex
= _MTX_INITIALIZER_NP
,
55 /** Per-GEM-fd state for the simulator. */
56 struct vc4_simulator_file
{
59 /* This is weird -- we make a "vc4_device" per file, even though on
60 * the kernel side this is a global. We do this so that kernel code
61 * calling us for BO allocation can get to our screen.
63 struct drm_device dev
;
65 /** Mapping from GEM handle to struct vc4_simulator_bo * */
66 struct hash_table
*bo_map
;
69 /** Wrapper for drm_vc4_bo tracking the simulator-specific state. */
70 struct vc4_simulator_bo
{
71 struct drm_vc4_bo base
;
72 struct vc4_simulator_file
*file
;
74 /** Area for this BO within sim_state->mem */
75 struct mem_block
*block
;
77 uint32_t winsys_stride
;
85 return (void *)(uintptr_t)key
;
88 static struct vc4_simulator_file
*
89 vc4_get_simulator_file_for_fd(int fd
)
91 struct hash_entry
*entry
= _mesa_hash_table_search(sim_state
.fd_map
,
93 return entry
? entry
->data
: NULL
;
96 /* A marker placed just after each BO, then checked after rendering to make
97 * sure it's still there.
99 #define BO_SENTINEL 0xfedcba98
101 #define PAGE_ALIGN2 12
104 * Allocates space in simulator memory and returns a tracking struct for it
105 * that also contains the drm_gem_cma_object struct.
107 static struct vc4_simulator_bo
*
108 vc4_create_simulator_bo(int fd
, int handle
, unsigned size
)
110 struct vc4_simulator_file
*file
= vc4_get_simulator_file_for_fd(fd
);
111 struct vc4_simulator_bo
*sim_bo
= rzalloc(file
,
112 struct vc4_simulator_bo
);
113 struct drm_vc4_bo
*bo
= &sim_bo
->base
;
114 struct drm_gem_cma_object
*obj
= &bo
->base
;
115 size
= align(size
, 4096);
118 sim_bo
->handle
= handle
;
120 mtx_lock(&sim_state
.mutex
);
121 sim_bo
->block
= u_mmAllocMem(sim_state
.heap
, size
+ 4, PAGE_ALIGN2
, 0);
122 mtx_unlock(&sim_state
.mutex
);
123 assert(sim_bo
->block
);
125 obj
->base
.size
= size
;
126 obj
->base
.dev
= &file
->dev
;
127 obj
->vaddr
= sim_state
.mem
+ sim_bo
->block
->ofs
;
128 obj
->paddr
= simpenrose_hw_addr(obj
->vaddr
);
130 *(uint32_t *)(obj
->vaddr
+ size
) = BO_SENTINEL
;
132 /* A handle of 0 is used for vc4_gem.c internal allocations that
133 * don't need to go in the lookup table.
136 mtx_lock(&sim_state
.mutex
);
137 _mesa_hash_table_insert(file
->bo_map
, int_to_key(handle
), bo
);
138 mtx_unlock(&sim_state
.mutex
);
145 vc4_free_simulator_bo(struct vc4_simulator_bo
*sim_bo
)
147 struct vc4_simulator_file
*sim_file
= sim_bo
->file
;
148 struct drm_vc4_bo
*bo
= &sim_bo
->base
;
149 struct drm_gem_cma_object
*obj
= &bo
->base
;
151 if (sim_bo
->winsys_map
)
152 munmap(sim_bo
->winsys_map
, obj
->base
.size
);
154 mtx_lock(&sim_state
.mutex
);
155 u_mmFreeMem(sim_bo
->block
);
156 if (sim_bo
->handle
) {
157 struct hash_entry
*entry
=
158 _mesa_hash_table_search(sim_file
->bo_map
,
159 int_to_key(sim_bo
->handle
));
160 _mesa_hash_table_remove(sim_file
->bo_map
, entry
);
162 mtx_unlock(&sim_state
.mutex
);
166 static struct vc4_simulator_bo
*
167 vc4_get_simulator_bo(struct vc4_simulator_file
*file
, int gem_handle
)
169 mtx_lock(&sim_state
.mutex
);
170 struct hash_entry
*entry
=
171 _mesa_hash_table_search(file
->bo_map
, int_to_key(gem_handle
));
172 mtx_unlock(&sim_state
.mutex
);
174 return entry
? entry
->data
: NULL
;
177 struct drm_gem_cma_object
*
178 drm_gem_cma_create(struct drm_device
*dev
, size_t size
)
180 struct vc4_screen
*screen
= dev
->screen
;
181 struct vc4_simulator_bo
*sim_bo
= vc4_create_simulator_bo(screen
->fd
,
183 return &sim_bo
->base
.base
;
187 vc4_simulator_pin_bos(struct drm_device
*dev
, struct vc4_job
*job
,
188 struct vc4_exec_info
*exec
)
190 int fd
= dev
->screen
->fd
;
191 struct vc4_simulator_file
*file
= vc4_get_simulator_file_for_fd(fd
);
192 struct drm_vc4_submit_cl
*args
= exec
->args
;
193 struct vc4_bo
**bos
= job
->bo_pointers
.base
;
195 exec
->bo_count
= args
->bo_handle_count
;
196 exec
->bo
= calloc(exec
->bo_count
, sizeof(void *));
197 for (int i
= 0; i
< exec
->bo_count
; i
++) {
198 struct vc4_bo
*bo
= bos
[i
];
199 struct vc4_simulator_bo
*sim_bo
=
200 vc4_get_simulator_bo(file
, bo
->handle
);
201 struct drm_vc4_bo
*drm_bo
= &sim_bo
->base
;
202 struct drm_gem_cma_object
*obj
= &drm_bo
->base
;
206 fprintf(stderr
, "bo hindex %d: %s\n", i
, bo
->name
);
210 memcpy(obj
->vaddr
, bo
->map
, bo
->size
);
214 /* The kernel does this validation at shader create ioctl
217 if (strcmp(bo
->name
, "code") == 0) {
218 drm_bo
->validated_shader
= vc4_validate_shader(obj
);
219 if (!drm_bo
->validated_shader
)
227 vc4_simulator_unpin_bos(struct vc4_exec_info
*exec
)
229 for (int i
= 0; i
< exec
->bo_count
; i
++) {
230 struct drm_gem_cma_object
*obj
= exec
->bo
[i
];
231 struct drm_vc4_bo
*drm_bo
= to_vc4_bo(&obj
->base
);
232 struct vc4_bo
*bo
= drm_bo
->bo
;
234 assert(*(uint32_t *)(obj
->vaddr
+
235 obj
->base
.size
) == BO_SENTINEL
);
236 memcpy(bo
->map
, obj
->vaddr
, bo
->size
);
238 if (drm_bo
->validated_shader
) {
239 free(drm_bo
->validated_shader
->texture_samples
);
240 free(drm_bo
->validated_shader
);
250 vc4_dump_to_file(struct vc4_exec_info
*exec
)
252 static int dumpno
= 0;
253 struct drm_vc4_get_hang_state
*state
;
254 struct drm_vc4_get_hang_state_bo
*bo_state
;
255 unsigned int dump_version
= 0;
257 if (!(vc4_debug
& VC4_DEBUG_DUMP
))
260 state
= calloc(1, sizeof(*state
));
263 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
->unref_list
,
268 /* Add one more for the overflow area that isn't wrapped in a BO. */
269 state
->bo_count
= exec
->bo_count
+ unref_count
+ 1;
270 bo_state
= calloc(state
->bo_count
, sizeof(*bo_state
));
272 char *filename
= NULL
;
273 asprintf(&filename
, "vc4-dri-%d.dump", dumpno
++);
274 FILE *f
= fopen(filename
, "w+");
276 fprintf(stderr
, "Couldn't open %s: %s", filename
,
281 fwrite(&dump_version
, sizeof(dump_version
), 1, f
);
283 state
->ct0ca
= exec
->ct0ca
;
284 state
->ct0ea
= exec
->ct0ea
;
285 state
->ct1ca
= exec
->ct1ca
;
286 state
->ct1ea
= exec
->ct1ea
;
287 state
->start_bin
= exec
->ct0ca
;
288 state
->start_render
= exec
->ct1ca
;
289 fwrite(state
, sizeof(*state
), 1, f
);
292 for (i
= 0; i
< exec
->bo_count
; i
++) {
293 struct drm_gem_cma_object
*cma_bo
= exec
->bo
[i
];
294 bo_state
[i
].handle
= i
; /* Not used by the parser. */
295 bo_state
[i
].paddr
= cma_bo
->paddr
;
296 bo_state
[i
].size
= cma_bo
->base
.size
;
299 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
->unref_list
,
301 struct drm_gem_cma_object
*cma_bo
= &bo
->base
;
302 bo_state
[i
].handle
= 0;
303 bo_state
[i
].paddr
= cma_bo
->paddr
;
304 bo_state
[i
].size
= cma_bo
->base
.size
;
308 /* Add the static overflow memory area. */
309 bo_state
[i
].handle
= exec
->bo_count
;
310 bo_state
[i
].paddr
= sim_state
.overflow
->ofs
;
311 bo_state
[i
].size
= sim_state
.overflow
->size
;
314 fwrite(bo_state
, sizeof(*bo_state
), state
->bo_count
, f
);
316 for (int i
= 0; i
< exec
->bo_count
; i
++) {
317 struct drm_gem_cma_object
*cma_bo
= exec
->bo
[i
];
318 fwrite(cma_bo
->vaddr
, cma_bo
->base
.size
, 1, f
);
321 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
->unref_list
,
323 struct drm_gem_cma_object
*cma_bo
= &bo
->base
;
324 fwrite(cma_bo
->vaddr
, cma_bo
->base
.size
, 1, f
);
327 void *overflow
= calloc(1, sim_state
.overflow
->size
);
328 fwrite(overflow
, 1, sim_state
.overflow
->size
, f
);
337 vc4_simulator_flush(struct vc4_context
*vc4
,
338 struct drm_vc4_submit_cl
*args
, struct vc4_job
*job
)
340 struct vc4_screen
*screen
= vc4
->screen
;
342 struct vc4_simulator_file
*file
= vc4_get_simulator_file_for_fd(fd
);
343 struct vc4_surface
*csurf
= vc4_surface(vc4
->framebuffer
.cbufs
[0]);
344 struct vc4_resource
*ctex
= csurf
? vc4_resource(csurf
->base
.texture
) : NULL
;
345 struct vc4_simulator_bo
*csim_bo
= ctex
? vc4_get_simulator_bo(file
, ctex
->bo
->handle
) : NULL
;
346 uint32_t winsys_stride
= ctex
? csim_bo
->winsys_stride
: 0;
347 uint32_t sim_stride
= ctex
? ctex
->slices
[0].stride
: 0;
348 uint32_t row_len
= MIN2(sim_stride
, winsys_stride
);
349 struct vc4_exec_info exec
;
350 struct drm_device
*dev
= &file
->dev
;
353 memset(&exec
, 0, sizeof(exec
));
354 list_inithead(&exec
.unref_list
);
356 if (ctex
&& csim_bo
->winsys_map
) {
358 fprintf(stderr
, "%dx%d %d %d %d\n",
359 ctex
->base
.b
.width0
, ctex
->base
.b
.height0
,
365 for (int y
= 0; y
< ctex
->base
.b
.height0
; y
++) {
366 memcpy(ctex
->bo
->map
+ y
* sim_stride
,
367 csim_bo
->winsys_map
+ y
* winsys_stride
,
374 ret
= vc4_simulator_pin_bos(dev
, job
, &exec
);
378 ret
= vc4_cl_validate(dev
, &exec
);
382 if (vc4_debug
& VC4_DEBUG_CL
) {
383 fprintf(stderr
, "RCL:\n");
384 vc4_dump_cl(sim_state
.mem
+ exec
.ct1ca
,
385 exec
.ct1ea
- exec
.ct1ca
, true);
388 vc4_dump_to_file(&exec
);
390 if (exec
.ct0ca
!= exec
.ct0ea
) {
391 int bfc
= simpenrose_do_binning(exec
.ct0ca
, exec
.ct0ea
);
393 fprintf(stderr
, "Binning returned %d flushes, should be 1.\n",
395 fprintf(stderr
, "Relocated binning command list:\n");
396 vc4_dump_cl(sim_state
.mem
+ exec
.ct0ca
,
397 exec
.ct0ea
- exec
.ct0ca
, false);
401 int rfc
= simpenrose_do_rendering(exec
.ct1ca
, exec
.ct1ea
);
403 fprintf(stderr
, "Rendering returned %d frames, should be 1.\n",
405 fprintf(stderr
, "Relocated render command list:\n");
406 vc4_dump_cl(sim_state
.mem
+ exec
.ct1ca
,
407 exec
.ct1ea
- exec
.ct1ca
, true);
411 ret
= vc4_simulator_unpin_bos(&exec
);
415 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
.unref_list
,
417 struct vc4_simulator_bo
*sim_bo
= (struct vc4_simulator_bo
*)bo
;
418 struct drm_gem_cma_object
*obj
= &sim_bo
->base
.base
;
419 list_del(&bo
->unref_head
);
420 assert(*(uint32_t *)(obj
->vaddr
+ obj
->base
.size
) ==
422 vc4_free_simulator_bo(sim_bo
);
425 if (ctex
&& csim_bo
->winsys_map
) {
426 for (int y
= 0; y
< ctex
->base
.b
.height0
; y
++) {
427 memcpy(csim_bo
->winsys_map
+ y
* winsys_stride
,
428 ctex
->bo
->map
+ y
* sim_stride
,
437 * Map the underlying GEM object from the real hardware GEM handle.
440 vc4_simulator_map_winsys_bo(int fd
, struct vc4_simulator_bo
*sim_bo
)
442 struct drm_vc4_bo
*bo
= &sim_bo
->base
;
443 struct drm_gem_cma_object
*obj
= &bo
->base
;
447 struct drm_mode_map_dumb map_dumb
= {
448 .handle
= sim_bo
->handle
,
450 ret
= drmIoctl(fd
, DRM_IOCTL_MODE_MAP_DUMB
, &map_dumb
);
452 fprintf(stderr
, "map ioctl failure\n");
456 map
= mmap(NULL
, obj
->base
.size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
,
457 fd
, map_dumb
.offset
);
458 if (map
== MAP_FAILED
) {
460 "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
461 sim_bo
->handle
, (long long)map_dumb
.offset
,
462 (int)obj
->base
.size
);
470 * Do fixups after a BO has been opened from a handle.
472 * This could be done at DRM_IOCTL_GEM_OPEN/DRM_IOCTL_GEM_PRIME_FD_TO_HANDLE
473 * time, but we're still using drmPrimeFDToHandle() so we have this helper to
474 * be called afterward instead.
476 void vc4_simulator_open_from_handle(int fd
, uint32_t winsys_stride
,
477 int handle
, uint32_t size
)
479 struct vc4_simulator_bo
*sim_bo
=
480 vc4_create_simulator_bo(fd
, handle
, size
);
482 sim_bo
->winsys_stride
= winsys_stride
;
483 sim_bo
->winsys_map
= vc4_simulator_map_winsys_bo(fd
, sim_bo
);
487 * Simulated ioctl(fd, DRM_VC4_CREATE_BO) implementation.
489 * Making a VC4 BO is just a matter of making a corresponding BO on the host.
492 vc4_simulator_create_bo_ioctl(int fd
, struct drm_vc4_create_bo
*args
)
495 struct drm_mode_create_dumb create
= {
498 .height
= (args
->size
+ 127) / 128,
501 ret
= drmIoctl(fd
, DRM_IOCTL_MODE_CREATE_DUMB
, &create
);
502 assert(create
.size
>= args
->size
);
504 args
->handle
= create
.handle
;
506 vc4_create_simulator_bo(fd
, create
.handle
, args
->size
);
512 * Simulated ioctl(fd, DRM_VC4_CREATE_SHADER_BO) implementation.
514 * In simulation we defer shader validation until exec time. Just make a host
515 * BO and memcpy the contents in.
518 vc4_simulator_create_shader_bo_ioctl(int fd
,
519 struct drm_vc4_create_shader_bo
*args
)
522 struct drm_mode_create_dumb create
= {
525 .height
= (args
->size
+ 127) / 128,
528 ret
= drmIoctl(fd
, DRM_IOCTL_MODE_CREATE_DUMB
, &create
);
531 assert(create
.size
>= args
->size
);
533 args
->handle
= create
.handle
;
535 vc4_create_simulator_bo(fd
, create
.handle
, args
->size
);
537 struct drm_mode_map_dumb map
= {
538 .handle
= create
.handle
540 ret
= drmIoctl(fd
, DRM_IOCTL_MODE_MAP_DUMB
, &map
);
544 void *shader
= mmap(NULL
, args
->size
, PROT_READ
| PROT_WRITE
, MAP_SHARED
,
546 memcpy(shader
, (void *)(uintptr_t)args
->data
, args
->size
);
547 munmap(shader
, args
->size
);
553 * Simulated ioctl(fd, DRM_VC4_MMAP_BO) implementation.
555 * We just pass this straight through to dumb mmap.
558 vc4_simulator_mmap_bo_ioctl(int fd
, struct drm_vc4_mmap_bo
*args
)
561 struct drm_mode_map_dumb map
= {
562 .handle
= args
->handle
,
565 ret
= drmIoctl(fd
, DRM_IOCTL_MODE_MAP_DUMB
, &map
);
566 args
->offset
= map
.offset
;
572 vc4_simulator_gem_close_ioctl(int fd
, struct drm_gem_close
*args
)
574 /* Free the simulator's internal tracking. */
575 struct vc4_simulator_file
*file
= vc4_get_simulator_file_for_fd(fd
);
576 struct vc4_simulator_bo
*sim_bo
= vc4_get_simulator_bo(file
,
579 vc4_free_simulator_bo(sim_bo
);
581 /* Pass the call on down. */
582 return drmIoctl(fd
, DRM_IOCTL_GEM_CLOSE
, args
);
586 vc4_simulator_get_param_ioctl(int fd
, struct drm_vc4_get_param
*args
)
588 switch (args
->param
) {
589 case DRM_VC4_PARAM_SUPPORTS_BRANCHES
:
593 case DRM_VC4_PARAM_V3D_IDENT0
:
594 args
->value
= 0x02000000;
597 case DRM_VC4_PARAM_V3D_IDENT1
:
598 args
->value
= 0x00000001;
602 fprintf(stderr
, "Unknown DRM_IOCTL_VC4_GET_PARAM(%lld)\n",
603 (long long)args
->value
);
609 vc4_simulator_ioctl(int fd
, unsigned long request
, void *args
)
612 case DRM_IOCTL_VC4_CREATE_BO
:
613 return vc4_simulator_create_bo_ioctl(fd
, args
);
614 case DRM_IOCTL_VC4_CREATE_SHADER_BO
:
615 return vc4_simulator_create_shader_bo_ioctl(fd
, args
);
616 case DRM_IOCTL_VC4_MMAP_BO
:
617 return vc4_simulator_mmap_bo_ioctl(fd
, args
);
619 case DRM_IOCTL_VC4_WAIT_BO
:
620 case DRM_IOCTL_VC4_WAIT_SEQNO
:
621 /* We do all of the vc4 rendering synchronously, so we just
622 * return immediately on the wait ioctls. This ignores any
623 * native rendering to the host BO, so it does mean we race on
624 * front buffer rendering.
628 case DRM_IOCTL_VC4_GET_PARAM
:
629 return vc4_simulator_get_param_ioctl(fd
, args
);
631 case DRM_IOCTL_GEM_CLOSE
:
632 return vc4_simulator_gem_close_ioctl(fd
, args
);
634 case DRM_IOCTL_GEM_OPEN
:
635 case DRM_IOCTL_GEM_FLINK
:
636 return drmIoctl(fd
, request
, args
);
638 fprintf(stderr
, "Unknown ioctl 0x%08x\n", (int)request
);
644 vc4_simulator_init_global(void)
646 mtx_lock(&sim_state
.mutex
);
647 if (sim_state
.refcount
++) {
648 mtx_unlock(&sim_state
.mutex
);
652 sim_state
.mem_size
= 256 * 1024 * 1024;
653 sim_state
.mem
= calloc(sim_state
.mem_size
, 1);
656 sim_state
.heap
= u_mmInit(0, sim_state
.mem_size
);
658 /* We supply our own memory so that we can have more aperture
659 * available (256MB instead of simpenrose's default 64MB).
661 simpenrose_init_hardware_supply_mem(sim_state
.mem
, sim_state
.mem_size
);
663 /* Carve out low memory for tile allocation overflow. The kernel
664 * should be automatically handling overflow memory setup on real
665 * hardware, but for simulation we just get one shot to set up enough
666 * overflow memory before execution. This overflow mem will be used
667 * up over the whole lifetime of simpenrose (not reused on each
668 * flush), so it had better be big.
670 sim_state
.overflow
= u_mmAllocMem(sim_state
.heap
, 32 * 1024 * 1024,
672 simpenrose_supply_overflow_mem(sim_state
.overflow
->ofs
,
673 sim_state
.overflow
->size
);
675 mtx_unlock(&sim_state
.mutex
);
678 _mesa_hash_table_create(NULL
,
680 _mesa_key_pointer_equal
);
684 vc4_simulator_init(struct vc4_screen
*screen
)
686 vc4_simulator_init_global();
688 screen
->sim_file
= rzalloc(screen
, struct vc4_simulator_file
);
690 screen
->sim_file
->bo_map
=
691 _mesa_hash_table_create(screen
->sim_file
,
693 _mesa_key_pointer_equal
);
695 mtx_lock(&sim_state
.mutex
);
696 _mesa_hash_table_insert(sim_state
.fd_map
, int_to_key(screen
->fd
+ 1),
698 mtx_unlock(&sim_state
.mutex
);
700 screen
->sim_file
->dev
.screen
= screen
;
704 vc4_simulator_destroy(struct vc4_screen
*screen
)
706 mtx_lock(&sim_state
.mutex
);
707 if (!--sim_state
.refcount
) {
708 _mesa_hash_table_destroy(sim_state
.fd_map
, NULL
);
709 u_mmDestroy(sim_state
.heap
);
711 /* No memsetting it, because it contains the mutex. */
713 mtx_unlock(&sim_state
.mutex
);
716 #endif /* USE_VC4_SIMULATOR */