2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #ifdef USE_VC4_SIMULATOR
26 #include "util/u_memory.h"
27 #include "util/ralloc.h"
29 #include "vc4_screen.h"
30 #include "vc4_context.h"
31 #include "kernel/vc4_drv.h"
32 #include "vc4_simulator_validate.h"
33 #include "simpenrose/simpenrose.h"
35 /* A marker placed just after each BO, then checked after rendering to make
36 * sure it's still there.
38 #define BO_SENTINEL 0xfedcba98
40 #define OVERFLOW_SIZE (32 * 1024 * 1024)
42 static struct drm_gem_cma_object
*
43 vc4_wrap_bo_with_cma(struct drm_device
*dev
, struct vc4_bo
*bo
)
45 struct vc4_context
*vc4
= dev
->vc4
;
46 struct vc4_screen
*screen
= vc4
->screen
;
47 struct drm_vc4_bo
*drm_bo
= CALLOC_STRUCT(drm_vc4_bo
);
48 struct drm_gem_cma_object
*obj
= &drm_bo
->base
;
49 uint32_t size
= align(bo
->size
, 4096);
52 obj
->base
.size
= size
;
54 obj
->vaddr
= screen
->simulator_mem_base
+ dev
->simulator_mem_next
;
55 obj
->paddr
= simpenrose_hw_addr(obj
->vaddr
);
57 dev
->simulator_mem_next
+= size
+ sizeof(uint32_t);
58 dev
->simulator_mem_next
= align(dev
->simulator_mem_next
, 4096);
59 assert(dev
->simulator_mem_next
<= screen
->simulator_mem_size
);
61 *(uint32_t *)(obj
->vaddr
+ bo
->size
) = BO_SENTINEL
;
66 struct drm_gem_cma_object
*
67 drm_gem_cma_create(struct drm_device
*dev
, size_t size
)
69 struct vc4_context
*vc4
= dev
->vc4
;
70 struct vc4_screen
*screen
= vc4
->screen
;
72 struct vc4_bo
*bo
= vc4_bo_alloc(screen
, size
, "simulator validate");
73 return vc4_wrap_bo_with_cma(dev
, bo
);
77 vc4_simulator_pin_bos(struct drm_device
*dev
, struct vc4_exec_info
*exec
)
79 struct drm_vc4_submit_cl
*args
= exec
->args
;
80 struct vc4_context
*vc4
= dev
->vc4
;
81 struct vc4_bo
**bos
= vc4
->bo_pointers
.base
;
83 exec
->bo_count
= args
->bo_handle_count
;
84 exec
->bo
= calloc(exec
->bo_count
, sizeof(void *));
85 for (int i
= 0; i
< exec
->bo_count
; i
++) {
86 struct vc4_bo
*bo
= bos
[i
];
87 struct drm_gem_cma_object
*obj
= vc4_wrap_bo_with_cma(dev
, bo
);
89 struct drm_vc4_bo
*drm_bo
= to_vc4_bo(&obj
->base
);
91 fprintf(stderr
, "bo hindex %d: %s\n", i
, bo
->name
);
95 memcpy(obj
->vaddr
, bo
->map
, bo
->size
);
99 /* The kernel does this validation at shader create ioctl
102 if (strcmp(bo
->name
, "code") == 0) {
103 drm_bo
->validated_shader
= vc4_validate_shader(obj
);
104 if (!drm_bo
->validated_shader
)
112 vc4_simulator_unpin_bos(struct vc4_exec_info
*exec
)
114 for (int i
= 0; i
< exec
->bo_count
; i
++) {
115 struct drm_gem_cma_object
*obj
= exec
->bo
[i
];
116 struct drm_vc4_bo
*drm_bo
= to_vc4_bo(&obj
->base
);
117 struct vc4_bo
*bo
= drm_bo
->bo
;
119 assert(*(uint32_t *)(obj
->vaddr
+ bo
->size
) == BO_SENTINEL
);
120 memcpy(bo
->map
, obj
->vaddr
, bo
->size
);
122 if (drm_bo
->validated_shader
) {
123 free(drm_bo
->validated_shader
->texture_samples
);
124 free(drm_bo
->validated_shader
);
135 vc4_dump_to_file(struct vc4_exec_info
*exec
)
137 static int dumpno
= 0;
138 struct drm_vc4_get_hang_state
*state
;
139 struct drm_vc4_get_hang_state_bo
*bo_state
;
140 unsigned int dump_version
= 0;
142 if (!(vc4_debug
& VC4_DEBUG_DUMP
))
145 state
= calloc(1, sizeof(*state
));
148 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
->unref_list
,
153 /* Add one more for the overflow area that isn't wrapped in a BO. */
154 state
->bo_count
= exec
->bo_count
+ unref_count
+ 1;
155 bo_state
= calloc(state
->bo_count
, sizeof(*bo_state
));
157 char *filename
= NULL
;
158 asprintf(&filename
, "vc4-dri-%d.dump", dumpno
++);
159 FILE *f
= fopen(filename
, "w+");
161 fprintf(stderr
, "Couldn't open %s: %s", filename
,
166 fwrite(&dump_version
, sizeof(dump_version
), 1, f
);
168 state
->ct0ca
= exec
->ct0ca
;
169 state
->ct0ea
= exec
->ct0ea
;
170 state
->ct1ca
= exec
->ct1ca
;
171 state
->ct1ea
= exec
->ct1ea
;
172 state
->start_bin
= exec
->ct0ca
;
173 state
->start_render
= exec
->ct1ca
;
174 fwrite(state
, sizeof(*state
), 1, f
);
177 for (i
= 0; i
< exec
->bo_count
; i
++) {
178 struct drm_gem_cma_object
*cma_bo
= exec
->bo
[i
];
179 bo_state
[i
].handle
= i
; /* Not used by the parser. */
180 bo_state
[i
].paddr
= cma_bo
->paddr
;
181 bo_state
[i
].size
= cma_bo
->base
.size
;
184 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
->unref_list
,
186 struct drm_gem_cma_object
*cma_bo
= &bo
->base
;
187 bo_state
[i
].handle
= 0;
188 bo_state
[i
].paddr
= cma_bo
->paddr
;
189 bo_state
[i
].size
= cma_bo
->base
.size
;
193 /* Add the static overflow memory area. */
194 bo_state
[i
].handle
= exec
->bo_count
;
195 bo_state
[i
].paddr
= 0;
196 bo_state
[i
].size
= OVERFLOW_SIZE
;
199 fwrite(bo_state
, sizeof(*bo_state
), state
->bo_count
, f
);
201 for (int i
= 0; i
< exec
->bo_count
; i
++) {
202 struct drm_gem_cma_object
*cma_bo
= exec
->bo
[i
];
203 fwrite(cma_bo
->vaddr
, cma_bo
->base
.size
, 1, f
);
206 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
->unref_list
,
208 struct drm_gem_cma_object
*cma_bo
= &bo
->base
;
209 fwrite(cma_bo
->vaddr
, cma_bo
->base
.size
, 1, f
);
212 void *overflow
= calloc(1, OVERFLOW_SIZE
);
213 fwrite(overflow
, 1, OVERFLOW_SIZE
, f
);
222 vc4_simulator_flush(struct vc4_context
*vc4
, struct drm_vc4_submit_cl
*args
)
224 struct vc4_screen
*screen
= vc4
->screen
;
225 struct vc4_surface
*csurf
= vc4_surface(vc4
->framebuffer
.cbufs
[0]);
226 struct vc4_resource
*ctex
= csurf
? vc4_resource(csurf
->base
.texture
) : NULL
;
227 uint32_t winsys_stride
= ctex
? ctex
->bo
->simulator_winsys_stride
: 0;
228 uint32_t sim_stride
= ctex
? ctex
->slices
[0].stride
: 0;
229 uint32_t row_len
= MIN2(sim_stride
, winsys_stride
);
230 struct vc4_exec_info exec
;
231 struct drm_device local_dev
= {
233 .simulator_mem_next
= OVERFLOW_SIZE
,
235 struct drm_device
*dev
= &local_dev
;
238 memset(&exec
, 0, sizeof(exec
));
239 list_inithead(&exec
.unref_list
);
241 if (ctex
&& ctex
->bo
->simulator_winsys_map
) {
243 fprintf(stderr
, "%dx%d %d %d %d\n",
244 ctex
->base
.b
.width0
, ctex
->base
.b
.height0
,
250 for (int y
= 0; y
< ctex
->base
.b
.height0
; y
++) {
251 memcpy(ctex
->bo
->map
+ y
* sim_stride
,
252 ctex
->bo
->simulator_winsys_map
+ y
* winsys_stride
,
259 ret
= vc4_simulator_pin_bos(dev
, &exec
);
263 ret
= vc4_cl_validate(dev
, &exec
);
267 if (vc4_debug
& VC4_DEBUG_CL
) {
268 fprintf(stderr
, "RCL:\n");
269 vc4_dump_cl(screen
->simulator_mem_base
+ exec
.ct1ca
,
270 exec
.ct1ea
- exec
.ct1ca
, true);
273 vc4_dump_to_file(&exec
);
275 if (exec
.ct0ca
!= exec
.ct0ea
) {
276 int bfc
= simpenrose_do_binning(exec
.ct0ca
, exec
.ct0ea
);
278 fprintf(stderr
, "Binning returned %d flushes, should be 1.\n",
280 fprintf(stderr
, "Relocated binning command list:\n");
281 vc4_dump_cl(screen
->simulator_mem_base
+ exec
.ct0ca
,
282 exec
.ct0ea
- exec
.ct0ca
, false);
286 int rfc
= simpenrose_do_rendering(exec
.ct1ca
, exec
.ct1ea
);
288 fprintf(stderr
, "Rendering returned %d frames, should be 1.\n",
290 fprintf(stderr
, "Relocated render command list:\n");
291 vc4_dump_cl(screen
->simulator_mem_base
+ exec
.ct1ca
,
292 exec
.ct1ea
- exec
.ct1ca
, true);
296 ret
= vc4_simulator_unpin_bos(&exec
);
300 list_for_each_entry_safe(struct drm_vc4_bo
, bo
, &exec
.unref_list
,
302 list_del(&bo
->unref_head
);
303 assert(*(uint32_t *)(bo
->base
.vaddr
+ bo
->bo
->size
) ==
305 vc4_bo_unreference(&bo
->bo
);
309 if (ctex
&& ctex
->bo
->simulator_winsys_map
) {
310 for (int y
= 0; y
< ctex
->base
.b
.height0
; y
++) {
311 memcpy(ctex
->bo
->simulator_winsys_map
+ y
* winsys_stride
,
312 ctex
->bo
->map
+ y
* sim_stride
,
321 vc4_simulator_init(struct vc4_screen
*screen
)
323 screen
->simulator_mem_size
= 256 * 1024 * 1024;
324 screen
->simulator_mem_base
= ralloc_size(screen
,
325 screen
->simulator_mem_size
);
327 /* We supply our own memory so that we can have more aperture
328 * available (256MB instead of simpenrose's default 64MB).
330 simpenrose_init_hardware_supply_mem(screen
->simulator_mem_base
,
331 screen
->simulator_mem_size
);
333 /* Carve out low memory for tile allocation overflow. The kernel
334 * should be automatically handling overflow memory setup on real
335 * hardware, but for simulation we just get one shot to set up enough
336 * overflow memory before execution. This overflow mem will be used
337 * up over the whole lifetime of simpenrose (not reused on each
338 * flush), so it had better be big.
340 simpenrose_supply_overflow_mem(0, OVERFLOW_SIZE
);
343 #endif /* USE_VC4_SIMULATOR */