vc4: Convert vc4_opt_dead_code to work in the presence of control flow.
[mesa.git] / src / gallium / drivers / vc4 / vc4_simulator.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifdef USE_VC4_SIMULATOR
25
26 #include "util/u_memory.h"
27 #include "util/ralloc.h"
28
29 #include "vc4_screen.h"
30 #include "vc4_context.h"
31 #include "kernel/vc4_drv.h"
32 #include "vc4_simulator_validate.h"
33 #include "simpenrose/simpenrose.h"
34
35 /* A marker placed just after each BO, then checked after rendering to make
36 * sure it's still there.
37 */
38 #define BO_SENTINEL 0xfedcba98
39
40 #define OVERFLOW_SIZE (32 * 1024 * 1024)
41
42 static struct drm_gem_cma_object *
43 vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
44 {
45 struct vc4_context *vc4 = dev->vc4;
46 struct vc4_screen *screen = vc4->screen;
47 struct drm_vc4_bo *drm_bo = CALLOC_STRUCT(drm_vc4_bo);
48 struct drm_gem_cma_object *obj = &drm_bo->base;
49 uint32_t size = align(bo->size, 4096);
50
51 drm_bo->bo = bo;
52 obj->base.size = size;
53 obj->base.dev = dev;
54 obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next;
55 obj->paddr = simpenrose_hw_addr(obj->vaddr);
56
57 dev->simulator_mem_next += size + sizeof(uint32_t);
58 dev->simulator_mem_next = align(dev->simulator_mem_next, 4096);
59 assert(dev->simulator_mem_next <= screen->simulator_mem_size);
60
61 *(uint32_t *)(obj->vaddr + bo->size) = BO_SENTINEL;
62
63 return obj;
64 }
65
66 struct drm_gem_cma_object *
67 drm_gem_cma_create(struct drm_device *dev, size_t size)
68 {
69 struct vc4_context *vc4 = dev->vc4;
70 struct vc4_screen *screen = vc4->screen;
71
72 struct vc4_bo *bo = vc4_bo_alloc(screen, size, "simulator validate");
73 return vc4_wrap_bo_with_cma(dev, bo);
74 }
75
76 static int
77 vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec)
78 {
79 struct drm_vc4_submit_cl *args = exec->args;
80 struct vc4_context *vc4 = dev->vc4;
81 struct vc4_bo **bos = vc4->bo_pointers.base;
82
83 exec->bo_count = args->bo_handle_count;
84 exec->bo = calloc(exec->bo_count, sizeof(void *));
85 for (int i = 0; i < exec->bo_count; i++) {
86 struct vc4_bo *bo = bos[i];
87 struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
88
89 struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
90 #if 0
91 fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
92 #endif
93
94 vc4_bo_map(bo);
95 memcpy(obj->vaddr, bo->map, bo->size);
96
97 exec->bo[i] = obj;
98
99 /* The kernel does this validation at shader create ioctl
100 * time.
101 */
102 if (strcmp(bo->name, "code") == 0) {
103 drm_bo->validated_shader = vc4_validate_shader(obj);
104 if (!drm_bo->validated_shader)
105 abort();
106 }
107 }
108 return 0;
109 }
110
111 static int
112 vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
113 {
114 for (int i = 0; i < exec->bo_count; i++) {
115 struct drm_gem_cma_object *obj = exec->bo[i];
116 struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
117 struct vc4_bo *bo = drm_bo->bo;
118
119 assert(*(uint32_t *)(obj->vaddr + bo->size) == BO_SENTINEL);
120 memcpy(bo->map, obj->vaddr, bo->size);
121
122 if (drm_bo->validated_shader) {
123 free(drm_bo->validated_shader->texture_samples);
124 free(drm_bo->validated_shader);
125 }
126 free(obj);
127 }
128
129 free(exec->bo);
130
131 return 0;
132 }
133
134 static void
135 vc4_dump_to_file(struct vc4_exec_info *exec)
136 {
137 static int dumpno = 0;
138 struct drm_vc4_get_hang_state *state;
139 struct drm_vc4_get_hang_state_bo *bo_state;
140 unsigned int dump_version = 0;
141
142 if (!(vc4_debug & VC4_DEBUG_DUMP))
143 return;
144
145 state = calloc(1, sizeof(*state));
146
147 int unref_count = 0;
148 list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
149 unref_head) {
150 unref_count++;
151 }
152
153 /* Add one more for the overflow area that isn't wrapped in a BO. */
154 state->bo_count = exec->bo_count + unref_count + 1;
155 bo_state = calloc(state->bo_count, sizeof(*bo_state));
156
157 char *filename = NULL;
158 asprintf(&filename, "vc4-dri-%d.dump", dumpno++);
159 FILE *f = fopen(filename, "w+");
160 if (!f) {
161 fprintf(stderr, "Couldn't open %s: %s", filename,
162 strerror(errno));
163 return;
164 }
165
166 fwrite(&dump_version, sizeof(dump_version), 1, f);
167
168 state->ct0ca = exec->ct0ca;
169 state->ct0ea = exec->ct0ea;
170 state->ct1ca = exec->ct1ca;
171 state->ct1ea = exec->ct1ea;
172 state->start_bin = exec->ct0ca;
173 state->start_render = exec->ct1ca;
174 fwrite(state, sizeof(*state), 1, f);
175
176 int i;
177 for (i = 0; i < exec->bo_count; i++) {
178 struct drm_gem_cma_object *cma_bo = exec->bo[i];
179 bo_state[i].handle = i; /* Not used by the parser. */
180 bo_state[i].paddr = cma_bo->paddr;
181 bo_state[i].size = cma_bo->base.size;
182 }
183
184 list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
185 unref_head) {
186 struct drm_gem_cma_object *cma_bo = &bo->base;
187 bo_state[i].handle = 0;
188 bo_state[i].paddr = cma_bo->paddr;
189 bo_state[i].size = cma_bo->base.size;
190 i++;
191 }
192
193 /* Add the static overflow memory area. */
194 bo_state[i].handle = exec->bo_count;
195 bo_state[i].paddr = 0;
196 bo_state[i].size = OVERFLOW_SIZE;
197 i++;
198
199 fwrite(bo_state, sizeof(*bo_state), state->bo_count, f);
200
201 for (int i = 0; i < exec->bo_count; i++) {
202 struct drm_gem_cma_object *cma_bo = exec->bo[i];
203 fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
204 }
205
206 list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
207 unref_head) {
208 struct drm_gem_cma_object *cma_bo = &bo->base;
209 fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
210 }
211
212 void *overflow = calloc(1, OVERFLOW_SIZE);
213 fwrite(overflow, 1, OVERFLOW_SIZE, f);
214 free(overflow);
215
216 free(state);
217 free(bo_state);
218 fclose(f);
219 }
220
221 int
222 vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
223 {
224 struct vc4_screen *screen = vc4->screen;
225 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
226 struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
227 uint32_t winsys_stride = ctex ? ctex->bo->simulator_winsys_stride : 0;
228 uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
229 uint32_t row_len = MIN2(sim_stride, winsys_stride);
230 struct vc4_exec_info exec;
231 struct drm_device local_dev = {
232 .vc4 = vc4,
233 .simulator_mem_next = OVERFLOW_SIZE,
234 };
235 struct drm_device *dev = &local_dev;
236 int ret;
237
238 memset(&exec, 0, sizeof(exec));
239 list_inithead(&exec.unref_list);
240
241 if (ctex && ctex->bo->simulator_winsys_map) {
242 #if 0
243 fprintf(stderr, "%dx%d %d %d %d\n",
244 ctex->base.b.width0, ctex->base.b.height0,
245 winsys_stride,
246 sim_stride,
247 ctex->bo->size);
248 #endif
249
250 for (int y = 0; y < ctex->base.b.height0; y++) {
251 memcpy(ctex->bo->map + y * sim_stride,
252 ctex->bo->simulator_winsys_map + y * winsys_stride,
253 row_len);
254 }
255 }
256
257 exec.args = args;
258
259 ret = vc4_simulator_pin_bos(dev, &exec);
260 if (ret)
261 return ret;
262
263 ret = vc4_cl_validate(dev, &exec);
264 if (ret)
265 return ret;
266
267 if (vc4_debug & VC4_DEBUG_CL) {
268 fprintf(stderr, "RCL:\n");
269 vc4_dump_cl(screen->simulator_mem_base + exec.ct1ca,
270 exec.ct1ea - exec.ct1ca, true);
271 }
272
273 vc4_dump_to_file(&exec);
274
275 if (exec.ct0ca != exec.ct0ea) {
276 int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
277 if (bfc != 1) {
278 fprintf(stderr, "Binning returned %d flushes, should be 1.\n",
279 bfc);
280 fprintf(stderr, "Relocated binning command list:\n");
281 vc4_dump_cl(screen->simulator_mem_base + exec.ct0ca,
282 exec.ct0ea - exec.ct0ca, false);
283 abort();
284 }
285 }
286 int rfc = simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
287 if (rfc != 1) {
288 fprintf(stderr, "Rendering returned %d frames, should be 1.\n",
289 rfc);
290 fprintf(stderr, "Relocated render command list:\n");
291 vc4_dump_cl(screen->simulator_mem_base + exec.ct1ca,
292 exec.ct1ea - exec.ct1ca, true);
293 abort();
294 }
295
296 ret = vc4_simulator_unpin_bos(&exec);
297 if (ret)
298 return ret;
299
300 list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list,
301 unref_head) {
302 list_del(&bo->unref_head);
303 assert(*(uint32_t *)(bo->base.vaddr + bo->bo->size) ==
304 BO_SENTINEL);
305 vc4_bo_unreference(&bo->bo);
306 free(bo);
307 }
308
309 if (ctex && ctex->bo->simulator_winsys_map) {
310 for (int y = 0; y < ctex->base.b.height0; y++) {
311 memcpy(ctex->bo->simulator_winsys_map + y * winsys_stride,
312 ctex->bo->map + y * sim_stride,
313 row_len);
314 }
315 }
316
317 return 0;
318 }
319
320 void
321 vc4_simulator_init(struct vc4_screen *screen)
322 {
323 screen->simulator_mem_size = 256 * 1024 * 1024;
324 screen->simulator_mem_base = ralloc_size(screen,
325 screen->simulator_mem_size);
326
327 /* We supply our own memory so that we can have more aperture
328 * available (256MB instead of simpenrose's default 64MB).
329 */
330 simpenrose_init_hardware_supply_mem(screen->simulator_mem_base,
331 screen->simulator_mem_size);
332
333 /* Carve out low memory for tile allocation overflow. The kernel
334 * should be automatically handling overflow memory setup on real
335 * hardware, but for simulation we just get one shot to set up enough
336 * overflow memory before execution. This overflow mem will be used
337 * up over the whole lifetime of simpenrose (not reused on each
338 * flush), so it had better be big.
339 */
340 simpenrose_supply_overflow_mem(0, OVERFLOW_SIZE);
341 }
342
343 #endif /* USE_VC4_SIMULATOR */