vc4: Don't forget to store stencil along with depth when storing either.
[mesa.git] / src / gallium / drivers / vc4 / vc4_simulator.c
1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifdef USE_VC4_SIMULATOR
25
26 #include "util/u_memory.h"
27
28 #include "vc4_screen.h"
29 #include "vc4_context.h"
30 #include "vc4_simulator_validate.h"
31 #include "simpenrose/simpenrose.h"
32
33 #define OVERFLOW_SIZE (32 * 1024 * 1024)
34
35 static struct drm_gem_cma_object *
36 vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
37 {
38 struct vc4_context *vc4 = dev->vc4;
39 struct vc4_screen *screen = vc4->screen;
40 struct drm_gem_cma_object *obj = CALLOC_STRUCT(drm_gem_cma_object);
41 uint32_t size = align(bo->size, 4096);
42
43 obj->bo = bo;
44 obj->base.size = size;
45 obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next;
46 obj->paddr = simpenrose_hw_addr(obj->vaddr);
47
48 dev->simulator_mem_next += size;
49 dev->simulator_mem_next = align(dev->simulator_mem_next, 4096);
50 assert(dev->simulator_mem_next <= screen->simulator_mem_size);
51
52 return obj;
53 }
54
55 static struct drm_gem_cma_object *
56 drm_gem_cma_create(struct drm_device *dev, size_t size)
57 {
58 struct vc4_context *vc4 = dev->vc4;
59 struct vc4_screen *screen = vc4->screen;
60
61 struct vc4_bo *bo = vc4_bo_alloc(screen, size, "simulator validate");
62 return vc4_wrap_bo_with_cma(dev, bo);
63 }
64
65 static int
66 vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec)
67 {
68 struct drm_vc4_submit_cl *args = exec->args;
69 struct vc4_context *vc4 = dev->vc4;
70 struct vc4_bo **bos = vc4->bo_pointers.base;
71
72 exec->bo_count = args->bo_handle_count;
73 exec->bo = calloc(exec->bo_count, sizeof(struct vc4_bo_exec_state));
74 for (int i = 0; i < exec->bo_count; i++) {
75 struct vc4_bo *bo = bos[i];
76 struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
77
78 #if 0
79 fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
80 #endif
81
82 vc4_bo_map(bo);
83 memcpy(obj->vaddr, bo->map, bo->size);
84
85 exec->bo[i].bo = obj;
86 }
87 return 0;
88 }
89
90 static int
91 vc4_simulator_unpin_bos(struct exec_info *exec)
92 {
93 for (int i = 0; i < exec->bo_count; i++) {
94 struct drm_gem_cma_object *obj = exec->bo[i].bo;
95 struct vc4_bo *bo = obj->bo;
96
97 memcpy(bo->map, obj->vaddr, bo->size);
98
99 free(obj);
100 }
101
102 free(exec->bo);
103
104 return 0;
105 }
106
107 static int
108 vc4_cl_validate(struct drm_device *dev, struct exec_info *exec)
109 {
110 struct drm_vc4_submit_cl *args = exec->args;
111 void *temp = NULL;
112 void *bin, *render;
113 int ret = 0;
114 uint32_t bin_offset = 0;
115 uint32_t render_offset = bin_offset + args->bin_cl_size;
116 uint32_t shader_rec_offset = roundup(render_offset +
117 args->render_cl_size, 16);
118 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
119 uint32_t exec_size = uniforms_offset + args->uniforms_size;
120 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
121 args->shader_rec_count);
122
123 if (shader_rec_offset < render_offset ||
124 uniforms_offset < shader_rec_offset ||
125 exec_size < uniforms_offset ||
126 args->shader_rec_count >= (UINT_MAX /
127 sizeof(struct vc4_shader_state)) ||
128 temp_size < exec_size) {
129 DRM_ERROR("overflow in exec arguments\n");
130 goto fail;
131 }
132
133 /* Allocate space where we'll store the copied in user command lists
134 * and shader records.
135 *
136 * We don't just copy directly into the BOs because we need to
137 * read the contents back for validation, and I think the
138 * bo->vaddr is uncached access.
139 */
140 temp = kmalloc(temp_size, GFP_KERNEL);
141 if (!temp) {
142 DRM_ERROR("Failed to allocate storage for copying "
143 "in bin/render CLs.\n");
144 ret = -ENOMEM;
145 goto fail;
146 }
147 bin = temp + bin_offset;
148 render = temp + render_offset;
149 exec->shader_rec_u = temp + shader_rec_offset;
150 exec->uniforms_u = temp + uniforms_offset;
151 exec->shader_state = temp + exec_size;
152 exec->shader_state_size = args->shader_rec_count;
153
154 ret = copy_from_user(bin, args->bin_cl, args->bin_cl_size);
155 if (ret) {
156 DRM_ERROR("Failed to copy in bin cl\n");
157 goto fail;
158 }
159
160 ret = copy_from_user(render, args->render_cl, args->render_cl_size);
161 if (ret) {
162 DRM_ERROR("Failed to copy in render cl\n");
163 goto fail;
164 }
165
166 ret = copy_from_user(exec->shader_rec_u, args->shader_rec,
167 args->shader_rec_size);
168 if (ret) {
169 DRM_ERROR("Failed to copy in shader recs\n");
170 goto fail;
171 }
172
173 ret = copy_from_user(exec->uniforms_u, args->uniforms,
174 args->uniforms_size);
175 if (ret) {
176 DRM_ERROR("Failed to copy in uniforms cl\n");
177 goto fail;
178 }
179
180 exec->exec_bo = drm_gem_cma_create(dev, exec_size);
181 #if 0
182 if (IS_ERR(exec->exec_bo)) {
183 DRM_ERROR("Couldn't allocate BO for exec\n");
184 ret = PTR_ERR(exec->exec_bo);
185 exec->exec_bo = NULL;
186 goto fail;
187 }
188 #endif
189
190 exec->ct0ca = exec->exec_bo->paddr + bin_offset;
191 exec->ct1ca = exec->exec_bo->paddr + render_offset;
192
193 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
194 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
195 exec->shader_rec_size = args->shader_rec_size;
196
197 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
198 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
199 exec->uniforms_size = args->uniforms_size;
200
201 ret = vc4_validate_cl(dev,
202 exec->exec_bo->vaddr + bin_offset,
203 bin,
204 args->bin_cl_size,
205 true,
206 exec);
207 if (ret)
208 goto fail;
209
210 ret = vc4_validate_cl(dev,
211 exec->exec_bo->vaddr + render_offset,
212 render,
213 args->render_cl_size,
214 false,
215 exec);
216 if (ret)
217 goto fail;
218
219 ret = vc4_validate_shader_recs(dev, exec);
220
221 fail:
222 kfree(temp);
223 return ret;
224 }
225
226 int
227 vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
228 {
229 struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
230 struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
231 uint32_t winsys_stride = ctex ? ctex->bo->simulator_winsys_stride : 0;
232 uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
233 uint32_t row_len = MIN2(sim_stride, winsys_stride);
234 struct exec_info exec;
235 struct drm_device local_dev = {
236 .vc4 = vc4,
237 .simulator_mem_next = OVERFLOW_SIZE,
238 };
239 struct drm_device *dev = &local_dev;
240 int ret;
241
242 memset(&exec, 0, sizeof(exec));
243
244 if (ctex && ctex->bo->simulator_winsys_map) {
245 #if 0
246 fprintf(stderr, "%dx%d %d %d %d\n",
247 ctex->base.b.width0, ctex->base.b.height0,
248 winsys_stride,
249 sim_stride,
250 ctex->bo->size);
251 #endif
252
253 for (int y = 0; y < ctex->base.b.height0; y++) {
254 memcpy(ctex->bo->map + y * sim_stride,
255 ctex->bo->simulator_winsys_map + y * winsys_stride,
256 row_len);
257 }
258 }
259
260 exec.args = args;
261
262 ret = vc4_simulator_pin_bos(dev, &exec);
263 if (ret)
264 return ret;
265
266 ret = vc4_cl_validate(dev, &exec);
267 if (ret)
268 return ret;
269
270 simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
271 simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
272
273 ret = vc4_simulator_unpin_bos(&exec);
274 if (ret)
275 return ret;
276
277 free(exec.exec_bo);
278
279 if (ctex && ctex->bo->simulator_winsys_map) {
280 for (int y = 0; y < ctex->base.b.height0; y++) {
281 memcpy(ctex->bo->simulator_winsys_map + y * winsys_stride,
282 ctex->bo->map + y * sim_stride,
283 row_len);
284 }
285 }
286
287 return 0;
288 }
289
290 void
291 vc4_simulator_init(struct vc4_screen *screen)
292 {
293 screen->simulator_mem_size = 256 * 1024 * 1024;
294 screen->simulator_mem_base = malloc(screen->simulator_mem_size);
295
296 /* We supply our own memory so that we can have more aperture
297 * available (256MB instead of simpenrose's default 64MB).
298 */
299 simpenrose_init_hardware_supply_mem(screen->simulator_mem_base,
300 screen->simulator_mem_size);
301
302 /* Carve out low memory for tile allocation overflow. The kernel
303 * should be automatically handling overflow memory setup on real
304 * hardware, but for simulation we just get one shot to set up enough
305 * overflow memory before execution. This overflow mem will be used
306 * up over the whole lifetime of simpenrose (not reused on each
307 * flush), so it had better be big.
308 */
309 simpenrose_supply_overflow_mem(0, OVERFLOW_SIZE);
310 }
311
312 #endif /* USE_VC4_SIMULATOR */