android: fix cflags and includes for amdgpu winsys
[mesa.git] / src / gallium / drivers / vc4 / vc4_simulator.c
index cedd401d47aa758e8a9f8e159fc25c6021578328..7cfd236349d3fc6b09a1bf7d4af69ab89e49b3d4 100644 (file)
 
 #ifdef USE_VC4_SIMULATOR
 
-#include <stdio.h>
-
 #include "util/u_memory.h"
+#include "util/ralloc.h"
 
 #include "vc4_screen.h"
 #include "vc4_context.h"
+#include "kernel/vc4_drv.h"
 #include "vc4_simulator_validate.h"
 #include "simpenrose/simpenrose.h"
 
@@ -39,11 +39,13 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
 {
         struct vc4_context *vc4 = dev->vc4;
         struct vc4_screen *screen = vc4->screen;
-        struct drm_gem_cma_object *obj = CALLOC_STRUCT(drm_gem_cma_object);
+        struct drm_vc4_bo *drm_bo = CALLOC_STRUCT(drm_vc4_bo);
+        struct drm_gem_cma_object *obj = &drm_bo->base;
         uint32_t size = align(bo->size, 4096);
 
-        obj->bo = bo;
+        drm_bo->bo = bo;
         obj->base.size = size;
+        obj->base.dev = dev;
         obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next;
         obj->paddr = simpenrose_hw_addr(obj->vaddr);
 
@@ -54,7 +56,7 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
         return obj;
 }
 
-static struct drm_gem_cma_object *
+struct drm_gem_cma_object *
 drm_gem_cma_create(struct drm_device *dev, size_t size)
 {
         struct vc4_context *vc4 = dev->vc4;
@@ -65,18 +67,19 @@ drm_gem_cma_create(struct drm_device *dev, size_t size)
 }
 
 static int
-vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec)
+vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec)
 {
         struct drm_vc4_submit_cl *args = exec->args;
         struct vc4_context *vc4 = dev->vc4;
         struct vc4_bo **bos = vc4->bo_pointers.base;
 
         exec->bo_count = args->bo_handle_count;
-        exec->bo = calloc(exec->bo_count, sizeof(struct vc4_bo_exec_state));
+        exec->bo = calloc(exec->bo_count, sizeof(void *));
         for (int i = 0; i < exec->bo_count; i++) {
                 struct vc4_bo *bo = bos[i];
                 struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
 
+                struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
 #if 0
                 fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
 #endif
@@ -84,17 +87,26 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec)
                 vc4_bo_map(bo);
                 memcpy(obj->vaddr, bo->map, bo->size);
 
-                exec->bo[i].bo = obj;
+                exec->bo[i] = obj;
+
+                /* The kernel does this validation at shader create ioctl
+                 * time.
+                 */
+                if (strcmp(bo->name, "code") == 0) {
+                        drm_bo->validated_shader = vc4_validate_shader(obj);
+                        if (!drm_bo->validated_shader)
+                                abort();
+                }
         }
         return 0;
 }
 
 static int
-vc4_simulator_unpin_bos(struct exec_info *exec)
+vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
 {
         for (int i = 0; i < exec->bo_count; i++) {
-                struct drm_gem_cma_object *obj = exec->bo[i].bo;
-                struct vc4_bo *bo = obj->bo;
+                struct drm_gem_cma_object *obj = exec->bo[i];
+                struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo;
 
                 memcpy(bo->map, obj->vaddr, bo->size);
 
@@ -106,134 +118,16 @@ vc4_simulator_unpin_bos(struct exec_info *exec)
         return 0;
 }
 
-static int
-vc4_cl_validate(struct drm_device *dev, struct exec_info *exec)
-{
-       struct drm_vc4_submit_cl *args = exec->args;
-       void *temp = NULL;
-       void *bin, *render;
-       int ret = 0;
-       uint32_t bin_offset = 0;
-       uint32_t render_offset = bin_offset + args->bin_cl_size;
-       uint32_t shader_rec_offset = roundup(render_offset +
-                                            args->render_cl_size, 16);
-       uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
-       uint32_t exec_size = uniforms_offset + args->uniforms_size;
-       uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
-                                         args->shader_rec_count);
-
-       if (shader_rec_offset < render_offset ||
-           uniforms_offset < shader_rec_offset ||
-           exec_size < uniforms_offset ||
-           args->shader_rec_count >= (UINT_MAX /
-                                         sizeof(struct vc4_shader_state)) ||
-           temp_size < exec_size) {
-               DRM_ERROR("overflow in exec arguments\n");
-               goto fail;
-       }
-
-       /* Allocate space where we'll store the copied in user command lists
-        * and shader records.
-        *
-        * We don't just copy directly into the BOs because we need to
-        * read the contents back for validation, and I think the
-        * bo->vaddr is uncached access.
-        */
-       temp = kmalloc(temp_size, GFP_KERNEL);
-       if (!temp) {
-               DRM_ERROR("Failed to allocate storage for copying "
-                         "in bin/render CLs.\n");
-               ret = -ENOMEM;
-               goto fail;
-       }
-       bin = temp + bin_offset;
-       render = temp + render_offset;
-       exec->shader_rec_u = temp + shader_rec_offset;
-       exec->uniforms_u = temp + uniforms_offset;
-       exec->shader_state = temp + exec_size;
-       exec->shader_state_size = args->shader_rec_count;
-
-       ret = copy_from_user(bin, args->bin_cl, args->bin_cl_size);
-       if (ret) {
-               DRM_ERROR("Failed to copy in bin cl\n");
-               goto fail;
-       }
-
-       ret = copy_from_user(render, args->render_cl, args->render_cl_size);
-       if (ret) {
-               DRM_ERROR("Failed to copy in render cl\n");
-               goto fail;
-       }
-
-       ret = copy_from_user(exec->shader_rec_u, args->shader_rec,
-                            args->shader_rec_size);
-       if (ret) {
-               DRM_ERROR("Failed to copy in shader recs\n");
-               goto fail;
-       }
-
-       ret = copy_from_user(exec->uniforms_u, args->uniforms,
-                            args->uniforms_size);
-       if (ret) {
-               DRM_ERROR("Failed to copy in uniforms cl\n");
-               goto fail;
-       }
-
-       exec->exec_bo = drm_gem_cma_create(dev, exec_size);
-#if 0
-       if (IS_ERR(exec->exec_bo)) {
-               DRM_ERROR("Couldn't allocate BO for exec\n");
-               ret = PTR_ERR(exec->exec_bo);
-               exec->exec_bo = NULL;
-               goto fail;
-       }
-#endif
-
-       exec->ct0ca = exec->exec_bo->paddr + bin_offset;
-       exec->ct1ca = exec->exec_bo->paddr + render_offset;
-
-       exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
-       exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
-       exec->shader_rec_size = args->shader_rec_size;
-
-       exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
-       exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
-       exec->uniforms_size = args->uniforms_size;
-
-       ret = vc4_validate_cl(dev,
-                             exec->exec_bo->vaddr + bin_offset,
-                             bin,
-                             args->bin_cl_size,
-                             true,
-                             exec);
-       if (ret)
-               goto fail;
-
-       ret = vc4_validate_cl(dev,
-                             exec->exec_bo->vaddr + render_offset,
-                             render,
-                             args->render_cl_size,
-                             false,
-                             exec);
-       if (ret)
-               goto fail;
-
-       ret = vc4_validate_shader_recs(dev, exec);
-
-fail:
-       kfree(temp);
-       return ret;
-}
-
 int
 vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
 {
+        struct vc4_screen *screen = vc4->screen;
         struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
-        struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
-        uint32_t winsys_stride = ctex->bo->simulator_winsys_stride;
-        uint32_t sim_stride = ctex->slices[0].stride;
+        struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
+        uint32_t winsys_stride = ctex ? ctex->bo->simulator_winsys_stride : 0;
+        uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0;
         uint32_t row_len = MIN2(sim_stride, winsys_stride);
-        struct exec_info exec;
+        struct vc4_exec_info exec;
         struct drm_device local_dev = {
                 .vc4 = vc4,
                 .simulator_mem_next = OVERFLOW_SIZE,
@@ -242,8 +136,9 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
         int ret;
 
         memset(&exec, 0, sizeof(exec));
+        list_inithead(&exec.unref_list);
 
-        if (ctex->bo->simulator_winsys_map) {
+        if (ctex && ctex->bo->simulator_winsys_map) {
 #if 0
                 fprintf(stderr, "%dx%d %d %d %d\n",
                         ctex->base.b.width0, ctex->base.b.height0,
@@ -269,16 +164,39 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
         if (ret)
                 return ret;
 
-        simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
-        simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
+        if (exec.ct0ca != exec.ct0ea) {
+                int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
+                if (bfc != 1) {
+                        fprintf(stderr, "Binning returned %d flushes, should be 1.\n",
+                                bfc);
+                        fprintf(stderr, "Relocated binning command list:\n");
+                        vc4_dump_cl(screen->simulator_mem_base + exec.ct0ca,
+                                    exec.ct0ea - exec.ct0ca, false);
+                        abort();
+                }
+        }
+        int rfc = simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
+        if (rfc != 1) {
+                fprintf(stderr, "Rendering returned %d frames, should be 1.\n",
+                        rfc);
+                fprintf(stderr, "Relocated render command list:\n");
+                vc4_dump_cl(screen->simulator_mem_base + exec.ct1ca,
+                            exec.ct1ea - exec.ct1ca, true);
+                abort();
+        }
 
         ret = vc4_simulator_unpin_bos(&exec);
         if (ret)
                 return ret;
 
-        free(exec.exec_bo);
+        list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list,
+                                 unref_head) {
+               list_del(&bo->unref_head);
+                vc4_bo_unreference(&bo->bo);
+                free(bo);
+        }
 
-        if (ctex->bo->simulator_winsys_map) {
+        if (ctex && ctex->bo->simulator_winsys_map) {
                 for (int y = 0; y < ctex->base.b.height0; y++) {
                         memcpy(ctex->bo->simulator_winsys_map + y * winsys_stride,
                                ctex->bo->map + y * sim_stride,
@@ -293,7 +211,8 @@ void
 vc4_simulator_init(struct vc4_screen *screen)
 {
         screen->simulator_mem_size = 256 * 1024 * 1024;
-        screen->simulator_mem_base = malloc(screen->simulator_mem_size);
+        screen->simulator_mem_base = ralloc_size(screen,
+                                                 screen->simulator_mem_size);
 
         /* We supply our own memory so that we can have more aperture
          * available (256MB instead of simpenrose's default 64MB).