X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_simulator.c;h=7cfd236349d3fc6b09a1bf7d4af69ab89e49b3d4;hb=6aaa814995d922d6f9cc68bc26276fd752866ceb;hp=0dada68791162537242ef363a1ff94d8d85ff0ce;hpb=a8f2bf0f51222a96a49dfb3d6f9b36d3e54d08cd;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 0dada687911..7cfd236349d 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -23,25 +23,29 @@ #ifdef USE_VC4_SIMULATOR -#include - #include "util/u_memory.h" +#include "util/ralloc.h" #include "vc4_screen.h" #include "vc4_context.h" +#include "kernel/vc4_drv.h" #include "vc4_simulator_validate.h" #include "simpenrose/simpenrose.h" +#define OVERFLOW_SIZE (32 * 1024 * 1024) + static struct drm_gem_cma_object * vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo) { struct vc4_context *vc4 = dev->vc4; struct vc4_screen *screen = vc4->screen; - struct drm_gem_cma_object *obj = CALLOC_STRUCT(drm_gem_cma_object); + struct drm_vc4_bo *drm_bo = CALLOC_STRUCT(drm_vc4_bo); + struct drm_gem_cma_object *obj = &drm_bo->base; uint32_t size = align(bo->size, 4096); - obj->bo = bo; + drm_bo->bo = bo; obj->base.size = size; + obj->base.dev = dev; obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next; obj->paddr = simpenrose_hw_addr(obj->vaddr); @@ -52,7 +56,7 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo) return obj; } -static struct drm_gem_cma_object * +struct drm_gem_cma_object * drm_gem_cma_create(struct drm_device *dev, size_t size) { struct vc4_context *vc4 = dev->vc4; @@ -63,7 +67,7 @@ drm_gem_cma_create(struct drm_device *dev, size_t size) } static int -vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec) +vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec) { struct drm_vc4_submit_cl *args = exec->args; struct vc4_context *vc4 = dev->vc4; @@ -75,20 +79,34 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct exec_info *exec) struct vc4_bo *bo = bos[i]; struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo); + struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base); +#if 0 + fprintf(stderr, "bo hindex %d: %s\n", i, bo->name); +#endif + + vc4_bo_map(bo); memcpy(obj->vaddr, bo->map, bo->size); exec->bo[i] = obj; - } + /* The kernel does this validation at shader create ioctl + * time. + */ + if (strcmp(bo->name, "code") == 0) { + drm_bo->validated_shader = vc4_validate_shader(obj); + if (!drm_bo->validated_shader) + abort(); + } + } return 0; } static int -vc4_simulator_unpin_bos(struct exec_info *exec) +vc4_simulator_unpin_bos(struct vc4_exec_info *exec) { for (int i = 0; i < exec->bo_count; i++) { struct drm_gem_cma_object *obj = exec->bo[i]; - struct vc4_bo *bo = obj->bo; + struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo; memcpy(bo->map, obj->vaddr, bo->size); @@ -100,147 +118,27 @@ vc4_simulator_unpin_bos(struct exec_info *exec) return 0; } -static int -vc4_cl_validate(struct drm_device *dev, struct exec_info *exec) -{ - struct drm_vc4_submit_cl *args = exec->args; - void *temp = NULL; - void *bin, *render, *shader_rec; - int ret = 0; - uint32_t bin_offset = 0; - uint32_t render_offset = bin_offset + args->bin_cl_len; - uint32_t shader_rec_offset = roundup(render_offset + - args->render_cl_len, 16); - uint32_t uniforms_offset = shader_rec_offset + args->shader_record_len; - uint32_t exec_size = uniforms_offset + args->uniforms_len; - uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * - args->shader_record_count); - - if (shader_rec_offset < render_offset || - uniforms_offset < shader_rec_offset || - exec_size < uniforms_offset || - args->shader_record_count >= (UINT_MAX / - sizeof(struct vc4_shader_state)) || - temp_size < exec_size) { - DRM_ERROR("overflow in exec arguments\n"); - goto fail; - } - - /* Allocate space where we'll store the copied in user command lists - * and shader records. - * - * We don't just copy directly into the BOs because we need to - * read the contents back for validation, and I think the - * bo->vaddr is uncached access. - */ - temp = kmalloc(temp_size, GFP_KERNEL); - if (!temp) { - DRM_ERROR("Failed to allocate storage for copying " - "in bin/render CLs.\n"); - ret = -ENOMEM; - goto fail; - } - bin = temp + bin_offset; - render = temp + render_offset; - shader_rec = temp + shader_rec_offset; - exec->uniforms_u = temp + uniforms_offset; - exec->shader_state = temp + exec_size; - exec->shader_state_size = args->shader_record_count; - - ret = copy_from_user(bin, args->bin_cl, args->bin_cl_len); - if (ret) { - DRM_ERROR("Failed to copy in bin cl\n"); - goto fail; - } - - ret = copy_from_user(render, args->render_cl, args->render_cl_len); - if (ret) { - DRM_ERROR("Failed to copy in render cl\n"); - goto fail; - } - - ret = copy_from_user(shader_rec, args->shader_records, - args->shader_record_len); - if (ret) { - DRM_ERROR("Failed to copy in shader recs\n"); - goto fail; - } - - ret = copy_from_user(exec->uniforms_u, args->uniforms, - args->uniforms_len); - if (ret) { - DRM_ERROR("Failed to copy in uniforms cl\n"); - goto fail; - } - - exec->exec_bo = drm_gem_cma_create(dev, exec_size); -#if 0 - if (IS_ERR(exec->exec_bo)) { - DRM_ERROR("Couldn't allocate BO for exec\n"); - ret = PTR_ERR(exec->exec_bo); - exec->exec_bo = NULL; - goto fail; - } -#endif - - exec->ct0ca = exec->exec_bo->paddr + bin_offset; - exec->ct0ea = exec->ct0ca + args->bin_cl_len; - exec->ct1ca = exec->exec_bo->paddr + render_offset; - exec->ct1ea = exec->ct1ca + args->render_cl_len; - exec->shader_paddr = exec->exec_bo->paddr + shader_rec_offset; - - exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; - exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; - exec->uniforms_size = args->uniforms_len; - - ret = vc4_validate_cl(dev, - exec->exec_bo->vaddr + bin_offset, - bin, - args->bin_cl_len, - true, - exec); - if (ret) - goto fail; - - ret = vc4_validate_cl(dev, - exec->exec_bo->vaddr + render_offset, - render, - args->render_cl_len, - false, - exec); - if (ret) - goto fail; - - ret = vc4_validate_shader_recs(dev, - exec->exec_bo->vaddr + shader_rec_offset, - shader_rec, - args->shader_record_len, - exec); - -fail: - kfree(temp); - return ret; -} - int -vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args, - struct vc4_surface *csurf) +vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) { - struct vc4_resource *ctex = vc4_resource(csurf->base.texture); - uint32_t winsys_stride = ctex->bo->simulator_winsys_stride; - uint32_t sim_stride = ctex->slices[0].stride; + struct vc4_screen *screen = vc4->screen; + struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); + struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL; + uint32_t winsys_stride = ctex ? ctex->bo->simulator_winsys_stride : 0; + uint32_t sim_stride = ctex ? ctex->slices[0].stride : 0; uint32_t row_len = MIN2(sim_stride, winsys_stride); - struct exec_info exec; + struct vc4_exec_info exec; struct drm_device local_dev = { .vc4 = vc4, - .simulator_mem_next = 0, + .simulator_mem_next = OVERFLOW_SIZE, }; struct drm_device *dev = &local_dev; int ret; memset(&exec, 0, sizeof(exec)); + list_inithead(&exec.unref_list); - if (ctex->bo->simulator_winsys_map) { + if (ctex && ctex->bo->simulator_winsys_map) { #if 0 fprintf(stderr, "%dx%d %d %d %d\n", ctex->base.b.width0, ctex->base.b.height0, @@ -266,16 +164,39 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args, if (ret) return ret; - simpenrose_do_binning(exec.ct0ca, exec.ct0ea); - simpenrose_do_rendering(exec.ct1ca, exec.ct1ea); + if (exec.ct0ca != exec.ct0ea) { + int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea); + if (bfc != 1) { + fprintf(stderr, "Binning returned %d flushes, should be 1.\n", + bfc); + fprintf(stderr, "Relocated binning command list:\n"); + vc4_dump_cl(screen->simulator_mem_base + exec.ct0ca, + exec.ct0ea - exec.ct0ca, false); + abort(); + } + } + int rfc = simpenrose_do_rendering(exec.ct1ca, exec.ct1ea); + if (rfc != 1) { + fprintf(stderr, "Rendering returned %d frames, should be 1.\n", + rfc); + fprintf(stderr, "Relocated render command list:\n"); + vc4_dump_cl(screen->simulator_mem_base + exec.ct1ca, + exec.ct1ea - exec.ct1ca, true); + abort(); + } ret = vc4_simulator_unpin_bos(&exec); if (ret) return ret; - free(exec.exec_bo); + list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec.unref_list, + unref_head) { + list_del(&bo->unref_head); + vc4_bo_unreference(&bo->bo); + free(bo); + } - if (ctex->bo->simulator_winsys_map) { + if (ctex && ctex->bo->simulator_winsys_map) { for (int y = 0; y < ctex->base.b.height0; y++) { memcpy(ctex->bo->simulator_winsys_map + y * winsys_stride, ctex->bo->map + y * sim_stride, @@ -289,9 +210,24 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args, void vc4_simulator_init(struct vc4_screen *screen) { - simpenrose_init_hardware(); - screen->simulator_mem_base = simpenrose_get_mem_start(); - screen->simulator_mem_size = simpenrose_get_mem_size(); + screen->simulator_mem_size = 256 * 1024 * 1024; + screen->simulator_mem_base = ralloc_size(screen, + screen->simulator_mem_size); + + /* We supply our own memory so that we can have more aperture + * available (256MB instead of simpenrose's default 64MB). + */ + simpenrose_init_hardware_supply_mem(screen->simulator_mem_base, + screen->simulator_mem_size); + + /* Carve out low memory for tile allocation overflow. The kernel + * should be automatically handling overflow memory setup on real + * hardware, but for simulation we just get one shot to set up enough + * overflow memory before execution. This overflow mem will be used + * up over the whole lifetime of simpenrose (not reused on each + * flush), so it had better be big. + */ + simpenrose_supply_overflow_mem(0, OVERFLOW_SIZE); } #endif /* USE_VC4_SIMULATOR */