From: Eric Anholt <eric@anholt.net>
Date: Fri, 18 Jul 2014 20:06:01 +0000 (-0700)
Subject: vc4: Switch simulator to using kernel validator
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a3cd3c0d198374647df3db83198e8ce0cddcb6b7;p=mesa.git

vc4: Switch simulator to using kernel validator

This ensures that when I'm using the simulator, I get a closer match to
what behavior on real hardware will be.  It lets me rapidly iterate on the
kernel validation code (which otherwise has a several-minute turnaround
time), and helps catch buffer overflow bugs in the userspace driver
faster.
---

diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index b4e499b7d6c..ee351835896 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -16,5 +16,6 @@ C_SOURCES := \
 	vc4_resource.c \
 	vc4_screen.c \
 	vc4_simulator.c \
+	vc4_simulator_validate.c \
 	vc4_state.c \
 	$()
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 6cf7d163dd0..653787e1905 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -60,7 +60,10 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
         bo->handle = create.handle;
         assert(create.size >= size);
 #else /* USE_VC4_SIMULATOR */
-        bo->map = vc4_simulator_alloc(screen, size);
+        static int next_handle = 0;
+        bo->handle = next_handle++;
+
+        bo->map = malloc(size);
 #endif /* USE_VC4_SIMULATOR */
 
         return bo;
@@ -77,6 +80,8 @@ vc4_bo_free(struct vc4_bo *bo)
         int ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &c);
         if (ret != 0)
                 fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
+#else
+        free(bo->map);
 #endif
 
         free(bo);
@@ -107,7 +112,7 @@ vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
         vc4_bo_map(bo);
         bo->simulator_winsys_map = bo->map;
         bo->simulator_winsys_stride = winsys_stride;
-        bo->map = vc4_simulator_alloc(screen, bo->size);
+        bo->map = malloc(bo->size);
 #endif
 
         return bo;
diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c
index 3bbeadc49ee..5c660d8d7a8 100644
--- a/src/gallium/drivers/vc4/vc4_cl.c
+++ b/src/gallium/drivers/vc4/vc4_cl.c
@@ -27,12 +27,6 @@
 void
 vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl)
 {
-#ifdef USE_VC4_SIMULATOR
-        uint32_t size = 256 * 1024;
-        cl->base = vc4_simulator_alloc(vc4->screen, size);
-        cl->end = cl->base + size;
-        cl->next = cl->base;
-#endif
 }
 
 void
@@ -41,9 +35,6 @@ vc4_grow_cl(struct vc4_cl *cl)
         uint32_t size = MAX2((cl->end - cl->base) * 2, 4096);
         uint32_t offset = cl->next -cl->base;
 
-#ifdef USE_VC4_SIMULATOR
-        assert(!"not reached");
-#endif
         cl->base = realloc(cl->base, size);
         cl->end = cl->base + size;
         cl->next = cl->base + offset;
@@ -70,5 +61,10 @@ vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo)
         }
 
         cl_u32(&vc4->bo_handles, bo->handle);
+
+#ifdef USE_VC4_SIMULATOR
+        cl_ptr(&vc4->bo_pointers, bo);
+#endif
+
         return hindex;
 }
diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index a892444d9c7..3e7c4836569 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -28,10 +28,6 @@
 
 #include "util/u_math.h"
 
-#ifdef USE_VC4_SIMULATOR
-#include "simpenrose/simpenrose.h"
-#endif
-
 #include "vc4_packet.h"
 
 struct vc4_bo;
@@ -79,6 +75,16 @@ cl_u32(struct vc4_cl *cl, uint32_t n)
         cl->next += 4;
 }
 
+static inline void
+cl_ptr(struct vc4_cl *cl, void *ptr)
+{
+        if (cl->next + sizeof(void *) > cl->end)
+                vc4_grow_cl(cl);
+
+        *(void **)cl->next = ptr;
+        cl->next += sizeof(void *);
+}
+
 static inline void
 cl_f(struct vc4_cl *cl, float f)
 {
@@ -92,12 +98,10 @@ cl_start_reloc(struct vc4_cl *cl, uint32_t n)
         assert(cl->reloc_count == 0);
         cl->reloc_count = n;
 
-#ifndef USE_VC4_SIMULATOR
         cl_u8(cl, GEM_HANDLES);
         cl->reloc_next = cl->next - cl->base;
         cl_u32(cl, 0); /* Space where hindex will be written. */
         cl_u32(cl, 0); /* Space where hindex will be written. */
-#endif
 }
 
 static inline void
@@ -107,22 +111,16 @@ cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
         cl->reloc_count = n;
         cl->reloc_next = cl->next - cl->base;
 
-#ifndef USE_VC4_SIMULATOR
         for (int i = 0; i < n; i++)
                 cl_u32(cl, 0); /* Space where hindex will be written. */
-#endif
 }
 
 static inline void
 cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
          struct vc4_bo *bo, uint32_t offset)
 {
-#ifndef USE_VC4_SIMULATOR
         *(uint32_t *)(cl->base + cl->reloc_next) = vc4_gem_hindex(vc4, bo);
         cl->reloc_next += 4;
-#else
-        offset += simpenrose_hw_addr(bo->map);
-#endif
 
         cl->reloc_count--;
 
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index 032d606eb7a..a9fa7ef70f1 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -32,8 +32,6 @@
 #include "indices/u_primconvert.h"
 #include "pipe/p_screen.h"
 
-#define __user
-#include "vc4_drm.h"
 #include "vc4_screen.h"
 #include "vc4_context.h"
 #include "vc4_resource.h"
@@ -111,19 +109,24 @@ vc4_flush(struct pipe_context *pctx)
         submit.shader_record_count = vc4->shader_rec_count;
 
         if (!(vc4_debug & VC4_DEBUG_NORAST)) {
+                int ret;
+
 #ifndef USE_VC4_SIMULATOR
-                int ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
-                if (ret)
-                        errx(1, "VC4 submit failed\n");
+                ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
 #else
-                vc4_simulator_flush(vc4, csurf);
+                ret = vc4_simulator_flush(vc4, &submit, csurf);
 #endif
+                if (ret)
+                        errx(1, "VC4 submit failed\n");
         }
 
         vc4_reset_cl(&vc4->bcl);
         vc4_reset_cl(&vc4->rcl);
         vc4_reset_cl(&vc4->shader_rec);
         vc4_reset_cl(&vc4->bo_handles);
+#ifdef USE_VC4_SIMULATOR
+        vc4_reset_cl(&vc4->bo_pointers);
+#endif
         vc4->shader_rec_count = 0;
 
         vc4->needs_flush = false;
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index c125d1b0c5d..ee9ddcfd82b 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -29,6 +29,8 @@
 #include "pipe/p_state.h"
 #include "util/u_slab.h"
 
+#define __user
+#include "vc4_drm.h"
 #include "vc4_bufmgr.h"
 #include "vc4_resource.h"
 #include "vc4_cl.h"
@@ -119,6 +121,9 @@ struct vc4_context {
         struct vc4_cl rcl;
         struct vc4_cl shader_rec;
         struct vc4_cl bo_handles;
+#ifdef USE_VC4_SIMULATOR
+        struct vc4_cl bo_pointers;
+#endif
         uint32_t shader_rec_count;
 
         struct vc4_bo *tile_alloc;
@@ -186,9 +191,9 @@ void vc4_draw_init(struct pipe_context *pctx);
 void vc4_state_init(struct pipe_context *pctx);
 void vc4_program_init(struct pipe_context *pctx);
 void vc4_simulator_init(struct vc4_screen *screen);
-void vc4_simulator_flush(struct vc4_context *vc4,
-                         struct vc4_surface *color_surf);
-void *vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size);
+int vc4_simulator_flush(struct vc4_context *vc4,
+                        struct drm_vc4_submit_cl *args,
+                        struct vc4_surface *color_surf);
 
 void vc4_get_uniform_bo(struct vc4_context *vc4,
                         struct vc4_compiled_shader *shader,
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 9e4454b141d..d5628d0d3ca 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -125,15 +125,10 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
         struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
         cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
         assert(vtx->num_elements <= 8);
-#ifndef USE_VC4_SIMULATOR
         /* Note that number of attributes == 0 in the packet means 8
          * attributes.  This field also contains the offset into shader_rec.
          */
         cl_u32(&vc4->bcl, vtx->num_elements & 0x7);
-#else
-        cl_u32(&vc4->bcl, simpenrose_hw_addr(vc4->shader_rec.next) |
-               (vtx->num_elements & 0x7));
-#endif
 
         /* Note that the primitive type fields match with OpenGL/gallium
          * definitions, up to but not including QUADS.
diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h
index 1fe5ce14190..a761122637f 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -44,7 +44,6 @@ struct vc4_screen {
         int fd;
 
         void *simulator_mem_base;
-        uint32_t simulator_mem_next;
         uint32_t simulator_mem_size;
 };
 
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index bd938b2644c..2b59aa53f5a 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -25,17 +25,207 @@
 
 #include <stdio.h>
 
+#include "util/u_memory.h"
+
 #include "vc4_screen.h"
 #include "vc4_context.h"
+#include "vc4_simulator_validate.h"
 #include "simpenrose/simpenrose.h"
 
-void
-vc4_simulator_flush(struct vc4_context *vc4, struct vc4_surface *csurf)
+static struct drm_gem_cma_object *
+vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
+{
+        struct vc4_context *vc4 = dev->vc4;
+        struct vc4_screen *screen = vc4->screen;
+        struct drm_gem_cma_object *obj = CALLOC_STRUCT(drm_gem_cma_object);
+        uint32_t size = align(bo->size, 4096);
+
+        obj->bo = bo;
+        obj->base.size = size;
+        obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next;
+        obj->paddr = simpenrose_hw_addr(obj->vaddr);
+
+        dev->simulator_mem_next += size;
+        dev->simulator_mem_next = align(dev->simulator_mem_next, 4096);
+        assert(dev->simulator_mem_next <= screen->simulator_mem_size);
+
+        return obj;
+}
+
+static struct drm_gem_cma_object *
+drm_gem_cma_create(struct drm_device *dev, size_t size)
+{
+        struct vc4_context *vc4 = dev->vc4;
+        struct vc4_screen *screen = vc4->screen;
+
+        struct vc4_bo *bo = vc4_bo_alloc(screen, size, "simulator validate");
+        return vc4_wrap_bo_with_cma(dev, bo);
+}
+
+static int
+vc4_simulator_pin_bos(struct drm_device *dev, struct drm_vc4_submit_cl *args,
+                      struct exec_info *exec)
+{
+        struct vc4_context *vc4 = dev->vc4;
+        struct vc4_bo **bos = vc4->bo_pointers.base;
+
+        exec->bo_count = args->bo_handle_count;
+        exec->bo = calloc(exec->bo_count, sizeof(void *));
+        for (int i = 0; i < exec->bo_count; i++) {
+                struct vc4_bo *bo = bos[i];
+                struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
+
+                memcpy(obj->vaddr, bo->map, bo->size);
+
+                exec->bo[i] = obj;
+        }
+
+        return 0;
+}
+
+static int
+vc4_simulator_unpin_bos(struct drm_vc4_submit_cl *args,
+                        struct exec_info *exec)
+{
+        for (int i = 0; i < exec->bo_count; i++) {
+                struct drm_gem_cma_object *obj = exec->bo[i];
+                struct vc4_bo *bo = obj->bo;
+
+                memcpy(bo->map, obj->vaddr, bo->size);
+
+                free(obj);
+        }
+
+        free(exec->bo);
+
+        return 0;
+}
+
+static int
+vc4_cl_validate(struct drm_device *dev, struct drm_vc4_submit_cl *args,
+		struct exec_info *exec)
+{
+	void *temp = NULL;
+	void *bin, *render, *shader_rec;
+	int ret = 0;
+	uint32_t bin_offset = 0;
+	uint32_t render_offset = bin_offset + args->bin_cl_len;
+	uint32_t shader_rec_offset = roundup(render_offset +
+					     args->render_cl_len, 16);
+	uint32_t exec_size = shader_rec_offset + args->shader_record_len;
+	uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
+					  args->shader_record_count);
+
+	if (shader_rec_offset < render_offset ||
+	    exec_size < shader_rec_offset ||
+	    args->shader_record_count >= (UINT_MAX /
+					  sizeof(struct vc4_shader_state)) ||
+	    temp_size < exec_size) {
+		DRM_ERROR("overflow in exec arguments\n");
+		goto fail;
+	}
+
+	/* Allocate space where we'll store the copied in user command lists
+	 * and shader records.
+	 *
+	 * We don't just copy directly into the BOs because we need to
+	 * read the contents back for validation, and I think the
+	 * bo->vaddr is uncached access.
+	 */
+	temp = kmalloc(temp_size, GFP_KERNEL);
+	if (!temp) {
+		DRM_ERROR("Failed to allocate storage for copying "
+			  "in bin/render CLs.\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+	bin = temp + bin_offset;
+	render = temp + render_offset;
+	shader_rec = temp + shader_rec_offset;
+	exec->shader_state = temp + exec_size;
+	exec->shader_state_size = args->shader_record_count;
+
+	ret = copy_from_user(bin, args->bin_cl, args->bin_cl_len);
+	if (ret) {
+		DRM_ERROR("Failed to copy in bin cl\n");
+		goto fail;
+	}
+
+	ret = copy_from_user(render, args->render_cl, args->render_cl_len);
+	if (ret) {
+		DRM_ERROR("Failed to copy in render cl\n");
+		goto fail;
+	}
+
+	ret = copy_from_user(shader_rec, args->shader_records,
+			     args->shader_record_len);
+	if (ret) {
+		DRM_ERROR("Failed to copy in shader recs\n");
+		goto fail;
+	}
+
+	exec->exec_bo = drm_gem_cma_create(dev, exec_size);
+#if 0
+	if (IS_ERR(exec->exec_bo)) {
+		DRM_ERROR("Couldn't allocate BO for exec\n");
+		ret = PTR_ERR(exec->exec_bo);
+		exec->exec_bo = NULL;
+		goto fail;
+	}
+#endif
+
+	exec->ct0ca = exec->exec_bo->paddr + bin_offset;
+	exec->ct0ea = exec->ct0ca + args->bin_cl_len;
+	exec->ct1ca = exec->exec_bo->paddr + render_offset;
+	exec->ct1ea = exec->ct1ca + args->render_cl_len;
+	exec->shader_paddr = exec->exec_bo->paddr + shader_rec_offset;
+
+	ret = vc4_validate_cl(dev,
+			      exec->exec_bo->vaddr + bin_offset,
+			      bin,
+			      args->bin_cl_len,
+			      true,
+			      exec);
+	if (ret)
+		goto fail;
+
+	ret = vc4_validate_cl(dev,
+			      exec->exec_bo->vaddr + render_offset,
+			      render,
+			      args->render_cl_len,
+			      false,
+			      exec);
+	if (ret)
+		goto fail;
+
+	ret = vc4_validate_shader_recs(dev,
+				       exec->exec_bo->vaddr + shader_rec_offset,
+				       shader_rec,
+				       args->shader_record_len,
+				       exec);
+
+fail:
+	kfree(temp);
+	return ret;
+}
+
+int
+vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args,
+                    struct vc4_surface *csurf)
 {
         struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
         uint32_t winsys_stride = ctex->bo->simulator_winsys_stride;
         uint32_t sim_stride = ctex->slices[0].stride;
         uint32_t row_len = MIN2(sim_stride, winsys_stride);
+        struct exec_info exec;
+        struct drm_device local_dev = {
+                .vc4 = vc4,
+                .simulator_mem_next = 0,
+        };
+        struct drm_device *dev = &local_dev;
+        int ret;
+
+        memset(&exec, 0, sizeof(exec));
 
         if (ctex->bo->simulator_winsys_map) {
 #if 0
@@ -53,10 +243,22 @@ vc4_simulator_flush(struct vc4_context *vc4, struct vc4_surface *csurf)
                 }
         }
 
-        simpenrose_do_binning(simpenrose_hw_addr(vc4->bcl.base),
-                              simpenrose_hw_addr(vc4->bcl.next));
-        simpenrose_do_rendering(simpenrose_hw_addr(vc4->rcl.base),
-                                simpenrose_hw_addr(vc4->rcl.next));
+        ret = vc4_simulator_pin_bos(dev, args, &exec);
+        if (ret)
+                return ret;
+
+        ret = vc4_cl_validate(dev, args, &exec);
+        if (ret)
+                return ret;
+
+        simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
+        simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
+
+        ret = vc4_simulator_unpin_bos(args, &exec);
+        if (ret)
+                return ret;
+
+        free(exec.exec_bo);
 
         if (ctex->bo->simulator_winsys_map) {
                 for (int y = 0; y < ctex->base.b.height0; y++) {
@@ -65,6 +267,8 @@ vc4_simulator_flush(struct vc4_context *vc4, struct vc4_surface *csurf)
                                row_len);
                 }
         }
+
+        return 0;
 }
 
 void
@@ -75,22 +279,4 @@ vc4_simulator_init(struct vc4_screen *screen)
         screen->simulator_mem_size = simpenrose_get_mem_size();
 }
 
-/**
- * Allocates GPU memory in the simulator's address space.
- *
- * We just allocate for the lifetime of the context now, but some day we'll
- * want an actual memory allocator at runtime.
- */
-void *
-vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size)
-{
-        void *alloc = screen->simulator_mem_base + screen->simulator_mem_next;
-
-        screen->simulator_mem_next += size;
-        assert(screen->simulator_mem_next < screen->simulator_mem_size);
-        screen->simulator_mem_next = align(screen->simulator_mem_next, 4096);
-
-        return alloc;
-}
-
 #endif /* USE_VC4_SIMULATOR */
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.c b/src/gallium/drivers/vc4/vc4_simulator_validate.c
new file mode 100644
index 00000000000..2839136bc42
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright Â© 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Command list validator for VC4.
+ *
+ * The VC4 has no IOMMU between it and system memory.  So, a user with
+ * access to execute command lists could escalate privilege by
+ * overwriting system memory (drawing to it as a framebuffer) or
+ * reading system memory it shouldn't (reading it as a texture, or
+ * uniform data, or vertex data).
+ *
+ * This validates command lists to ensure that all accesses are within
+ * the bounds of the GEM objects referenced.  It explicitly whitelists
+ * packets, and looks at the offsets in any address fields to make
+ * sure they're constrained within the BOs they reference.
+ *
+ * Note that because of the validation that's happening anyway, this
+ * is where GEM relocation processing happens.
+ */
+
+#include "vc4_simulator_validate.h"
+#include "vc4_packet.h"
+
+#define VALIDATE_ARGS \
+	struct exec_info *exec,				\
+	void *validated,				\
+	void *untrusted
+
+static int
+validate_branch_to_sublist(VALIDATE_ARGS)
+{
+	struct drm_gem_cma_object *target;
+
+	/* XXX: Validate address jumped to */
+
+	target = exec->bo[exec->bo_index[0]];
+
+	*(uint32_t *)(validated + 0) =
+		*(uint32_t *)(untrusted + 0) + target->paddr;
+
+	return 0;
+}
+
+static int
+validate_store_tile_buffer_general(VALIDATE_ARGS)
+{
+#if 0
+	struct drm_gem_cma_object *fbo;
+
+	/* XXX: Validate address offset */
+
+	fbo = exec->bo[exec->bo_index[0]];
+
+	/* XXX */
+	/*
+	*(uint32_t *)(validated + 2) =
+		*(uint32_t *)(untrusted + 2) + fbo->paddr;
+		*/
+#endif
+
+	return 0;
+}
+
+static int
+validate_indexed_prim_list(VALIDATE_ARGS)
+{
+	struct drm_gem_cma_object *ib;
+	uint32_t max_index = *(uint32_t *)(untrusted + 9);
+	uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
+	uint32_t ib_access_end = (max_index + 1) * index_size;
+
+	/* Check overflow condition */
+	if (max_index == ~0) {
+		DRM_ERROR("unlimited max index\n");
+		return -EINVAL;
+	}
+
+	if (ib_access_end < max_index) {
+		DRM_ERROR("IB access overflow\n");
+		return -EINVAL;
+	}
+
+	ib = exec->bo[exec->bo_index[0]];
+	if (ib_access_end > ib->base.size) {
+		DRM_ERROR("IB access out of bounds (%d/%d)\n",
+			  ib_access_end, ib->base.size);
+		return -EINVAL;
+	}
+
+	*(uint32_t *)(validated + 5) =
+		*(uint32_t *)(untrusted + 5) + ib->paddr;
+
+	return 0;
+}
+
+static int
+validate_gl_shader_state(VALIDATE_ARGS)
+{
+	uint32_t i = exec->shader_state_count++;
+
+	if (i >= exec->shader_state_size) { /* XXX? */
+		DRM_ERROR("More requests for shader states than declared\n");
+		return -EINVAL;
+	}
+
+	exec->shader_state[i].packet = VC4_PACKET_GL_SHADER_STATE;
+	exec->shader_state[i].addr = *(uint32_t *)untrusted;
+
+	*(uint32_t *)validated = exec->shader_state[i].addr +
+		exec->shader_paddr;
+
+	return 0;
+}
+
+static int
+validate_nv_shader_state(VALIDATE_ARGS)
+{
+	uint32_t i = exec->shader_state_count++;
+
+	if (i >= exec->shader_state_size) {
+		DRM_ERROR("More requests for shader states than declared\n");
+		return -EINVAL;
+	}
+
+	exec->shader_state[i].packet = VC4_PACKET_NV_SHADER_STATE;
+	exec->shader_state[i].addr = *(uint32_t *)untrusted;
+
+	if (exec->shader_state[i].addr & 15) {
+		DRM_ERROR("NV shader state address 0x%08x misaligned\n",
+			  exec->shader_state[i].addr);
+		return -EINVAL;
+	}
+
+	*(uint32_t *)validated =
+		exec->shader_state[i].addr + exec->shader_paddr;
+
+	return 0;
+}
+
+static int
+validate_tile_binning_config(VALIDATE_ARGS)
+{
+	struct drm_gem_cma_object *tile_allocation;
+	struct drm_gem_cma_object *tile_state_data_array;
+
+	tile_allocation = exec->bo[exec->bo_index[0]];
+	tile_state_data_array = exec->bo[exec->bo_index[1]];
+
+	/* XXX: Validate offsets */
+	*(uint32_t *)validated =
+		*(uint32_t *)untrusted + tile_allocation->paddr;
+
+	*(uint32_t *)(validated + 8) =
+		*(uint32_t *)(untrusted + 8) + tile_state_data_array->paddr;
+
+	return 0;
+}
+
+static int
+validate_tile_rendering_mode_config(VALIDATE_ARGS)
+{
+	struct drm_gem_cma_object *fbo;
+
+	fbo = exec->bo[exec->bo_index[0]];
+
+	/* XXX: Validate offsets */
+	*(uint32_t *)validated =
+		*(uint32_t *)untrusted + fbo->paddr;
+
+	return 0;
+}
+
+static int
+validate_gem_handles(VALIDATE_ARGS)
+{
+	int i;
+
+	memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
+
+	for (i = 0; i < ARRAY_SIZE(exec->bo_index); i++) {
+		if (exec->bo_index[i] >= exec->bo_count) {
+			DRM_ERROR("Validated BO index %d >= %d\n",
+				  exec->bo_index[i], exec->bo_count);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static const struct cmd_info {
+	bool bin;
+	bool render;
+	uint16_t len;
+	const char *name;
+	int (*func)(struct exec_info *exec, void *validated, void *untrusted);
+} cmd_info[] = {
+	[0] = { 1, 1, 1, "halt", NULL },
+	[1] = { 1, 1, 1, "nop", NULL },
+	[4] = { 1, 1, 1, "flush", NULL },
+	[5] = { 1, 0, 1, "flush all state", NULL },
+	[6] = { 1, 0, 1, "start tile binning", NULL },
+	[7] = { 1, 0, 1, "increment semaphore", NULL },
+	[8] = { 1, 1, 1, "wait on semaphore", NULL },
+	[17] = { 1, 1, 5, "branch to sublist", validate_branch_to_sublist },
+	[24] = { 0, 1, 1, "store MS resolved tile color buffer", NULL },
+	[25] = { 0, 1, 1, "store MS resolved tile color buffer and EOF", NULL },
+
+	[28] = { 0, 1, 7, "Store Tile Buffer General",
+		 validate_store_tile_buffer_general },
+
+	[32] = { 1, 1, 14, "Indexed Primitive List",
+		 validate_indexed_prim_list },
+
+	/* XXX: bounds check verts? */
+	[33] = { 1, 1, 10, "Vertex Array Primitives", NULL },
+
+	[56] = { 1, 1, 2, "primitive list format", NULL }, /* XXX: bin valid? */
+
+	[64] = { 1, 1, 5, "GL Shader State", validate_gl_shader_state },
+	[65] = { 1, 1, 5, "NV Shader State", validate_nv_shader_state },
+
+	[96] = { 1, 1, 4, "configuration bits", NULL },
+	[97] = { 1, 1, 5, "flat shade flags", NULL },
+	[98] = { 1, 1, 5, "point size", NULL },
+	[99] = { 1, 1, 5, "line width", NULL },
+	[100] = { 1, 1, 3, "RHT X boundary", NULL },
+	[101] = { 1, 1, 5, "Depth Offset", NULL },
+	[102] = { 1, 1, 9, "Clip Window", NULL },
+	[103] = { 1, 1, 5, "Viewport Offset", NULL },
+	[105] = { 1, 1, 9, "Clipper XY Scaling", NULL },
+	/* Note: The docs say this was also 105, but it was 106 in the
+	 * initial userland code drop.
+	 */
+	[106] = { 1, 1, 9, "Clipper Z Scale and Offset", NULL },
+
+	[112] = { 1, 0, 16, "tile binning configuration",
+		  validate_tile_binning_config },
+
+	/* XXX: Do we need to validate this one?  It's got width/height in it.
+	 */
+	[113] = { 0, 1, 11, "tile rendering mode configuration",
+		  validate_tile_rendering_mode_config},
+
+	[114] = { 0, 1, 14, "Clear Colors", NULL },
+
+	/* XXX: Do we need to validate here?  It's got tile x/y number for
+	 * rendering
+	 */
+	[115] = { 0, 1, 3, "Tile Coordinates", NULL },
+
+	[254] = { 1, 1, 9, "GEM handles", validate_gem_handles },
+};
+
+int
+vc4_validate_cl(struct drm_device *dev,
+		void *validated,
+		void *unvalidated,
+		uint32_t len,
+		bool is_bin,
+		struct exec_info *exec)
+{
+	uint32_t dst_offset = 0;
+	uint32_t src_offset = 0;
+
+	while (src_offset < len) {
+		void *dst_pkt = validated + dst_offset;
+		void *src_pkt = unvalidated + src_offset;
+		u8 cmd = *(uint8_t *)src_pkt;
+		const struct cmd_info *info;
+
+		if (cmd > ARRAY_SIZE(cmd_info)) {
+			DRM_ERROR("0x%08x: packet %d out of bounds\n",
+				  src_offset, cmd);
+			return -EINVAL;
+		}
+
+		info = &cmd_info[cmd];
+		if (!info->name) {
+			DRM_ERROR("0x%08x: packet %d invalid\n",
+				  src_offset, cmd);
+			return -EINVAL;
+		}
+
+#if 0
+		DRM_INFO("0x%08x: packet %d (%s) size %d processing...\n",
+			 src_offset, cmd, info->name, info->len);
+#endif
+
+		if ((is_bin && !info->bin) ||
+		    (!is_bin && !info->render)) {
+			DRM_ERROR("0x%08x: packet %d (%s) invalid for %s\n",
+				  src_offset, cmd, info->name,
+				  is_bin ? "binner" : "render");
+			return -EINVAL;
+		}
+
+		if (src_offset + info->len > len) {
+			DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
+				  "exceeds bounds (0x%08x)\n",
+				  src_offset, cmd, info->name, info->len,
+				  src_offset + len);
+			return -EINVAL;
+		}
+
+		if (cmd != 254)
+			memcpy(dst_pkt, src_pkt, info->len);
+
+		if (info->func && info->func(exec,
+					     dst_pkt + 1,
+					     src_pkt + 1)) {
+			DRM_ERROR("0x%08x: packet %d (%s) failed to "
+				  "validate\n",
+				  src_offset, cmd, info->name);
+			return -EINVAL;
+		}
+
+		src_offset += info->len;
+		/* GEM handle loading doesn't produce HW packets. */
+		if (cmd != 254)
+			dst_offset += info->len;
+
+		/* When the CL hits halt, it'll stop reading anything else. */
+		if (cmd == 0)
+			break;
+	}
+
+	return 0;
+}
+
+static int
+validate_shader_rec(struct drm_device *dev,
+		    struct exec_info *exec,
+		    void *validated,
+		    void *unvalidated,
+		    uint32_t len,
+		    struct vc4_shader_state *state)
+{
+	uint32_t *src_handles = unvalidated;
+	void *src_pkt;
+	void *dst_pkt = validated;
+	static const int gl_bo_offsets[] = {
+		4, 8, /* fs code, ubo */
+		16, 20, /* vs code, ubo */
+		28, 32, /* cs code, ubo */
+	};
+	static const int nv_bo_offsets[] = {
+		4, 8, /* fs code, ubo */
+		12, /* vbo */
+	};
+	struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_bo_offsets) + 8];
+	const int *bo_offsets;
+	uint32_t nr_attributes = 0, nr_bo, packet_size;
+	int i;
+
+	if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
+		bo_offsets = nv_bo_offsets;
+		nr_bo = ARRAY_SIZE(nv_bo_offsets);
+
+		packet_size = 16;
+	} else {
+		bo_offsets = gl_bo_offsets;
+		nr_bo = ARRAY_SIZE(gl_bo_offsets);
+
+		nr_attributes = state->addr & 0x7;
+		if (nr_attributes == 0)
+			nr_attributes = 8;
+		packet_size = 36 + nr_attributes * 8;
+	}
+	if ((nr_bo + nr_attributes) * 4 + packet_size > len) {
+		DRM_ERROR("overflowed shader packet read "
+			  "(handles %d, packet %d, len %d)\n",
+			  (nr_bo + nr_attributes) * 4, packet_size, len);
+		return -EINVAL;
+	}
+
+	src_pkt = unvalidated + 4 * (nr_bo + nr_attributes);
+	memcpy(dst_pkt, src_pkt, packet_size);
+
+	for (i = 0; i < nr_bo + nr_attributes; i++) {
+		if (src_handles[i] >= exec->bo_count) {
+			DRM_ERROR("shader rec bo index %d > %d\n",
+				  src_handles[i], exec->bo_count);
+			return -EINVAL;
+		}
+		bo[i] = exec->bo[src_handles[i]];
+	}
+
+	for (i = 0; i < nr_bo; i++) {
+		/* XXX: validation */
+		uint32_t o = bo_offsets[i];
+		*(uint32_t *)(dst_pkt + o) =
+			bo[i]->paddr + *(uint32_t *)(src_pkt + o);
+	}
+
+	for (i = 0; i < nr_attributes; i++) {
+		/* XXX: validation */
+		uint32_t o = 36 + i * 8;
+		*(uint32_t *)(dst_pkt + o) =
+			bo[nr_bo + i]->paddr + *(uint32_t *)(src_pkt + o);
+	}
+
+	return 0;
+}
+
+int
+vc4_validate_shader_recs(struct drm_device *dev,
+			 void *validated,
+			 void *unvalidated,
+			 uint32_t len,
+			 struct exec_info *exec)
+{
+	uint32_t dst_offset = 0;
+	uint32_t src_offset = 0;
+	uint32_t i;
+	int ret = 0;
+
+	for (i = 0; i < exec->shader_state_count; i++) {
+		if ((exec->shader_state[i].addr & ~0xf) !=
+		    (validated - exec->exec_bo->vaddr -
+		     (exec->shader_paddr - exec->exec_bo->paddr))) {
+			DRM_ERROR("unexpected shader rec offset: "
+				  "0x%08x vs 0x%08x\n",
+				  exec->shader_state[i].addr & ~0xf,
+				  (int)(validated -
+					exec->exec_bo->vaddr -
+					(exec->shader_paddr -
+					 exec->exec_bo->paddr)));
+			return -EINVAL;
+		}
+
+		ret = validate_shader_rec(dev, exec,
+					  validated + dst_offset,
+					  unvalidated + src_offset,
+					  len - src_offset,
+					  &exec->shader_state[i]);
+		if (ret)
+			return ret;
+		/* XXX: incr dst/src offset */
+	}
+
+	return ret;
+}
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h
new file mode 100644
index 00000000000..4a2a2181ab4
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright Â© 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_SIMULATOR_VALIDATE_H
+#define VC4_SIMULATOR_VALIDATE_H
+
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <errno.h>
+
+#define DRM_INFO(...) fprintf(stderr, __VA_ARGS__)
+#define DRM_ERROR(...) fprintf(stderr, __VA_ARGS__)
+#define kmalloc(size, arg) malloc(size)
+#define kfree(ptr) free(ptr)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define roundup(x, y) align(x, y)
+
+static inline int
+copy_from_user(void *dst, void *src, size_t size)
+{
+        memcpy(dst, src, size);
+        return 0;
+}
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+
+struct drm_device {
+        struct vc4_context *vc4;
+        uint32_t simulator_mem_next;
+};
+
+struct drm_gem_cma_object {
+        struct vc4_bo *bo;
+
+        struct {
+                uint32_t size;
+        } base;
+        uint32_t paddr;
+        void *vaddr;
+};
+
+struct exec_info {
+	/* This is the array of BOs that were looked up at the start of exec.
+	 * Command validation will use indices into this array.
+	 */
+	struct drm_gem_cma_object **bo;
+	uint32_t bo_count;
+
+	/* Current indices into @bo loaded by the non-hardware packet
+	 * that passes in indices.  This can be used even without
+	 * checking that we've seen one of those packets, because
+	 * @bo_count is always >= 1, and this struct is initialized to
+	 * 0.
+	 */
+	uint32_t bo_index[2];
+	uint32_t max_width, max_height;
+
+	/**
+	 * This is the BO where we store the validated command lists
+	 * and shader records.
+	 */
+	struct drm_gem_cma_object *exec_bo;
+
+	/**
+	 * This tracks the per-shader-record state (packet 64) that
+	 * determines the length of the shader record and the offset
+	 * it's expected to be found at.  It gets read in from the
+	 * command lists.
+	 */
+	struct vc4_shader_state {
+		uint8_t packet;
+		uint32_t addr;
+	} *shader_state;
+
+	/** How many shader states the user declared they were using. */
+	uint32_t shader_state_size;
+	/** How many shader state records the validator has seen. */
+	uint32_t shader_state_count;
+
+	/**
+	 * Computed addresses pointing into exec_bo where we start the
+	 * bin thread (ct0) and render thread (ct1).
+	 */
+	uint32_t ct0ca, ct0ea;
+	uint32_t ct1ca, ct1ea;
+	uint32_t shader_paddr;
+};
+
+int vc4_validate_cl(struct drm_device *dev,
+                    void *validated,
+                    void *unvalidated,
+                    uint32_t len,
+                    bool is_bin,
+                    struct exec_info *exec);
+
+int vc4_validate_shader_recs(struct drm_device *dev,
+                             void *validated,
+                             void *unvalidated,
+                             uint32_t len,
+                             struct exec_info *exec);
+
+#endif /* VC4_SIMULATOR_VALIDATE_H */