vc4: Move tile state/alloc allocation into the kernel.
authorEric Anholt <eric@anholt.net>
Wed, 17 Jun 2015 20:24:06 +0000 (13:24 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 18 Jun 2015 06:53:49 +0000 (23:53 -0700)
This avoids a security issue where userspace could have written the tile
state/tile alloc behind the GPU's back, and will apparently be necessary
for fixing stability bugs (tile state buffers are missing some top bits
for the tile alloc's address).

src/gallium/drivers/vc4/kernel/vc4_drv.h
src/gallium/drivers/vc4/kernel/vc4_packet.h
src/gallium/drivers/vc4/kernel/vc4_render_cl.c
src/gallium/drivers/vc4/kernel/vc4_validate.c
src/gallium/drivers/vc4/vc4_context.c
src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_draw.c
src/gallium/drivers/vc4/vc4_simulator.c
src/gallium/drivers/vc4/vc4_simulator_validate.h

index 83802dd774a35d8fc77d708ec05e9bb77e551f94..1fd8aa9fb28d5ff9551233ddf91ca8ac3d059774 100644 (file)
@@ -28,8 +28,6 @@
 
 enum vc4_bo_mode {
        VC4_MODE_UNDECIDED,
-       VC4_MODE_TILE_ALLOC,
-       VC4_MODE_TSDA,
        VC4_MODE_RENDER,
        VC4_MODE_SHADER,
 };
@@ -91,7 +89,8 @@ struct vc4_exec_info {
        bool found_start_tile_binning_packet;
        bool found_increment_semaphore_packet;
        uint8_t bin_tiles_x, bin_tiles_y;
-       struct drm_gem_cma_object *tile_alloc_bo;
+       struct drm_gem_cma_object *tile_bo;
+       uint32_t tile_alloc_offset;
 
        /**
         * Computed addresses pointing into exec_bo where we start the
index 764a125c6e8fc2690d63cb0a23c70530cec3ba34..88cfc0fa9f0cf28f22a8b4855299ebb183daddcd 100644 (file)
@@ -232,15 +232,19 @@ enum vc4_packet {
 /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
 #define VC4_BIN_CONFIG_DB_NON_MS                   (1 << 7)
 
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32         (0 << 5)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64         (1 << 5)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128        (2 << 5)
-#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256        (3 << 5)
-
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32    (0 << 3)
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64    (1 << 3)
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128   (2 << 3)
-#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256   (3 << 3)
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK       VC4_MASK(6, 5)
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT      5
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32         0
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64         1
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128        2
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256        3
+
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK  VC4_MASK(4, 3)
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32    0
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64    1
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128   2
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256   3
 
 #define VC4_BIN_CONFIG_AUTO_INIT_TSDA              (1 << 2)
 #define VC4_BIN_CONFIG_TILE_BUFFER_64BIT           (1 << 1)
index de6070fec727b99a8d412609e4639128296ab3ba..e2d907ad91fc89528dea97a852deaba0c01f6c12 100644 (file)
@@ -140,7 +140,8 @@ static void emit_tile(struct vc4_exec_info *exec,
 
        if (has_bin) {
                rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
-               rcl_u32(setup, (exec->tile_alloc_bo->paddr +
+               rcl_u32(setup, (exec->tile_bo->paddr +
+                               exec->tile_alloc_offset +
                                (y * exec->bin_tiles_x + x) * 32));
        }
 
index 80b0e653d8071de34a80a73fd9568e5042ac28d4..a0b67a7e50b784d231797b9973be2a1bb7a150ec 100644 (file)
@@ -375,15 +375,10 @@ validate_nv_shader_state(VALIDATE_ARGS)
 static int
 validate_tile_binning_config(VALIDATE_ARGS)
 {
-       struct drm_gem_cma_object *tile_allocation;
-       struct drm_gem_cma_object *tile_state_data_array;
+       struct drm_device *dev = exec->exec_bo->base.dev;
        uint8_t flags;
-       uint32_t tile_allocation_size;
-       uint32_t tile_alloc_init_block_size;
-
-       if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &tile_allocation) ||
-           !vc4_use_handle(exec, 1, VC4_MODE_TSDA, &tile_state_data_array))
-               return -EINVAL;
+       uint32_t tile_state_size, tile_alloc_size;
+       uint32_t tile_count;
 
        if (exec->found_tile_binning_mode_config_packet) {
                DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
@@ -393,6 +388,7 @@ validate_tile_binning_config(VALIDATE_ARGS)
 
        exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
        exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
+       tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
        flags = *(uint8_t *)(untrusted + 14);
 
        if (exec->bin_tiles_x == 0 ||
@@ -402,15 +398,6 @@ validate_tile_binning_config(VALIDATE_ARGS)
                return -EINVAL;
        }
 
-       /* Our validation relies on the user not getting to set up their own
-        * tile state/tile allocation BO contents.
-        */
-       if (!(flags & VC4_BIN_CONFIG_AUTO_INIT_TSDA)) {
-               DRM_ERROR("binning config missing "
-                         "VC4_BIN_CONFIG_AUTO_INIT_TSDA\n");
-               return -EINVAL;
-       }
-
        if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
                     VC4_BIN_CONFIG_TILE_BUFFER_64BIT |
                     VC4_BIN_CONFIG_MS_MODE_4X)) {
@@ -418,40 +405,52 @@ validate_tile_binning_config(VALIDATE_ARGS)
                return -EINVAL;
        }
 
-       if (*(uint32_t *)(untrusted + 0) != 0) {
-               DRM_ERROR("tile allocation offset != 0 unsupported\n");
-               return -EINVAL;
-       }
-       tile_allocation_size = *(uint32_t *)(untrusted + 4);
-       if (tile_allocation_size > tile_allocation->base.size) {
-               DRM_ERROR("tile allocation size %d > BO size %d\n",
-                         tile_allocation_size, tile_allocation->base.size);
-               return -EINVAL;
-       }
-       *(uint32_t *)validated = tile_allocation->paddr;
-       exec->tile_alloc_bo = tile_allocation;
-
-       tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3));
-       if (exec->bin_tiles_x * exec->bin_tiles_y *
-           tile_alloc_init_block_size > tile_allocation_size) {
-               DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n",
-                         exec->bin_tiles_x * exec->bin_tiles_y *
-                         tile_alloc_init_block_size,
-                         tile_allocation_size);
-               return -EINVAL;
-       }
+       /* The tile state data array is 48 bytes per tile, and we put it at
+        * the start of a BO containing both it and the tile alloc.
+        */
+       tile_state_size = 48 * tile_count;
+
+       /* Since the tile alloc array will follow us, align. */
+       exec->tile_alloc_offset = roundup(tile_state_size, 4096);
+
+       *(uint8_t *)(validated + 14) =
+               ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
+                           VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
+                VC4_BIN_CONFIG_AUTO_INIT_TSDA |
+                VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
+                              VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
+                VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
+                              VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
+
+       /* Initial block size. */
+       tile_alloc_size = 32 * tile_count;
+
+       /*
+        * The initial allocation gets rounded to the next 256 bytes before
+        * the hardware starts fulfilling further allocations.
+        */
+       tile_alloc_size = roundup(tile_alloc_size, 256);
 
-       if (*(uint32_t *)(untrusted + 8) != 0) {
-               DRM_ERROR("TSDA offset != 0 unsupported\n");
-               return -EINVAL;
-       }
-       if (exec->bin_tiles_x * exec->bin_tiles_y * 48 >
-           tile_state_data_array->base.size) {
-               DRM_ERROR("TSDA of %db too small for %dx%d bin config\n",
-                         tile_state_data_array->base.size,
-                         exec->bin_tiles_x, exec->bin_tiles_y);
-       }
-       *(uint32_t *)(validated + 8) = tile_state_data_array->paddr;
+       /* Add space for the extra allocations.  This is what gets used first,
+        * before overflow memory.  It must have at least 4096 bytes, but we
+        * want to avoid overflow memory usage if possible.
+        */
+       tile_alloc_size += 1024 * 1024;
+
+       exec->tile_bo = drm_gem_cma_create(dev, exec->tile_alloc_offset +
+                                          tile_alloc_size);
+       if (!exec->tile_bo)
+               return -ENOMEM;
+       list_addtail(&to_vc4_bo(&exec->tile_bo->base)->unref_head,
+                    &exec->unref_list);
+
+       /* tile alloc address. */
+       *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
+                                       exec->tile_alloc_offset);
+       /* tile alloc size. */
+       *(uint32_t *)(validated + 4) = tile_alloc_size;
+       /* tile state address. */
+       *(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
 
        return 0;
 }
index ebd357f70658a14d88072738f70edcd40d2a1d2b..630f8e688967abfbc27b4aa6b71515cb759e514b 100644 (file)
@@ -184,8 +184,6 @@ vc4_context_destroy(struct pipe_context *pctx)
 
         pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
         pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
-        vc4_bo_unreference(&vc4->tile_alloc);
-        vc4_bo_unreference(&vc4->tile_state);
 
         vc4_program_fini(pctx);
 
index ad5d0b153ff74ed76510d0956ae4da526726038a..d5d6be16f6edebe9d794cf7af66b487947ba453a 100644 (file)
@@ -208,9 +208,6 @@ struct vc4_context {
         uint32_t draw_height;
         /** @} */
 
-        struct vc4_bo *tile_alloc;
-        struct vc4_bo *tile_state;
-
         struct util_slab_mempool transfer_pool;
         struct blitter_context *blitter;
 
index 3e181d0606af020673db89db530327cb9e5b8881..5e6d70d6f337921bb416b8bfe1ff5dc8a98ae4f7 100644 (file)
@@ -72,44 +72,15 @@ vc4_start_draw(struct vc4_context *vc4)
         uint32_t tilew = align(width, 64) / 64;
         uint32_t tileh = align(height, 64) / 64;
 
-        /* Tile alloc memory setup: We use an initial alloc size of 32b.  The
-         * hardware then aligns that to 256b (we use 4096, because all of our
-         * BO allocations align to that anyway), then for some reason the
-         * simulator wants an extra page available, even if you have overflow
-         * memory set up.
-         *
-         * XXX: The binner only does 28-bit addressing math, so the tile alloc
-         * and tile state should be in the same BO and that BO needs to not
-         * cross a 256MB boundary, somehow.
-         */
-        uint32_t tile_alloc_size = 32 * tilew * tileh;
-        tile_alloc_size = align(tile_alloc_size, 4096);
-        tile_alloc_size += 4096;
-        uint32_t tile_state_size = 48 * tilew * tileh;
-        if (!vc4->tile_alloc || vc4->tile_alloc->size < tile_alloc_size) {
-                vc4_bo_unreference(&vc4->tile_alloc);
-                vc4->tile_alloc = vc4_bo_alloc(vc4->screen, tile_alloc_size,
-                                               "tile_alloc");
-        }
-        if (!vc4->tile_state || vc4->tile_state->size < tile_state_size) {
-                vc4_bo_unreference(&vc4->tile_state);
-                vc4->tile_state = vc4_bo_alloc(vc4->screen, tile_state_size,
-                                               "tile_state");
-        }
-
         //   Tile state data is 48 bytes per tile, I think it can be thrown away
         //   as soon as binning is finished.
-        cl_start_reloc(&vc4->bcl, 2);
         cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
-        cl_reloc(vc4, &vc4->bcl, vc4->tile_alloc, 0);
-        cl_u32(&vc4->bcl, vc4->tile_alloc->size);
-        cl_reloc(vc4, &vc4->bcl, vc4->tile_state, 0);
+        cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */
+        cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */
+        cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */
         cl_u8(&vc4->bcl, tilew);
         cl_u8(&vc4->bcl, tileh);
-        cl_u8(&vc4->bcl,
-              VC4_BIN_CONFIG_AUTO_INIT_TSDA |
-              VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 |
-              VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32);
+        cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */
 
         /* START_TILE_BINNING resets the statechange counters in the hardware,
          * which are what is used when a primitive is binned to a tile to
index 2e4d8798f8e3f06bda305a38f1ba4028b0fcff5d..b58013dd2ee3e43802df82c7f19dcd7cbe3f195d 100644 (file)
@@ -45,6 +45,7 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo)
 
         drm_bo->bo = bo;
         obj->base.size = size;
+        obj->base.dev = dev;
         obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next;
         obj->paddr = simpenrose_hw_addr(obj->vaddr);
 
index c3b7a638f93ba89c23f8037444a558f97962a9fd..2bb36b253bba848d66c6ef0b2e71ee029b77eb51 100644 (file)
@@ -66,6 +66,7 @@ struct drm_device {
 
 struct drm_gem_object {
         uint32_t size;
+        struct drm_device *dev;
 };
 
 struct drm_gem_cma_object {