From: Eric Anholt Date: Wed, 17 Jun 2015 20:24:06 +0000 (-0700) Subject: vc4: Move tile state/alloc allocation into the kernel. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1d45e44b2f9e52d6eebe84ab08da6b7393011f95;p=mesa.git vc4: Move tile state/alloc allocation into the kernel. This avoids a security issue where userspace could have written the tile state/tile alloc behind the GPU's back, and will apparently be necessary for fixing stability bugs (tile state buffers are missing some top bits for the tile alloc's address). --- diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index 83802dd774a..1fd8aa9fb28 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -28,8 +28,6 @@ enum vc4_bo_mode { VC4_MODE_UNDECIDED, - VC4_MODE_TILE_ALLOC, - VC4_MODE_TSDA, VC4_MODE_RENDER, VC4_MODE_SHADER, }; @@ -91,7 +89,8 @@ struct vc4_exec_info { bool found_start_tile_binning_packet; bool found_increment_semaphore_packet; uint8_t bin_tiles_x, bin_tiles_y; - struct drm_gem_cma_object *tile_alloc_bo; + struct drm_gem_cma_object *tile_bo; + uint32_t tile_alloc_offset; /** * Computed addresses pointing into exec_bo where we start the diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h index 764a125c6e8..88cfc0fa9f0 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_packet.h +++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h @@ -232,15 +232,19 @@ enum vc4_packet { /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */ #define VC4_BIN_CONFIG_DB_NON_MS (1 << 7) -#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 (0 << 5) -#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 (1 << 5) -#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 (2 << 5) -#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 (3 << 5) - -#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 (0 << 3) -#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 (1 << 3) -#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 (2 << 3) -#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 (3 << 3) +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5) +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2 +#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3 + +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3) +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2 +#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3 #define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2) #define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1) diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c index de6070fec72..e2d907ad91f 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c +++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c @@ -140,7 +140,8 @@ static void emit_tile(struct vc4_exec_info *exec, if (has_bin) { rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST); - rcl_u32(setup, (exec->tile_alloc_bo->paddr + + rcl_u32(setup, (exec->tile_bo->paddr + + exec->tile_alloc_offset + (y * exec->bin_tiles_x + x) * 32)); } diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 80b0e653d80..a0b67a7e50b 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -375,15 +375,10 @@ validate_nv_shader_state(VALIDATE_ARGS) static int validate_tile_binning_config(VALIDATE_ARGS) { - struct drm_gem_cma_object *tile_allocation; - struct drm_gem_cma_object *tile_state_data_array; + struct drm_device *dev = exec->exec_bo->base.dev; uint8_t flags; - uint32_t tile_allocation_size; - uint32_t tile_alloc_init_block_size; - - if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &tile_allocation) || - !vc4_use_handle(exec, 1, VC4_MODE_TSDA, &tile_state_data_array)) - return -EINVAL; + uint32_t tile_state_size, tile_alloc_size; + uint32_t tile_count; if (exec->found_tile_binning_mode_config_packet) { DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); @@ -393,6 +388,7 @@ validate_tile_binning_config(VALIDATE_ARGS) exec->bin_tiles_x = *(uint8_t *)(untrusted + 12); exec->bin_tiles_y = *(uint8_t *)(untrusted + 13); + tile_count = exec->bin_tiles_x * exec->bin_tiles_y; flags = *(uint8_t *)(untrusted + 14); if (exec->bin_tiles_x == 0 || @@ -402,15 +398,6 @@ validate_tile_binning_config(VALIDATE_ARGS) return -EINVAL; } - /* Our validation relies on the user not getting to set up their own - * tile state/tile allocation BO contents. - */ - if (!(flags & VC4_BIN_CONFIG_AUTO_INIT_TSDA)) { - DRM_ERROR("binning config missing " - "VC4_BIN_CONFIG_AUTO_INIT_TSDA\n"); - return -EINVAL; - } - if (flags & (VC4_BIN_CONFIG_DB_NON_MS | VC4_BIN_CONFIG_TILE_BUFFER_64BIT | VC4_BIN_CONFIG_MS_MODE_4X)) { @@ -418,40 +405,52 @@ validate_tile_binning_config(VALIDATE_ARGS) return -EINVAL; } - if (*(uint32_t *)(untrusted + 0) != 0) { - DRM_ERROR("tile allocation offset != 0 unsupported\n"); - return -EINVAL; - } - tile_allocation_size = *(uint32_t *)(untrusted + 4); - if (tile_allocation_size > tile_allocation->base.size) { - DRM_ERROR("tile allocation size %d > BO size %d\n", - tile_allocation_size, tile_allocation->base.size); - return -EINVAL; - } - *(uint32_t *)validated = tile_allocation->paddr; - exec->tile_alloc_bo = tile_allocation; - - tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3)); - if (exec->bin_tiles_x * exec->bin_tiles_y * - tile_alloc_init_block_size > tile_allocation_size) { - DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n", - exec->bin_tiles_x * exec->bin_tiles_y * - tile_alloc_init_block_size, - tile_allocation_size); - return -EINVAL; - } + /* The tile state data array is 48 bytes per tile, and we put it at + * the start of a BO containing both it and the tile alloc. + */ + tile_state_size = 48 * tile_count; + + /* Since the tile alloc array will follow us, align. */ + exec->tile_alloc_offset = roundup(tile_state_size, 4096); + + *(uint8_t *)(validated + 14) = + ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK | + VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) | + VC4_BIN_CONFIG_AUTO_INIT_TSDA | + VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32, + VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) | + VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128, + VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE)); + + /* Initial block size. */ + tile_alloc_size = 32 * tile_count; + + /* + * The initial allocation gets rounded to the next 256 bytes before + * the hardware starts fulfilling further allocations. + */ + tile_alloc_size = roundup(tile_alloc_size, 256); - if (*(uint32_t *)(untrusted + 8) != 0) { - DRM_ERROR("TSDA offset != 0 unsupported\n"); - return -EINVAL; - } - if (exec->bin_tiles_x * exec->bin_tiles_y * 48 > - tile_state_data_array->base.size) { - DRM_ERROR("TSDA of %db too small for %dx%d bin config\n", - tile_state_data_array->base.size, - exec->bin_tiles_x, exec->bin_tiles_y); - } - *(uint32_t *)(validated + 8) = tile_state_data_array->paddr; + /* Add space for the extra allocations. This is what gets used first, + * before overflow memory. It must have at least 4096 bytes, but we + * want to avoid overflow memory usage if possible. + */ + tile_alloc_size += 1024 * 1024; + + exec->tile_bo = drm_gem_cma_create(dev, exec->tile_alloc_offset + + tile_alloc_size); + if (!exec->tile_bo) + return -ENOMEM; + list_addtail(&to_vc4_bo(&exec->tile_bo->base)->unref_head, + &exec->unref_list); + + /* tile alloc address. */ + *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr + + exec->tile_alloc_offset); + /* tile alloc size. */ + *(uint32_t *)(validated + 4) = tile_alloc_size; + /* tile state address. */ + *(uint32_t *)(validated + 8) = exec->tile_bo->paddr; return 0; } diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index ebd357f7065..630f8e68896 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -184,8 +184,6 @@ vc4_context_destroy(struct pipe_context *pctx) pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL); pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL); - vc4_bo_unreference(&vc4->tile_alloc); - vc4_bo_unreference(&vc4->tile_state); vc4_program_fini(pctx); diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index ad5d0b153ff..d5d6be16f6e 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -208,9 +208,6 @@ struct vc4_context { uint32_t draw_height; /** @} */ - struct vc4_bo *tile_alloc; - struct vc4_bo *tile_state; - struct util_slab_mempool transfer_pool; struct blitter_context *blitter; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 3e181d0606a..5e6d70d6f33 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -72,44 +72,15 @@ vc4_start_draw(struct vc4_context *vc4) uint32_t tilew = align(width, 64) / 64; uint32_t tileh = align(height, 64) / 64; - /* Tile alloc memory setup: We use an initial alloc size of 32b. The - * hardware then aligns that to 256b (we use 4096, because all of our - * BO allocations align to that anyway), then for some reason the - * simulator wants an extra page available, even if you have overflow - * memory set up. - * - * XXX: The binner only does 28-bit addressing math, so the tile alloc - * and tile state should be in the same BO and that BO needs to not - * cross a 256MB boundary, somehow. - */ - uint32_t tile_alloc_size = 32 * tilew * tileh; - tile_alloc_size = align(tile_alloc_size, 4096); - tile_alloc_size += 4096; - uint32_t tile_state_size = 48 * tilew * tileh; - if (!vc4->tile_alloc || vc4->tile_alloc->size < tile_alloc_size) { - vc4_bo_unreference(&vc4->tile_alloc); - vc4->tile_alloc = vc4_bo_alloc(vc4->screen, tile_alloc_size, - "tile_alloc"); - } - if (!vc4->tile_state || vc4->tile_state->size < tile_state_size) { - vc4_bo_unreference(&vc4->tile_state); - vc4->tile_state = vc4_bo_alloc(vc4->screen, tile_state_size, - "tile_state"); - } - // Tile state data is 48 bytes per tile, I think it can be thrown away // as soon as binning is finished. - cl_start_reloc(&vc4->bcl, 2); cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG); - cl_reloc(vc4, &vc4->bcl, vc4->tile_alloc, 0); - cl_u32(&vc4->bcl, vc4->tile_alloc->size); - cl_reloc(vc4, &vc4->bcl, vc4->tile_state, 0); + cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */ + cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */ + cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */ cl_u8(&vc4->bcl, tilew); cl_u8(&vc4->bcl, tileh); - cl_u8(&vc4->bcl, - VC4_BIN_CONFIG_AUTO_INIT_TSDA | - VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 | - VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32); + cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */ /* START_TILE_BINNING resets the statechange counters in the hardware, * which are what is used when a primitive is binned to a tile to diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 2e4d8798f8e..b58013dd2ee 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -45,6 +45,7 @@ vc4_wrap_bo_with_cma(struct drm_device *dev, struct vc4_bo *bo) drm_bo->bo = bo; obj->base.size = size; + obj->base.dev = dev; obj->vaddr = screen->simulator_mem_base + dev->simulator_mem_next; obj->paddr = simpenrose_hw_addr(obj->vaddr); diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h index c3b7a638f93..2bb36b253bb 100644 --- a/src/gallium/drivers/vc4/vc4_simulator_validate.h +++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h @@ -66,6 +66,7 @@ struct drm_device { struct drm_gem_object { uint32_t size; + struct drm_device *dev; }; struct drm_gem_cma_object {