From e7c8ad0a6c8ba263f29b7c3c5120bc6beabeba7b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sat, 20 Jun 2015 15:30:04 -0700 Subject: [PATCH] vc4: Add kernel RCL support for MSAA rendering. --- src/gallium/drivers/vc4/kernel/vc4_packet.h | 10 + .../drivers/vc4/kernel/vc4_render_cl.c | 254 +++++++++++++++--- src/gallium/drivers/vc4/kernel/vc4_validate.c | 5 +- src/gallium/drivers/vc4/vc4_drm.h | 7 +- src/gallium/drivers/vc4/vc4_job.c | 2 + 5 files changed, 239 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h index 8e4fd364ece..c2e3a5128f0 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_packet.h +++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h @@ -156,6 +156,16 @@ enum vc4_packet { #define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1) #define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0) +/** @{ + * + * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and + * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER. + */ +#define VC4_LOADSTORE_FULL_RES_EOF (1 << 3) +#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2) +#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1) +#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0) + /** @{ * * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c index 3447312907b..d9c68423f8b 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c +++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c @@ -39,6 +39,8 @@ struct vc4_rcl_setup { struct drm_gem_cma_object *color_write; struct drm_gem_cma_object *zs_read; struct drm_gem_cma_object *zs_write; + struct drm_gem_cma_object *msaa_color_write; + struct drm_gem_cma_object *msaa_zs_write; struct drm_gem_cma_object *rcl; u32 next_offset; @@ -80,6 +82,22 @@ static void vc4_store_before_load(struct vc4_rcl_setup *setup) rcl_u32(setup, 0); /* no address, since we're in None mode */ } +/* + * Calculates the physical address of the start of a tile in a RCL surface. + * + * Unlike the other load/store packets, + * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile + * coordinates packet, and instead just store to the address given. + */ +static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec, + struct drm_gem_cma_object *bo, + struct drm_vc4_submit_rcl_surface *surf, + uint8_t x, uint8_t y) +{ + return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE * + (DIV_ROUND_UP(exec->args->width, 32) * y + x); +} + /* * Emits a PACKET_TILE_COORDINATES if one isn't already pending. * @@ -107,22 +125,41 @@ static void emit_tile(struct vc4_exec_info *exec, * may be outstanding at a time. */ if (setup->color_read) { - rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - rcl_u16(setup, args->color_read.bits); - rcl_u32(setup, - setup->color_read->paddr + args->color_read.offset); + if (args->color_read.flags & + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { + rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER); + rcl_u32(setup, + vc4_full_res_offset(exec, setup->color_read, + &args->color_read, x, y) | + VC4_LOADSTORE_FULL_RES_DISABLE_ZS); + } else { + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + rcl_u16(setup, args->color_read.bits); + rcl_u32(setup, setup->color_read->paddr + + args->color_read.offset); + } } if (setup->zs_read) { - if (setup->color_read) { - /* Exec previous load. */ - vc4_tile_coordinates(setup, x, y); - vc4_store_before_load(setup); + if (args->zs_read.flags & + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { + rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER); + rcl_u32(setup, + vc4_full_res_offset(exec, setup->zs_read, + &args->zs_read, x, y) | + VC4_LOADSTORE_FULL_RES_DISABLE_COLOR); + } else { + if (setup->color_read) { + /* Exec previous load. */ + vc4_tile_coordinates(setup, x, y); + vc4_store_before_load(setup); + } + + rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); + rcl_u16(setup, args->zs_read.bits); + rcl_u32(setup, setup->zs_read->paddr + + args->zs_read.offset); } - - rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); - rcl_u16(setup, args->zs_read.bits); - rcl_u32(setup, setup->zs_read->paddr + args->zs_read.offset); } /* Clipping depends on tile coordinates having been @@ -143,20 +180,60 @@ static void emit_tile(struct vc4_exec_info *exec, (y * exec->bin_tiles_x + x) * 32)); } + if (setup->msaa_color_write) { + bool last_tile_write = (!setup->msaa_zs_write && + !setup->zs_write && + !setup->color_write); + uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS; + + if (!last_tile_write) + bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL; + else if (last) + bits |= VC4_LOADSTORE_FULL_RES_EOF; + rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER); + rcl_u32(setup, + vc4_full_res_offset(exec, setup->msaa_color_write, + &args->msaa_color_write, x, y) | + bits); + } + + if (setup->msaa_zs_write) { + bool last_tile_write = (!setup->zs_write && + !setup->color_write); + uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR; + + if (setup->msaa_color_write) + vc4_tile_coordinates(setup, x, y); + if (!last_tile_write) + bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL; + else if (last) + bits |= VC4_LOADSTORE_FULL_RES_EOF; + rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER); + rcl_u32(setup, + vc4_full_res_offset(exec, setup->msaa_zs_write, + &args->msaa_zs_write, x, y) | + bits); + } + if (setup->zs_write) { + bool last_tile_write = !setup->color_write; + + if (setup->msaa_color_write || setup->msaa_zs_write) + vc4_tile_coordinates(setup, x, y); + rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); rcl_u16(setup, args->zs_write.bits | - (setup->color_write ? - VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0)); + (last_tile_write ? + 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR)); rcl_u32(setup, (setup->zs_write->paddr + args->zs_write.offset) | - ((last && !setup->color_write) ? + ((last && last_tile_write) ? VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); } if (setup->color_write) { - if (setup->zs_write) { - /* Reset after previous store */ + if (setup->msaa_color_write || setup->msaa_zs_write || + setup->zs_write) { vc4_tile_coordinates(setup, x, y); } @@ -191,14 +268,26 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, } if (setup->color_read) { - loop_body_size += (VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE); + if (args->color_read.flags & + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { + loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE; + } else { + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; + } } if (setup->zs_read) { - if (setup->color_read) { - loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; - loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; + if (args->zs_read.flags & + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { + loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE; + } else { + if (setup->color_read && + !(args->color_read.flags & + VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) { + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; + loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; + } + loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; } - loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE; } if (has_bin) { @@ -206,13 +295,23 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE; } + if (setup->msaa_color_write) + loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE; + if (setup->msaa_zs_write) + loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE; + if (setup->zs_write) loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE; - if (setup->color_write) { - if (setup->zs_write) - loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE; + if (setup->color_write) loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE; - } + + /* We need a VC4_PACKET_TILE_COORDINATES in between each store. */ + loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE * + ((setup->msaa_color_write != NULL) + + (setup->msaa_zs_write != NULL) + + (setup->color_write != NULL) + + (setup->zs_write != NULL) - 1); + size += xtiles * ytiles * loop_body_size; setup->rcl = drm_gem_cma_create(dev, size); @@ -265,6 +364,56 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec, return 0; } +static int vc4_full_res_bounds_check(struct vc4_exec_info *exec, + struct drm_gem_cma_object *obj, + struct drm_vc4_submit_rcl_surface *surf) +{ + struct drm_vc4_submit_cl *args = exec->args; + u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32); + + if (surf->offset > obj->base.size) { + DRM_ERROR("surface offset %d > BO size %zd\n", + surf->offset, obj->base.size); + return -EINVAL; + } + + if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE < + render_tiles_stride * args->max_y_tile + args->max_x_tile) { + DRM_ERROR("MSAA tile %d, %d out of bounds " + "(bo size %zd, offset %d).\n", + args->max_x_tile, args->max_y_tile, + obj->base.size, + surf->offset); + return -EINVAL; + } + + return 0; +} + +static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec, + struct drm_gem_cma_object **obj, + struct drm_vc4_submit_rcl_surface *surf) +{ + if (surf->flags != 0 || surf->bits != 0) { + DRM_ERROR("MSAA surface had nonzero flags/bits\n"); + return -EINVAL; + } + + if (surf->hindex == ~0) + return 0; + + *obj = vc4_use_bo(exec, surf->hindex); + if (!*obj) + return -EINVAL; + + if (surf->offset & 0xf) { + DRM_ERROR("MSAA write must be 16b aligned.\n"); + return -EINVAL; + } + + return vc4_full_res_bounds_check(exec, *obj, surf); +} + static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, struct drm_gem_cma_object **obj, struct drm_vc4_submit_rcl_surface *surf) @@ -276,9 +425,10 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, uint8_t format = VC4_GET_FIELD(surf->bits, VC4_LOADSTORE_TILE_BUFFER_FORMAT); int cpp; + int ret; - if (surf->pad != 0) { - DRM_ERROR("Padding unset\n"); + if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { + DRM_ERROR("Extra flags set\n"); return -EINVAL; } @@ -289,6 +439,25 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, if (!*obj) return -EINVAL; + if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) { + if (surf == &exec->args->zs_write) { + DRM_ERROR("general zs write may not be a full-res.\n"); + return -EINVAL; + } + + if (surf->bits != 0) { + DRM_ERROR("load/store general bits set with " + "full res load/store.\n"); + return -EINVAL; + } + + ret = vc4_full_res_bounds_check(exec, *obj, surf); + if (!ret) + return ret; + + return 0; + } + if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK | VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK | VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) { @@ -341,6 +510,7 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec, static int vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec, + struct vc4_rcl_setup *setup, struct drm_gem_cma_object **obj, struct drm_vc4_submit_rcl_surface *surf) { @@ -350,13 +520,15 @@ vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec, VC4_RENDER_CONFIG_FORMAT); int cpp; - if (surf->pad != 0) { - DRM_ERROR("Padding unset\n"); + if (surf->flags != 0) { + DRM_ERROR("No flags supported on render config.\n"); return -EINVAL; } if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK | - VC4_RENDER_CONFIG_FORMAT_MASK)) { + VC4_RENDER_CONFIG_FORMAT_MASK | + VC4_RENDER_CONFIG_MS_MODE_4X | + VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) { DRM_ERROR("Unknown bits in render config: 0x%04x\n", surf->bits); return -EINVAL; @@ -420,12 +592,13 @@ int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) return -EINVAL; } - ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read); + ret = vc4_rcl_render_config_surface_setup(exec, &setup, + &setup.color_write, + &args->color_write); if (ret) return ret; - ret = vc4_rcl_render_config_surface_setup(exec, &setup.color_write, - &args->color_write); + ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read); if (ret) return ret; @@ -437,10 +610,21 @@ int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec) if (ret) return ret; + ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write, + &args->msaa_color_write); + if (ret) + return ret; + + ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write, + &args->msaa_zs_write); + if (ret) + return ret; + /* We shouldn't even have the job submitted to us if there's no * surface to write out. */ - if (!setup.color_write && !setup.zs_write) { + if (!setup.color_write && !setup.zs_write && + !setup.msaa_color_write && !setup.msaa_zs_write) { DRM_ERROR("RCL requires color or Z/S write\n"); return -EINVAL; } diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 8dbb46cea85..c9e3934ab4b 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -359,9 +359,8 @@ validate_tile_binning_config(VALIDATE_ARGS) } if (flags & (VC4_BIN_CONFIG_DB_NON_MS | - VC4_BIN_CONFIG_TILE_BUFFER_64BIT | - VC4_BIN_CONFIG_MS_MODE_4X)) { - DRM_ERROR("unsupported bining config flags 0x%02x\n", flags); + VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) { + DRM_ERROR("unsupported binning config flags 0x%02x\n", flags); return -EINVAL; } diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h index c93454b38fe..bf58c6cd078 100644 --- a/src/gallium/drivers/vc4/vc4_drm.h +++ b/src/gallium/drivers/vc4/vc4_drm.h @@ -45,9 +45,12 @@ struct drm_vc4_submit_rcl_surface { uint32_t offset; /* Offset to start of buffer. */ /* * Bits for either render config (color_write) or load/store packet. + * Bits should all be 0 for MSAA load/stores. */ uint16_t bits; - uint16_t pad; + +#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0) + uint16_t flags; }; /** @@ -129,6 +132,8 @@ struct drm_vc4_submit_cl { struct drm_vc4_submit_rcl_surface color_write; struct drm_vc4_submit_rcl_surface zs_read; struct drm_vc4_submit_rcl_surface zs_write; + struct drm_vc4_submit_rcl_surface msaa_color_write; + struct drm_vc4_submit_rcl_surface msaa_zs_write; uint32_t clear_color[2]; uint32_t clear_z; uint8_t clear_s; diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c index 41c2c5fb205..cde24dc074c 100644 --- a/src/gallium/drivers/vc4/vc4_job.c +++ b/src/gallium/drivers/vc4/vc4_job.c @@ -161,6 +161,8 @@ vc4_job_submit(struct vc4_context *vc4) vc4->zs_read, true, false); vc4_submit_setup_rcl_surface(vc4, &submit.zs_write, vc4->zs_write, true, true); + submit.msaa_color_write.hindex = ~0; + submit.msaa_zs_write.hindex = ~0; submit.bo_handles = (uintptr_t)vc4->bo_handles.base; submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4; -- 2.30.2