From a032a9665f275085c825b54b62128ae90adba1c6 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 1 May 2019 14:34:00 -0700 Subject: [PATCH] iris: Enable PIPE_CAP_SURFACE_REINTERPRET_BLOCKS This makes CompressedTexSubImage from a PBO source do proper GPU rendering to upload instead of stalling to map the PBO source on the CPU (then copying it on the CPU). Thanks Bas Nieuwenhuizen for pointing out that Vulkan includes this functionality, and to Jason Ekstrand for writing the code I adapted. Vulkan only supports a single layer, however, and this code tries to support multiple layers as long as it's miplevel 0. Improves performance in Sid Meier's Civilization VI: Average frame time (ms): -3.67423% +/- 1.46201% (n=5) 99th percentile frame time (ms): -5.09910% +/- 3.87874% (n=5) --- src/gallium/drivers/iris/iris_screen.c | 1 + src/gallium/drivers/iris/iris_state.c | 100 +++++++++++++++++++++++-- 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 5b321a6f862..30d32b2ed43 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -179,6 +179,7 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES: case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: return true; case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE: case PIPE_CAP_TGSI_FS_FBFETCH: diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 677fa5aba53..1d8f61847ff 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -1901,7 +1901,8 @@ iris_create_surface(struct pipe_context *ctx, return NULL; } - surf->view = (struct isl_view) { + struct isl_view *view = &surf->view; + *view = (struct isl_view) { .format = fmt.fmt, .base_level = tmpl->u.tex.level, .levels = 1, @@ -1925,15 +1926,102 @@ iris_create_surface(struct pipe_context *ctx, if (!unlikely(map)) return NULL; - unsigned aux_modes = res->aux.possible_usages; - while (aux_modes) { - enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); + if (!isl_format_is_compressed(res->surf.format)) { + /* This is a normal surface. Fill out a SURFACE_STATE for each possible + * auxiliary surface mode and return the pipe_surface. + */ + unsigned aux_modes = res->aux.possible_usages; + while (aux_modes) { + enum isl_aux_usage aux_usage = u_bit_scan(&aux_modes); - fill_surface_state(&screen->isl_dev, map, res, &surf->view, aux_usage); + fill_surface_state(&screen->isl_dev, map, res, view, aux_usage); + + map += SURFACE_STATE_ALIGNMENT; + } - map += SURFACE_STATE_ALIGNMENT; + return psurf; } + /* The resource has a compressed format, which is not renderable, but we + * have a renderable view format. We must be attempting to upload blocks + * of compressed data via an uncompressed view. + * + * In this case, we can assume there are no auxiliary buffers, a single + * miplevel, and that the resource is single-sampled. Gallium may try + * and create an uncompressed view with multiple layers, however. + */ + assert(!isl_format_is_compressed(fmt.fmt)); + assert(res->aux.possible_usages == 1 << ISL_AUX_USAGE_NONE); + assert(res->surf.samples == 1); + assert(view->levels == 1); + + struct isl_surf isl_surf; + uint32_t offset_B = 0, tile_x_sa = 0, tile_y_sa = 0; + + if (view->base_level > 0) { + /* We can't rely on the hardware's miplevel selection with such + * a substantial lie about the format, so we select a single image + * using the Tile X/Y Offset fields. In this case, we can't handle + * multiple array slices. + * + * On Broadwell, HALIGN and VALIGN are specified in pixels and are + * hard-coded to align to exactly the block size of the compressed + * texture. This means that, when reinterpreted as a non-compressed + * texture, the tile offsets may be anything and we can't rely on + * X/Y Offset. + * + * Return NULL to force the state tracker to take fallback paths. + */ + if (view->array_len > 1 || GEN_GEN == 8) + return NULL; + + const bool is_3d = res->surf.dim == ISL_SURF_DIM_3D; + isl_surf_get_image_surf(&screen->isl_dev, &res->surf, + view->base_level, + is_3d ? 0 : view->base_array_layer, + is_3d ? view->base_array_layer : 0, + &isl_surf, + &offset_B, &tile_x_sa, &tile_y_sa); + + /* We use address and tile offsets to access a single level/layer + * as a subimage, so reset level/layer so it doesn't offset again. + */ + view->base_array_layer = 0; + view->base_level = 0; + } else { + /* Level 0 doesn't require tile offsets, and the hardware can find + * array slices using QPitch even with the format override, so we + * can allow layers in this case. Copy the original ISL surface. + */ + memcpy(&isl_surf, &res->surf, sizeof(isl_surf)); + } + + /* Scale down the image dimensions by the block size. */ + const struct isl_format_layout *fmtl = + isl_format_get_layout(res->surf.format); + isl_surf.format = fmt.fmt; + isl_surf.logical_level0_px.width = + DIV_ROUND_UP(isl_surf.logical_level0_px.width, fmtl->bw); + isl_surf.logical_level0_px.height = + DIV_ROUND_UP(isl_surf.logical_level0_px.height, fmtl->bh); + isl_surf.phys_level0_sa.width /= fmtl->bw; + isl_surf.phys_level0_sa.height /= fmtl->bh; + tile_x_sa /= fmtl->bw; + tile_y_sa /= fmtl->bh; + + psurf->width = isl_surf.logical_level0_px.width; + psurf->height = isl_surf.logical_level0_px.height; + + struct isl_surf_fill_state_info f = { + .surf = &isl_surf, + .view = view, + .mocs = mocs(res->bo), + .address = res->bo->gtt_offset + offset_B, + .x_offset_sa = tile_x_sa, + .y_offset_sa = tile_y_sa, + }; + + isl_surf_fill_state_s(&screen->isl_dev, map, &f); return psurf; } -- 2.30.2