From 3bcd0f1912a60cc9d3813923d18d29465e41ff56 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 14 Jul 2016 17:26:43 -0700 Subject: [PATCH] vc4: Speed up glGenerateMipmaps by avoiding shadow baselevel. To support general GL_TEXTURE_BASE_LEVEL we have to copy to a temporary miptree. However, if a single level is being selected, we can use the existing miptree and force all the sampling to be from that particular level. This avoids a ton of software fallbacks in glGenerateMipmaps(), which uses base levels in the blit implementation in gallium. Improves "glmark2 -b terrain" from 2 fps to 3 (perhaps some more precision would be useful?), and cuts its CPU usage during the benchmarking from ~30% to ~10% (total CPU time from 8.8s to 7.6s). --- src/gallium/drivers/vc4/vc4_context.h | 1 + src/gallium/drivers/vc4/vc4_program.c | 11 +++++++++++ src/gallium/drivers/vc4/vc4_qir.h | 1 + src/gallium/drivers/vc4/vc4_resource.c | 4 +++- src/gallium/drivers/vc4/vc4_state.c | 9 +++++++-- 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 7da2b554e49..751f0437807 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -74,6 +74,7 @@ struct vc4_sampler_view { struct pipe_sampler_view base; uint32_t texture_p0; uint32_t texture_p1; + bool force_first_level; }; struct vc4_sampler_state { diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index b4b62e3869a..4ee49a258f1 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -393,6 +393,12 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) } } + if (c->key->tex[unit].forced_first_level) { + lod = qir_uniform_f(c, c->key->tex[unit].forced_first_level); + is_txl = true; + is_txb = false; + } + struct qreg texture_u[] = { qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit), qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit), @@ -2313,6 +2319,7 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key, { for (int i = 0; i < texstate->num_textures; i++) { struct pipe_sampler_view *sampler = texstate->textures[i]; + struct vc4_sampler_view *vc4_sampler = vc4_sampler_view(sampler); struct pipe_sampler_state *sampler_state = texstate->samplers[i]; @@ -2333,6 +2340,10 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key, key->tex[i].compare_func = sampler_state->compare_func; key->tex[i].wrap_s = sampler_state->wrap_s; key->tex[i].wrap_t = sampler_state->wrap_t; + if (vc4_sampler->force_first_level) { + key->tex[i].forced_first_level = + sampler->u.tex.first_level; + } } } diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 88eda225d80..81b55651cec 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -314,6 +314,7 @@ struct vc4_key { unsigned compare_func:3; unsigned wrap_s:3; unsigned wrap_t:3; + unsigned forced_first_level:8; }; struct { uint16_t msaa_width, msaa_height; diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index a07fa3d9979..08d7d207f79 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -877,7 +877,9 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, if (shadow->writes == orig->writes && orig->bo->private) return; - perf_debug("Updating shadow texture due to %s\n", + perf_debug("Updating %dx%d@%d shadow texture due to %s\n", + orig->base.b.width0, orig->base.b.height0, + view->u.tex.first_level, view->u.tex.first_level ? "base level" : "raster layout"); for (int i = 0; i <= shadow->base.b.last_level; i++) { diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index bf4e0232daf..df9e1a3ab03 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -603,7 +603,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, * Also, Raspberry Pi doesn't support sampling from raster textures, * so we also have to copy to a temporary then. */ - if (cso->u.tex.first_level || + if ((cso->u.tex.first_level && + (cso->u.tex.first_level != cso->u.tex.last_level)) || rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) { struct vc4_resource *shadow_parent = vc4_resource(prsc); struct pipe_resource tmpl = shadow_parent->base.b; @@ -626,6 +627,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, clone->writes = shadow_parent->writes - 1; assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R); + } else if (cso->u.tex.first_level) { + so->force_first_level = true; } so->base.texture = prsc; so->base.reference.count = 1; @@ -634,7 +637,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, so->texture_p0 = (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) | VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) | - VC4_SET_FIELD(cso->u.tex.last_level - + VC4_SET_FIELD(so->force_first_level ? + cso->u.tex.last_level : + cso->u.tex.last_level - cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) | VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE, VC4_TEX_P0_CMMODE)); -- 2.30.2