vc4: Speed up glGenerateMipmaps by avoiding shadow baselevel.
authorEric Anholt <eric@anholt.net>
Fri, 15 Jul 2016 00:26:43 +0000 (17:26 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 15 Jul 2016 20:54:00 +0000 (13:54 -0700)
To support general GL_TEXTURE_BASE_LEVEL we have to copy to a temporary
miptree.  However, if a single level is being selected, we can use the
existing miptree and force all the sampling to be from that particular
level.

This avoids a ton of software fallbacks in glGenerateMipmaps(), which uses
base levels in the blit implementation in gallium.  Improves "glmark2 -b
terrain" from 2 fps to 3 (perhaps some more precision would be useful?),
and cuts its CPU usage during the benchmarking from ~30% to ~10% (total
CPU time from 8.8s to 7.6s).

src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_resource.c
src/gallium/drivers/vc4/vc4_state.c

index 7da2b554e4908088d444fd9d9b562df7ebc668b7..751f0437807e66fb86d011f49ce2ff0f4baf3858 100644 (file)
@@ -74,6 +74,7 @@ struct vc4_sampler_view {
         struct pipe_sampler_view base;
         uint32_t texture_p0;
         uint32_t texture_p1;
+        bool force_first_level;
 };
 
 struct vc4_sampler_state {
index b4b62e3869ad0748ce1957b5a80318af4c192c3f..4ee49a258f129bf9bb12b5fd594bfc6c876473af 100644 (file)
@@ -393,6 +393,12 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
                 }
         }
 
+        if (c->key->tex[unit].forced_first_level) {
+                lod = qir_uniform_f(c, c->key->tex[unit].forced_first_level);
+                is_txl = true;
+                is_txb = false;
+        }
+
         struct qreg texture_u[] = {
                 qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit),
                 qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit),
@@ -2313,6 +2319,7 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
 {
         for (int i = 0; i < texstate->num_textures; i++) {
                 struct pipe_sampler_view *sampler = texstate->textures[i];
+                struct vc4_sampler_view *vc4_sampler = vc4_sampler_view(sampler);
                 struct pipe_sampler_state *sampler_state =
                         texstate->samplers[i];
 
@@ -2333,6 +2340,10 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
                         key->tex[i].compare_func = sampler_state->compare_func;
                         key->tex[i].wrap_s = sampler_state->wrap_s;
                         key->tex[i].wrap_t = sampler_state->wrap_t;
+                        if (vc4_sampler->force_first_level) {
+                                key->tex[i].forced_first_level =
+                                        sampler->u.tex.first_level;
+                        }
                 }
         }
 
index 88eda225d803ed9803a8c85620f6eada01eaee2a..81b55651cecaf8e6d8381429fc28bbfcf3a21651 100644 (file)
@@ -314,6 +314,7 @@ struct vc4_key {
                                 unsigned compare_func:3;
                                 unsigned wrap_s:3;
                                 unsigned wrap_t:3;
+                                unsigned forced_first_level:8;
                         };
                         struct {
                                 uint16_t msaa_width, msaa_height;
index a07fa3d997994e0b51889c4ef1d72691a4a7d9a2..08d7d207f7987b1c968b9ba54b837314032b4566 100644 (file)
@@ -877,7 +877,9 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
         if (shadow->writes == orig->writes && orig->bo->private)
                 return;
 
-        perf_debug("Updating shadow texture due to %s\n",
+        perf_debug("Updating %dx%d@%d shadow texture due to %s\n",
+                   orig->base.b.width0, orig->base.b.height0,
+                   view->u.tex.first_level,
                    view->u.tex.first_level ? "base level" : "raster layout");
 
         for (int i = 0; i <= shadow->base.b.last_level; i++) {
index bf4e0232daf3b96b9fe9672ddc1b3ce37e6d6c2b..df9e1a3ab03d4dd48494dd0754e95cc8b013735b 100644 (file)
@@ -603,7 +603,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
          * Also, Raspberry Pi doesn't support sampling from raster textures,
          * so we also have to copy to a temporary then.
          */
-        if (cso->u.tex.first_level ||
+        if ((cso->u.tex.first_level &&
+             (cso->u.tex.first_level != cso->u.tex.last_level)) ||
             rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
                 struct vc4_resource *shadow_parent = vc4_resource(prsc);
                 struct pipe_resource tmpl = shadow_parent->base.b;
@@ -626,6 +627,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                 clone->writes = shadow_parent->writes - 1;
 
                 assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
+        } else if (cso->u.tex.first_level) {
+                so->force_first_level = true;
         }
         so->base.texture = prsc;
         so->base.reference.count = 1;
@@ -634,7 +637,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
         so->texture_p0 =
                 (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
                  VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
-                 VC4_SET_FIELD(cso->u.tex.last_level -
+                 VC4_SET_FIELD(so->force_first_level ?
+                               cso->u.tex.last_level :
+                               cso->u.tex.last_level -
                                cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
                  VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE,
                                VC4_TEX_P0_CMMODE));