From 0f4d2b0a2dd3fa39426f2789bf2a8fc939adf001 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 14 Jul 2015 12:18:40 -0700 Subject: [PATCH] vc4: Cache texture p0/p1 setup for the sampler view. In exchange for a bit of space and computation in CSO setup, we cut vc4_uniform.c (draw time) code size by 4.8%. --- src/gallium/drivers/vc4/vc4_context.h | 12 +++++++++ src/gallium/drivers/vc4/vc4_state.c | 35 ++++++++++++++++++-------- src/gallium/drivers/vc4/vc4_uniforms.c | 24 ++++++------------ 3 files changed, 43 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index d5d6be16f6e..be7df1bd347 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -69,6 +69,12 @@ #define VC4_DIRTY_UNCOMPILED_FS (1 << 22) #define VC4_DIRTY_COMPILED_FS (1 << 24) +struct vc4_sampler_view { + struct pipe_sampler_view base; + uint32_t texture_p0; + uint32_t texture_p1; +}; + struct vc4_texture_stateobj { struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; unsigned num_textures; @@ -326,6 +332,12 @@ vc4_context(struct pipe_context *pcontext) return (struct vc4_context *)pcontext; } +static inline struct vc4_sampler_view * +vc4_sampler_view(struct pipe_sampler_view *psview) +{ + return (struct vc4_sampler_view *)psview; +} + struct pipe_context *vc4_context_create(struct pipe_screen *pscreen, void *priv); void vc4_draw_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index e0ce4aee779..46852b6628c 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -499,13 +499,13 @@ static struct pipe_sampler_view * vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) { - struct pipe_sampler_view *so = malloc(sizeof(*so)); + struct vc4_sampler_view *so = malloc(sizeof(*so)); struct vc4_resource *rsc = vc4_resource(prsc); if (!so) return NULL; - *so = *cso; + so->base = *cso; pipe_reference(NULL, &prsc->reference); @@ -516,18 +516,19 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, * Also, Raspberry Pi doesn't support sampling from raster textures, * so we also have to copy to a temporary then. */ - if (so->u.tex.first_level || + if (cso->u.tex.first_level || rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) { struct vc4_resource *shadow_parent = vc4_resource(prsc); struct pipe_resource tmpl = shadow_parent->base.b; struct vc4_resource *clone; tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; - tmpl.width0 = u_minify(tmpl.width0, so->u.tex.first_level); - tmpl.height0 = u_minify(tmpl.height0, so->u.tex.first_level); - tmpl.last_level = so->u.tex.last_level - so->u.tex.first_level; + tmpl.width0 = u_minify(tmpl.width0, cso->u.tex.first_level); + tmpl.height0 = u_minify(tmpl.height0, cso->u.tex.first_level); + tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level; prsc = vc4_resource_create(pctx->screen, &tmpl); + rsc = vc4_resource(prsc); clone = vc4_resource(prsc); clone->shadow_parent = &shadow_parent->base.b; /* Flag it as needing update of the contents from the parent. */ @@ -535,11 +536,23 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R); } - so->texture = prsc; - so->reference.count = 1; - so->context = pctx; - - return so; + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + so->texture_p0 = + (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) | + VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) | + VC4_SET_FIELD(cso->u.tex.last_level - + cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) | + VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE, + VC4_TEX_P0_CMMODE)); + so->texture_p1 = + (VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) | + VC4_SET_FIELD(prsc->height0 & 2047, VC4_TEX_P1_HEIGHT) | + VC4_SET_FIELD(prsc->width0 & 2047, VC4_TEX_P1_WIDTH)); + + return &so->base; } static void diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c index e8fca3d0cbd..e8e9d6b0119 100644 --- a/src/gallium/drivers/vc4/vc4_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_uniforms.c @@ -53,16 +53,11 @@ write_texture_p0(struct vc4_context *vc4, struct vc4_texture_stateobj *texstate, uint32_t unit) { - struct pipe_sampler_view *texture = texstate->textures[unit]; - struct vc4_resource *rsc = vc4_resource(texture->texture); + struct vc4_sampler_view *sview = + vc4_sampler_view(texstate->textures[unit]); + struct vc4_resource *rsc = vc4_resource(sview->base.texture); - cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, - VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) | - VC4_SET_FIELD(texture->u.tex.last_level - - texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) | - VC4_SET_FIELD(texture->target == PIPE_TEXTURE_CUBE, - VC4_TEX_P0_CMMODE) | - VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE)); + cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, sview->texture_p0); } static void @@ -71,8 +66,8 @@ write_texture_p1(struct vc4_context *vc4, struct vc4_texture_stateobj *texstate, uint32_t unit) { - struct pipe_sampler_view *texture = texstate->textures[unit]; - struct vc4_resource *rsc = vc4_resource(texture->texture); + struct vc4_sampler_view *sview = + vc4_sampler_view(texstate->textures[unit]); struct pipe_sampler_state *sampler = texstate->samplers[unit]; static const uint8_t minfilter_map[6] = { VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR, @@ -91,12 +86,7 @@ write_texture_p1(struct vc4_context *vc4, (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); - cl_aligned_u32(uniforms, - VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) | - VC4_SET_FIELD(texture->texture->height0 & 2047, - VC4_TEX_P1_HEIGHT) | - VC4_SET_FIELD(texture->texture->width0 & 2047, - VC4_TEX_P1_WIDTH) | + cl_aligned_u32(uniforms, sview->texture_p1 | VC4_SET_FIELD(magfilter_map[sampler->mag_img_filter], VC4_TEX_P1_MAGFILT) | VC4_SET_FIELD(minfilter_map[sampler->min_mip_filter * 2 + -- 2.30.2