From 39056b0e2ac10342d8a3a6000f12a510f5dbd773 Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Mon, 24 Jul 2017 10:28:17 +0200 Subject: [PATCH] etnaviv: Implement ICACHE This patch adds support for large shaders on GC3000. For example the "terrain" glmark benchmark with a large fragment shader will work after this. If the GPU supports ICACHE, shaders larger than the available state area will be uploaded to a bo of their own and instructed to be loaded from memory on demand. Small shaders will be uploaded in the usual way. This mimics the behavior of the blob. On GPUs that don't support ICACHE, this patch should make no difference. Signed-off-by: Wladimir J. van der Laan Reviewed-by: Christian Gmeiner --- .../drivers/etnaviv/etnaviv_compiler.c | 3 +- .../drivers/etnaviv/etnaviv_compiler.h | 5 ++ src/gallium/drivers/etnaviv/etnaviv_emit.c | 52 +++++++++++++------ .../drivers/etnaviv/etnaviv_internal.h | 4 ++ src/gallium/drivers/etnaviv/etnaviv_screen.c | 4 +- src/gallium/drivers/etnaviv/etnaviv_shader.c | 45 +++++++++++++++- 6 files changed, 95 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c index 165ab74298a..f65a168672c 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c @@ -2277,7 +2277,7 @@ etna_compile_check_limits(struct etna_compile *c) /* round up number of uniforms, including immediates, in units of four */ int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4; - if (c->inst_ptr > c->specs->max_instructions) { + if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) { DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr, c->specs->max_instructions); return false; @@ -2501,6 +2501,7 @@ etna_compile_shader(struct etna_shader_variant *v) v->vs_pointsize_out_reg = -1; v->ps_color_out_reg = -1; v->ps_depth_out_reg = -1; + v->needs_icache = c->inst_ptr > c->specs->max_instructions; copy_uniform_state_to_shader(c, v); if (c->info.processor == PIPE_SHADER_VERTEX) { diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.h b/src/gallium/drivers/etnaviv/etnaviv_compiler.h index 88a093fd918..f5c16890a66 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.h +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.h @@ -94,12 +94,17 @@ struct etna_shader_variant { /* unknown input property (XX_INPUT_COUNT, field UNK8) */ uint32_t input_count_unk8; + /* shader is larger than GPU instruction limit, thus needs icache */ + bool needs_icache; + /* shader variants form a linked list */ struct etna_shader_variant *next; /* replicated here to avoid passing extra ptrs everywhere */ struct etna_shader *shader; struct etna_shader_key key; + + struct etna_bo *bo; /* cached code memory bo handle (for icache) */ }; struct etna_varying { diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c index 273b3d05e3f..c2117d563d1 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c @@ -421,9 +421,6 @@ etna_emit_state(struct etna_context *ctx) if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING); /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC); - if (ctx->specs.has_shader_range_registers) { - /*0085C*/ EMIT_STATE(VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16); - } } if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) { /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X); @@ -534,10 +531,6 @@ etna_emit_state(struct etna_context *ctx) : ctx->shader_state.PS_TEMP_REGISTER_CONTROL); /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL); /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC); - if (ctx->specs.has_shader_range_registers) { - /*0101C*/ EMIT_STATE(PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) | - 0x100); - } } if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) { uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG; @@ -739,14 +732,43 @@ etna_emit_state(struct etna_context *ctx) if (dirty & (ETNA_DIRTY_SHADER)) { /* Special case: a new shader was loaded; simply re-load all uniforms and * shader code at once */ - /*04000 or 0C000*/ - etna_set_state_multi(stream, ctx->specs.vs_offset, - ctx->shader_state.vs_inst_mem_size, - ctx->shader_state.VS_INST_MEM); - /*06000 or 0D000*/ - etna_set_state_multi(stream, ctx->specs.ps_offset, - ctx->shader_state.ps_inst_mem_size, - ctx->shader_state.PS_INST_MEM); + if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) { + assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers); + /* Set icache (VS) */ + etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16); + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, + VIVS_VS_ICACHE_CONTROL_ENABLE | + VIVS_VS_ICACHE_CONTROL_FLUSH_VS); + assert(ctx->shader_state.VS_INST_ADDR.bo); + etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR); + + /* Set icache (PS) */ + etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16); + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, + VIVS_VS_ICACHE_CONTROL_ENABLE | + VIVS_VS_ICACHE_CONTROL_FLUSH_PS); + assert(ctx->shader_state.PS_INST_ADDR.bo); + etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR); + } else { + /* Upload shader directly, first flushing and disabling icache if + * supported on this hw */ + if (ctx->specs.has_icache) { + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, + VIVS_VS_ICACHE_CONTROL_FLUSH_PS | + VIVS_VS_ICACHE_CONTROL_FLUSH_VS); + } + if (ctx->specs.has_shader_range_registers) { + etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16); + etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) | + 0x100); + } + etna_set_state_multi(stream, ctx->specs.vs_offset, + ctx->shader_state.vs_inst_mem_size, + ctx->shader_state.VS_INST_MEM); + etna_set_state_multi(stream, ctx->specs.ps_offset, + ctx->shader_state.ps_inst_mem_size, + ctx->shader_state.PS_INST_MEM); + } if (ctx->specs.has_unified_uniforms) { etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0); diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h b/src/gallium/drivers/etnaviv/etnaviv_internal.h index 5c13f23b063..a6544f62199 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_internal.h +++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h @@ -76,6 +76,8 @@ struct etna_specs { unsigned single_buffer : 1; /* has unified uniforms memory */ unsigned has_unified_uniforms : 1; + /* can load shader instructions from memory */ + unsigned has_icache : 1; /* can use any kind of wrapping mode on npot textures */ unsigned npot_tex_any_wrap; /* number of bits per TS tile */ @@ -250,6 +252,8 @@ struct compiled_shader_state { uint32_t VS_UNIFORMS[ETNA_MAX_UNIFORMS * 4]; uint32_t *PS_INST_MEM; uint32_t PS_UNIFORMS[ETNA_MAX_UNIFORMS * 4]; + struct etna_reloc PS_INST_ADDR; + struct etna_reloc VS_INST_ADDR; }; /* state of some 3d and common registers relevant to etna driver */ diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index fccb8dccfb8..f400e423de3 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -710,7 +710,8 @@ etna_get_specs(struct etna_screen *screen) * same. */ screen->specs.ps_offset = 0x8000 + 0x1000; - screen->specs.max_instructions = 256; + screen->specs.max_instructions = 256; /* maximum number instructions for non-icache use */ + screen->specs.has_icache = true; } else { if (instruction_count > 256) { /* unified instruction memory? */ screen->specs.vs_offset = 0xC000; @@ -721,6 +722,7 @@ etna_get_specs(struct etna_screen *screen) screen->specs.ps_offset = 0x6000; screen->specs.max_instructions = instruction_count / 2; } + screen->specs.has_icache = false; } if (VIV_FEATURE(screen, chipMinorFeatures1, HALTI0)) { diff --git a/src/gallium/drivers/etnaviv/etnaviv_shader.c b/src/gallium/drivers/etnaviv/etnaviv_shader.c index b5256e4b510..6012680624b 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_shader.c +++ b/src/gallium/drivers/etnaviv/etnaviv_shader.c @@ -29,12 +29,30 @@ #include "etnaviv_compiler.h" #include "etnaviv_context.h" #include "etnaviv_debug.h" +#include "etnaviv_screen.h" #include "etnaviv_util.h" #include "tgsi/tgsi_parse.h" #include "util/u_math.h" #include "util/u_memory.h" +/* Upload shader code to bo, if not already done */ +static bool etna_icache_upload_shader(struct etna_context *ctx, struct etna_shader_variant *v) +{ + if (v->bo) + return true; + v->bo = etna_bo_new(ctx->screen->dev, v->code_size*4, DRM_ETNA_GEM_CACHE_UNCACHED); + if (!v->bo) + return false; + + void *buf = etna_bo_map(v->bo); + etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE); + memcpy(buf, v->code, v->code_size*4); + etna_bo_cpu_fini(v->bo); + DBG("Uploaded %s of %u words to bo %p", v->processor == PIPE_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo); + return true; +} + /* Link vs and fs together: fill in shader_state from vs and fs * as this function is called every time a new fs or vs is bound, the goal is to * do little processing as possible here, and to precompute as much as possible in @@ -45,7 +63,7 @@ */ static bool etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs, - const struct etna_shader_variant *vs, const struct etna_shader_variant *fs) + struct etna_shader_variant *vs, struct etna_shader_variant *fs) { struct etna_shader_link_info link = { }; @@ -164,9 +182,32 @@ etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs, /* reference instruction memory */ cs->vs_inst_mem_size = vs->code_size; cs->VS_INST_MEM = vs->code; + cs->ps_inst_mem_size = fs->code_size; cs->PS_INST_MEM = fs->code; + if (vs->needs_icache | fs->needs_icache) { + /* If either of the shaders needs ICACHE, we use it for both. It is + * either switched on or off for the entire shader processor. + */ + if (!etna_icache_upload_shader(ctx, vs) || + !etna_icache_upload_shader(ctx, fs)) { + assert(0); + return false; + } + + cs->VS_INST_ADDR.bo = vs->bo; + cs->VS_INST_ADDR.offset = 0; + cs->VS_INST_ADDR.flags = ETNA_RELOC_READ; + cs->PS_INST_ADDR.bo = fs->bo; + cs->PS_INST_ADDR.offset = 0; + cs->PS_INST_ADDR.flags = ETNA_RELOC_READ; + } else { + /* clear relocs */ + memset(&cs->VS_INST_ADDR, 0, sizeof(cs->VS_INST_ADDR)); + memset(&cs->PS_INST_ADDR, 0, sizeof(cs->PS_INST_ADDR)); + } + return true; } @@ -352,6 +393,8 @@ etna_delete_shader_state(struct pipe_context *pctx, void *ss) while (v) { t = v; v = v->next; + if (t->bo) + etna_bo_del(t->bo); etna_destroy_shader(t); } -- 2.30.2