From f97acb4bb4b18f127b62aa8eeb57cdf3d8fe3aa2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 5 Jun 2020 10:05:45 -0700 Subject: [PATCH] freedreno/ir3: disk-cache support Adds a shader disk-cache for ir3 shader variants. Note that builds with `-Dshader-cache=false` have no-op stubs with `disk_cache_create()` that returns NULL. Binning pass variants are serialized together with their draw-pass counterparts, due to shared const-state. Signed-off-by: Rob Clark Part-of: --- src/freedreno/Makefile.sources | 1 + src/freedreno/ir3/ir3_compiler.c | 3 + src/freedreno/ir3/ir3_compiler.h | 13 + src/freedreno/ir3/ir3_disk_cache.c | 226 ++++++++++++++++++ src/freedreno/ir3/ir3_shader.c | 10 +- src/freedreno/ir3/ir3_shader.h | 17 ++ src/freedreno/ir3/meson.build | 3 +- .../drivers/freedreno/freedreno_screen.c | 14 ++ 8 files changed, 284 insertions(+), 3 deletions(-) create mode 100644 src/freedreno/ir3/ir3_disk_cache.c diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources index 908c7bdcb84..99d9ace4a73 100644 --- a/src/freedreno/Makefile.sources +++ b/src/freedreno/Makefile.sources @@ -35,6 +35,7 @@ ir3_SOURCES := \ ir3/ir3_cf.c \ ir3/ir3_dce.c \ ir3/ir3_delay.c \ + ir3/ir3_disk_cache.c \ ir3/ir3_group.c \ ir3/ir3_image.c \ ir3/ir3_image.h \ diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 09d0b49dcea..9fc00d8f91f 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -40,6 +40,7 @@ static const struct debug_named_value shader_debug_options[] = { {"forces2en", IR3_DBG_FORCES2EN, "Force s2en mode for tex sampler instructions"}, {"nouboopt", IR3_DBG_NOUBOOPT, "Disable lowering UBO to uniform"}, {"nofp16", IR3_DBG_NOFP16, "Don't lower mediump to fp16"}, + {"nocache", IR3_DBG_NOCACHE, "Disable shader cache"}, #ifdef DEBUG /* DEBUG-only options: */ {"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"}, @@ -122,5 +123,7 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id) compiler->const_upload_unit = 8; } + ir3_disk_cache_init(compiler); + return compiler; } diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index ee6ee5ba83c..54a1afd5a25 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -27,6 +27,8 @@ #ifndef IR3_COMPILER_H_ #define IR3_COMPILER_H_ +#include "util/disk_cache.h" + #include "ir3.h" struct ir3_ra_reg_set; @@ -39,6 +41,8 @@ struct ir3_compiler { struct ir3_ra_reg_set *mergedregs_set; uint32_t shader_count; + struct disk_cache *disk_cache; + /* * Configuration options for things that are handled differently on * different generations: @@ -98,6 +102,14 @@ struct ir3_compiler { void ir3_compiler_destroy(struct ir3_compiler *compiler); struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id); +void ir3_disk_cache_init(struct ir3_compiler *compiler); +void ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler, + struct ir3_shader *shader); +bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler, + struct ir3_shader_variant *v); +void ir3_disk_cache_store(struct ir3_compiler *compiler, + struct ir3_shader_variant *v); + int ir3_compile_shader_nir(struct ir3_compiler *compiler, struct ir3_shader_variant *so); @@ -120,6 +132,7 @@ enum ir3_shader_debug { IR3_DBG_FORCES2EN = BITFIELD_BIT(8), IR3_DBG_NOUBOOPT = BITFIELD_BIT(9), IR3_DBG_NOFP16 = BITFIELD_BIT(10), + IR3_DBG_NOCACHE = BITFIELD_BIT(11), /* DEBUG-only options: */ IR3_DBG_SCHEDMSGS = BITFIELD_BIT(20), diff --git a/src/freedreno/ir3/ir3_disk_cache.c b/src/freedreno/ir3/ir3_disk_cache.c new file mode 100644 index 00000000000..a2249f25de3 --- /dev/null +++ b/src/freedreno/ir3/ir3_disk_cache.c @@ -0,0 +1,226 @@ +/* + * Copyright © 2020 Google, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nir_serialize.h" + +#include "ir3_compiler.h" +#include "ir3_nir.h" + +#define debug 0 + +/* + * Shader disk-cache implementation. + * + * Note that at least in the EGL_ANDROID_blob_cache, we should never + * rely on inter-dependencies between different cache entries: + * + * No guarantees are made as to whether a given key/value pair is present in + * the cache after the set call. If a different value has been associated + * with the given key in the past then it is undefined which value, if any, is + * associated with the key after the set call. Note that while there are no + * guarantees, the cache implementation should attempt to cache the most + * recently set value for a given key. + * + * for this reason, because binning pass variants share const_state with + * their draw-pass counterpart, both variants are serialized together. + */ + +void +ir3_disk_cache_init(struct ir3_compiler *compiler) +{ + if (ir3_shader_debug & IR3_DBG_NOCACHE) + return; + + /* array length = print length + nul char + 1 extra to verify it's unused */ + char renderer[7]; + ASSERTED int len = + snprintf(renderer, sizeof(renderer), "FD%03d", compiler->gpu_id); + assert(len == sizeof(renderer) - 2); + + const struct build_id_note *note = + build_id_find_nhdr_for_addr(ir3_disk_cache_init); + assert(note && build_id_length(note) == 20); /* sha1 */ + + const uint8_t *id_sha1 = build_id_data(note); + assert(id_sha1); + + char timestamp[41]; + _mesa_sha1_format(timestamp, id_sha1); + + const uint64_t driver_flags = ir3_shader_debug; + compiler->disk_cache = disk_cache_create(renderer, timestamp, driver_flags); +} + +void +ir3_disk_cache_init_shader_key(struct ir3_compiler *compiler, + struct ir3_shader *shader) +{ + if (!compiler->disk_cache) + return; + + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + + /* Serialize the NIR to a binary blob that we can hash for the disk + * cache. Drop unnecessary information (like variable names) + * so the serialized NIR is smaller, and also to let us detect more + * isomorphic shaders when hashing, increasing cache hits. + */ + struct blob blob; + blob_init(&blob); + nir_serialize(&blob, shader->nir, true); + _mesa_sha1_update(&ctx, blob.data, blob.size); + blob_finish(&blob); + + /* Note that on some gens stream-out is lowered in ir3 to stg. For later + * gens we maybe don't need to include stream-out in the cache key. + */ + _mesa_sha1_update(&ctx, &shader->stream_output, sizeof(shader->stream_output)); + + _mesa_sha1_final(&ctx, shader->cache_key); +} + +static void +compute_variant_key(struct ir3_compiler *compiler, + struct ir3_shader_variant *v, cache_key cache_key) +{ + struct blob blob; + blob_init(&blob); + + blob_write_bytes(&blob, &v->shader->cache_key, sizeof(v->shader->cache_key)); + blob_write_bytes(&blob, &v->key, sizeof(v->key)); + blob_write_uint8(&blob, v->binning_pass); + + disk_cache_compute_key(compiler->disk_cache, blob.data, blob.size, cache_key); + + blob_finish(&blob); +} + +static void +retrieve_variant(struct blob_reader *blob, struct ir3_shader_variant *v) +{ + blob_copy_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE); + + /* + * pointers need special handling: + */ + + v->bin = malloc(4 * v->info.sizedwords); + blob_copy_bytes(blob, v->bin, 4 * v->info.sizedwords); + + if (!v->binning_pass) { + blob_copy_bytes(blob, v->const_state, sizeof(*v->const_state)); + unsigned immeds_sz = v->const_state->immediates_size * + sizeof(v->const_state->immediates[0]); + v->const_state->immediates = ralloc_size(v->const_state, immeds_sz); + blob_copy_bytes(blob, v->const_state->immediates, immeds_sz); + } +} + +static void +store_variant(struct blob *blob, struct ir3_shader_variant *v) +{ + blob_write_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE); + + /* + * pointers need special handling: + */ + + blob_write_bytes(blob, v->bin, 4 * v->info.sizedwords); + + if (!v->binning_pass) { + blob_write_bytes(blob, v->const_state, sizeof(*v->const_state)); + unsigned immeds_sz = v->const_state->immediates_size * + sizeof(v->const_state->immediates[0]); + blob_write_bytes(blob, v->const_state->immediates, immeds_sz); + } +} + +bool +ir3_disk_cache_retrieve(struct ir3_compiler *compiler, + struct ir3_shader_variant *v) +{ + if (!compiler->disk_cache) + return false; + + cache_key cache_key; + + compute_variant_key(compiler, v, cache_key); + + if (debug) { + char sha1[41]; + _mesa_sha1_format(sha1, cache_key); + fprintf(stderr, "[mesa disk cache] retrieving variant %s: ", sha1); + } + + size_t size; + void *buffer = disk_cache_get(compiler->disk_cache, cache_key, &size); + + if (debug) + fprintf(stderr, "%s\n", buffer ? "found" : "missing"); + + if (!buffer) + return false; + + struct blob_reader blob; + blob_reader_init(&blob, buffer, size); + + retrieve_variant(&blob, v); + + if (v->binning) + retrieve_variant(&blob, v->binning); + + free(buffer); + + return true; +} + +void +ir3_disk_cache_store(struct ir3_compiler *compiler, + struct ir3_shader_variant *v) +{ + if (!compiler->disk_cache) + return; + + cache_key cache_key; + + compute_variant_key(compiler, v, cache_key); + + if (debug) { + char sha1[41]; + _mesa_sha1_format(sha1, cache_key); + fprintf(stderr, "[mesa disk cache] storing variant %s\n", sha1); + } + + struct blob blob; + blob_init(&blob); + + store_variant(&blob, v); + + if (v->binning) + store_variant(&blob, v->binning); + + disk_cache_put(compiler->disk_cache, cache_key, blob.data, blob.size, NULL); + blob_finish(&blob); +} diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index bcea1e82534..0950b62f92f 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -245,12 +245,17 @@ create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key) goto fail; } + if (ir3_disk_cache_retrieve(shader->compiler, v)) + return v; + if (!compile_variant(v)) goto fail; if (needs_binning_variant(v) && !compile_variant(v->binning)) goto fail; + ir3_disk_cache_store(shader->compiler, v); + return v; fail: @@ -445,10 +450,11 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir, if (stream_output) memcpy(&shader->stream_output, stream_output, sizeof(shader->stream_output)); shader->num_reserved_user_consts = reserved_user_consts; + shader->nir = nir; - ir3_nir_post_finalize(compiler, nir); + ir3_disk_cache_init_shader_key(compiler, shader); - shader->nir = nir; + ir3_nir_post_finalize(compiler, nir); if (ir3_shader_debug & IR3_DBG_DISASM) { printf("dump nir%d: type=%d", shader->id, shader->type); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 2ccc9a86cd3..45a4170e683 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -33,6 +33,7 @@ #include "compiler/shader_enums.h" #include "compiler/nir/nir.h" #include "util/bitscan.h" +#include "util/disk_cache.h" #include "ir3_compiler.h" @@ -481,11 +482,25 @@ struct ir3_shader_variant { gl_shader_stage type; struct ir3_shader *shader; + /* + * Below here is serialized when written to disk cache: + */ + /* The actual binary shader instructions, size given by info.sizedwords: */ uint32_t *bin; struct ir3_const_state *const_state; + /* + * The following macros are used by the shader disk cache save/ + * restore paths to serialize/deserialize the variant. Any + * pointers that require special handling in store_variant() + * and retrieve_variant() should go above here. + */ +#define VARIANT_CACHE_START offsetof(struct ir3_shader_variant, info) +#define VARIANT_CACHE_PTR(v) (((char *)v) + VARIANT_CACHE_START) +#define VARIANT_CACHE_SIZE (sizeof(struct ir3_shader_variant) - VARIANT_CACHE_START) + struct ir3_info info; /* Levels of nesting of flow control: @@ -681,6 +696,8 @@ struct ir3_shader { struct ir3_shader_variant *variants; mtx_t variants_lock; + cache_key cache_key; /* shader disk-cache key */ + /* Bitmask of bits of the shader key used by this shader. Used to avoid * recompiles for GL NOS that doesn't actually apply to the shader. */ diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index 466b065d134..06f004322ca 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -78,6 +78,7 @@ libfreedreno_ir3_files = files( 'ir3_cp_postsched.c', 'ir3_dce.c', 'ir3_delay.c', + 'ir3_disk_cache.c', 'ir3_group.c', 'ir3_image.c', 'ir3_image.h', @@ -110,7 +111,7 @@ libfreedreno_ir3 = static_library( include_directories : [inc_freedreno, inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux], c_args : [no_override_init_args], gnu_symbol_visibility : 'hidden', - dependencies : idep_nir_headers, + dependencies : [idep_nir_headers, dep_dl], build_by_default : false, ) diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 1be5feb2e8f..0c7889dcd0b 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -716,6 +716,19 @@ fd_get_compiler_options(struct pipe_screen *pscreen, return ir2_get_compiler_options(); } +static struct disk_cache * +fd_get_disk_shader_cache(struct pipe_screen *pscreen) +{ + struct fd_screen *screen = fd_screen(pscreen); + + if (is_ir3(screen)) { + struct ir3_compiler *compiler = screen->compiler; + return compiler->disk_cache; + } + + return NULL; +} + bool fd_screen_bo_get_handle(struct pipe_screen *pscreen, struct fd_bo *bo, @@ -993,6 +1006,7 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro) pscreen->get_shader_param = fd_screen_get_shader_param; pscreen->get_compute_param = fd_get_compute_param; pscreen->get_compiler_options = fd_get_compiler_options; + pscreen->get_disk_shader_cache = fd_get_disk_shader_cache; fd_resource_screen_init(pscreen); fd_query_screen_init(pscreen); -- 2.30.2