From: Mark Menzynski Date: Tue, 28 Jan 2020 12:36:27 +0000 (+0100) Subject: nvc0: Add shader disk caching X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=da9d721323dc8b68984dda55a7814d4f9c970963;p=mesa.git nvc0: Add shader disk caching Adds shader disk caching for nvc0 to reduce the need to every time compile shaders. Shaders are saved into disk_shader_cache from nvc0_screen structure. It serializes the input nv50_ir_prog_info to compute the hash key and also to do a byte compare between the original nv50_ir_prog_info and the one saved in the cache. If keys match and also the byte compare returns they are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the cache can be used instead of compiling input info. Seems to be significantly improving loading times, these are the results from running bunch of shaders: cache off real 2m58.574s user 21m34.018s sys 0m8.055s cache on, first run real 3m32.617s user 24m52.701s sys 0m20.400s cache on, second run real 0m23.745s user 2m43.566s sys 0m4.532s Signed-off-by: Mark Menzynski Reviewed-by: Karol Herbst Part-of: --- diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index d3d6bd2e5dd..282a643b8db 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -322,6 +322,7 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); /* nvc0_program.c */ bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset, + struct disk_cache *, struct pipe_debug_callback *); bool nvc0_program_upload(struct nvc0_context *, struct nvc0_program *); void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 087493fe2e2..3b97f4781a4 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -24,6 +24,7 @@ #include "compiler/nir/nir.h" #include "tgsi/tgsi_ureg.h" +#include "util/blob.h" #include "nvc0/nvc0_context.h" @@ -570,11 +571,17 @@ nvc0_program_dump(struct nvc0_program *prog) bool nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, + struct disk_cache *disk_shader_cache, struct pipe_debug_callback *debug) { + struct blob blob; + size_t cache_size; struct nv50_ir_prog_info *info; struct nv50_ir_prog_info_out info_out = {}; - int ret; + + int ret = 0; + cache_key key; + bool shader_loaded = false; info = CALLOC_STRUCT(nv50_ir_prog_info); if (!info) @@ -634,11 +641,45 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->assignSlots = nvc0_program_assign_varying_slots; - ret = nv50_ir_generate_code(info, &info_out); - if (ret) { - NOUVEAU_ERR("shader translation failed: %i\n", ret); - goto out; + blob_init(&blob); + + if (disk_shader_cache) { + if (nv50_ir_prog_info_serialize(&blob, info)) { + void *cached_data = NULL; + + disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key); + cached_data = disk_cache_get(disk_shader_cache, key, &cache_size); + + if (cached_data && cache_size >= blob.size) { // blob.size is the size of serialized "info" + /* Blob contains only "info". In disk cache, "info_out" comes right after it */ + size_t offset = blob.size; + if (nv50_ir_prog_info_out_deserialize(cached_data, cache_size, offset, &info_out)) + shader_loaded = true; + else + debug_printf("WARNING: Couldn't deserialize shaders"); + } + free(cached_data); + } else { + debug_printf("WARNING: Couldn't serialize input shaders"); + } + } + if (!shader_loaded) { + cache_size = 0; + ret = nv50_ir_generate_code(info, &info_out); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; + } + if (disk_shader_cache) { + if (nv50_ir_prog_info_out_serialize(&blob, &info_out)) { + disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL); + cache_size = blob.size; + } else { + debug_printf("WARNING: Couldn't serialize shaders"); + } + } } + blob_finish(&blob); prog->code = info_out.bin.code; prog->code_size = info_out.bin.codeSize; @@ -711,10 +752,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, &prog->pipe.stream_output); pipe_debug_message(debug, SHADER_INFO, - "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d", + "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d, cached: %zd", prog->type, info_out.bin.tlsSpace, info_out.bin.smemSize, prog->num_gprs, info_out.bin.instructions, - info_out.bin.codeSize); + info_out.bin.codeSize, cache_size); #ifndef NDEBUG if (debug_get_option("NV50_PROG_CHIPSET", NULL) && info->dbgFlags) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 490026b2c00..5e2a6c0566e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -54,7 +54,8 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) if (!prog->translated) { prog->translated = nvc0_program_translate( - prog, nvc0->screen->base.device->chipset, &nvc0->base.debug); + prog, nvc0->screen->base.device->chipset, + nvc0->screen->base.disk_shader_cache, &nvc0->base.debug); if (!prog->translated) return false; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 076fe1c6dec..90c2a5e1c6e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -616,6 +616,7 @@ nvc0_sp_state_create(struct pipe_context *pipe, prog->translated = nvc0_program_translate( prog, nvc0_context(pipe)->screen->base.device->chipset, + nvc0_context(pipe)->screen->base.disk_shader_cache, &nouveau_context(pipe)->debug); return (void *)prog; @@ -755,6 +756,7 @@ nvc0_cp_state_create(struct pipe_context *pipe, prog->translated = nvc0_program_translate( prog, nvc0_context(pipe)->screen->base.device->chipset, + nvc0_context(pipe)->screen->base.disk_shader_cache, &nouveau_context(pipe)->debug); return (void *)prog;