nvc0: Add shader disk caching
authorMark Menzynski <mmenzyns@redhat.com>
Tue, 28 Jan 2020 12:36:27 +0000 (13:36 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 25 Aug 2020 18:56:37 +0000 (18:56 +0000)
Adds shader disk caching for nvc0 to reduce the need to every time compile
shaders. Shaders are saved into disk_shader_cache from nvc0_screen structure.

It serializes the input nv50_ir_prog_info to compute the hash key and
also to do a byte compare between the original nv50_ir_prog_info and the one
saved in the cache. If keys match and also the byte compare returns they
are equal, shaders are same, and the compiled nv50_ir_prog_info_out from the
cache can be used instead of compiling input info.

Seems to be significantly improving loading times, these are the results
from running bunch of shaders:
cache off
real 2m58.574s
user 21m34.018s
sys 0m8.055s

cache on, first run
real 3m32.617s
user 24m52.701s
sys 0m20.400s

cache on, second run
real 0m23.745s
user 2m43.566s
sys 0m4.532s

Signed-off-by: Mark Menzynski <mmenzyns@redhat.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4264>

src/gallium/drivers/nouveau/nvc0/nvc0_context.h
src/gallium/drivers/nouveau/nvc0/nvc0_program.c
src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
src/gallium/drivers/nouveau/nvc0/nvc0_state.c

index d3d6bd2e5dd9f1da081b35c8bdc3b81b38f489f3..282a643b8db7fb42a4ad8f43497c42dfb348872f 100644 (file)
@@ -322,6 +322,7 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
 
 /* nvc0_program.c */
 bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset,
+                            struct disk_cache *,
                             struct pipe_debug_callback *);
 bool nvc0_program_upload(struct nvc0_context *, struct nvc0_program *);
 void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
index 087493fe2e2ef8efff9564da22c9482040b08752..3b97f4781a42942ef37bf46fc57aa4e3c53f1c24 100644 (file)
@@ -24,6 +24,7 @@
 
 #include "compiler/nir/nir.h"
 #include "tgsi/tgsi_ureg.h"
+#include "util/blob.h"
 
 #include "nvc0/nvc0_context.h"
 
@@ -570,11 +571,17 @@ nvc0_program_dump(struct nvc0_program *prog)
 
 bool
 nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
+                       struct disk_cache *disk_shader_cache,
                        struct pipe_debug_callback *debug)
 {
+   struct blob blob;
+   size_t cache_size;
    struct nv50_ir_prog_info *info;
    struct nv50_ir_prog_info_out info_out = {};
-   int ret;
+
+   int ret = 0;
+   cache_key key;
+   bool shader_loaded = false;
 
    info = CALLOC_STRUCT(nv50_ir_prog_info);
    if (!info)
@@ -634,11 +641,45 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
 
    info->assignSlots = nvc0_program_assign_varying_slots;
 
-   ret = nv50_ir_generate_code(info, &info_out);
-   if (ret) {
-      NOUVEAU_ERR("shader translation failed: %i\n", ret);
-      goto out;
+   blob_init(&blob);
+
+   if (disk_shader_cache) {
+      if (nv50_ir_prog_info_serialize(&blob, info)) {
+         void *cached_data = NULL;
+
+         disk_cache_compute_key(disk_shader_cache, blob.data, blob.size, key);
+         cached_data = disk_cache_get(disk_shader_cache, key, &cache_size);
+
+         if (cached_data && cache_size >= blob.size) { // blob.size is the size of serialized "info"
+            /* Blob contains only "info". In disk cache, "info_out" comes right after it */
+            size_t offset = blob.size;
+            if (nv50_ir_prog_info_out_deserialize(cached_data, cache_size, offset, &info_out))
+               shader_loaded = true;
+            else
+               debug_printf("WARNING: Couldn't deserialize shaders");
+         }
+         free(cached_data);
+      } else {
+         debug_printf("WARNING: Couldn't serialize input shaders");
+      }
+   }
+   if (!shader_loaded) {
+      cache_size = 0;
+      ret = nv50_ir_generate_code(info, &info_out);
+      if (ret) {
+         NOUVEAU_ERR("shader translation failed: %i\n", ret);
+         goto out;
+      }
+      if (disk_shader_cache) {
+         if (nv50_ir_prog_info_out_serialize(&blob, &info_out)) {
+            disk_cache_put(disk_shader_cache, key, blob.data, blob.size, NULL);
+            cache_size = blob.size;
+         } else {
+            debug_printf("WARNING: Couldn't serialize shaders");
+         }
+      }
    }
+   blob_finish(&blob);
 
    prog->code = info_out.bin.code;
    prog->code_size = info_out.bin.codeSize;
@@ -711,10 +752,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
                                                 &prog->pipe.stream_output);
 
    pipe_debug_message(debug, SHADER_INFO,
-                      "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d",
+                      "type: %d, local: %d, shared: %d, gpr: %d, inst: %d, bytes: %d, cached: %zd",
                       prog->type, info_out.bin.tlsSpace, info_out.bin.smemSize,
                       prog->num_gprs, info_out.bin.instructions,
-                      info_out.bin.codeSize);
+                      info_out.bin.codeSize, cache_size);
 
 #ifndef NDEBUG
    if (debug_get_option("NV50_PROG_CHIPSET", NULL) && info->dbgFlags)
index 490026b2c003e8587615c412b095b186bfbca5c9..5e2a6c0566e5b4a4f1633baf36d3e8f93d26b7a0 100644 (file)
@@ -54,7 +54,8 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
 
    if (!prog->translated) {
       prog->translated = nvc0_program_translate(
-         prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
+         prog, nvc0->screen->base.device->chipset,
+         nvc0->screen->base.disk_shader_cache, &nvc0->base.debug);
       if (!prog->translated)
          return false;
    }
index 076fe1c6decc15ad307bac61f92bdf9d85067b32..90c2a5e1c6e222fb2e801b31b33569e09c915173 100644 (file)
@@ -616,6 +616,7 @@ nvc0_sp_state_create(struct pipe_context *pipe,
 
    prog->translated = nvc0_program_translate(
       prog, nvc0_context(pipe)->screen->base.device->chipset,
+      nvc0_context(pipe)->screen->base.disk_shader_cache,
       &nouveau_context(pipe)->debug);
 
    return (void *)prog;
@@ -755,6 +756,7 @@ nvc0_cp_state_create(struct pipe_context *pipe,
 
    prog->translated = nvc0_program_translate(
       prog, nvc0_context(pipe)->screen->base.device->chipset,
+      nvc0_context(pipe)->screen->base.disk_shader_cache,
       &nouveau_context(pipe)->debug);
 
    return (void *)prog;