i965: add initial implementation of on disk shader cache
authorTimothy Arceri <timothy.arceri@collabora.com>
Fri, 1 Jul 2016 07:02:57 +0000 (17:02 +1000)
committerJordan Justen <jordan.l.justen@intel.com>
Wed, 1 Nov 2017 06:36:54 +0000 (23:36 -0700)
This uses the Mesa disk_cache support to write out the final linked
binary for vertex and fragment shader programs.

This is based off the initial implementation done by Carl Worth. It
has been significantly reworked, first by Tim Arceri, and then by
Jordan Justen.

v2:
 * Squash 'i965: add image param shader cache support'
 * Squash 'i965: add shader cache support for pull param pointers'
 * Sustantially simplified by a rework on top of Jason's 2975e4c56a7a.
 * Rename load_program_data to read_program_data. (Jason)

v3:
 * Simplify and align program read/write. (Jason)

v4:
 * Don't save prog_data size since we know it from the stage. (Ken)
 * Don't save program size, since prog_data includes the size. (Ken)
 * Remove `assert` that potentially could be triggered by disk
   corruption of the cache entries. (Ken)
 * Fix compute shader scratch allocation. (Ken)
 * Remove special case mapping for non-LLC. (Ken)
 * Remove SET_UPLOAD_PARAMS macro

[jordan.l.justen@intel.com: *_cached_program => brw_disk_cache_*_program]
[jordan.l.justen@intel.com: brw_shader_cache.c => brw_disk_cache.c]
[jordan.l.justen@intel.com: don't map to write program when LLC is present]
[jordan.l.justen@intel.com: set program_written_to_cache on read from cache]
[jordan.l.justen@intel.com: only try cache when status is linking_skipped]
[jordan.l.justen@intel.com: all v2-v4 changes noted above]
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/Makefile.sources
src/mesa/drivers/dri/i965/brw_disk_cache.c [new file with mode: 0644]
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/meson.build

index 053d89b81ecf9bc5780b75295bc54958d755e3ee..2980cdb3c54fb6eded8d16b6a5d1c51b8f707ade 100644 (file)
@@ -14,6 +14,7 @@ i965_FILES = \
        brw_cs.h \
        brw_curbe.c \
        brw_defines.h \
+       brw_disk_cache.c \
        brw_draw.c \
        brw_draw.h \
        brw_draw_upload.c \
diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c
new file mode 100644 (file)
index 0000000..54da1ab
--- /dev/null
@@ -0,0 +1,290 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "compiler/blob.h"
+#include "compiler/glsl/ir_uniform.h"
+#include "compiler/glsl/shader_cache.h"
+#include "main/mtypes.h"
+#include "util/disk_cache.h"
+#include "util/macros.h"
+#include "util/mesa-sha1.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+
+static void
+gen_shader_sha1(struct brw_context *brw, struct gl_program *prog,
+                gl_shader_stage stage, void *key, unsigned char *out_sha1)
+{
+   char sha1_buf[41];
+   unsigned char sha1[20];
+   char manifest[256];
+   int offset = 0;
+
+   _mesa_sha1_format(sha1_buf, prog->sh.data->sha1);
+   offset += snprintf(manifest, sizeof(manifest), "program: %s\n", sha1_buf);
+
+   _mesa_sha1_compute(key, brw_prog_key_size(stage), sha1);
+   _mesa_sha1_format(sha1_buf, sha1);
+   offset += snprintf(manifest + offset, sizeof(manifest) - offset,
+                      "%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage),
+                      sha1_buf);
+
+   _mesa_sha1_compute(manifest, strlen(manifest), out_sha1);
+}
+
+static void
+write_blob_program_data(struct blob *binary, gl_shader_stage stage,
+                        const void *program,
+                        struct brw_stage_prog_data *prog_data)
+{
+   /* Write prog_data to blob. */
+   blob_write_bytes(binary, prog_data, brw_prog_data_size(stage));
+
+   /* Write program to blob. */
+   blob_write_bytes(binary, program, prog_data->program_size);
+
+   /* Write push params */
+   blob_write_bytes(binary, prog_data->param,
+                    sizeof(uint32_t) * prog_data->nr_params);
+
+   /* Write pull params */
+   blob_write_bytes(binary, prog_data->pull_param,
+                    sizeof(uint32_t) * prog_data->nr_pull_params);
+}
+
+static bool
+read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
+                       gl_shader_stage stage, const uint8_t **program,
+                       struct brw_stage_prog_data *prog_data)
+{
+   /* Read shader prog_data from blob. */
+   blob_copy_bytes(binary, prog_data, brw_prog_data_size(stage));
+   if (binary->overrun)
+      return false;
+
+   /* Read shader program from blob. */
+   *program = blob_read_bytes(binary, prog_data->program_size);
+
+   /* Read push params */
+   prog_data->param = rzalloc_array(NULL, uint32_t, prog_data->nr_params);
+   blob_copy_bytes(binary, prog_data->param,
+                   sizeof(uint32_t) * prog_data->nr_params);
+
+   /* Read pull params */
+   prog_data->pull_param = rzalloc_array(NULL, uint32_t,
+                                         prog_data->nr_pull_params);
+   blob_copy_bytes(binary, prog_data->pull_param,
+                   sizeof(uint32_t) * prog_data->nr_pull_params);
+
+   return (binary->current == binary->end && !binary->overrun);
+}
+
+static bool
+read_and_upload(struct brw_context *brw, struct disk_cache *cache,
+                struct gl_program *prog, gl_shader_stage stage)
+{
+   unsigned char binary_sha1[20];
+
+   union brw_any_prog_key prog_key;
+
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      brw_vs_populate_key(brw, &prog_key.vs);
+      /* We don't care what instance of the program it is for the disk cache
+       * hash lookup, so set the id to 0 for the sha1 hashing.
+       * program_string_id will be set below.
+       */
+      prog_key.vs.program_string_id = 0;
+      break;
+   case MESA_SHADER_FRAGMENT:
+      brw_wm_populate_key(brw, &prog_key.wm);
+      prog_key.wm.program_string_id = 0;
+      break;
+   default:
+      unreachable("Unsupported stage!");
+   }
+
+   gen_shader_sha1(brw, prog, stage, &prog_key, binary_sha1);
+
+   size_t buffer_size;
+   uint8_t *buffer = disk_cache_get(cache, binary_sha1, &buffer_size);
+   if (buffer == NULL) {
+      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+         char sha1_buf[41];
+         _mesa_sha1_format(sha1_buf, binary_sha1);
+         fprintf(stderr, "No cached %s binary found for: %s\n",
+                 _mesa_shader_stage_to_abbrev(stage), sha1_buf);
+      }
+      return false;
+   }
+
+   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+      char sha1_buf[41];
+      _mesa_sha1_format(sha1_buf, binary_sha1);
+      fprintf(stderr, "attempting to populate bo cache with binary: %s\n",
+              sha1_buf);
+   }
+
+   struct blob_reader binary;
+   blob_reader_init(&binary, buffer, buffer_size);
+
+   const uint8_t *program;
+   struct brw_stage_prog_data *prog_data =
+      ralloc_size(NULL, sizeof(union brw_any_prog_data));
+   if (!read_blob_program_data(&binary, prog, stage, &program, prog_data)) {
+      /* Something very bad has gone wrong discard the item from the cache and
+       * rebuild from source.
+       */
+      if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+         fprintf(stderr, "Error reading program from cache (invalid i965 "
+                 "cache item)\n");
+      }
+
+      disk_cache_remove(cache, binary_sha1);
+      free(buffer);
+      return false;
+   }
+
+   enum brw_cache_id cache_id;
+   struct brw_stage_state *stage_state;
+
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      prog_key.vs.program_string_id = brw_program(prog)->id;
+      cache_id = BRW_CACHE_VS_PROG;
+      stage_state = &brw->vs.base;
+      break;
+   case MESA_SHADER_FRAGMENT:
+      prog_key.wm.program_string_id = brw_program(prog)->id;
+      cache_id = BRW_CACHE_FS_PROG;
+      stage_state = &brw->wm.base;
+      break;
+   default:
+      unreachable("Unsupported stage!");
+   }
+
+   brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch);
+
+   brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
+                    program, prog_data->program_size, prog_data,
+                    brw_prog_data_size(stage), &stage_state->prog_offset,
+                    &stage_state->prog_data);
+
+   prog->program_written_to_cache = true;
+
+   free(buffer);
+
+   return true;
+}
+
+bool
+brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage)
+{
+   struct disk_cache *cache = brw->ctx.Cache;
+   if (cache == NULL)
+      return false;
+
+   struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage];
+   if (prog == NULL)
+      return false;
+
+   if (prog->sh.data->LinkStatus != linking_skipped)
+      goto fail;
+
+   if (!read_and_upload(brw, cache, prog, stage))
+      goto fail;
+
+   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+      fprintf(stderr, "read gen program from cache\n");
+   }
+
+   return true;
+
+fail:
+   /*FIXME: Fall back and compile from source here. */
+   return false;
+}
+
+static void
+write_program_data(struct brw_context *brw, struct gl_program *prog,
+                   void *key, struct brw_stage_prog_data *prog_data,
+                   uint32_t prog_offset, struct disk_cache *cache,
+                   gl_shader_stage stage)
+{
+   struct blob binary;
+   blob_init(&binary);
+
+   const void *program_map = brw->cache.map + prog_offset;
+   /* TODO: Improve perf for non-LLC. It would be best to save it at program
+    * generation time when the program is in normal memory accessible with
+    * cache to the CPU. Another easier change would be to use
+    * _mesa_streaming_load_memcpy to read from the program mapped memory. */
+   write_blob_program_data(&binary, stage, program_map, prog_data);
+
+   unsigned char sha1[20];
+   char buf[41];
+   gen_shader_sha1(brw, prog, stage, key, sha1);
+   _mesa_sha1_format(buf, sha1);
+   if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
+      fprintf(stderr, "putting binary in cache: %s\n", buf);
+   }
+
+   disk_cache_put(cache, sha1, binary.data, binary.size, NULL);
+
+   prog->program_written_to_cache = true;
+   blob_finish(&binary);
+}
+
+void
+brw_disk_cache_write_program(struct brw_context *brw)
+{
+   struct disk_cache *cache = brw->ctx.Cache;
+   if (cache == NULL)
+      return;
+
+   struct gl_program *prog =
+      brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX];
+   if (prog && !prog->program_written_to_cache) {
+      struct brw_vs_prog_key vs_key;
+      brw_vs_populate_key(brw, &vs_key);
+      vs_key.program_string_id = 0;
+
+      write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data,
+                         brw->vs.base.prog_offset, cache,
+                         MESA_SHADER_VERTEX);
+   }
+
+   prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+   if (prog && !prog->program_written_to_cache) {
+      struct brw_wm_prog_key wm_key;
+      brw_wm_populate_key(brw, &wm_key);
+      wm_key.program_string_id = 0;
+
+      write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data,
+                         brw->wm.base.prog_offset, cache,
+                         MESA_SHADER_FRAGMENT);
+   }
+}
index 8db354cf232da0af5f7a3e15a24004cb79cac582..6f2e0501b4b8677693a0d16d526a5aeb8f0251b6 100644 (file)
@@ -131,6 +131,11 @@ void brw_upload_state_base_address(struct brw_context *brw);
 void gen8_write_pma_stall_bits(struct brw_context *brw,
                                uint32_t pma_stall_bits);
 
+/* brw_disk_cache.c */
+bool brw_disk_cache_upload_program(struct brw_context *brw,
+                                   gl_shader_stage stage);
+void brw_disk_cache_write_program(struct brw_context *brw);
+
 /***********************************************************************
  * brw_state.c
  */
index 144a254bd6471318b37c380066f49a295381c183..09e1179adc45ebd26140074f739f9dda70a712b8 100644 (file)
@@ -34,6 +34,7 @@ files_i965 = files(
   'brw_cs.h',
   'brw_curbe.c',
   'brw_defines.h',
+  'brw_disk_cache.c',
   'brw_draw.c',
   'brw_draw.h',
   'brw_draw_upload.c',