From: Timothy Arceri Date: Fri, 1 Jul 2016 07:02:57 +0000 (+1000) Subject: i965: add initial implementation of on disk shader cache X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=516d50db319396c23fd599012aec8b3fbc0ee5a6;p=mesa.git i965: add initial implementation of on disk shader cache This uses the Mesa disk_cache support to write out the final linked binary for vertex and fragment shader programs. This is based off the initial implementation done by Carl Worth. It has been significantly reworked, first by Tim Arceri, and then by Jordan Justen. v2: * Squash 'i965: add image param shader cache support' * Squash 'i965: add shader cache support for pull param pointers' * Sustantially simplified by a rework on top of Jason's 2975e4c56a7a. * Rename load_program_data to read_program_data. (Jason) v3: * Simplify and align program read/write. (Jason) v4: * Don't save prog_data size since we know it from the stage. (Ken) * Don't save program size, since prog_data includes the size. (Ken) * Remove `assert` that potentially could be triggered by disk corruption of the cache entries. (Ken) * Fix compute shader scratch allocation. (Ken) * Remove special case mapping for non-LLC. (Ken) * Remove SET_UPLOAD_PARAMS macro [jordan.l.justen@intel.com: *_cached_program => brw_disk_cache_*_program] [jordan.l.justen@intel.com: brw_shader_cache.c => brw_disk_cache.c] [jordan.l.justen@intel.com: don't map to write program when LLC is present] [jordan.l.justen@intel.com: set program_written_to_cache on read from cache] [jordan.l.justen@intel.com: only try cache when status is linking_skipped] [jordan.l.justen@intel.com: all v2-v4 changes noted above] Signed-off-by: Jordan Justen Reviewed-by: Jason Ekstrand Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 053d89b81ec..2980cdb3c54 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -14,6 +14,7 @@ i965_FILES = \ brw_cs.h \ brw_curbe.c \ brw_defines.h \ + brw_disk_cache.c \ brw_draw.c \ brw_draw.h \ brw_draw_upload.c \ diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c new file mode 100644 index 00000000000..54da1abab63 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c @@ -0,0 +1,290 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "compiler/blob.h" +#include "compiler/glsl/ir_uniform.h" +#include "compiler/glsl/shader_cache.h" +#include "main/mtypes.h" +#include "util/disk_cache.h" +#include "util/macros.h" +#include "util/mesa-sha1.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_vs.h" +#include "brw_wm.h" + +static void +gen_shader_sha1(struct brw_context *brw, struct gl_program *prog, + gl_shader_stage stage, void *key, unsigned char *out_sha1) +{ + char sha1_buf[41]; + unsigned char sha1[20]; + char manifest[256]; + int offset = 0; + + _mesa_sha1_format(sha1_buf, prog->sh.data->sha1); + offset += snprintf(manifest, sizeof(manifest), "program: %s\n", sha1_buf); + + _mesa_sha1_compute(key, brw_prog_key_size(stage), sha1); + _mesa_sha1_format(sha1_buf, sha1); + offset += snprintf(manifest + offset, sizeof(manifest) - offset, + "%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage), + sha1_buf); + + _mesa_sha1_compute(manifest, strlen(manifest), out_sha1); +} + +static void +write_blob_program_data(struct blob *binary, gl_shader_stage stage, + const void *program, + struct brw_stage_prog_data *prog_data) +{ + /* Write prog_data to blob. */ + blob_write_bytes(binary, prog_data, brw_prog_data_size(stage)); + + /* Write program to blob. */ + blob_write_bytes(binary, program, prog_data->program_size); + + /* Write push params */ + blob_write_bytes(binary, prog_data->param, + sizeof(uint32_t) * prog_data->nr_params); + + /* Write pull params */ + blob_write_bytes(binary, prog_data->pull_param, + sizeof(uint32_t) * prog_data->nr_pull_params); +} + +static bool +read_blob_program_data(struct blob_reader *binary, struct gl_program *prog, + gl_shader_stage stage, const uint8_t **program, + struct brw_stage_prog_data *prog_data) +{ + /* Read shader prog_data from blob. */ + blob_copy_bytes(binary, prog_data, brw_prog_data_size(stage)); + if (binary->overrun) + return false; + + /* Read shader program from blob. */ + *program = blob_read_bytes(binary, prog_data->program_size); + + /* Read push params */ + prog_data->param = rzalloc_array(NULL, uint32_t, prog_data->nr_params); + blob_copy_bytes(binary, prog_data->param, + sizeof(uint32_t) * prog_data->nr_params); + + /* Read pull params */ + prog_data->pull_param = rzalloc_array(NULL, uint32_t, + prog_data->nr_pull_params); + blob_copy_bytes(binary, prog_data->pull_param, + sizeof(uint32_t) * prog_data->nr_pull_params); + + return (binary->current == binary->end && !binary->overrun); +} + +static bool +read_and_upload(struct brw_context *brw, struct disk_cache *cache, + struct gl_program *prog, gl_shader_stage stage) +{ + unsigned char binary_sha1[20]; + + union brw_any_prog_key prog_key; + + switch (stage) { + case MESA_SHADER_VERTEX: + brw_vs_populate_key(brw, &prog_key.vs); + /* We don't care what instance of the program it is for the disk cache + * hash lookup, so set the id to 0 for the sha1 hashing. + * program_string_id will be set below. + */ + prog_key.vs.program_string_id = 0; + break; + case MESA_SHADER_FRAGMENT: + brw_wm_populate_key(brw, &prog_key.wm); + prog_key.wm.program_string_id = 0; + break; + default: + unreachable("Unsupported stage!"); + } + + gen_shader_sha1(brw, prog, stage, &prog_key, binary_sha1); + + size_t buffer_size; + uint8_t *buffer = disk_cache_get(cache, binary_sha1, &buffer_size); + if (buffer == NULL) { + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { + char sha1_buf[41]; + _mesa_sha1_format(sha1_buf, binary_sha1); + fprintf(stderr, "No cached %s binary found for: %s\n", + _mesa_shader_stage_to_abbrev(stage), sha1_buf); + } + return false; + } + + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { + char sha1_buf[41]; + _mesa_sha1_format(sha1_buf, binary_sha1); + fprintf(stderr, "attempting to populate bo cache with binary: %s\n", + sha1_buf); + } + + struct blob_reader binary; + blob_reader_init(&binary, buffer, buffer_size); + + const uint8_t *program; + struct brw_stage_prog_data *prog_data = + ralloc_size(NULL, sizeof(union brw_any_prog_data)); + if (!read_blob_program_data(&binary, prog, stage, &program, prog_data)) { + /* Something very bad has gone wrong discard the item from the cache and + * rebuild from source. + */ + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { + fprintf(stderr, "Error reading program from cache (invalid i965 " + "cache item)\n"); + } + + disk_cache_remove(cache, binary_sha1); + free(buffer); + return false; + } + + enum brw_cache_id cache_id; + struct brw_stage_state *stage_state; + + switch (stage) { + case MESA_SHADER_VERTEX: + prog_key.vs.program_string_id = brw_program(prog)->id; + cache_id = BRW_CACHE_VS_PROG; + stage_state = &brw->vs.base; + break; + case MESA_SHADER_FRAGMENT: + prog_key.wm.program_string_id = brw_program(prog)->id; + cache_id = BRW_CACHE_FS_PROG; + stage_state = &brw->wm.base; + break; + default: + unreachable("Unsupported stage!"); + } + + brw_alloc_stage_scratch(brw, stage_state, prog_data->total_scratch); + + brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage), + program, prog_data->program_size, prog_data, + brw_prog_data_size(stage), &stage_state->prog_offset, + &stage_state->prog_data); + + prog->program_written_to_cache = true; + + free(buffer); + + return true; +} + +bool +brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage) +{ + struct disk_cache *cache = brw->ctx.Cache; + if (cache == NULL) + return false; + + struct gl_program *prog = brw->ctx._Shader->CurrentProgram[stage]; + if (prog == NULL) + return false; + + if (prog->sh.data->LinkStatus != linking_skipped) + goto fail; + + if (!read_and_upload(brw, cache, prog, stage)) + goto fail; + + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { + fprintf(stderr, "read gen program from cache\n"); + } + + return true; + +fail: + /*FIXME: Fall back and compile from source here. */ + return false; +} + +static void +write_program_data(struct brw_context *brw, struct gl_program *prog, + void *key, struct brw_stage_prog_data *prog_data, + uint32_t prog_offset, struct disk_cache *cache, + gl_shader_stage stage) +{ + struct blob binary; + blob_init(&binary); + + const void *program_map = brw->cache.map + prog_offset; + /* TODO: Improve perf for non-LLC. It would be best to save it at program + * generation time when the program is in normal memory accessible with + * cache to the CPU. Another easier change would be to use + * _mesa_streaming_load_memcpy to read from the program mapped memory. */ + write_blob_program_data(&binary, stage, program_map, prog_data); + + unsigned char sha1[20]; + char buf[41]; + gen_shader_sha1(brw, prog, stage, key, sha1); + _mesa_sha1_format(buf, sha1); + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { + fprintf(stderr, "putting binary in cache: %s\n", buf); + } + + disk_cache_put(cache, sha1, binary.data, binary.size, NULL); + + prog->program_written_to_cache = true; + blob_finish(&binary); +} + +void +brw_disk_cache_write_program(struct brw_context *brw) +{ + struct disk_cache *cache = brw->ctx.Cache; + if (cache == NULL) + return; + + struct gl_program *prog = + brw->ctx._Shader->CurrentProgram[MESA_SHADER_VERTEX]; + if (prog && !prog->program_written_to_cache) { + struct brw_vs_prog_key vs_key; + brw_vs_populate_key(brw, &vs_key); + vs_key.program_string_id = 0; + + write_program_data(brw, prog, &vs_key, brw->vs.base.prog_data, + brw->vs.base.prog_offset, cache, + MESA_SHADER_VERTEX); + } + + prog = brw->ctx._Shader->CurrentProgram[MESA_SHADER_FRAGMENT]; + if (prog && !prog->program_written_to_cache) { + struct brw_wm_prog_key wm_key; + brw_wm_populate_key(brw, &wm_key); + wm_key.program_string_id = 0; + + write_program_data(brw, prog, &wm_key, brw->wm.base.prog_data, + brw->wm.base.prog_offset, cache, + MESA_SHADER_FRAGMENT); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 8db354cf232..6f2e0501b4b 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -131,6 +131,11 @@ void brw_upload_state_base_address(struct brw_context *brw); void gen8_write_pma_stall_bits(struct brw_context *brw, uint32_t pma_stall_bits); +/* brw_disk_cache.c */ +bool brw_disk_cache_upload_program(struct brw_context *brw, + gl_shader_stage stage); +void brw_disk_cache_write_program(struct brw_context *brw); + /*********************************************************************** * brw_state.c */ diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index 144a254bd64..09e1179adc4 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -34,6 +34,7 @@ files_i965 = files( 'brw_cs.h', 'brw_curbe.c', 'brw_defines.h', + 'brw_disk_cache.c', 'brw_draw.c', 'brw_draw.h', 'brw_draw_upload.c',