#include "compiler/nir/nir_serialize.h"
#include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
#include "util/hash_table.h"
#include "util/crc32.h"
#include "util/u_async_debug.h"
/* SHADER_CACHE */
/**
- * Return the IR binary in a buffer. For TGSI the first 4 bytes contain its
- * size as integer.
+ * Return the IR key for the shader cache.
*/
-void *si_get_ir_binary(struct si_shader_selector *sel, bool ngg, bool es)
+void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
+ unsigned char ir_sha1_cache_key[20])
{
- struct blob blob;
+ struct blob blob = {};
unsigned ir_size;
void *ir_binary;
- if (sel->tokens) {
- ir_binary = sel->tokens;
- ir_size = tgsi_num_tokens(sel->tokens) *
- sizeof(struct tgsi_token);
+ if (sel->nir_binary) {
+ ir_binary = sel->nir_binary;
+ ir_size = sel->nir_size;
} else {
assert(sel->nir);
if (sel->force_correct_derivs_after_kill)
shader_variant_flags |= 1 << 3;
- unsigned size = 4 + 4 + ir_size + sizeof(sel->so);
- char *result = (char*)MALLOC(size);
- if (!result)
- return NULL;
-
- ((uint32_t*)result)[0] = size;
- ((uint32_t*)result)[1] = shader_variant_flags;
- memcpy(result + 8, ir_binary, ir_size);
- memcpy(result + 8 + ir_size, &sel->so, sizeof(sel->so));
-
- if (sel->nir)
+ struct mesa_sha1 ctx;
+ _mesa_sha1_init(&ctx);
+ _mesa_sha1_update(&ctx, &shader_variant_flags, 4);
+ _mesa_sha1_update(&ctx, ir_binary, ir_size);
+ if (sel->type == PIPE_SHADER_VERTEX ||
+ sel->type == PIPE_SHADER_TESS_EVAL ||
+ sel->type == PIPE_SHADER_GEOMETRY)
+ _mesa_sha1_update(&ctx, &sel->so, sizeof(sel->so));
+ _mesa_sha1_final(&ctx, ir_sha1_cache_key);
+
+ if (ir_binary == blob.data)
blob_finish(&blob);
-
- return result;
}
/** Copy "data" to "ptr" and return the next dword following copied data. */
/**
* Insert a shader into the cache. It's assumed the shader is not in the cache.
* Use si_shader_cache_load_shader before calling this.
- *
- * Returns false on failure, in which case the ir_binary should be freed.
*/
-bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary,
+void si_shader_cache_insert_shader(struct si_screen *sscreen,
+ unsigned char ir_sha1_cache_key[20],
struct si_shader *shader,
bool insert_into_disk_cache)
{
struct hash_entry *entry;
uint8_t key[CACHE_KEY_SIZE];
- entry = _mesa_hash_table_search(sscreen->shader_cache, ir_binary);
+ entry = _mesa_hash_table_search(sscreen->shader_cache, ir_sha1_cache_key);
if (entry)
- return false; /* already added */
+ return; /* already added */
hw_binary = si_get_shader_binary(shader);
if (!hw_binary)
- return false;
+ return;
- if (_mesa_hash_table_insert(sscreen->shader_cache, ir_binary,
+ if (_mesa_hash_table_insert(sscreen->shader_cache,
+ mem_dup(ir_sha1_cache_key, 20),
hw_binary) == NULL) {
FREE(hw_binary);
- return false;
+ return;
}
if (sscreen->disk_shader_cache && insert_into_disk_cache) {
- disk_cache_compute_key(sscreen->disk_shader_cache, ir_binary,
- *((uint32_t *)ir_binary), key);
+ disk_cache_compute_key(sscreen->disk_shader_cache,
+ ir_sha1_cache_key, 20, key);
disk_cache_put(sscreen->disk_shader_cache, key, hw_binary,
*((uint32_t *) hw_binary), NULL);
}
-
- return true;
}
-bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary,
+bool si_shader_cache_load_shader(struct si_screen *sscreen,
+ unsigned char ir_sha1_cache_key[20],
struct si_shader *shader)
{
struct hash_entry *entry =
- _mesa_hash_table_search(sscreen->shader_cache, ir_binary);
+ _mesa_hash_table_search(sscreen->shader_cache, ir_sha1_cache_key);
if (!entry) {
if (sscreen->disk_shader_cache) {
unsigned char sha1[CACHE_KEY_SIZE];
- size_t tg_size = *((uint32_t *) ir_binary);
disk_cache_compute_key(sscreen->disk_shader_cache,
- ir_binary, tg_size, sha1);
+ ir_sha1_cache_key, 20, sha1);
size_t binary_size;
uint8_t *buffer =
}
free(buffer);
- if (!si_shader_cache_insert_shader(sscreen, ir_binary,
- shader, false))
- FREE(ir_binary);
+ si_shader_cache_insert_shader(sscreen, ir_sha1_cache_key,
+ shader, false);
} else {
return false;
}
} else {
- if (si_load_shader_binary(shader, entry->data))
- FREE(ir_binary);
- else
+ if (!si_load_shader_binary(shader, entry->data))
return false;
}
p_atomic_inc(&sscreen->num_shader_cache_hits);
static uint32_t si_shader_cache_key_hash(const void *key)
{
- /* The first dword is the key size. */
- return util_hash_crc32(key, *(uint32_t*)key);
+ /* Take the first dword of SHA1. */
+ return *(uint32_t*)key;
}
static bool si_shader_cache_key_equals(const void *a, const void *b)
{
- uint32_t *keya = (uint32_t*)a;
- uint32_t *keyb = (uint32_t*)b;
-
- /* The first dword is the key size. */
- if (*keya != *keyb)
- return false;
-
- return memcmp(keya, keyb, *keya) == 0;
+ /* Compare SHA1s. */
+ return memcmp(a, b, 20) == 0;
}
static void si_destroy_shader_cache_entry(struct hash_entry *entry)
shader->ctx_reg.ngg.vgt_primitiveid_en =
S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
- S_028A84_NGG_DISABLE_PROVOK_REUSE(es_enable_prim_id);
+ S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.mono.u.vs_export_prim_id ||
+ gs_sel->info.writes_primid);
if (gs_type == PIPE_SHADER_GEOMETRY) {
shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = es_sel->esgs_itemsize / 4;
shader->ge_cntl =
S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
- S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts) |
+ S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
/* Bug workaround for a possible hang with non-tessellation cases.
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
shader->vgt_vertex_reuse_block_cntl);
- if (initial_cdw != sctx->gfx_cs->current.cdw)
- sctx->context_roll = true;
-
/* Required programming for tessellation. (legacy pipeline only) */
if (sctx->chip_class == GFX10 &&
shader->selector->type == PIPE_SHADER_TESS_EVAL) {
shader->pa_cl_vs_out_cntl,
SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
}
+
+ if (initial_cdw != sctx->gfx_cs->current.cdw)
+ sctx->context_roll = true;
}
/**
main_part->key.as_ngg = key->as_ngg;
main_part->is_monolithic = false;
- if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
+ if (si_compile_shader(sscreen, compiler_state->compiler,
main_part, &compiler_state->debug) != 0) {
FREE(main_part);
return false;
if (!compiler->passes)
si_init_compiler(sscreen, compiler);
+ /* Serialize NIR to save memory. Monolithic shader variants
+ * have to deserialize NIR before compilation.
+ */
+ if (sel->nir) {
+ struct blob blob;
+ size_t size;
+
+ blob_init(&blob);
+ /* true = remove optional debugging data to increase
+ * the likehood of getting more shader cache hits.
+ * It also drops variable names, so we'll save more memory.
+ */
+ nir_serialize(&blob, sel->nir, true);
+ blob_finish_get_buffer(&blob, &sel->nir_binary, &size);
+ sel->nir_size = size;
+ }
+
/* Compile the main shader part for use with a prolog and/or epilog.
* If this fails, the driver will try to compile a monolithic shader
* on demand.
*/
if (!sscreen->use_monolithic_shaders) {
struct si_shader *shader = CALLOC_STRUCT(si_shader);
- void *ir_binary = NULL;
+ unsigned char ir_sha1_cache_key[20];
if (!shader) {
fprintf(stderr, "radeonsi: can't allocate a main shader part\n");
sel->type == PIPE_SHADER_GEOMETRY))
shader->key.as_ngg = 1;
- if (sel->tokens || sel->nir) {
- ir_binary = si_get_ir_binary(sel, shader->key.as_ngg,
- shader->key.as_es);
+ if (sel->nir) {
+ si_get_ir_cache_key(sel, shader->key.as_ngg,
+ shader->key.as_es, ir_sha1_cache_key);
}
/* Try to load the shader from the shader cache. */
simple_mtx_lock(&sscreen->shader_cache_mutex);
- if (ir_binary &&
- si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
+ if (si_shader_cache_load_shader(sscreen, ir_sha1_cache_key, shader)) {
simple_mtx_unlock(&sscreen->shader_cache_mutex);
si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
} else {
simple_mtx_unlock(&sscreen->shader_cache_mutex);
/* Compile the shader if it hasn't been loaded from the cache. */
- if (si_compile_tgsi_shader(sscreen, compiler, shader,
+ if (si_compile_shader(sscreen, compiler, shader,
debug) != 0) {
FREE(shader);
- FREE(ir_binary);
fprintf(stderr, "radeonsi: can't compile a main shader part\n");
return;
}
- if (ir_binary) {
- simple_mtx_lock(&sscreen->shader_cache_mutex);
- if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
- FREE(ir_binary);
- simple_mtx_unlock(&sscreen->shader_cache_mutex);
- }
+ simple_mtx_lock(&sscreen->shader_cache_mutex);
+ si_shader_cache_insert_shader(sscreen, ir_sha1_cache_key,
+ shader, true);
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
}
*si_get_main_shader_part(sel, &shader->key) = shader;
si_shader_vs(sscreen, sel->gs_copy_shader, sel);
}
+
+ /* Free NIR. We only keep serialized NIR after this point. */
+ if (sel->nir) {
+ ralloc_free(sel->nir);
+ sel->nir = NULL;
+ }
}
void si_schedule_initial_compile(struct si_context *sctx, unsigned processor,
sel->so = state->stream_output;
- if (state->type == PIPE_SHADER_IR_TGSI &&
- !sscreen->options.enable_nir) {
- sel->tokens = tgsi_dup_tokens(state->tokens);
- if (!sel->tokens) {
- FREE(sel);
- return NULL;
- }
-
- tgsi_scan_shader(state->tokens, &sel->info);
- tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
-
- /* Fixup for TGSI: Set which opcode uses which (i,j) pair. */
- if (sel->info.uses_persp_opcode_interp_centroid)
- sel->info.uses_persp_centroid = true;
-
- if (sel->info.uses_linear_opcode_interp_centroid)
- sel->info.uses_linear_centroid = true;
-
- if (sel->info.uses_persp_opcode_interp_offset ||
- sel->info.uses_persp_opcode_interp_sample)
- sel->info.uses_persp_center = true;
-
- if (sel->info.uses_linear_opcode_interp_offset ||
- sel->info.uses_linear_opcode_interp_sample)
- sel->info.uses_linear_center = true;
+ if (state->type == PIPE_SHADER_IR_TGSI) {
+ sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
} else {
- if (state->type == PIPE_SHADER_IR_TGSI) {
- sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
- } else {
- assert(state->type == PIPE_SHADER_IR_NIR);
- sel->nir = state->ir.nir;
- }
-
- si_nir_scan_shader(sel->nir, &sel->info);
- si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
- si_nir_adjust_driver_locations(sel->nir);
+ assert(state->type == PIPE_SHADER_IR_NIR);
+ sel->nir = state->ir.nir;
}
+ si_nir_scan_shader(sel->nir, &sel->info);
+ si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
+ si_nir_adjust_driver_locations(sel->nir);
+
sel->type = sel->info.processor;
p_atomic_inc(&sscreen->num_shaders_created);
si_get_active_slot_masks(&sel->info,
(sel->so.output[i].stream * 4);
}
+ sel->num_vs_inputs = sel->type == PIPE_SHADER_VERTEX &&
+ !sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD] ?
+ sel->info.num_inputs : 0;
+
/* The prolog is a no-op if there are no inputs. */
sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX &&
sel->info.num_inputs &&
/* EN_MAX_VERT_OUT_PER_GS_INSTANCE does not work with tesselation. */
sel->tess_turns_off_ngg =
- (sscreen->info.family == CHIP_NAVI10 ||
- sscreen->info.family == CHIP_NAVI12 ||
- sscreen->info.family == CHIP_NAVI14) &&
+ sscreen->info.chip_class == GFX10 &&
sel->gs_num_invocations * sel->gs_max_out_vertices > 256;
break;
sctx->flags |= SI_CONTEXT_VGT_FLUSH;
sctx->ngg = new_ngg;
- sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+ sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
return true;
}
return false;
sctx->ia_multi_vgt_param_key.u.uses_gs = sel != NULL;
si_update_common_shader_state(sctx);
- sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+ sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
ngg_changed = si_update_ngg(sctx);
if (ngg_changed || enable_changed)
si_update_tess_uses_prim_id(sctx);
si_update_common_shader_state(sctx);
- sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+ sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
bool ngg_changed = si_update_ngg(sctx);
if (ngg_changed || enable_changed)
util_queue_fence_destroy(&sel->ready);
simple_mtx_destroy(&sel->mutex);
- free(sel->tokens);
ralloc_free(sel->nir);
+ free(sel->nir_binary);
free(sel);
}
}
if (key.u.ngg) {
- stages |= S_028B54_PRIMGEN_EN(1);
- if (key.u.streamout)
- stages |= S_028B54_NGG_WAVE_ID_EN(1);
+ stages |= S_028B54_PRIMGEN_EN(1) |
+ S_028B54_NGG_WAVE_ID_EN(key.u.streamout) |
+ S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough);
} else if (key.u.gs)
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
int r;
+ if (!sctx->compiler.passes)
+ si_init_compiler(sctx->screen, &sctx->compiler);
+
compiler_state.compiler = &sctx->compiler;
compiler_state.debug = sctx->debug;
compiler_state.is_debug_context = sctx->is_debug;
}
}
+ /* This must be done after the shader variant is selected. */
+ if (sctx->ngg)
+ key.u.ngg_passthrough = gfx10_is_ngg_passthrough(si_get_vs(sctx)->current);
+
si_update_vgt_shader_config(sctx, key);
if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)