bool si_init_shader_cache(struct si_screen *sscreen)
{
- (void) mtx_init(&sscreen->shader_cache_mutex, mtx_plain);
+ (void) simple_mtx_init(&sscreen->shader_cache_mutex, mtx_plain);
sscreen->shader_cache =
_mesa_hash_table_create(NULL,
si_shader_cache_key_hash,
if (sscreen->shader_cache)
_mesa_hash_table_destroy(sscreen->shader_cache,
si_destroy_shader_cache_entry);
- mtx_destroy(&sscreen->shader_cache_mutex);
+ simple_mtx_destroy(&sscreen->shader_cache_mutex);
}
/* SHADER STATES */
if (thread_index < 0)
util_queue_fence_wait(&sel->ready);
- mtx_lock(&sel->mutex);
+ simple_mtx_lock(&sel->mutex);
/* Find the shader variant. */
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
/* Don't check the "current" shader. We checked it above. */
if (current != iter &&
memcmp(&iter->key, key, sizeof(*key)) == 0) {
- mtx_unlock(&sel->mutex);
+ simple_mtx_unlock(&sel->mutex);
if (unlikely(!util_queue_fence_is_signalled(&iter->ready))) {
/* If it's an optimized shader and its compilation has
/* Build a new shader. */
shader = CALLOC_STRUCT(si_shader);
if (!shader) {
- mtx_unlock(&sel->mutex);
+ simple_mtx_unlock(&sel->mutex);
return -ENOMEM;
}
assert(0);
}
- mtx_lock(&previous_stage_sel->mutex);
+ simple_mtx_lock(&previous_stage_sel->mutex);
ok = si_check_missing_main_part(sscreen,
previous_stage_sel,
compiler_state, &shader1_key);
- mtx_unlock(&previous_stage_sel->mutex);
+ simple_mtx_unlock(&previous_stage_sel->mutex);
}
if (ok) {
if (!ok) {
FREE(shader);
- mtx_unlock(&sel->mutex);
+ simple_mtx_unlock(&sel->mutex);
return -ENOMEM; /* skip the draw call */
}
}
/* Compile it asynchronously. */
util_queue_add_job(&sscreen->shader_compiler_queue_low_priority,
shader, &shader->ready,
- si_build_shader_variant_low_priority, NULL);
+ si_build_shader_variant_low_priority, NULL,
+ 0);
/* Add only after the ready fence was reset, to guard against a
* race with si_bind_XX_shader. */
/* Use the default (unoptimized) shader for now. */
memset(&key->opt, 0, sizeof(key->opt));
- mtx_unlock(&sel->mutex);
+ simple_mtx_unlock(&sel->mutex);
if (sscreen->options.sync_compile)
util_queue_fence_wait(&shader->ready);
sel->last_variant = shader;
}
- mtx_unlock(&sel->mutex);
+ simple_mtx_unlock(&sel->mutex);
assert(!shader->is_optimized);
si_build_shader_variant(shader, thread_index, false);
}
/* Try to load the shader from the shader cache. */
- mtx_lock(&sscreen->shader_cache_mutex);
+ simple_mtx_lock(&sscreen->shader_cache_mutex);
if (ir_binary &&
si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
- mtx_unlock(&sscreen->shader_cache_mutex);
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
} else {
- mtx_unlock(&sscreen->shader_cache_mutex);
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
/* Compile the shader if it hasn't been loaded from the cache. */
if (si_compile_tgsi_shader(sscreen, compiler, shader,
}
if (ir_binary) {
- mtx_lock(&sscreen->shader_cache_mutex);
+ simple_mtx_lock(&sscreen->shader_cache_mutex);
if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
FREE(ir_binary);
- mtx_unlock(&sscreen->shader_cache_mutex);
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
}
}
}
util_queue_add_job(&sctx->screen->shader_compiler_queue, job,
- ready_fence, execute, NULL);
+ ready_fence, execute, NULL, 0);
if (debug) {
util_queue_fence_wait(ready_fence);
if (sel->info.properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE])
sel->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1);
- (void) mtx_init(&sel->mutex, mtx_plain);
+ (void) simple_mtx_init(&sel->mutex, mtx_plain);
si_schedule_initial_compile(sctx, sel->info.processor, &sel->ready,
&sel->compiler_ctx_state, sel,
si_delete_shader(sctx, sel->gs_copy_shader);
util_queue_fence_destroy(&sel->ready);
- mtx_destroy(&sel->mutex);
+ simple_mtx_destroy(&sel->mutex);
free(sel->tokens);
ralloc_free(sel->nir);
free(sel);
pipe_aligned_buffer_create(sctx->b.screen,
SI_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
- esgs_ring_size, alignment);
+ esgs_ring_size,
+ sctx->screen->info.pte_fragment_size);
if (!sctx->esgs_ring)
return false;
}
pipe_aligned_buffer_create(sctx->b.screen,
SI_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
- gsvs_ring_size, alignment);
+ gsvs_ring_size,
+ sctx->screen->info.pte_fragment_size);
if (!sctx->gsvs_ring)
return false;
}
static void si_shader_lock(struct si_shader *shader)
{
- mtx_lock(&shader->selector->mutex);
+ simple_mtx_lock(&shader->selector->mutex);
if (shader->previous_stage_sel) {
assert(shader->previous_stage_sel != shader->selector);
- mtx_lock(&shader->previous_stage_sel->mutex);
+ simple_mtx_lock(&shader->previous_stage_sel->mutex);
}
}
static void si_shader_unlock(struct si_shader *shader)
{
if (shader->previous_stage_sel)
- mtx_unlock(&shader->previous_stage_sel->mutex);
- mtx_unlock(&shader->selector->mutex);
+ simple_mtx_unlock(&shader->previous_stage_sel->mutex);
+ simple_mtx_unlock(&shader->selector->mutex);
}
/**
return 1;
}
-static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
-{
- return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0;
-}
-
static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
{
return shader ? shader->config.scratch_bytes_per_wave : 0;
sctx->fixed_func_tcs_shader.current;
}
-static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
-{
- unsigned bytes = 0;
-
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
-
- if (sctx->tes_shader.cso) {
- struct si_shader *tcs = si_get_tcs_current(sctx);
-
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(tcs));
- }
- return bytes;
-}
-
static bool si_update_scratch_relocs(struct si_context *sctx)
{
struct si_shader *tcs = si_get_tcs_current(sctx);
static bool si_update_spi_tmpring_size(struct si_context *sctx)
{
- unsigned current_scratch_buffer_size =
- si_get_current_scratch_buffer_size(sctx);
- unsigned scratch_bytes_per_wave =
- si_get_max_scratch_bytes_per_wave(sctx);
- unsigned scratch_needed_size = scratch_bytes_per_wave *
- sctx->scratch_waves;
+ /* SPI_TMPRING_SIZE.WAVESIZE must be constant for each scratch buffer.
+ * There are 2 cases to handle:
+ *
+ * - If the current needed size is less than the maximum seen size,
+ * use the maximum seen size, so that WAVESIZE remains the same.
+ *
+ * - If the current needed size is greater than the maximum seen size,
+ * the scratch buffer is reallocated, so we can increase WAVESIZE.
+ *
+ * Shaders that set SCRATCH_EN=0 don't allocate scratch space.
+ * Otherwise, the number of waves that can use scratch is
+ * SPI_TMPRING_SIZE.WAVES.
+ */
+ unsigned bytes = 0;
+
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
+
+ if (sctx->tes_shader.cso) {
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(si_get_tcs_current(sctx)));
+ }
+
+ sctx->max_seen_scratch_bytes_per_wave =
+ MAX2(sctx->max_seen_scratch_bytes_per_wave, bytes);
+
+ unsigned scratch_needed_size =
+ sctx->max_seen_scratch_bytes_per_wave * sctx->scratch_waves;
unsigned spi_tmpring_size;
if (scratch_needed_size > 0) {
- if (scratch_needed_size > current_scratch_buffer_size) {
+ if (!sctx->scratch_buffer ||
+ scratch_needed_size > sctx->scratch_buffer->b.b.width0) {
/* Create a bigger scratch buffer */
si_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer =
si_aligned_buffer_create(&sctx->screen->b,
- SI_RESOURCE_FLAG_UNMAPPABLE,
- PIPE_USAGE_DEFAULT,
- scratch_needed_size, 256);
+ SI_RESOURCE_FLAG_UNMAPPABLE,
+ PIPE_USAGE_DEFAULT,
+ scratch_needed_size,
+ sctx->screen->info.pte_fragment_size);
if (!sctx->scratch_buffer)
return false;
"scratch size should already be aligned correctly.");
spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
- S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+ S_0286E8_WAVESIZE(sctx->max_seen_scratch_bytes_per_wave >> 10);
if (spi_tmpring_size != sctx->spi_tmpring_size) {
sctx->spi_tmpring_size = spi_tmpring_size;
si_mark_atom_dirty(sctx, &sctx->atoms.s.scratch_state);