si_nir_opts(sel->nir);
si_nir_scan_shader(sel->nir, &sel->info);
- si_lower_nir(sel);
+ si_lower_nir(sel->screen, sel->nir);
}
/* Store the declared LDS size into tgsi_shader_info for the shader
program->num_cs_user_data_dwords =
sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
- void *ir_binary = si_get_ir_binary(sel, false);
+ void *ir_binary = si_get_ir_binary(sel, false, false);
/* Try to load the shader from the shader cache. */
- mtx_lock(&sscreen->shader_cache_mutex);
+ simple_mtx_lock(&sscreen->shader_cache_mutex);
if (ir_binary &&
si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
- mtx_unlock(&sscreen->shader_cache_mutex);
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
si_shader_dump(sscreen, shader, debug, stderr, true);
if (!si_shader_binary_upload(sscreen, shader, 0))
program->shader.compilation_failed = true;
} else {
- mtx_unlock(&sscreen->shader_cache_mutex);
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
if (!si_shader_create(sscreen, compiler, &program->shader, debug)) {
program->shader.compilation_failed = true;
S_00B84C_LDS_SIZE(shader->config.lds_size);
if (ir_binary) {
- mtx_lock(&sscreen->shader_cache_mutex);
+ simple_mtx_lock(&sscreen->shader_cache_mutex);
if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
FREE(ir_binary);
- mtx_unlock(&sscreen->shader_cache_mutex);
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
}
}
&sel->compiler_ctx_state,
program, si_create_compute_state_async);
} else {
- const struct pipe_llvm_program_header *header;
- const char *code;
+ const struct pipe_binary_program_header *header;
header = cso->prog;
- code = cso->prog + sizeof(struct pipe_llvm_program_header);
program->shader.binary.elf_size = header->num_bytes;
program->shader.binary.elf_buffer = malloc(header->num_bytes);
FREE(program);
return NULL;
}
- memcpy((void *)program->shader.binary.elf_buffer, code, header->num_bytes);
+ memcpy((void *)program->shader.binary.elf_buffer, header->blob, header->num_bytes);
const amd_kernel_code_t *code_object =
si_compute_get_code_object(program, 0);
si_aligned_buffer_create(&sctx->screen->b,
SI_RESOURCE_FLAG_UNMAPPABLE,
PIPE_USAGE_DEFAULT,
- scratch_needed, 256);
+ scratch_needed,
+ sctx->screen->info.pte_fragment_size);
if (!sctx->compute_scratch_buffer)
return false;
COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
"COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2);
+ sctx->max_seen_compute_scratch_bytes_per_wave =
+ MAX2(sctx->max_seen_compute_scratch_bytes_per_wave,
+ config->scratch_bytes_per_wave);
+
radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
S_00B860_WAVES(sctx->scratch_waves)
- | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
+ | S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
sctx->cs_shader_state.emitted_program = program;
sctx->cs_shader_state.offset = offset;