- struct si_compute *program = (struct si_compute *)job;
- struct si_shader *shader = &program->shader;
- struct si_shader_selector sel;
- struct ac_llvm_compiler *compiler;
- struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
- struct si_screen *sscreen = program->screen;
-
- assert(!debug->debug_message || debug->async);
- assert(thread_index >= 0);
- assert(thread_index < ARRAY_SIZE(sscreen->compiler));
- compiler = &sscreen->compiler[thread_index];
-
- memset(&sel, 0, sizeof(sel));
-
- sel.screen = sscreen;
-
- if (program->ir_type == PIPE_SHADER_IR_TGSI) {
- tgsi_scan_shader(program->ir.tgsi, &sel.info);
- sel.tokens = program->ir.tgsi;
- } else {
- assert(program->ir_type == PIPE_SHADER_IR_NIR);
- sel.nir = program->ir.nir;
-
- si_nir_opts(sel.nir);
- si_nir_scan_shader(sel.nir, &sel.info);
- si_lower_nir(&sel);
- }
-
- /* Store the declared LDS size into tgsi_shader_info for the shader
- * cache to include it.
- */
- sel.info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE] = program->local_size;
-
- sel.type = PIPE_SHADER_COMPUTE;
- si_get_active_slot_masks(&sel.info,
- &program->active_const_and_shader_buffers,
- &program->active_samplers_and_images);
-
- program->shader.selector = &sel;
- program->shader.is_monolithic = true;
- program->uses_grid_size = sel.info.uses_grid_size;
- program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
- program->uses_bindless_images = sel.info.uses_bindless_images;
- program->reads_variable_block_size =
- sel.info.uses_block_size &&
- sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
- program->num_cs_user_data_dwords =
- sel.info.properties[TGSI_PROPERTY_CS_USER_DATA_DWORDS];
-
- void *ir_binary = si_get_ir_binary(&sel);
-
- /* Try to load the shader from the shader cache. */
- mtx_lock(&sscreen->shader_cache_mutex);
-
- if (ir_binary &&
- si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
- mtx_unlock(&sscreen->shader_cache_mutex);
-
- si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
- si_shader_dump(sscreen, shader, debug, stderr, true);
-
- if (!si_shader_binary_upload(sscreen, shader, 0))
- program->shader.compilation_failed = true;
- } else {
- mtx_unlock(&sscreen->shader_cache_mutex);
-
- if (!si_shader_create(sscreen, compiler, &program->shader, debug)) {
- program->shader.compilation_failed = true;
-
- if (program->ir_type == PIPE_SHADER_IR_TGSI)
- FREE(program->ir.tgsi);
- program->shader.selector = NULL;
- return;
- }
-
- bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
- unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
- (sel.info.uses_grid_size ? 3 : 0) +
- (program->reads_variable_block_size ? 3 : 0) +
- program->num_cs_user_data_dwords;
-
- shader->config.rsrc1 =
- S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
- S_00B848_DX10_CLAMP(1) |
- S_00B848_MEM_ORDERED(sscreen->info.chip_class >= GFX10) |
- S_00B848_WGP_MODE(sscreen->info.chip_class >= GFX10) |
- S_00B848_FLOAT_MODE(shader->config.float_mode);
-
- if (program->screen->info.chip_class < GFX10) {
- shader->config.rsrc1 |=
- S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8);
- }
-
- shader->config.rsrc2 =
- S_00B84C_USER_SGPR(user_sgprs) |
- S_00B84C_SCRATCH_EN(scratch_enabled) |
- S_00B84C_TGID_X_EN(sel.info.uses_block_id[0]) |
- S_00B84C_TGID_Y_EN(sel.info.uses_block_id[1]) |
- S_00B84C_TGID_Z_EN(sel.info.uses_block_id[2]) |
- S_00B84C_TIDIG_COMP_CNT(sel.info.uses_thread_id[2] ? 2 :
- sel.info.uses_thread_id[1] ? 1 : 0) |
- S_00B84C_LDS_SIZE(shader->config.lds_size);
-
- if (ir_binary) {
- mtx_lock(&sscreen->shader_cache_mutex);
- if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
- FREE(ir_binary);
- mtx_unlock(&sscreen->shader_cache_mutex);
- }
- }
-
- if (program->ir_type == PIPE_SHADER_IR_TGSI)
- FREE(program->ir.tgsi);
-
- program->shader.selector = NULL;
+ struct si_compute *program = (struct si_compute *)job;
+ struct si_shader_selector *sel = &program->sel;
+ struct si_shader *shader = &program->shader;
+ struct ac_llvm_compiler *compiler;
+ struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
+ struct si_screen *sscreen = sel->screen;
+
+ assert(!debug->debug_message || debug->async);
+ assert(thread_index >= 0);
+ assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+ compiler = &sscreen->compiler[thread_index];
+
+ if (!compiler->passes)
+ si_init_compiler(sscreen, compiler);
+
+ assert(program->ir_type == PIPE_SHADER_IR_NIR);
+ si_nir_scan_shader(sel->nir, &sel->info);
+
+ /* Store the declared LDS size into si_shader_info for the shader
+ * cache to include it.
+ */
+ sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE] = program->local_size;
+
+ si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
+ &sel->active_samplers_and_images);
+
+ program->shader.is_monolithic = true;
+ program->reads_variable_block_size =
+ sel->info.uses_block_size && sel->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
+ program->num_cs_user_data_dwords =
+ sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
+
+ unsigned char ir_sha1_cache_key[20];
+ si_get_ir_cache_key(sel, false, false, ir_sha1_cache_key);
+
+ /* Try to load the shader from the shader cache. */
+ simple_mtx_lock(&sscreen->shader_cache_mutex);
+
+ if (si_shader_cache_load_shader(sscreen, ir_sha1_cache_key, shader)) {
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
+
+ si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
+ si_shader_dump(sscreen, shader, debug, stderr, true);
+
+ if (!si_shader_binary_upload(sscreen, shader, 0))
+ program->shader.compilation_failed = true;
+ } else {
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
+
+ if (!si_create_shader_variant(sscreen, compiler, &program->shader, debug)) {
+ program->shader.compilation_failed = true;
+ return;
+ }
+
+ bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
+ unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS + (sel->info.uses_grid_size ? 3 : 0) +
+ (program->reads_variable_block_size ? 3 : 0) +
+ program->num_cs_user_data_dwords;
+
+ shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) /
+ (sscreen->compute_wave_size == 32 ? 8 : 4)) |
+ S_00B848_DX10_CLAMP(1) |
+ S_00B848_MEM_ORDERED(sscreen->info.chip_class >= GFX10) |
+ S_00B848_WGP_MODE(sscreen->info.chip_class >= GFX10) |
+ S_00B848_FLOAT_MODE(shader->config.float_mode);
+
+ if (sscreen->info.chip_class < GFX10) {
+ shader->config.rsrc1 |= S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8);
+ }
+
+ shader->config.rsrc2 = S_00B84C_USER_SGPR(user_sgprs) | S_00B84C_SCRATCH_EN(scratch_enabled) |
+ S_00B84C_TGID_X_EN(sel->info.uses_block_id[0]) |
+ S_00B84C_TGID_Y_EN(sel->info.uses_block_id[1]) |
+ S_00B84C_TGID_Z_EN(sel->info.uses_block_id[2]) |
+ S_00B84C_TG_SIZE_EN(sel->info.uses_subgroup_info) |
+ S_00B84C_TIDIG_COMP_CNT(sel->info.uses_thread_id[2]
+ ? 2
+ : sel->info.uses_thread_id[1] ? 1 : 0) |
+ S_00B84C_LDS_SIZE(shader->config.lds_size);
+
+ simple_mtx_lock(&sscreen->shader_cache_mutex);
+ si_shader_cache_insert_shader(sscreen, ir_sha1_cache_key, shader, true);
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
+ }
+
+ ralloc_free(sel->nir);
+ sel->nir = NULL;