- struct si_compute *program = (struct si_compute *)job;
- struct si_shader_selector *sel = &program->sel;
- struct si_shader *shader = &program->shader;
- struct ac_llvm_compiler *compiler;
- struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
- struct si_screen *sscreen = sel->screen;
-
- assert(!debug->debug_message || debug->async);
- assert(thread_index >= 0);
- assert(thread_index < ARRAY_SIZE(sscreen->compiler));
- compiler = &sscreen->compiler[thread_index];
-
- if (program->ir_type == PIPE_SHADER_IR_TGSI) {
- tgsi_scan_shader(sel->tokens, &sel->info);
- } else {
- assert(program->ir_type == PIPE_SHADER_IR_NIR);
-
- si_nir_opts(sel->nir);
- si_nir_scan_shader(sel->nir, &sel->info);
- si_lower_nir(sel, sscreen->compute_wave_size);
- }
-
- /* Store the declared LDS size into tgsi_shader_info for the shader
- * cache to include it.
- */
- sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE] = program->local_size;
-
- si_get_active_slot_masks(&sel->info,
- &sel->active_const_and_shader_buffers,
- &sel->active_samplers_and_images);
-
- program->shader.is_monolithic = true;
- program->reads_variable_block_size =
- sel->info.uses_block_size &&
- sel->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
- program->num_cs_user_data_dwords =
- sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
-
- void *ir_binary = si_get_ir_binary(sel);
-
- /* Try to load the shader from the shader cache. */
- mtx_lock(&sscreen->shader_cache_mutex);
-
- if (ir_binary &&
- si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
- mtx_unlock(&sscreen->shader_cache_mutex);
-
- si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
- si_shader_dump(sscreen, shader, debug, stderr, true);
-
- if (!si_shader_binary_upload(sscreen, shader, 0))
- program->shader.compilation_failed = true;
- } else {
- mtx_unlock(&sscreen->shader_cache_mutex);
-
- if (!si_shader_create(sscreen, compiler, &program->shader, debug)) {
- program->shader.compilation_failed = true;
-
- if (program->ir_type == PIPE_SHADER_IR_TGSI)
- FREE(sel->tokens);
- return;
- }
-
- bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
- unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
- (sel->info.uses_grid_size ? 3 : 0) +
- (program->reads_variable_block_size ? 3 : 0) +
- program->num_cs_user_data_dwords;
-
- shader->config.rsrc1 =
- S_00B848_VGPRS((shader->config.num_vgprs - 1) /
- (sscreen->compute_wave_size == 32 ? 8 : 4)) |
- S_00B848_DX10_CLAMP(1) |
- S_00B848_MEM_ORDERED(sscreen->info.chip_class >= GFX10) |
- S_00B848_WGP_MODE(sscreen->info.chip_class >= GFX10) |
- S_00B848_FLOAT_MODE(shader->config.float_mode);
-
- if (sscreen->info.chip_class < GFX10) {
- shader->config.rsrc1 |=
- S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8);
- }
-
- shader->config.rsrc2 =
- S_00B84C_USER_SGPR(user_sgprs) |
- S_00B84C_SCRATCH_EN(scratch_enabled) |
- S_00B84C_TGID_X_EN(sel->info.uses_block_id[0]) |
- S_00B84C_TGID_Y_EN(sel->info.uses_block_id[1]) |
- S_00B84C_TGID_Z_EN(sel->info.uses_block_id[2]) |
- S_00B84C_TIDIG_COMP_CNT(sel->info.uses_thread_id[2] ? 2 :
- sel->info.uses_thread_id[1] ? 1 : 0) |
- S_00B84C_LDS_SIZE(shader->config.lds_size);
-
- if (ir_binary) {
- mtx_lock(&sscreen->shader_cache_mutex);
- if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
- FREE(ir_binary);
- mtx_unlock(&sscreen->shader_cache_mutex);
- }
- }
-
- if (program->ir_type == PIPE_SHADER_IR_TGSI)
- FREE(sel->tokens);
+ struct si_compute *program = (struct si_compute *)job;
+ struct si_shader_selector *sel = &program->sel;
+ struct si_shader *shader = &program->shader;
+ struct ac_llvm_compiler *compiler;
+ struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
+ struct si_screen *sscreen = sel->screen;
+
+ assert(!debug->debug_message || debug->async);
+ assert(thread_index >= 0);
+ assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+ compiler = &sscreen->compiler[thread_index];
+
+ if (!compiler->passes)
+ si_init_compiler(sscreen, compiler);
+
+ assert(program->ir_type == PIPE_SHADER_IR_NIR);
+ si_nir_scan_shader(sel->nir, &sel->info);
+
+ /* Store the declared LDS size into si_shader_info for the shader
+ * cache to include it.
+ */
+ sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE] = program->local_size;
+
+ si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
+ &sel->active_samplers_and_images);
+
+ program->shader.is_monolithic = true;
+ program->reads_variable_block_size =
+ sel->info.uses_block_size && sel->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
+ program->num_cs_user_data_dwords =
+ sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
+
+ unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS + (sel->info.uses_grid_size ? 3 : 0) +
+ (program->reads_variable_block_size ? 3 : 0) +
+ program->num_cs_user_data_dwords;
+
+ /* Fast path for compute shaders - some descriptors passed via user SGPRs. */
+ /* Shader buffers in user SGPRs. */
+ for (unsigned i = 0; i < 3 && user_sgprs <= 12 && sel->info.shader_buffers_declared & (1 << i); i++) {
+ user_sgprs = align(user_sgprs, 4);
+ if (i == 0)
+ sel->cs_shaderbufs_sgpr_index = user_sgprs;
+ user_sgprs += 4;
+ sel->cs_num_shaderbufs_in_user_sgprs++;
+ }
+
+ /* Images in user SGPRs. */
+ unsigned non_msaa_images = sel->info.images_declared & ~sel->info.msaa_images_declared;
+
+ for (unsigned i = 0; i < 3 && non_msaa_images & (1 << i); i++) {
+ unsigned num_sgprs = sel->info.image_buffers & (1 << i) ? 4 : 8;
+
+ if (align(user_sgprs, num_sgprs) + num_sgprs > 16)
+ break;
+
+ user_sgprs = align(user_sgprs, num_sgprs);
+ if (i == 0)
+ sel->cs_images_sgpr_index = user_sgprs;
+ user_sgprs += num_sgprs;
+ sel->cs_num_images_in_user_sgprs++;
+ }
+ sel->cs_images_num_sgprs = user_sgprs - sel->cs_images_sgpr_index;
+ assert(user_sgprs <= 16);
+
+ unsigned char ir_sha1_cache_key[20];
+ si_get_ir_cache_key(sel, false, false, ir_sha1_cache_key);
+
+ /* Try to load the shader from the shader cache. */
+ simple_mtx_lock(&sscreen->shader_cache_mutex);
+
+ if (si_shader_cache_load_shader(sscreen, ir_sha1_cache_key, shader)) {
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
+
+ si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
+ si_shader_dump(sscreen, shader, debug, stderr, true);
+
+ if (!si_shader_binary_upload(sscreen, shader, 0))
+ program->shader.compilation_failed = true;
+ } else {
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
+
+ if (!si_create_shader_variant(sscreen, compiler, &program->shader, debug)) {
+ program->shader.compilation_failed = true;
+ return;
+ }
+
+ bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
+
+ shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) /
+ (sscreen->compute_wave_size == 32 ? 8 : 4)) |
+ S_00B848_DX10_CLAMP(1) |
+ S_00B848_MEM_ORDERED(sscreen->info.chip_class >= GFX10) |
+ S_00B848_WGP_MODE(sscreen->info.chip_class >= GFX10) |
+ S_00B848_FLOAT_MODE(shader->config.float_mode);
+
+ if (sscreen->info.chip_class < GFX10) {
+ shader->config.rsrc1 |= S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8);
+ }
+
+ shader->config.rsrc2 = S_00B84C_USER_SGPR(user_sgprs) | S_00B84C_SCRATCH_EN(scratch_enabled) |
+ S_00B84C_TGID_X_EN(sel->info.uses_block_id[0]) |
+ S_00B84C_TGID_Y_EN(sel->info.uses_block_id[1]) |
+ S_00B84C_TGID_Z_EN(sel->info.uses_block_id[2]) |
+ S_00B84C_TG_SIZE_EN(sel->info.uses_subgroup_info) |
+ S_00B84C_TIDIG_COMP_CNT(sel->info.uses_thread_id[2]
+ ? 2
+ : sel->info.uses_thread_id[1] ? 1 : 0) |
+ S_00B84C_LDS_SIZE(shader->config.lds_size);
+
+ simple_mtx_lock(&sscreen->shader_cache_mutex);
+ si_shader_cache_insert_shader(sscreen, ir_sha1_cache_key, shader, true);
+ simple_mtx_unlock(&sscreen->shader_cache_mutex);
+ }
+
+ ralloc_free(sel->nir);
+ sel->nir = NULL;