radeonsi/nir: simplify si_lower_nir signature
[mesa.git] / src / gallium / drivers / radeonsi / si_compute.c
index ac4a7aa3135a601f5b4804a8c1cc53757ca82e39..ba8271d3fe3f2e4420fa2cbd7b42208072d08ce3 100644 (file)
@@ -128,7 +128,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
 
                si_nir_opts(sel->nir);
                si_nir_scan_shader(sel->nir, &sel->info);
-               si_lower_nir(sel);
+               si_lower_nir(sel->screen, sel->nir);
        }
 
        /* Store the declared LDS size into tgsi_shader_info for the shader
@@ -147,14 +147,14 @@ static void si_create_compute_state_async(void *job, int thread_index)
        program->num_cs_user_data_dwords =
                sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
 
-       void *ir_binary = si_get_ir_binary(sel, false);
+       void *ir_binary = si_get_ir_binary(sel, false, false);
 
        /* Try to load the shader from the shader cache. */
-       mtx_lock(&sscreen->shader_cache_mutex);
+       simple_mtx_lock(&sscreen->shader_cache_mutex);
 
        if (ir_binary &&
            si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
-               mtx_unlock(&sscreen->shader_cache_mutex);
+               simple_mtx_unlock(&sscreen->shader_cache_mutex);
 
                si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
                si_shader_dump(sscreen, shader, debug, stderr, true);
@@ -162,7 +162,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
                if (!si_shader_binary_upload(sscreen, shader, 0))
                        program->shader.compilation_failed = true;
        } else {
-               mtx_unlock(&sscreen->shader_cache_mutex);
+               simple_mtx_unlock(&sscreen->shader_cache_mutex);
 
                if (!si_shader_create(sscreen, compiler, &program->shader, debug)) {
                        program->shader.compilation_failed = true;
@@ -202,10 +202,10 @@ static void si_create_compute_state_async(void *job, int thread_index)
                        S_00B84C_LDS_SIZE(shader->config.lds_size);
 
                if (ir_binary) {
-                       mtx_lock(&sscreen->shader_cache_mutex);
+                       simple_mtx_lock(&sscreen->shader_cache_mutex);
                        if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
                                FREE(ir_binary);
-                       mtx_unlock(&sscreen->shader_cache_mutex);
+                       simple_mtx_unlock(&sscreen->shader_cache_mutex);
                }
        }
 
@@ -256,10 +256,8 @@ static void *si_create_compute_state(
                                            &sel->compiler_ctx_state,
                                            program, si_create_compute_state_async);
        } else {
-               const struct pipe_llvm_program_header *header;
-               const char *code;
+               const struct pipe_binary_program_header *header;
                header = cso->prog;
-               code = cso->prog + sizeof(struct pipe_llvm_program_header);
 
                program->shader.binary.elf_size = header->num_bytes;
                program->shader.binary.elf_buffer = malloc(header->num_bytes);
@@ -267,7 +265,7 @@ static void *si_create_compute_state(
                        FREE(program);
                        return NULL;
                }
-               memcpy((void *)program->shader.binary.elf_buffer, code, header->num_bytes);
+               memcpy((void *)program->shader.binary.elf_buffer, header->blob, header->num_bytes);
 
                const amd_kernel_code_t *code_object =
                        si_compute_get_code_object(program, 0);
@@ -422,7 +420,8 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
                        si_aligned_buffer_create(&sctx->screen->b,
                                                 SI_RESOURCE_FLAG_UNMAPPABLE,
                                                 PIPE_USAGE_DEFAULT,
-                                                scratch_needed, 256);
+                                                scratch_needed,
+                                                sctx->screen->info.pte_fragment_size);
 
                if (!sctx->compute_scratch_buffer)
                        return false;
@@ -531,9 +530,13 @@ static bool si_switch_compute_shader(struct si_context *sctx,
        COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
                "COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2);
 
+       sctx->max_seen_compute_scratch_bytes_per_wave =
+               MAX2(sctx->max_seen_compute_scratch_bytes_per_wave,
+                    config->scratch_bytes_per_wave);
+
        radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
                  S_00B860_WAVES(sctx->scratch_waves)
-                    | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
+                    | S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
 
        sctx->cs_shader_state.emitted_program = program;
        sctx->cs_shader_state.offset = offset;