radeonsi/nir: simplify si_lower_nir signature
[mesa.git] / src / gallium / drivers / radeonsi / si_compute.c
index 22975069c999be515daf341c9d3031e9c8910a78..ba8271d3fe3f2e4420fa2cbd7b42208072d08ce3 100644 (file)
@@ -128,7 +128,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
 
                si_nir_opts(sel->nir);
                si_nir_scan_shader(sel->nir, &sel->info);
-               si_lower_nir(sel, sscreen->compute_wave_size);
+               si_lower_nir(sel->screen, sel->nir);
        }
 
        /* Store the declared LDS size into tgsi_shader_info for the shader
@@ -147,14 +147,14 @@ static void si_create_compute_state_async(void *job, int thread_index)
        program->num_cs_user_data_dwords =
                sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
 
-       void *ir_binary = si_get_ir_binary(sel);
+       void *ir_binary = si_get_ir_binary(sel, false, false);
 
        /* Try to load the shader from the shader cache. */
-       mtx_lock(&sscreen->shader_cache_mutex);
+       simple_mtx_lock(&sscreen->shader_cache_mutex);
 
        if (ir_binary &&
            si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
-               mtx_unlock(&sscreen->shader_cache_mutex);
+               simple_mtx_unlock(&sscreen->shader_cache_mutex);
 
                si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
                si_shader_dump(sscreen, shader, debug, stderr, true);
@@ -162,7 +162,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
                if (!si_shader_binary_upload(sscreen, shader, 0))
                        program->shader.compilation_failed = true;
        } else {
-               mtx_unlock(&sscreen->shader_cache_mutex);
+               simple_mtx_unlock(&sscreen->shader_cache_mutex);
 
                if (!si_shader_create(sscreen, compiler, &program->shader, debug)) {
                        program->shader.compilation_failed = true;
@@ -202,10 +202,10 @@ static void si_create_compute_state_async(void *job, int thread_index)
                        S_00B84C_LDS_SIZE(shader->config.lds_size);
 
                if (ir_binary) {
-                       mtx_lock(&sscreen->shader_cache_mutex);
+                       simple_mtx_lock(&sscreen->shader_cache_mutex);
                        if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
                                FREE(ir_binary);
-                       mtx_unlock(&sscreen->shader_cache_mutex);
+                       simple_mtx_unlock(&sscreen->shader_cache_mutex);
                }
        }
 
@@ -232,7 +232,7 @@ static void *si_create_compute_state(
        program->input_size = cso->req_input_mem;
 
        if (cso->ir_type != PIPE_SHADER_IR_NATIVE) {
-               if (sscreen->options.always_nir &&
+               if (sscreen->options.enable_nir &&
                    cso->ir_type == PIPE_SHADER_IR_TGSI) {
                        program->ir_type = PIPE_SHADER_IR_NIR;
                        sel->nir = tgsi_to_nir(cso->prog, ctx->screen);
@@ -256,10 +256,8 @@ static void *si_create_compute_state(
                                            &sel->compiler_ctx_state,
                                            program, si_create_compute_state_async);
        } else {
-               const struct pipe_llvm_program_header *header;
-               const char *code;
+               const struct pipe_binary_program_header *header;
                header = cso->prog;
-               code = cso->prog + sizeof(struct pipe_llvm_program_header);
 
                program->shader.binary.elf_size = header->num_bytes;
                program->shader.binary.elf_buffer = malloc(header->num_bytes);
@@ -267,7 +265,7 @@ static void *si_create_compute_state(
                        FREE(program);
                        return NULL;
                }
-               memcpy((void *)program->shader.binary.elf_buffer, code, header->num_bytes);
+               memcpy((void *)program->shader.binary.elf_buffer, header->blob, header->num_bytes);
 
                const amd_kernel_code_t *code_object =
                        si_compute_get_code_object(program, 0);
@@ -318,7 +316,22 @@ static void si_set_global_binding(
        struct si_context *sctx = (struct si_context*)ctx;
        struct si_compute *program = sctx->cs_shader_state.program;
 
-       assert(first + n <= MAX_GLOBAL_BUFFERS);
+       if (first + n > program->max_global_buffers) {
+               unsigned old_max = program->max_global_buffers;
+               program->max_global_buffers = first + n;
+               program->global_buffers =
+                       realloc(program->global_buffers,
+                               program->max_global_buffers *
+                               sizeof(program->global_buffers[0]));
+               if (!program->global_buffers) {
+                       fprintf(stderr, "radeonsi: failed to allocate compute global_buffers\n");
+                       return;
+               }
+
+               memset(&program->global_buffers[old_max], 0,
+                      (program->max_global_buffers - old_max) *
+                      sizeof(program->global_buffers[0]));
+       }
 
        if (!resources) {
                for (i = 0; i < n; i++) {
@@ -407,7 +420,8 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx,
                        si_aligned_buffer_create(&sctx->screen->b,
                                                 SI_RESOURCE_FLAG_UNMAPPABLE,
                                                 PIPE_USAGE_DEFAULT,
-                                                scratch_needed, 256);
+                                                scratch_needed,
+                                                sctx->screen->info.pte_fragment_size);
 
                if (!sctx->compute_scratch_buffer)
                        return false;
@@ -516,9 +530,13 @@ static bool si_switch_compute_shader(struct si_context *sctx,
        COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
                "COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2);
 
+       sctx->max_seen_compute_scratch_bytes_per_wave =
+               MAX2(sctx->max_seen_compute_scratch_bytes_per_wave,
+                    config->scratch_bytes_per_wave);
+
        radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
                  S_00B860_WAVES(sctx->scratch_waves)
-                    | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
+                    | S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
 
        sctx->cs_shader_state.emitted_program = program;
        sctx->cs_shader_state.offset = offset;
@@ -912,7 +930,7 @@ static void si_launch_grid(
                return;
 
        /* Global buffers */
-       for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
+       for (i = 0; i < program->max_global_buffers; i++) {
                struct si_resource *buffer =
                        si_resource(program->global_buffers[i]);
                if (!buffer) {
@@ -952,6 +970,10 @@ void si_destroy_compute(struct si_compute *program)
                util_queue_fence_destroy(&sel->ready);
        }
 
+       for (unsigned i = 0; i < program->max_global_buffers; i++)
+               pipe_resource_reference(&program->global_buffers[i], NULL);
+       FREE(program->global_buffers);
+
        si_shader_destroy(&program->shader);
        ralloc_free(program->sel.nir);
        FREE(program);