radeonsi/gfx10: merge main and pos/param export IF blocks into one if possible

[mesa.git] / src / gallium / drivers / radeonsi / si_compute.c
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c

index 32f934237b0245f20a58bf28887fe97316c98679..25e08c6f428c30f591c3e5f96db5854ca1c40711 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -24,7 +24,6 @@
   */
  
  #include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
  #include "util/u_async_debug.h"
  #include "util/u_memory.h"
  #include "util/u_upload_mgr.h"
@@ -121,17 +120,13 @@ static void si_create_compute_state_async(void *job, int thread_index)
         assert(thread_index < ARRAY_SIZE(sscreen->compiler));
         compiler = &sscreen->compiler[thread_index];
  
-       if (program->ir_type == PIPE_SHADER_IR_TGSI) {
-               tgsi_scan_shader(sel->tokens, &sel->info);
-       } else {
-               assert(program->ir_type == PIPE_SHADER_IR_NIR);
+       if (!compiler->passes)
+               si_init_compiler(sscreen, compiler);
  
-               si_nir_opts(sel->nir);
-               si_nir_scan_shader(sel->nir, &sel->info);
-               si_lower_nir(sel);
-       }
+       assert(program->ir_type == PIPE_SHADER_IR_NIR);
+       si_nir_scan_shader(sel->nir, &sel->info);
  
-       /* Store the declared LDS size into tgsi_shader_info for the shader
+       /* Store the declared LDS size into si_shader_info for the shader
          * cache to include it.
          */
         sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE] = program->local_size;
@@ -147,14 +142,14 @@ static void si_create_compute_state_async(void *job, int thread_index)
         program->num_cs_user_data_dwords =
                 sel->info.properties[TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD];
  
-       void *ir_binary = si_get_ir_binary(sel, false, false);
+       unsigned char ir_sha1_cache_key[20];
+       si_get_ir_cache_key(sel, false, false, ir_sha1_cache_key);
  
         /* Try to load the shader from the shader cache. */
-       mtx_lock(&sscreen->shader_cache_mutex);
+       simple_mtx_lock(&sscreen->shader_cache_mutex);
  
-       if (ir_binary &&
-           si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
-               mtx_unlock(&sscreen->shader_cache_mutex);
+       if (si_shader_cache_load_shader(sscreen, ir_sha1_cache_key, shader)) {
+               simple_mtx_unlock(&sscreen->shader_cache_mutex);
  
                 si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
                 si_shader_dump(sscreen, shader, debug, stderr, true);
@@ -162,13 +157,10 @@ static void si_create_compute_state_async(void *job, int thread_index)
                 if (!si_shader_binary_upload(sscreen, shader, 0))
                         program->shader.compilation_failed = true;
         } else {
-               mtx_unlock(&sscreen->shader_cache_mutex);
+               simple_mtx_unlock(&sscreen->shader_cache_mutex);
  
-               if (!si_shader_create(sscreen, compiler, &program->shader, debug)) {
+               if (!si_create_shader_variant(sscreen, compiler, &program->shader, debug)) {
                         program->shader.compilation_failed = true;
-
-                       if (program->ir_type == PIPE_SHADER_IR_TGSI)
-                               FREE(sel->tokens);
                         return;
                 }
  
@@ -197,20 +189,19 @@ static void si_create_compute_state_async(void *job, int thread_index)
                         S_00B84C_TGID_X_EN(sel->info.uses_block_id[0]) |
                         S_00B84C_TGID_Y_EN(sel->info.uses_block_id[1]) |
                         S_00B84C_TGID_Z_EN(sel->info.uses_block_id[2]) |
+                       S_00B84C_TG_SIZE_EN(sel->info.uses_subgroup_info) |
                         S_00B84C_TIDIG_COMP_CNT(sel->info.uses_thread_id[2] ? 2 :
                                                 sel->info.uses_thread_id[1] ? 1 : 0) |
                         S_00B84C_LDS_SIZE(shader->config.lds_size);
  
-               if (ir_binary) {
-                       mtx_lock(&sscreen->shader_cache_mutex);
-                       if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
-                               FREE(ir_binary);
-                       mtx_unlock(&sscreen->shader_cache_mutex);
-               }
+               simple_mtx_lock(&sscreen->shader_cache_mutex);
+               si_shader_cache_insert_shader(sscreen, ir_sha1_cache_key,
+                                             shader, true);
+               simple_mtx_unlock(&sscreen->shader_cache_mutex);
         }
  
-       if (program->ir_type == PIPE_SHADER_IR_TGSI)
-               FREE(sel->tokens);
+       ralloc_free(sel->nir);
+       sel->nir = NULL;
  }
  
  static void *si_create_compute_state(
@@ -232,16 +223,9 @@ static void *si_create_compute_state(
         program->input_size = cso->req_input_mem;
  
         if (cso->ir_type != PIPE_SHADER_IR_NATIVE) {
-               if (sscreen->options.enable_nir &&
-                   cso->ir_type == PIPE_SHADER_IR_TGSI) {
+               if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
                         program->ir_type = PIPE_SHADER_IR_NIR;
                         sel->nir = tgsi_to_nir(cso->prog, ctx->screen);
-               } else if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
-                       sel->tokens = tgsi_dup_tokens(cso->prog);
-                       if (!sel->tokens) {
-                               FREE(program);
-                               return NULL;
-                       }
                 } else {
                         assert(cso->ir_type == PIPE_SHADER_IR_NIR);
                         sel->nir = (struct nir_shader *) cso->prog;
@@ -256,10 +240,8 @@ static void *si_create_compute_state(
                                             &sel->compiler_ctx_state,
                                             program, si_create_compute_state_async);
         } else {
-               const struct pipe_llvm_program_header *header;
-               const char *code;
+               const struct pipe_binary_program_header *header;
                 header = cso->prog;
-               code = cso->prog + sizeof(struct pipe_llvm_program_header);
  
                 program->shader.binary.elf_size = header->num_bytes;
                 program->shader.binary.elf_buffer = malloc(header->num_bytes);
@@ -267,7 +249,7 @@ static void *si_create_compute_state(
                         FREE(program);
                         return NULL;
                 }
-               memcpy((void *)program->shader.binary.elf_buffer, code, header->num_bytes);
+               memcpy((void *)program->shader.binary.elf_buffer, header->blob, header->num_bytes);
  
                 const amd_kernel_code_t *code_object =
                         si_compute_get_code_object(program, 0);
@@ -532,9 +514,13 @@ static bool si_switch_compute_shader(struct si_context *sctx,
         COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
                 "COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2);
  
+       sctx->max_seen_compute_scratch_bytes_per_wave =
+               MAX2(sctx->max_seen_compute_scratch_bytes_per_wave,
+                    config->scratch_bytes_per_wave);
+
         radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
                   S_00B860_WAVES(sctx->scratch_waves)
-                    | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
+                    | S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
  
         sctx->cs_shader_state.emitted_program = program;
         sctx->cs_shader_state.offset = offset;
@@ -715,8 +701,8 @@ static bool si_upload_compute_input(struct si_context *sctx,
         return true;
  }
  
-static void si_setup_tgsi_user_data(struct si_context *sctx,
-                                const struct pipe_grid_info *info)
+static void si_setup_nir_user_data(struct si_context *sctx,
+                                  const struct pipe_grid_info *info)
  {
         struct si_compute *program = sctx->cs_shader_state.program;
         struct si_shader_selector *sel = &program->sel;
@@ -940,7 +926,7 @@ static void si_launch_grid(
         }
  
         if (program->ir_type != PIPE_SHADER_IR_NATIVE)
-               si_setup_tgsi_user_data(sctx, info);
+               si_setup_nir_user_data(sctx, info);
  
         si_emit_dispatch_packets(sctx, info);