radeonsi: emit_spi_map packets optimization
[mesa.git] / src / gallium / drivers / radeonsi / si_state_shaders.c
index e7610af2fa7d315d83f47bf1a65c15cff7b2e288..4e0320a226d823dc277ed040c23ea69206e5a006 100644 (file)
@@ -45,7 +45,7 @@
  * Return the IR binary in a buffer. For TGSI the first 4 bytes contain its
  * size as integer.
  */
-static void *si_get_ir_binary(struct si_shader_selector *sel)
+void *si_get_ir_binary(struct si_shader_selector *sel)
 {
        struct blob blob;
        unsigned ir_size;
@@ -202,10 +202,9 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
  *
  * Returns false on failure, in which case the ir_binary should be freed.
  */
-static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
-                                         void *ir_binary,
-                                         struct si_shader *shader,
-                                         bool insert_into_disk_cache)
+bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary,
+                                  struct si_shader *shader,
+                                  bool insert_into_disk_cache)
 {
        void *hw_binary;
        struct hash_entry *entry;
@@ -235,9 +234,8 @@ static bool si_shader_cache_insert_shader(struct si_screen *sscreen,
        return true;
 }
 
-static bool si_shader_cache_load_shader(struct si_screen *sscreen,
-                                       void *ir_binary,
-                                       struct si_shader *shader)
+bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary,
+                                struct si_shader *shader)
 {
        struct hash_entry *entry =
                _mesa_hash_table_search(sscreen->shader_cache, ir_binary);
@@ -1511,7 +1509,7 @@ static void si_build_shader_variant(struct si_shader *shader,
 {
        struct si_shader_selector *sel = shader->selector;
        struct si_screen *sscreen = sel->screen;
-       struct si_compiler *compiler;
+       struct ac_llvm_compiler *compiler;
        struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
        int r;
 
@@ -1582,10 +1580,10 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
                main_part->selector = sel;
                main_part->key.as_es = key->as_es;
                main_part->key.as_ls = key->as_ls;
+               main_part->is_monolithic = false;
 
                if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
-                                          main_part, false,
-                                          &compiler_state->debug) != 0) {
+                                          main_part, &compiler_state->debug) != 0) {
                        FREE(main_part);
                        return false;
                }
@@ -1858,7 +1856,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
 {
        struct si_shader_selector *sel = (struct si_shader_selector *)job;
        struct si_screen *sscreen = sel->screen;
-       struct si_compiler *compiler;
+       struct ac_llvm_compiler *compiler;
        struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
 
        assert(!debug->debug_message || debug->async);
@@ -1884,6 +1882,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
                util_queue_fence_init(&shader->ready);
 
                shader->selector = sel;
+               shader->is_monolithic = false;
                si_parse_next_shader_property(&sel->info,
                                              sel->so.num_outputs != 0,
                                              &shader->key);
@@ -1902,7 +1901,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
                        mtx_unlock(&sscreen->shader_cache_mutex);
 
                        /* Compile the shader if it hasn't been loaded from the cache. */
-                       if (si_compile_tgsi_shader(sscreen, compiler, shader, false,
+                       if (si_compile_tgsi_shader(sscreen, compiler, shader,
                                                   debug) != 0) {
                                FREE(shader);
                                FREE(ir_binary);
@@ -1975,6 +1974,34 @@ static void si_init_shader_selector_async(void *job, int thread_index)
        }
 }
 
+void si_schedule_initial_compile(struct si_context *sctx, unsigned processor,
+                                struct util_queue_fence *ready_fence,
+                                struct si_compiler_ctx_state *compiler_ctx_state,
+                                void *job, util_queue_execute_func execute)
+{
+       util_queue_fence_init(ready_fence);
+
+       struct util_async_debug_callback async_debug;
+       bool wait =
+               (sctx->debug.debug_message && !sctx->debug.async) ||
+               sctx->is_debug ||
+               si_can_dump_shader(sctx->screen, processor);
+
+       if (wait) {
+               u_async_debug_init(&async_debug);
+               compiler_ctx_state->debug = async_debug.base;
+       }
+
+       util_queue_add_job(&sctx->screen->shader_compiler_queue, job,
+                          ready_fence, execute, NULL);
+
+       if (wait) {
+               util_queue_fence_wait(ready_fence);
+               u_async_debug_drain(&async_debug, &sctx->debug);
+               u_async_debug_cleanup(&async_debug);
+       }
+}
+
 /* Return descriptor slot usage masks from the given shader info. */
 void si_get_active_slot_masks(const struct tgsi_shader_info *info,
                              uint32_t *const_and_shader_buffers,
@@ -2243,29 +2270,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
        }
 
        (void) mtx_init(&sel->mutex, mtx_plain);
-       util_queue_fence_init(&sel->ready);
-
-       struct util_async_debug_callback async_debug;
-       bool wait =
-               (sctx->debug.debug_message && !sctx->debug.async) ||
-               sctx->is_debug ||
-               si_can_dump_shader(sscreen, sel->info.processor);
-
-       if (wait) {
-               u_async_debug_init(&async_debug);
-               sel->compiler_ctx_state.debug = async_debug.base;
-       }
-
-       util_queue_add_job(&sscreen->shader_compiler_queue, sel,
-                          &sel->ready, si_init_shader_selector_async,
-                          NULL);
-
-       if (wait) {
-               util_queue_fence_wait(&sel->ready);
-               u_async_debug_drain(&async_debug, &sctx->debug);
-               u_async_debug_cleanup(&async_debug);
-       }
 
+       si_schedule_initial_compile(sctx, sel->info.processor, &sel->ready,
+                                   &sel->compiler_ctx_state, sel,
+                                   si_init_shader_selector_async);
        return sel;
 }
 
@@ -2626,27 +2634,25 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
 
 static void si_emit_spi_map(struct si_context *sctx)
 {
-       struct radeon_winsys_cs *cs = sctx->gfx_cs;
        struct si_shader *ps = sctx->ps_shader.current;
        struct si_shader *vs = si_get_vs_state(sctx);
        struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
        unsigned i, num_interp, num_written = 0, bcol_interp[2];
+       unsigned spi_ps_input_cntl[32];
 
        if (!ps || !ps->selector->info.num_inputs)
                return;
 
        num_interp = si_get_ps_num_interp(ps);
        assert(num_interp > 0);
-       radeon_set_context_reg_seq(cs, R_028644_SPI_PS_INPUT_CNTL_0, num_interp);
 
        for (i = 0; i < psinfo->num_inputs; i++) {
                unsigned name = psinfo->input_semantic_name[i];
                unsigned index = psinfo->input_semantic_index[i];
                unsigned interpolate = psinfo->input_interpolate[i];
 
-               radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, name, index,
-                                                    interpolate));
-               num_written++;
+               spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, name,
+                                                           index, interpolate);
 
                if (name == TGSI_SEMANTIC_COLOR) {
                        assert(index < ARRAY_SIZE(bcol_interp));
@@ -2661,12 +2667,19 @@ static void si_emit_spi_map(struct si_context *sctx)
                        if (!(psinfo->colors_read & (0xf << (i * 4))))
                                continue;
 
-                       radeon_emit(cs, si_get_ps_input_cntl(sctx, vs, bcol,
-                                                            i, bcol_interp[i]));
-                       num_written++;
+                       spi_ps_input_cntl[num_written++] =
+                         si_get_ps_input_cntl(sctx, vs, bcol, i, bcol_interp[i]);
+
                }
        }
        assert(num_interp == num_written);
+
+       /* R_028644_SPI_PS_INPUT_CNTL_0 */
+       /* Dota 2: Only ~16% of SPI map updates set different values. */
+       /* Talos: Only ~9% of SPI map updates set different values. */
+       radeon_opt_set_context_regn(sctx, R_028644_SPI_PS_INPUT_CNTL_0,
+                                   spi_ps_input_cntl,
+                                   sctx->tracked_regs.spi_ps_input_cntl, num_interp);
 }
 
 /**
@@ -3356,7 +3369,7 @@ bool si_update_shaders(struct si_context *sctx)
 
 static void si_emit_scratch_state(struct si_context *sctx)
 {
-       struct radeon_winsys_cs *cs = sctx->gfx_cs;
+       struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
        radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
                               sctx->spi_tmpring_size);