radeonsi/gfx9: keep reusing the same buffer/address for the gfx9 flush fence
authorMarek Olšák <marek.olsak@amd.com>
Tue, 6 Jun 2017 22:16:46 +0000 (00:16 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 21 Jun 2017 23:51:02 +0000 (01:51 +0200)
instead of using a monotonic suballocator

v2: initialize the memory at context creation

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state_draw.c

index a4f3c2d6149bde02d3b8b035b23a07878e4cab89..74ac0ec038936afb57ae265cc8ac00692bd753fa 100644 (file)
@@ -64,6 +64,7 @@ static void si_destroy_context(struct pipe_context *context)
        free(sctx->border_color_table);
        r600_resource_reference(&sctx->scratch_buffer, NULL);
        r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
+       r600_resource_reference(&sctx->wait_mem_scratch, NULL);
 
        si_pm4_free_state(sctx, sctx->init_config, ~0);
        if (sctx->init_config_gs_rings)
@@ -269,6 +270,23 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
        /* these must be last */
        si_begin_new_cs(sctx);
 
+       if (sctx->b.chip_class >= GFX9) {
+               sctx->wait_mem_scratch = (struct r600_resource*)
+                       pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4);
+               if (!sctx->wait_mem_scratch)
+                       goto fail;
+
+               /* Initialize the memory. */
+               struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+               radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+               radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+                           S_370_WR_CONFIRM(1) |
+                           S_370_ENGINE_SEL(V_370_ME));
+               radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
+               radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
+               radeon_emit(cs, sctx->wait_mem_number);
+       }
+
        /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
         * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
        if (sctx->b.chip_class == CIK) {
index 1c1740838190b75ec6185ff81697e34317e7dfcc..bd724e80a06608d84b4ff9046a1c0257bf592000 100644 (file)
@@ -263,6 +263,8 @@ struct si_context {
        struct si_screen                *screen;
        LLVMTargetMachineRef            tm; /* only non-threaded compilation */
        struct si_shader_ctx_state      fixed_func_tcs_shader;
+       struct r600_resource            *wait_mem_scratch;
+       unsigned                        wait_mem_number;
 
        struct radeon_winsys_cs         *ce_ib;
        struct radeon_winsys_cs         *ce_preamble_ib;
index 2b000e7469615a26a4ede3ce155e524e9b672b7e..85ceacad80f629f22164e233b9ed186291409fbd 100644 (file)
@@ -954,9 +954,8 @@ void si_emit_cache_flush(struct si_context *sctx)
         * wait for idle on GFX9. We have to use a TS event.
         */
        if (sctx->b.chip_class >= GFX9 && flush_cb_db) {
-               struct r600_resource *rbuf = NULL;
                uint64_t va;
-               unsigned offset = 0, tc_flags, cb_db_event;
+               unsigned tc_flags, cb_db_event;
 
                /* Set the CB/DB flush event. */
                switch (flush_cb_db) {
@@ -997,14 +996,15 @@ void si_emit_cache_flush(struct si_context *sctx)
                        sctx->b.num_L2_invalidates++;
                }
 
-               /* Allocate memory for the fence. */
-               u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
-                                    &offset, (struct pipe_resource**)&rbuf);
-               va = rbuf->gpu_address + offset;
+               /* Do the flush (enqueue the event and wait for it). */
+               va = sctx->wait_mem_scratch->gpu_address;
+               sctx->wait_mem_number++;
 
                r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1,
-                                        rbuf, va, 0, 1);
-               r600_gfx_wait_fence(rctx, va, 1, 0xffffffff);
+                                        sctx->wait_mem_scratch, va,
+                                        sctx->wait_mem_number - 1,
+                                        sctx->wait_mem_number);
+               r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
        }
 
        /* Make sure ME is idle (it executes most packets) before continuing.