gallium/radeon: add r600_gfx_{write,wait}_fence
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 14 Sep 2016 13:43:18 +0000 (15:43 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Thu, 29 Sep 2016 09:14:29 +0000 (11:14 +0200)
For bottom-of-pipe fences inside the gfx command stream.

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeonsi/si_perfcounter.c

index b0d981331c85b9b651fb502b4599ec2f6c7a3b57..b681a946997995972f28e335bd06d59687849362 100644 (file)
@@ -80,6 +80,58 @@ void radeon_shader_binary_clean(struct radeon_shader_binary *b)
  * pipe_context
  */
 
+void r600_gfx_write_fence(struct r600_common_context *ctx,
+                         uint64_t va, uint32_t old_value, uint32_t new_value)
+{
+       struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+       if (ctx->chip_class == CIK) {
+               /* Two EOP events are required to make all engines go idle
+                * (and optional cache flushes executed) before the timestamp
+                * is written.
+                */
+               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+               radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
+                               EVENT_INDEX(5));
+               radeon_emit(cs, va);
+               radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
+               radeon_emit(cs, old_value); /* immediate data */
+               radeon_emit(cs, 0); /* unused */
+       }
+
+       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
+                       EVENT_INDEX(5));
+       radeon_emit(cs, va);
+       radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
+       radeon_emit(cs, new_value); /* immediate data */
+       radeon_emit(cs, 0); /* unused */
+}
+
+unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
+{
+       unsigned dwords = 6;
+
+       if (screen->chip_class == CIK)
+               dwords *= 2;
+
+       return dwords;
+}
+
+void r600_gfx_wait_fence(struct r600_common_context *ctx,
+                        uint64_t va, uint32_t ref, uint32_t mask)
+{
+       struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+       radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+       radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
+       radeon_emit(cs, va);
+       radeon_emit(cs, va >> 32);
+       radeon_emit(cs, ref); /* reference value */
+       radeon_emit(cs, mask); /* mask */
+       radeon_emit(cs, 4); /* poll interval */
+}
+
 void r600_draw_rectangle(struct blitter_context *blitter,
                         int x1, int y1, int x2, int y2, float depth,
                         enum blitter_attrib_type type,
index dd33eabcbff04f5307a9610423e2a67e1ded0bc8..96b23b23b25b89a10d9b62d29bc3d9b59d828824 100644 (file)
@@ -695,6 +695,11 @@ r600_invalidate_resource(struct pipe_context *ctx,
                         struct pipe_resource *resource);
 
 /* r600_common_pipe.c */
+void r600_gfx_write_fence(struct r600_common_context *ctx,
+                         uint64_t va, uint32_t old_value, uint32_t new_value);
+unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
+void r600_gfx_wait_fence(struct r600_common_context *ctx,
+                        uint64_t va, uint32_t ref, uint32_t mask);
 void r600_draw_rectangle(struct blitter_context *blitter,
                         int x1, int y1, int x2, int y2, float depth,
                         enum blitter_attrib_type type,
index 0ced617dbc82a57fb5956c2dcb4e76378ce72ac9..d0c5392b441e1e49a6b3b0c9fbab16d92a682c50 100644 (file)
@@ -591,39 +591,8 @@ static void si_pc_emit_stop(struct r600_common_context *ctx,
 {
        struct radeon_winsys_cs *cs = ctx->gfx.cs;
 
-       if (ctx->screen->chip_class == CIK) {
-               /* Two EOP events are required to make all engines go idle
-                * (and optional cache flushes executed) before the timestamp
-                * is written.
-                *
-                * Write 1, because we need to wait for the second EOP event.
-                */
-               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-               radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
-                               EVENT_INDEX(5));
-               radeon_emit(cs, va);
-               radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
-               radeon_emit(cs, 1); /* immediate data */
-               radeon_emit(cs, 0); /* unused */
-       }
-
-       /* Write 0. */
-       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
-                       EVENT_INDEX(5));
-       radeon_emit(cs, va);
-       radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
-       radeon_emit(cs, 0); /* immediate data */
-       radeon_emit(cs, 0); /* unused */
-
-       /* Wait until the memory location is 0. */
-       radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-       radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
-       radeon_emit(cs, va);
-       radeon_emit(cs, va >> 32);
-       radeon_emit(cs, 0); /* reference value */
-       radeon_emit(cs, 0xffffffff); /* mask */
-       radeon_emit(cs, 4); /* poll interval */
+       r600_gfx_write_fence(ctx, va, 1, 0);
+       r600_gfx_wait_fence(ctx, va, 0, 0xffffffff);
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
        radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
@@ -719,14 +688,10 @@ void si_init_perfcounters(struct si_screen *screen)
                return;
 
        pc->num_start_cs_dwords = 14;
-       pc->num_stop_cs_dwords = 20;
+       pc->num_stop_cs_dwords = 14 + r600_gfx_write_fence_dwords(&screen->b);
        pc->num_instance_cs_dwords = 3;
        pc->num_shaders_cs_dwords = 4;
 
-       if (screen->b.chip_class == CIK) {
-               pc->num_stop_cs_dwords += 6;
-       }
-
        pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
        pc->shader_type_suffixes = si_pc_shader_type_suffixes;
        pc->shader_type_bits = si_pc_shader_type_bits;