ac: import lp_create_builder() from gallivm
[mesa.git] / src / gallium / drivers / radeonsi / si_perfcounter.c
index 0ced617dbc82a57fb5956c2dcb4e76378ce72ac9..1cf004dff83b8827c8d52d2d41998585a072dfb6 100644 (file)
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * Authors:
- *  Nicolai Hähnle <nicolai.haehnle@amd.com>
- *
  */
 
 #include "radeon/r600_cs.h"
 #include "radeon/r600_query.h"
-#include "radeon/r600_pipe_common.h"
 #include "util/u_memory.h"
 
 #include "si_pipe.h"
@@ -378,7 +373,7 @@ static struct si_pc_block groups_CIK[] = {
 };
 
 static struct si_pc_block groups_VI[] = {
-       { &cik_CB, 396, 4 },
+       { &cik_CB, 405, 4 },
        { &cik_CPF, 19 },
        { &cik_DB, 257, 4 },
        { &cik_GRBM, 34 },
@@ -404,6 +399,30 @@ static struct si_pc_block groups_VI[] = {
 
 };
 
+static struct si_pc_block groups_gfx9[] = {
+       { &cik_CB, 438, 4 },
+       { &cik_CPF, 32 },
+       { &cik_DB, 328, 4 },
+       { &cik_GRBM, 38 },
+       { &cik_GRBMSE, 16 },
+       { &cik_PA_SU, 292 },
+       { &cik_PA_SC, 491 },
+       { &cik_SPI, 196 },
+       { &cik_SQ, 374 },
+       { &cik_SX, 208 },
+       { &cik_TA, 119, 16 },
+       { &cik_TCA, 35, 2 },
+       { &cik_TCC, 256, 16 },
+       { &cik_TD, 57, 16 },
+       { &cik_TCP, 85, 16 },
+       { &cik_GDS, 121 },
+       { &cik_VGT, 148 },
+       { &cik_IA, 32 },
+       { &cik_WD, 58 },
+       { &cik_CPG, 59 },
+       { &cik_CPC, 35 },
+};
+
 static void si_pc_get_size(struct r600_perfcounter_block *group,
                        unsigned count, unsigned *selectors,
                        unsigned *num_select_dw, unsigned *num_read_dw)
@@ -579,7 +598,7 @@ static void si_pc_emit_start(struct r600_common_context *ctx,
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET));
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_START) | EVENT_INDEX(0));
+       radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) | EVENT_INDEX(0));
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               S_036020_PERFMON_STATE(V_036020_START_COUNTING));
 }
@@ -591,44 +610,15 @@ static void si_pc_emit_stop(struct r600_common_context *ctx,
 {
        struct radeon_winsys_cs *cs = ctx->gfx.cs;
 
-       if (ctx->screen->chip_class == CIK) {
-               /* Two EOP events are required to make all engines go idle
-                * (and optional cache flushes executed) before the timestamp
-                * is written.
-                *
-                * Write 1, because we need to wait for the second EOP event.
-                */
-               radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-               radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
-                               EVENT_INDEX(5));
-               radeon_emit(cs, va);
-               radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
-               radeon_emit(cs, 1); /* immediate data */
-               radeon_emit(cs, 0); /* unused */
-       }
-
-       /* Write 0. */
-       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
-                       EVENT_INDEX(5));
-       radeon_emit(cs, va);
-       radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
-       radeon_emit(cs, 0); /* immediate data */
-       radeon_emit(cs, 0); /* unused */
-
-       /* Wait until the memory location is 0. */
-       radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-       radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
-       radeon_emit(cs, va);
-       radeon_emit(cs, va >> 32);
-       radeon_emit(cs, 0); /* reference value */
-       radeon_emit(cs, 0xffffffff); /* mask */
-       radeon_emit(cs, 4); /* poll interval */
+       si_gfx_write_event_eop(ctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+                                EOP_DATA_SEL_VALUE_32BIT,
+                                buffer, va, 0, SI_NOT_QUERY);
+       si_gfx_wait_fence(ctx, va, 0, 0xffffffff);
 
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
+       radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_STOP) | EVENT_INDEX(0));
+       radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
        radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
                               S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
                               S_036020_PERFMON_SAMPLE_ENABLE(1));
@@ -680,10 +670,10 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
        }
 }
 
-static void si_pc_cleanup(struct r600_common_screen *rscreen)
+static void si_pc_cleanup(struct si_screen *sscreen)
 {
-       r600_perfcounters_do_destroy(rscreen->perfcounters);
-       rscreen->perfcounters = NULL;
+       si_perfcounters_do_destroy(sscreen->perfcounters);
+       sscreen->perfcounters = NULL;
 }
 
 void si_init_perfcounters(struct si_screen *screen)
@@ -693,7 +683,7 @@ void si_init_perfcounters(struct si_screen *screen)
        unsigned num_blocks;
        unsigned i;
 
-       switch (screen->b.chip_class) {
+       switch (screen->info.chip_class) {
        case CIK:
                blocks = groups_CIK;
                num_blocks = ARRAY_SIZE(groups_CIK);
@@ -702,16 +692,20 @@ void si_init_perfcounters(struct si_screen *screen)
                blocks = groups_VI;
                num_blocks = ARRAY_SIZE(groups_VI);
                break;
+       case GFX9:
+               blocks = groups_gfx9;
+               num_blocks = ARRAY_SIZE(groups_gfx9);
+               break;
        case SI:
        default:
                return; /* not implemented */
        }
 
-       if (screen->b.info.max_sh_per_se != 1) {
+       if (screen->info.max_sh_per_se != 1) {
                /* This should not happen on non-SI chips. */
                fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not "
                        "supported (inaccurate performance counters)\n",
-                       screen->b.info.max_sh_per_se);
+                       screen->info.max_sh_per_se);
        }
 
        pc = CALLOC_STRUCT(r600_perfcounters);
@@ -719,14 +713,10 @@ void si_init_perfcounters(struct si_screen *screen)
                return;
 
        pc->num_start_cs_dwords = 14;
-       pc->num_stop_cs_dwords = 20;
+       pc->num_stop_cs_dwords = 14 + si_gfx_write_fence_dwords(screen);
        pc->num_instance_cs_dwords = 3;
        pc->num_shaders_cs_dwords = 4;
 
-       if (screen->b.chip_class == CIK) {
-               pc->num_stop_cs_dwords += 6;
-       }
-
        pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
        pc->shader_type_suffixes = si_pc_shader_type_suffixes;
        pc->shader_type_bits = si_pc_shader_type_bits;
@@ -740,7 +730,7 @@ void si_init_perfcounters(struct si_screen *screen)
        pc->emit_read = si_pc_emit_read;
        pc->cleanup = si_pc_cleanup;
 
-       if (!r600_perfcounters_init(pc, num_blocks))
+       if (!si_perfcounters_init(pc, num_blocks))
                goto error;
 
        for (i = 0; i < num_blocks; ++i) {
@@ -748,11 +738,11 @@ void si_init_perfcounters(struct si_screen *screen)
                unsigned instances = block->instances;
 
                if (!strcmp(block->b->name, "IA")) {
-                       if (screen->b.info.max_se > 2)
+                       if (screen->info.max_se > 2)
                                instances = 2;
                }
 
-               r600_perfcounters_add_block(&screen->b, pc,
+               si_perfcounters_add_block(screen, pc,
                                            block->b->name,
                                            block->b->flags,
                                            block->b->num_counters,
@@ -761,9 +751,9 @@ void si_init_perfcounters(struct si_screen *screen)
                                            block);
        }
 
-       screen->b.perfcounters = pc;
+       screen->perfcounters = pc;
        return;
 
 error:
-       r600_perfcounters_do_destroy(pc);
+       si_perfcounters_do_destroy(pc);
 }