i965: Use force_compat_profile driconf option
[mesa.git] / src / mesa / drivers / dri / i965 / brw_performance_query.c
index dc4a833f563eb55408594388f3e5c2bde5e9fa6f..16e467442bc0b0b389d8778a686efb23e82baa59 100644 (file)
@@ -111,41 +111,24 @@ brw_is_perf_query_ready(struct gl_context *ctx,
 static void
 dump_perf_query_callback(GLuint id, void *query_void, void *brw_void)
 {
-   struct gl_context *ctx = brw_void;
+   struct brw_context *ctx = brw_void;
+   struct gen_perf_context *perf_ctx = ctx->perf_ctx;
    struct gl_perf_query_object *o = query_void;
    struct brw_perf_query_object * brw_query = brw_perf_query(o);
    struct gen_perf_query_object *obj = brw_query->query;
 
-   switch (obj->queryinfo->kind) {
-   case GEN_PERF_QUERY_TYPE_OA:
-   case GEN_PERF_QUERY_TYPE_RAW:
-      DBG("%4d: %-6s %-8s BO: %-4s OA data: %-10s %-15s\n",
-          id,
-          o->Used ? "Dirty," : "New,",
-          o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"),
-          obj->oa.bo ? "yes," : "no,",
-          brw_is_perf_query_ready(ctx, o) ? "ready," : "not ready,",
-          obj->oa.results_accumulated ? "accumulated" : "not accumulated");
-      break;
-   case GEN_PERF_QUERY_TYPE_PIPELINE:
-      DBG("%4d: %-6s %-8s BO: %-4s\n",
-          id,
-          o->Used ? "Dirty," : "New,",
-          o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"),
-          obj->pipeline_stats.bo ? "yes" : "no");
-      break;
-   default:
-      unreachable("Unknown query type");
-      break;
-   }
+   DBG("%4d: %-6s %-8s ",
+       id,
+       o->Used ? "Dirty," : "New,",
+       o->Active ? "Active," : (o->Ready ? "Ready," : "Pending,"));
+   gen_perf_dump_query(perf_ctx, obj, &ctx->batch);
 }
 
 static void
 dump_perf_queries(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->ctx;
-   DBG("Queries: (Open queries = %d, OA users = %d)\n",
-       brw->perf_ctx.n_active_oa_queries, brw->perf_ctx.n_oa_users);
+   gen_perf_dump_query_count(brw->perf_ctx);
    _mesa_HashWalk(ctx->PerfQuery.Objects, dump_perf_query_callback, brw);
 }
 
@@ -161,28 +144,14 @@ brw_get_perf_query_info(struct gl_context *ctx,
                         GLuint *n_active)
 {
    struct brw_context *brw = brw_context(ctx);
-   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
-   const struct gen_perf_query_info *query =
-      &perf_ctx->perf->queries[query_index];
+   struct gen_perf_context *perf_ctx = brw->perf_ctx;
+   struct gen_perf_config *perf_cfg = gen_perf_config(perf_ctx);
+   const struct gen_perf_query_info *query = &perf_cfg->queries[query_index];
 
    *name = query->name;
    *data_size = query->data_size;
    *n_counters = query->n_counters;
-
-   switch (query->kind) {
-   case GEN_PERF_QUERY_TYPE_OA:
-   case GEN_PERF_QUERY_TYPE_RAW:
-      *n_active = perf_ctx->n_active_oa_queries;
-      break;
-
-   case GEN_PERF_QUERY_TYPE_PIPELINE:
-      *n_active = perf_ctx->n_active_pipeline_stats_queries;
-      break;
-
-   default:
-      unreachable("Unknown query type");
-      break;
-   }
+   *n_active = gen_perf_active_queries(perf_ctx, query);
 }
 
 static GLuint
@@ -230,8 +199,9 @@ brw_get_perf_counter_info(struct gl_context *ctx,
                           GLuint64 *raw_max)
 {
    struct brw_context *brw = brw_context(ctx);
+   struct gen_perf_config *perf_cfg = gen_perf_config(brw->perf_ctx);
    const struct gen_perf_query_info *query =
-      &brw->perf_ctx.perf->queries[query_index];
+      &perf_cfg->queries[query_index];
    const struct gen_perf_query_counter *counter =
       &query->counters[counter_index];
 
@@ -277,7 +247,7 @@ brw_begin_perf_query(struct gl_context *ctx,
    struct brw_context *brw = brw_context(ctx);
    struct brw_perf_query_object *brw_query = brw_perf_query(o);
    struct gen_perf_query_object *obj = brw_query->query;
-   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   struct gen_perf_context *perf_ctx = brw->perf_ctx;
 
    /* We can assume the frontend hides mistaken attempts to Begin a
     * query object multiple times before its End. Similarly if an
@@ -308,7 +278,7 @@ brw_end_perf_query(struct gl_context *ctx,
    struct brw_context *brw = brw_context(ctx);
    struct brw_perf_query_object *brw_query = brw_perf_query(o);
    struct gen_perf_query_object *obj = brw_query->query;
-   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   struct gen_perf_context *perf_ctx = brw->perf_ctx;
 
    DBG("End(%d)\n", o->Id);
    gen_perf_end_query(perf_ctx, obj);
@@ -323,7 +293,7 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
 
    assert(!o->Ready);
 
-   gen_perf_wait_query(&brw->perf_ctx, obj, &brw->batch);
+   gen_perf_wait_query(brw->perf_ctx, obj, &brw->batch);
 }
 
 static bool
@@ -337,7 +307,7 @@ brw_is_perf_query_ready(struct gl_context *ctx,
    if (o->Ready)
       return true;
 
-   return gen_perf_is_query_ready(&brw->perf_ctx, obj, &brw->batch);
+   return gen_perf_is_query_ready(brw->perf_ctx, obj, &brw->batch);
 }
 
 /**
@@ -366,7 +336,7 @@ brw_get_perf_query_data(struct gl_context *ctx,
     */
    assert(o->Ready);
 
-   gen_perf_get_query_data(&brw->perf_ctx, obj,
+   gen_perf_get_query_data(brw->perf_ctx, obj,
                            data_size, data, bytes_written);
 }
 
@@ -374,22 +344,17 @@ static struct gl_perf_query_object *
 brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index)
 {
    struct brw_context *brw = brw_context(ctx);
-   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
-   const struct gen_perf_query_info *queryinfo =
-      &perf_ctx->perf->queries[query_index];
-   struct gen_perf_query_object *obj =
-      calloc(1, sizeof(struct gen_perf_query_object));
-
-   if (!obj)
+   struct gen_perf_context *perf_ctx = brw->perf_ctx;
+   struct gen_perf_query_object * obj = gen_perf_new_query(perf_ctx, query_index);
+   if (unlikely(!obj))
       return NULL;
 
-   obj->queryinfo = queryinfo;
-
-   perf_ctx->n_query_instances++;
-
    struct brw_perf_query_object *brw_query = calloc(1, sizeof(struct brw_perf_query_object));
-   if (unlikely(!brw_query))
+   if (unlikely(!brw_query)) {
+      gen_perf_delete_query(perf_ctx, obj);
       return NULL;
+   }
+
    brw_query->query = obj;
    return &brw_query->base;
 }
@@ -404,7 +369,7 @@ brw_delete_perf_query(struct gl_context *ctx,
    struct brw_context *brw = brw_context(ctx);
    struct brw_perf_query_object *brw_query = brw_perf_query(o);
    struct gen_perf_query_object *obj = brw_query->query;
-   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
+   struct gen_perf_context *perf_ctx = brw->perf_ctx;
 
    /* We can assume that the frontend waits for a query to complete
     * before ever calling into here, so we don't have to worry about
@@ -420,94 +385,6 @@ brw_delete_perf_query(struct gl_context *ctx,
 }
 
 /******************************************************************************/
-
-static void
-init_pipeline_statistic_query_registers(struct brw_context *brw)
-{
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   struct gen_perf_config *perf = brw->perf_ctx.perf;
-   struct gen_perf_query_info *query =
-      gen_perf_query_append_query_info(perf, MAX_STAT_COUNTERS);
-
-   query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
-   query->name = "Pipeline Statistics Registers";
-
-   gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
-                                            "N vertices submitted");
-   gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
-                                            "N primitives submitted");
-   gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
-                                            "N vertex shader invocations");
-
-   if (devinfo->gen == 6) {
-      gen_perf_query_info_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
-                                       "SO_PRIM_STORAGE_NEEDED",
-                                       "N geometry shader stream-out primitives (total)");
-      gen_perf_query_info_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
-                                       "SO_NUM_PRIMS_WRITTEN",
-                                       "N geometry shader stream-out primitives (written)");
-   } else {
-      gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
-                                       "SO_PRIM_STORAGE_NEEDED (Stream 0)",
-                                       "N stream-out (stream 0) primitives (total)");
-      gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
-                                       "SO_PRIM_STORAGE_NEEDED (Stream 1)",
-                                       "N stream-out (stream 1) primitives (total)");
-      gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
-                                       "SO_PRIM_STORAGE_NEEDED (Stream 2)",
-                                       "N stream-out (stream 2) primitives (total)");
-      gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
-                                       "SO_PRIM_STORAGE_NEEDED (Stream 3)",
-                                       "N stream-out (stream 3) primitives (total)");
-      gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
-                                       "SO_NUM_PRIMS_WRITTEN (Stream 0)",
-                                       "N stream-out (stream 0) primitives (written)");
-      gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
-                                       "SO_NUM_PRIMS_WRITTEN (Stream 1)",
-                                       "N stream-out (stream 1) primitives (written)");
-      gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
-                                       "SO_NUM_PRIMS_WRITTEN (Stream 2)",
-                                       "N stream-out (stream 2) primitives (written)");
-      gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
-                                       "SO_NUM_PRIMS_WRITTEN (Stream 3)",
-                                       "N stream-out (stream 3) primitives (written)");
-   }
-
-   gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
-                                          "N TCS shader invocations");
-   gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
-                                          "N TES shader invocations");
-
-   gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
-                                          "N geometry shader invocations");
-   gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
-                                          "N geometry shader primitives emitted");
-
-   gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
-                                          "N primitives entering clipping");
-   gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
-                                          "N primitives leaving clipping");
-
-   if (devinfo->is_haswell || devinfo->gen == 8) {
-      gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
-                                       "N fragment shader invocations",
-                                       "N fragment shader invocations");
-   } else {
-      gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
-                                             "N fragment shader invocations");
-   }
-
-   gen_perf_query_info_add_basic_stat_reg(query, PS_DEPTH_COUNT,
-                                          "N z-pass fragments");
-
-   if (devinfo->gen >= 7) {
-      gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
-                                             "N compute shader invocations");
-   }
-
-   query->data_size = sizeof(uint64_t) * query->n_counters;
-}
-
 /* gen_device_info will have incorrect default topology values for unsupported kernels.
  * verify kernel support to ensure OA metrics are accurate.
  */
@@ -594,12 +471,16 @@ brw_init_perf_query_info(struct gl_context *ctx)
    struct brw_context *brw = brw_context(ctx);
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
-   struct gen_perf_context *perf_ctx = &brw->perf_ctx;
-   if (perf_ctx->perf)
-      return perf_ctx->perf->n_queries;
+   struct gen_perf_context *perf_ctx = brw->perf_ctx;
+   struct gen_perf_config *perf_cfg = gen_perf_config(perf_ctx);
+
+   if (perf_cfg)
+      return perf_cfg->n_queries;
 
-   perf_ctx->perf = gen_perf_new(brw);
-   struct gen_perf_config *perf_cfg = perf_ctx->perf;
+   if (!oa_metrics_kernel_support(brw->screen->driScrnPriv->fd, devinfo))
+      return 0;
+
+   perf_cfg = gen_perf_new(ctx);
 
    perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc;
    perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference;
@@ -619,13 +500,7 @@ brw_init_perf_query_info(struct gl_context *ctx)
 
    gen_perf_init_context(perf_ctx, perf_cfg, brw, brw->bufmgr, devinfo,
                          brw->hw_ctx, brw->screen->driScrnPriv->fd);
-
-   init_pipeline_statistic_query_registers(brw);
-   gen_perf_query_register_mdapi_statistic_query(devinfo, perf_cfg);
-
-   if ((oa_metrics_kernel_support(perf_ctx->drm_fd, devinfo)) &&
-       (gen_perf_load_oa_metrics(perf_cfg, perf_ctx->drm_fd, devinfo)))
-      gen_perf_query_register_mdapi_oa_query(devinfo, perf_cfg);
+   gen_perf_init_metrics(perf_cfg, devinfo, brw->screen->driScrnPriv->fd);
 
    return perf_cfg->n_queries;
 }