intel/perf: fix raw query kernel metric selection

[mesa.git] / src / intel / perf / gen_perf_query.c
diff --git a/src/intel/perf/gen_perf_query.c b/src/intel/perf/gen_perf_query.c

index 3d4f922b2b3c7b2e33847b6715df9e4cd4653a76..593ab5c473b66e91d74fb443701177a1443d8ffd 100644 (file)
--- a/src/intel/perf/gen_perf_query.c
+++ b/src/intel/perf/gen_perf_query.c
@@ -423,7 +423,7 @@ get_metric_id(struct gen_perf_config *perf,
     if (!gen_perf_load_metric_id(perf, query->guid,
                                  &raw_query->oa_metrics_set_id)) {
        DBG("Unable to read query guid=%s ID, falling back to test config\n", query->guid);
-      raw_query->oa_metrics_set_id = 1ULL;
+      raw_query->oa_metrics_set_id = perf->fallback_raw_oa_metric;
     } else {
        DBG("Raw query '%s'guid=%s loaded ID: %"PRIu64"\n",
            query->name, query->guid, query->oa_metrics_set_id);
@@ -938,20 +938,24 @@ read_oa_samples_until(struct gen_perf_context *perf_ctx,
        if (len <= 0) {
           exec_list_push_tail(&perf_ctx->free_sample_buffers, &buf->link);
  
-         if (len < 0) {
-            if (errno == EAGAIN) {
-               return ((last_timestamp - start_timestamp) < INT32_MAX &&
-                       (last_timestamp - start_timestamp) >=
-                       (end_timestamp - start_timestamp)) ?
-                      OA_READ_STATUS_FINISHED :
-                      OA_READ_STATUS_UNFINISHED;
-            } else {
-               DBG("Error reading i915 perf samples: %m\n");
-            }
-         } else
+         if (len == 0) {
              DBG("Spurious EOF reading i915 perf samples\n");
+            return OA_READ_STATUS_ERROR;
+         }
+
+         if (errno != EAGAIN) {
+            DBG("Error reading i915 perf samples: %m\n");
+            return OA_READ_STATUS_ERROR;
+         }
+
+         if ((last_timestamp - start_timestamp) >= INT32_MAX)
+            return OA_READ_STATUS_UNFINISHED;
+
+         if ((last_timestamp - start_timestamp) <
+              (end_timestamp - start_timestamp))
+            return OA_READ_STATUS_UNFINISHED;
  
-         return OA_READ_STATUS_ERROR;
+         return OA_READ_STATUS_FINISHED;
        }
  
        buf->len = len;
@@ -1061,17 +1065,6 @@ gen_perf_wait_query(struct gen_perf_context *perf_ctx,
        perf_cfg->vtbl.batchbuffer_flush(perf_ctx->ctx, __FILE__, __LINE__);
  
     perf_cfg->vtbl.bo_wait_rendering(bo);
-
-   /* Due to a race condition between the OA unit signaling report
-    * availability and the report actually being written into memory,
-    * we need to wait for all the reports to come in before we can
-    * read them.
-    */
-   if (query->queryinfo->kind == GEN_PERF_QUERY_TYPE_OA ||
-       query->queryinfo->kind == GEN_PERF_QUERY_TYPE_RAW) {
-      while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
-         ;
-   }
  }
  
  bool
@@ -1087,8 +1080,8 @@ gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
        return (query->oa.results_accumulated ||
                (query->oa.bo &&
                 !perf_cfg->vtbl.batch_references(current_batch, query->oa.bo) &&
-               !perf_cfg->vtbl.bo_busy(query->oa.bo) &&
-               read_oa_samples_for_query(perf_ctx, query, current_batch)));
+               !perf_cfg->vtbl.bo_busy(query->oa.bo)));
+
     case GEN_PERF_QUERY_TYPE_PIPELINE:
        return (query->pipeline_stats.bo &&
                !perf_cfg->vtbl.batch_references(current_batch, query->pipeline_stats.bo) &&
@@ -1515,6 +1508,7 @@ get_pipeline_stats_data(struct gen_perf_context *perf_ctx,
  void
  gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
                          struct gen_perf_query_object *query,
+                        void *current_batch,
                          int data_size,
                          unsigned *data,
                          unsigned *bytes_written)
@@ -1526,6 +1520,17 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
     case GEN_PERF_QUERY_TYPE_OA:
     case GEN_PERF_QUERY_TYPE_RAW:
        if (!query->oa.results_accumulated) {
+         /* Due to the sampling frequency of the OA buffer by the i915-perf
+          * driver, there can be a 5ms delay between the Mesa seeing the query
+          * complete and i915 making all the OA buffer reports available to us.
+          * We need to wait for all the reports to come in before we can do
+          * the post processing removing unrelated deltas.
+          * There is a i915-perf series to address this issue, but it's
+          * not been merged upstream yet.
+          */
+         while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
+            ;
+
           read_gt_frequency(perf_ctx, query);
           uint32_t *begin_report = query->oa.map;
           uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;