intel/cs_intrinsics: Handle 64-bit intrinsics
[mesa.git] / src / intel / perf / gen_perf_query.c
index 755ec9a83dd3f7e12ef1228e3fe43609cfb6492e..7bd8e0277d24d2d85e29358efd49062615626d87 100644 (file)
@@ -625,7 +625,7 @@ snapshot_statistics_registers(struct gen_perf_context *ctx,
 
       perf->vtbl.store_register_mem(ctx->ctx, obj->pipeline_stats.bo,
                                     counter->pipeline_stat.reg, 8,
-                                    offset_in_bytes + i * sizeof(uint64_t));
+                                    offset_in_bytes + counter->offset);
    }
 }
 
@@ -938,20 +938,24 @@ read_oa_samples_until(struct gen_perf_context *perf_ctx,
       if (len <= 0) {
          exec_list_push_tail(&perf_ctx->free_sample_buffers, &buf->link);
 
-         if (len < 0) {
-            if (errno == EAGAIN) {
-               return ((last_timestamp - start_timestamp) < INT32_MAX &&
-                       (last_timestamp - start_timestamp) >=
-                       (end_timestamp - start_timestamp)) ?
-                      OA_READ_STATUS_FINISHED :
-                      OA_READ_STATUS_UNFINISHED;
-            } else {
-               DBG("Error reading i915 perf samples: %m\n");
-            }
-         } else
+         if (len == 0) {
             DBG("Spurious EOF reading i915 perf samples\n");
+            return OA_READ_STATUS_ERROR;
+         }
+
+         if (errno != EAGAIN) {
+            DBG("Error reading i915 perf samples: %m\n");
+            return OA_READ_STATUS_ERROR;
+         }
+
+         if ((last_timestamp - start_timestamp) >= INT32_MAX)
+            return OA_READ_STATUS_UNFINISHED;
+
+         if ((last_timestamp - start_timestamp) <
+              (end_timestamp - start_timestamp))
+            return OA_READ_STATUS_UNFINISHED;
 
-         return OA_READ_STATUS_ERROR;
+         return OA_READ_STATUS_FINISHED;
       }
 
       buf->len = len;
@@ -1061,17 +1065,6 @@ gen_perf_wait_query(struct gen_perf_context *perf_ctx,
       perf_cfg->vtbl.batchbuffer_flush(perf_ctx->ctx, __FILE__, __LINE__);
 
    perf_cfg->vtbl.bo_wait_rendering(bo);
-
-   /* Due to a race condition between the OA unit signaling report
-    * availability and the report actually being written into memory,
-    * we need to wait for all the reports to come in before we can
-    * read them.
-    */
-   if (query->queryinfo->kind == GEN_PERF_QUERY_TYPE_OA ||
-       query->queryinfo->kind == GEN_PERF_QUERY_TYPE_RAW) {
-      while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
-         ;
-   }
 }
 
 bool
@@ -1087,8 +1080,8 @@ gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
       return (query->oa.results_accumulated ||
               (query->oa.bo &&
                !perf_cfg->vtbl.batch_references(current_batch, query->oa.bo) &&
-               !perf_cfg->vtbl.bo_busy(query->oa.bo) &&
-               read_oa_samples_for_query(perf_ctx, query, current_batch)));
+               !perf_cfg->vtbl.bo_busy(query->oa.bo)));
+
    case GEN_PERF_QUERY_TYPE_PIPELINE:
       return (query->pipeline_stats.bo &&
               !perf_cfg->vtbl.batch_references(current_batch, query->pipeline_stats.bo) &&
@@ -1515,6 +1508,7 @@ get_pipeline_stats_data(struct gen_perf_context *perf_ctx,
 void
 gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
                         struct gen_perf_query_object *query,
+                        void *current_batch,
                         int data_size,
                         unsigned *data,
                         unsigned *bytes_written)
@@ -1526,6 +1520,17 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
    case GEN_PERF_QUERY_TYPE_OA:
    case GEN_PERF_QUERY_TYPE_RAW:
       if (!query->oa.results_accumulated) {
+         /* Due to the sampling frequency of the OA buffer by the i915-perf
+          * driver, there can be a 5ms delay between the Mesa seeing the query
+          * complete and i915 making all the OA buffer reports available to us.
+          * We need to wait for all the reports to come in before we can do
+          * the post processing removing unrelated deltas.
+          * There is a i915-perf series to address this issue, but it's
+          * not been merged upstream yet.
+          */
+         while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
+            ;
+
          read_gt_frequency(perf_ctx, query);
          uint32_t *begin_report = query->oa.map;
          uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;