-/******************************************************************************/
-
-/**
- * Emit MI_STORE_REGISTER_MEM commands to capture all of the
- * pipeline statistics for the performance query object.
- */
-static void
-snapshot_statistics_registers(struct brw_context *brw,
- struct brw_perf_query_object *obj,
- uint32_t offset_in_bytes)
-{
- const struct brw_perf_query_info *query = obj->query;
- const int n_counters = query->n_counters;
-
- for (int i = 0; i < n_counters; i++) {
- const struct brw_perf_query_counter *counter = &query->counters[i];
-
- assert(counter->data_type == GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL);
-
- brw_store_register_mem64(brw, obj->pipeline_stats.bo,
- counter->pipeline_stat.reg,
- offset_in_bytes + i * sizeof(uint64_t));
- }
-}
-
-/**
- * Add a query to the global list of "unaccumulated queries."
- *
- * Queries are tracked here until all the associated OA reports have
- * been accumulated via accumulate_oa_reports() after the end
- * MI_REPORT_PERF_COUNT has landed in query->oa.bo.
- */
-static void
-add_to_unaccumulated_query_list(struct brw_context *brw,
- struct brw_perf_query_object *obj)
-{
- if (brw->perfquery.unaccumulated_elements >=
- brw->perfquery.unaccumulated_array_size)
- {
- brw->perfquery.unaccumulated_array_size *= 1.5;
- brw->perfquery.unaccumulated =
- reralloc(brw, brw->perfquery.unaccumulated,
- struct brw_perf_query_object *,
- brw->perfquery.unaccumulated_array_size);
- }
-
- brw->perfquery.unaccumulated[brw->perfquery.unaccumulated_elements++] = obj;
-}
-
-/**
- * Remove a query from the global list of unaccumulated queries once
- * after successfully accumulating the OA reports associated with the
- * query in accumulate_oa_reports() or when discarding unwanted query
- * results.
- */
-static void
-drop_from_unaccumulated_query_list(struct brw_context *brw,
- struct brw_perf_query_object *obj)
-{
- for (int i = 0; i < brw->perfquery.unaccumulated_elements; i++) {
- if (brw->perfquery.unaccumulated[i] == obj) {
- int last_elt = --brw->perfquery.unaccumulated_elements;
-
- if (i == last_elt)
- brw->perfquery.unaccumulated[i] = NULL;
- else {
- brw->perfquery.unaccumulated[i] =
- brw->perfquery.unaccumulated[last_elt];
- }
-
- break;
- }
- }
-
- /* Drop our samples_head reference so that associated periodic
- * sample data buffers can potentially be reaped if they aren't
- * referenced by any other queries...
- */
-
- struct brw_oa_sample_buf *buf =
- exec_node_data(struct brw_oa_sample_buf, obj->oa.samples_head, link);
-
- assert(buf->refcount > 0);
- buf->refcount--;
-
- obj->oa.samples_head = NULL;
-
- reap_old_sample_buffers(brw);
-}
-
-static uint64_t
-timebase_scale(struct brw_context *brw, uint32_t u32_time_delta)
-{
- uint64_t tmp = ((uint64_t)u32_time_delta) * 1000000000ull;
-
- return tmp ? tmp / brw->perfquery.sys_vars.timestamp_frequency : 0;
-}
-
-static void
-accumulate_uint32(const uint32_t *report0,
- const uint32_t *report1,
- uint64_t *accumulator)
-{
- *accumulator += (uint32_t)(*report1 - *report0);
-}
-
-/**
- * Given pointers to starting and ending OA snapshots, add the deltas for each
- * counter to the results.
- */
-static void
-add_deltas(struct brw_context *brw,
- struct brw_perf_query_object *obj,
- const uint32_t *start,
- const uint32_t *end)
-{
- const struct brw_perf_query_info *query = obj->query;
- uint64_t *accumulator = obj->oa.accumulator;
- int i;
-
- switch (query->oa_format) {
- case I915_OA_FORMAT_A45_B8_C8:
- accumulate_uint32(start + 1, end + 1, accumulator); /* timestamp */
-
- for (i = 0; i < 61; i++)
- accumulate_uint32(start + 3 + i, end + 3 + i, accumulator + 1 + i);
-
- break;
- default:
- unreachable("Can't accumulate OA counters in unknown format");
- }
-}
-
-static bool
-inc_n_oa_users(struct brw_context *brw)
-{
- if (brw->perfquery.n_oa_users == 0 &&
- drmIoctl(brw->perfquery.oa_stream_fd,
- I915_PERF_IOCTL_ENABLE, 0) < 0)
- {
- return false;
- }
- ++brw->perfquery.n_oa_users;
-
- return true;
-}
-
-static void
-dec_n_oa_users(struct brw_context *brw)
-{
- /* Disabling the i915 perf stream will effectively disable the OA
- * counters. Note it's important to be sure there are no outstanding
- * MI_RPC commands at this point since they could stall the CS
- * indefinitely once OACONTROL is disabled.
- */
- --brw->perfquery.n_oa_users;
- if (brw->perfquery.n_oa_users == 0 &&
- drmIoctl(brw->perfquery.oa_stream_fd, I915_PERF_IOCTL_DISABLE, 0) < 0)
- {
- DBG("WARNING: Error disabling i915 perf stream: %m\n");
- }
-}
-
-/* In general if we see anything spurious while accumulating results,
- * we don't try and continue accumulating the current query, hoping
- * for the best, we scrap anything outstanding, and then hope for the
- * best with new queries.
- */
-static void
-discard_all_queries(struct brw_context *brw)
-{
- while (brw->perfquery.unaccumulated_elements) {
- struct brw_perf_query_object *obj = brw->perfquery.unaccumulated[0];
-
- obj->oa.results_accumulated = true;
- drop_from_unaccumulated_query_list(brw, brw->perfquery.unaccumulated[0]);
-
- dec_n_oa_users(brw);
- }
-}
-
-static bool
-read_oa_samples(struct brw_context *brw)
-{
- while (1) {
- struct brw_oa_sample_buf *buf = get_free_sample_buf(brw);
- int len;
-
- while ((len = read(brw->perfquery.oa_stream_fd, buf->buf,
- sizeof(buf->buf))) < 0 && errno == EINTR)
- ;
-
- if (len <= 0) {
- exec_list_push_tail(&brw->perfquery.free_sample_buffers, &buf->link);
-
- if (len < 0) {
- if (errno == EAGAIN)
- return true;
- else {
- DBG("Error reading i915 perf samples: %m\n");
- return false;
- }
- } else {
- DBG("Spurious EOF reading i915 perf samples\n");
- return false;
- }
- }
-
- buf->len = len;
- exec_list_push_tail(&brw->perfquery.sample_buffers, &buf->link);
- }
-
- unreachable("not reached");
- return false;
-}
-
-/**
- * Accumulate raw OA counter values based on deltas between pairs
- * of OA reports.
- *
- * Accumulation starts from the first report captured via
- * MI_REPORT_PERF_COUNT (MI_RPC) by brw_begin_perf_query() until the
- * last MI_RPC report requested by brw_end_perf_query(). Between these
- * two reports there may also some number of periodically sampled OA
- * reports collected via the i915 perf interface - depending on the
- * duration of the query.
- *
- * These periodic snapshots help to ensure we handle counter overflow
- * correctly by being frequent enough to ensure we don't miss multiple
- * overflows of a counter between snapshots.
- */
-static void
-accumulate_oa_reports(struct brw_context *brw,
- struct brw_perf_query_object *obj)
-{
- struct gl_perf_query_object *o = &obj->base;
- uint32_t *query_buffer;
- uint32_t *start;
- uint32_t *last;
- uint32_t *end;
- struct exec_node *first_samples_node;
-
- assert(o->Ready);
-
- /* Collect the latest periodic OA reports from i915 perf */
- if (!read_oa_samples(brw))
- goto error;
-
- query_buffer = brw_bo_map(brw, obj->oa.bo, MAP_READ);
-
- start = last = query_buffer;
- end = query_buffer + (MI_RPC_BO_END_OFFSET_BYTES / sizeof(uint32_t));
-
- if (start[0] != obj->oa.begin_report_id) {
- DBG("Spurious start report id=%"PRIu32"\n", start[0]);
- goto error;
- }
- if (end[0] != (obj->oa.begin_report_id + 1)) {
- DBG("Spurious end report id=%"PRIu32"\n", end[0]);
- goto error;
- }
-
- /* See if we have any periodic reports to accumulate too... */
-
- /* N.B. The oa.samples_head was set when the query began and
- * pointed to the tail of the brw->perfquery.sample_buffers list at
- * the time the query started. Since the buffer existed before the
- * first MI_REPORT_PERF_COUNT command was emitted we therefore know
- * that no data in this particular node's buffer can possibly be
- * associated with the query - so skip ahead one...
- */
- first_samples_node = obj->oa.samples_head->next;
-
- foreach_list_typed_from(struct brw_oa_sample_buf, buf, link,
- &brw->perfquery.sample_buffers,
- first_samples_node)
- {
- int offset = 0;
-
- while (offset < buf->len) {
- const struct drm_i915_perf_record_header *header =
- (const struct drm_i915_perf_record_header *)(buf->buf + offset);
-
- assert(header->size != 0);
- assert(header->size <= buf->len);
-
- offset += header->size;
-
- switch (header->type) {
- case DRM_I915_PERF_RECORD_SAMPLE: {
- uint32_t *report = (uint32_t *)(header + 1);
-
- /* Ignore reports that come before the start marker.
- * (Note: takes care to allow overflow of 32bit timestamps)
- */
- if (timebase_scale(brw, report[1] - start[1]) > 5000000000)
- continue;
-
- /* Ignore reports that come after the end marker.
- * (Note: takes care to allow overflow of 32bit timestamps)
- */
- if (timebase_scale(brw, report[1] - end[1]) <= 5000000000)
- goto end;
-
- add_deltas(brw, obj, last, report);
-
- last = report;
-
- break;
- }
-
- case DRM_I915_PERF_RECORD_OA_BUFFER_LOST:
- DBG("i915 perf: OA error: all reports lost\n");
- goto error;
- case DRM_I915_PERF_RECORD_OA_REPORT_LOST:
- DBG("i915 perf: OA report lost\n");
- break;
- }
- }
- }
-
-end:
-
- add_deltas(brw, obj, last, end);
-
- DBG("Marking %d accumulated - results gathered\n", o->Id);
-
- brw_bo_unmap(obj->oa.bo);
- obj->oa.results_accumulated = true;
- drop_from_unaccumulated_query_list(brw, obj);
- dec_n_oa_users(brw);
-
- return;
-
-error:
-
- brw_bo_unmap(obj->oa.bo);
- discard_all_queries(brw);
-}