src/mesa/drivers/dri/i965/brw_performance_query.h

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #ifndef BRW_PERFORMANCE_QUERY_H
  25 #define BRW_PERFORMANCE_QUERY_H
  26
  27 #include <stdint.h>
  28
  29 #include "brw_context.h"
  30 #include "brw_performance_query_metrics.h"
  31
  32 /*
  33  * When currently allocate only one page for pipeline statistics queries. Here
  34  * we derived the maximum number of counters for that amount.
  35  */
  36 #define STATS_BO_SIZE               4096
  37 #define STATS_BO_END_OFFSET_BYTES   (STATS_BO_SIZE / 2)
  38 #define MAX_STAT_COUNTERS           (STATS_BO_END_OFFSET_BYTES / 8)
  39
  40 /*
  41  * The largest OA formats we can use include:
  42  * For Haswell:
  43  *   1 timestamp, 45 A counters, 8 B counters and 8 C counters.
  44  * For Gen8+
  45  *   1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
  46  */
  47 #define MAX_OA_REPORT_COUNTERS 62
  48
  49 /**
  50  * i965 representation of a performance query object.
  51  *
  52  * NB: We want to keep this structure relatively lean considering that
  53  * applications may expect to allocate enough objects to be able to
  54  * query around all draw calls in a frame.
  55  */
  56 struct brw_perf_query_object
  57 {
  58    struct gl_perf_query_object base;
  59
  60    const struct brw_perf_query_info *query;
  61
  62    /* See query->kind to know which state below is in use... */
  63    union {
  64       struct {
  65
  66          /**
  67           * BO containing OA counter snapshots at query Begin/End time.
  68           */
  69          struct brw_bo *bo;
  70
  71          /**
  72           * Address of mapped of @bo
  73           */
  74          void *map;
  75
  76          /**
  77           * The MI_REPORT_PERF_COUNT command lets us specify a unique
  78           * ID that will be reflected in the resulting OA report
  79           * that's written by the GPU. This is the ID we're expecting
  80           * in the begin report and the the end report should be
  81           * @begin_report_id + 1.
  82           */
  83          int begin_report_id;
  84
  85          /**
  86           * Reference the head of the brw->perfquery.sample_buffers
  87           * list at the time that the query started (so we only need
  88           * to look at nodes after this point when looking for samples
  89           * related to this query)
  90           *
  91           * (See struct brw_oa_sample_buf description for more details)
  92           */
  93          struct exec_node *samples_head;
  94
  95          /**
  96           * Storage for the final accumulated OA counters.
  97           */
  98          uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
  99
 100          /**
 101           * Hw ID used by the context on which the query was running.
 102           */
 103          uint32_t hw_id;
 104
 105          /**
 106           * false while in the unaccumulated_elements list, and set to
 107           * true when the final, end MI_RPC snapshot has been
 108           * accumulated.
 109           */
 110          bool results_accumulated;
 111
 112          /**
 113           * Number of reports accumulated to produce the results.
 114           */
 115          uint32_t reports_accumulated;
 116
 117          /**
 118           * Frequency of the GT at begin and end of the query.
 119           */
 120          uint64_t gt_frequency[2];
 121
 122          /**
 123           * Frequency in the slices of the GT at the begin and end of the
 124           * query.
 125           */
 126          uint64_t slice_frequency[2];
 127
 128          /**
 129           * Frequency in the unslice of the GT at the begin and end of the
 130           * query.
 131           */
 132          uint64_t unslice_frequency[2];
 133       } oa;
 134
 135       struct {
 136          /**
 137           * BO containing starting and ending snapshots for the
 138           * statistics counters.
 139           */
 140          struct brw_bo *bo;
 141       } pipeline_stats;
 142    };
 143 };
 144
 145 static inline struct brw_perf_query_info *
 146 brw_perf_query_append_query_info(struct brw_context *brw)
 147 {
 148    brw->perfquery.queries =
 149       reralloc(brw, brw->perfquery.queries,
 150                struct brw_perf_query_info, ++brw->perfquery.n_queries);
 151
 152    return &brw->perfquery.queries[brw->perfquery.n_queries - 1];
 153 }
 154
 155 static inline void
 156 brw_perf_query_info_add_stat_reg(struct brw_perf_query_info *query,
 157                                  uint32_t reg,
 158                                  uint32_t numerator,
 159                                  uint32_t denominator,
 160                                  const char *name,
 161                                  const char *description)
 162 {
 163    struct brw_perf_query_counter *counter;
 164
 165    assert(query->n_counters < MAX_STAT_COUNTERS);
 166
 167    counter = &query->counters[query->n_counters];
 168    counter->name = name;
 169    counter->desc = description;
 170    counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
 171    counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
 172    counter->size = sizeof(uint64_t);
 173    counter->offset = sizeof(uint64_t) * query->n_counters;
 174    counter->pipeline_stat.reg = reg;
 175    counter->pipeline_stat.numerator = numerator;
 176    counter->pipeline_stat.denominator = denominator;
 177
 178    query->n_counters++;
 179 }
 180
 181 static inline void
 182 brw_perf_query_info_add_basic_stat_reg(struct brw_perf_query_info *query,
 183                                        uint32_t reg, const char *name)
 184 {
 185    brw_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
 186 }
 187
 188 /* Accumulate 32bits OA counters */
 189 static inline void
 190 brw_perf_query_accumulate_uint32(const uint32_t *report0,
 191                                  const uint32_t *report1,
 192                                  uint64_t *accumulator)
 193 {
 194    *accumulator += (uint32_t)(*report1 - *report0);
 195 }
 196
 197 /* Accumulate 40bits OA counters */
 198 static inline void
 199 brw_perf_query_accumulate_uint40(int a_index,
 200                                  const uint32_t *report0,
 201                                  const uint32_t *report1,
 202                                  uint64_t *accumulator)
 203 {
 204    const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
 205    const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
 206    uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32;
 207    uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32;
 208    uint64_t value0 = report0[a_index + 4] | high0;
 209    uint64_t value1 = report1[a_index + 4] | high1;
 210    uint64_t delta;
 211
 212    if (value0 > value1)
 213       delta = (1ULL << 40) + value1 - value0;
 214    else
 215       delta = value1 - value0;
 216
 217    *accumulator += delta;
 218 }
 219
 220 int brw_perf_query_get_mdapi_oa_data(struct brw_context *brw,
 221                                      struct brw_perf_query_object *obj,
 222                                      size_t data_size,
 223                                      uint8_t *data);
 224 void brw_perf_query_register_mdapi_oa_query(struct brw_context *brw);
 225 void brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw);
 226
 227 #endif /* BRW_PERFORMANCE_QUERY_H */