f62786f7f1ce74ef035666f91e5b8ab89d34e5de
[mesa.git] / src / mesa / drivers / dri / i965 / brw_performance_query.h
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef BRW_PERFORMANCE_QUERY_H
25 #define BRW_PERFORMANCE_QUERY_H
26
27 #include <stdint.h>
28
29 #include "brw_context.h"
30 #include "brw_performance_query_metrics.h"
31
32 /*
33 * When currently allocate only one page for pipeline statistics queries. Here
34 * we derived the maximum number of counters for that amount.
35 */
36 #define STATS_BO_SIZE 4096
37 #define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2)
38 #define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8)
39
40 /*
41 * The largest OA formats we can use include:
42 * For Haswell:
43 * 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
44 * For Gen8+
45 * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
46 */
47 #define MAX_OA_REPORT_COUNTERS 62
48
49 /**
50 * i965 representation of a performance query object.
51 *
52 * NB: We want to keep this structure relatively lean considering that
53 * applications may expect to allocate enough objects to be able to
54 * query around all draw calls in a frame.
55 */
56 struct brw_perf_query_object
57 {
58 struct gl_perf_query_object base;
59
60 const struct brw_perf_query_info *query;
61
62 /* See query->kind to know which state below is in use... */
63 union {
64 struct {
65
66 /**
67 * BO containing OA counter snapshots at query Begin/End time.
68 */
69 struct brw_bo *bo;
70
71 /**
72 * Address of mapped of @bo
73 */
74 void *map;
75
76 /**
77 * The MI_REPORT_PERF_COUNT command lets us specify a unique
78 * ID that will be reflected in the resulting OA report
79 * that's written by the GPU. This is the ID we're expecting
80 * in the begin report and the the end report should be
81 * @begin_report_id + 1.
82 */
83 int begin_report_id;
84
85 /**
86 * Reference the head of the brw->perfquery.sample_buffers
87 * list at the time that the query started (so we only need
88 * to look at nodes after this point when looking for samples
89 * related to this query)
90 *
91 * (See struct brw_oa_sample_buf description for more details)
92 */
93 struct exec_node *samples_head;
94
95 /**
96 * Storage for the final accumulated OA counters.
97 */
98 uint64_t accumulator[MAX_OA_REPORT_COUNTERS];
99
100 /**
101 * Hw ID used by the context on which the query was running.
102 */
103 uint32_t hw_id;
104
105 /**
106 * false while in the unaccumulated_elements list, and set to
107 * true when the final, end MI_RPC snapshot has been
108 * accumulated.
109 */
110 bool results_accumulated;
111
112 /**
113 * Number of reports accumulated to produce the results.
114 */
115 uint32_t reports_accumulated;
116 } oa;
117
118 struct {
119 /**
120 * BO containing starting and ending snapshots for the
121 * statistics counters.
122 */
123 struct brw_bo *bo;
124 } pipeline_stats;
125 };
126 };
127
128 static inline struct brw_perf_query_info *
129 brw_perf_query_append_query_info(struct brw_context *brw)
130 {
131 brw->perfquery.queries =
132 reralloc(brw, brw->perfquery.queries,
133 struct brw_perf_query_info, ++brw->perfquery.n_queries);
134
135 return &brw->perfquery.queries[brw->perfquery.n_queries - 1];
136 }
137
138 static inline void
139 brw_perf_query_info_add_stat_reg(struct brw_perf_query_info *query,
140 uint32_t reg,
141 uint32_t numerator,
142 uint32_t denominator,
143 const char *name,
144 const char *description)
145 {
146 struct brw_perf_query_counter *counter;
147
148 assert(query->n_counters < MAX_STAT_COUNTERS);
149
150 counter = &query->counters[query->n_counters];
151 counter->name = name;
152 counter->desc = description;
153 counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL;
154 counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
155 counter->size = sizeof(uint64_t);
156 counter->offset = sizeof(uint64_t) * query->n_counters;
157 counter->pipeline_stat.reg = reg;
158 counter->pipeline_stat.numerator = numerator;
159 counter->pipeline_stat.denominator = denominator;
160
161 query->n_counters++;
162 }
163
164 static inline void
165 brw_perf_query_info_add_basic_stat_reg(struct brw_perf_query_info *query,
166 uint32_t reg, const char *name)
167 {
168 brw_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
169 }
170
171 /* Accumulate 32bits OA counters */
172 static inline void
173 brw_perf_query_accumulate_uint32(const uint32_t *report0,
174 const uint32_t *report1,
175 uint64_t *accumulator)
176 {
177 *accumulator += (uint32_t)(*report1 - *report0);
178 }
179
180 /* Accumulate 40bits OA counters */
181 static inline void
182 brw_perf_query_accumulate_uint40(int a_index,
183 const uint32_t *report0,
184 const uint32_t *report1,
185 uint64_t *accumulator)
186 {
187 const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40);
188 const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40);
189 uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32;
190 uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32;
191 uint64_t value0 = report0[a_index + 4] | high0;
192 uint64_t value1 = report1[a_index + 4] | high1;
193 uint64_t delta;
194
195 if (value0 > value1)
196 delta = (1ULL << 40) + value1 - value0;
197 else
198 delta = value1 - value0;
199
200 *accumulator += delta;
201 }
202
203 #endif /* BRW_PERFORMANCE_QUERY_H */