80f311be2e84e206a0db67aec5d6b366477290d8
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_query.c
1 /*
2 * Copyright 2011 Nouveau Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Christoph Bumiller
23 */
24
25 #define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
26
27 #include "nvc0/nvc0_context.h"
28 #include "nvc0/nvc0_query.h"
29 #include "nvc0/nvc0_query_sw.h"
30 #include "nvc0/nvc0_query_hw.h"
31 #include "nvc0/nvc0_query_hw_sm.h"
32
33 static struct pipe_query *
34 nvc0_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
35 {
36 struct nvc0_context *nvc0 = nvc0_context(pipe);
37 struct nvc0_query *q;
38
39 q = nvc0_sw_create_query(nvc0, type, index);
40 if (!q)
41 q = nvc0_hw_create_query(nvc0, type, index);
42
43 return (struct pipe_query *)q;
44 }
45
46 static void
47 nvc0_destroy_query(struct pipe_context *pipe, struct pipe_query *pq)
48 {
49 struct nvc0_query *q = nvc0_query(pq);
50 q->funcs->destroy_query(nvc0_context(pipe), q);
51 }
52
53 static boolean
54 nvc0_begin_query(struct pipe_context *pipe, struct pipe_query *pq)
55 {
56 struct nvc0_query *q = nvc0_query(pq);
57 return q->funcs->begin_query(nvc0_context(pipe), q);
58 }
59
60 static void
61 nvc0_end_query(struct pipe_context *pipe, struct pipe_query *pq)
62 {
63 struct nvc0_query *q = nvc0_query(pq);
64 q->funcs->end_query(nvc0_context(pipe), q);
65 }
66
67 static boolean
68 nvc0_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
69 boolean wait, union pipe_query_result *result)
70 {
71 struct nvc0_query *q = nvc0_query(pq);
72 return q->funcs->get_query_result(nvc0_context(pipe), q, wait, result);
73 }
74
75 static void
76 nvc0_render_condition(struct pipe_context *pipe,
77 struct pipe_query *pq,
78 boolean condition, uint mode)
79 {
80 struct nvc0_context *nvc0 = nvc0_context(pipe);
81 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
82 struct nvc0_query *q = nvc0_query(pq);
83 struct nvc0_hw_query *hq = nvc0_hw_query(q);
84 uint32_t cond;
85 bool wait =
86 mode != PIPE_RENDER_COND_NO_WAIT &&
87 mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
88
89 if (!pq) {
90 cond = NVC0_3D_COND_MODE_ALWAYS;
91 }
92 else {
93 /* NOTE: comparison of 2 queries only works if both have completed */
94 switch (q->type) {
95 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
96 cond = condition ? NVC0_3D_COND_MODE_EQUAL :
97 NVC0_3D_COND_MODE_NOT_EQUAL;
98 wait = true;
99 break;
100 case PIPE_QUERY_OCCLUSION_COUNTER:
101 case PIPE_QUERY_OCCLUSION_PREDICATE:
102 if (likely(!condition)) {
103 if (unlikely(hq->nesting))
104 cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
105 NVC0_3D_COND_MODE_ALWAYS;
106 else
107 cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
108 } else {
109 cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
110 }
111 break;
112 default:
113 assert(!"render condition query not a predicate");
114 cond = NVC0_3D_COND_MODE_ALWAYS;
115 break;
116 }
117 }
118
119 nvc0->cond_query = pq;
120 nvc0->cond_cond = condition;
121 nvc0->cond_condmode = cond;
122 nvc0->cond_mode = mode;
123
124 if (!pq) {
125 PUSH_SPACE(push, 1);
126 IMMED_NVC0(push, NVC0_3D(COND_MODE), cond);
127 return;
128 }
129
130 if (wait)
131 nvc0_hw_query_fifo_wait(push, q);
132
133 PUSH_SPACE(push, 7);
134 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
135 BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
136 PUSH_DATAh(push, hq->bo->offset + hq->offset);
137 PUSH_DATA (push, hq->bo->offset + hq->offset);
138 PUSH_DATA (push, cond);
139 BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
140 PUSH_DATAh(push, hq->bo->offset + hq->offset);
141 PUSH_DATA (push, hq->bo->offset + hq->offset);
142 }
143
144 /* === DRIVER STATISTICS === */
145
146 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
147
148 static const char *nvc0_sw_query_drv_stat_names[] =
149 {
150 "drv-tex_obj_current_count",
151 "drv-tex_obj_current_bytes",
152 "drv-buf_obj_current_count",
153 "drv-buf_obj_current_bytes_vid",
154 "drv-buf_obj_current_bytes_sys",
155 "drv-tex_transfers_rd",
156 "drv-tex_transfers_wr",
157 "drv-tex_copy_count",
158 "drv-tex_blit_count",
159 "drv-tex_cache_flush_count",
160 "drv-buf_transfers_rd",
161 "drv-buf_transfers_wr",
162 "drv-buf_read_bytes_staging_vid",
163 "drv-buf_write_bytes_direct",
164 "drv-buf_write_bytes_staging_vid",
165 "drv-buf_write_bytes_staging_sys",
166 "drv-buf_copy_bytes",
167 "drv-buf_non_kernel_fence_sync_count",
168 "drv-any_non_kernel_fence_sync_count",
169 "drv-query_sync_count",
170 "drv-gpu_serialize_count",
171 "drv-draw_calls_array",
172 "drv-draw_calls_indexed",
173 "drv-draw_calls_fallback_count",
174 "drv-user_buffer_upload_bytes",
175 "drv-constbuf_upload_count",
176 "drv-constbuf_upload_bytes",
177 "drv-pushbuf_count",
178 "drv-resource_validate_count"
179 };
180
181 #endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
182
183 /* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
184
185 /* NOTE: intentionally using the same names as NV */
186 static const char *nve4_hw_sm_query_names[] =
187 {
188 /* MP counters */
189 "active_cycles",
190 "active_warps",
191 "atom_count",
192 "branch",
193 "divergent_branch",
194 "gld_request",
195 "global_ld_mem_divergence_replays",
196 "global_store_transaction",
197 "global_st_mem_divergence_replays",
198 "gred_count",
199 "gst_request",
200 "inst_executed",
201 "inst_issued",
202 "inst_issued1",
203 "inst_issued2",
204 "l1_global_load_hit",
205 "l1_global_load_miss",
206 "l1_local_load_hit",
207 "l1_local_load_miss",
208 "l1_local_store_hit",
209 "l1_local_store_miss",
210 "l1_shared_load_transactions",
211 "l1_shared_store_transactions",
212 "local_load",
213 "local_load_transactions",
214 "local_store",
215 "local_store_transactions",
216 "prof_trigger_00",
217 "prof_trigger_01",
218 "prof_trigger_02",
219 "prof_trigger_03",
220 "prof_trigger_04",
221 "prof_trigger_05",
222 "prof_trigger_06",
223 "prof_trigger_07",
224 "shared_load",
225 "shared_load_replay",
226 "shared_store",
227 "shared_store_replay",
228 "sm_cta_launched",
229 "threads_launched",
230 "uncached_global_load_transaction",
231 "warps_launched",
232 /* metrics, i.e. functions of the MP counters */
233 "metric-ipc", /* inst_executed, clock */
234 "metric-ipac", /* inst_executed, active_cycles */
235 "metric-ipec", /* inst_executed, (bool)inst_executed */
236 "metric-achieved_occupancy", /* active_warps, active_cycles */
237 "metric-sm_efficiency", /* active_cycles, clock */
238 "metric-inst_replay_overhead" /* inst_issued, inst_executed */
239 };
240
241 /* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
242 static const char *nvc0_hw_sm_query_names[] =
243 {
244 /* MP counters */
245 "active_cycles",
246 "active_warps",
247 "atom_count",
248 "branch",
249 "divergent_branch",
250 "gld_request",
251 "gred_count",
252 "gst_request",
253 "inst_executed",
254 "inst_issued1_0",
255 "inst_issued1_1",
256 "inst_issued2_0",
257 "inst_issued2_1",
258 "local_load",
259 "local_store",
260 "prof_trigger_00",
261 "prof_trigger_01",
262 "prof_trigger_02",
263 "prof_trigger_03",
264 "prof_trigger_04",
265 "prof_trigger_05",
266 "prof_trigger_06",
267 "prof_trigger_07",
268 "shared_load",
269 "shared_store",
270 "threads_launched",
271 "thread_inst_executed_0",
272 "thread_inst_executed_1",
273 "thread_inst_executed_2",
274 "thread_inst_executed_3",
275 "warps_launched",
276 };
277
278 int
279 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
280 unsigned id,
281 struct pipe_driver_query_info *info)
282 {
283 struct nvc0_screen *screen = nvc0_screen(pscreen);
284 int count = 0;
285
286 count += NVC0_SW_QUERY_DRV_STAT_COUNT;
287
288 if (screen->base.device->drm_version >= 0x01000101) {
289 if (screen->compute) {
290 if (screen->base.class_3d == NVE4_3D_CLASS) {
291 count += NVE4_HW_SM_QUERY_COUNT;
292 } else
293 if (screen->base.class_3d < NVE4_3D_CLASS) {
294 count += NVC0_HW_SM_QUERY_COUNT;
295 }
296 }
297 }
298
299 if (!info)
300 return count;
301
302 /* Init default values. */
303 info->name = "this_is_not_the_query_you_are_looking_for";
304 info->query_type = 0xdeadd01d;
305 info->max_value.u64 = 0;
306 info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
307 info->group_id = -1;
308
309 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
310 if (id < NVC0_SW_QUERY_DRV_STAT_COUNT) {
311 info->name = nvc0_sw_query_drv_stat_names[id];
312 info->query_type = NVC0_SW_QUERY_DRV_STAT(id);
313 info->max_value.u64 = 0;
314 if (strstr(info->name, "bytes"))
315 info->type = PIPE_DRIVER_QUERY_TYPE_BYTES;
316 info->group_id = NVC0_SW_QUERY_DRV_STAT_GROUP;
317 return 1;
318 } else
319 #endif
320 if (id < count) {
321 if (screen->compute) {
322 if (screen->base.class_3d == NVE4_3D_CLASS) {
323 info->name = nve4_hw_sm_query_names[id - NVC0_SW_QUERY_DRV_STAT_COUNT];
324 info->query_type = NVE4_HW_SM_QUERY(id - NVC0_SW_QUERY_DRV_STAT_COUNT);
325 info->max_value.u64 =
326 (id < NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY) ? 0 : 100;
327 info->group_id = NVC0_HW_SM_QUERY_GROUP;
328 return 1;
329 } else
330 if (screen->base.class_3d < NVE4_3D_CLASS) {
331 info->name = nvc0_hw_sm_query_names[id - NVC0_SW_QUERY_DRV_STAT_COUNT];
332 info->query_type = NVC0_HW_SM_QUERY(id - NVC0_SW_QUERY_DRV_STAT_COUNT);
333 info->group_id = NVC0_HW_SM_QUERY_GROUP;
334 return 1;
335 }
336 }
337 }
338 /* user asked for info about non-existing query */
339 return 0;
340 }
341
342 int
343 nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
344 unsigned id,
345 struct pipe_driver_query_group_info *info)
346 {
347 struct nvc0_screen *screen = nvc0_screen(pscreen);
348 int count = 0;
349
350 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
351 count++;
352 #endif
353
354 if (screen->base.device->drm_version >= 0x01000101) {
355 if (screen->compute) {
356 if (screen->base.class_3d == NVE4_3D_CLASS) {
357 count++;
358 } else
359 if (screen->base.class_3d < NVE4_3D_CLASS) {
360 count++;
361 }
362 }
363 }
364
365 if (!info)
366 return count;
367
368 if (id == NVC0_HW_SM_QUERY_GROUP) {
369 if (screen->compute) {
370 info->name = "MP counters";
371 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
372
373 /* Because we can't expose the number of hardware counters needed for
374 * each different query, we don't want to allow more than one active
375 * query simultaneously to avoid failure when the maximum number of
376 * counters is reached. Note that these groups of GPU counters are
377 * currently only used by AMD_performance_monitor.
378 */
379 info->max_active_queries = 1;
380
381 if (screen->base.class_3d == NVE4_3D_CLASS) {
382 info->num_queries = NVE4_HW_SM_QUERY_COUNT;
383 return 1;
384 } else
385 if (screen->base.class_3d < NVE4_3D_CLASS) {
386 info->num_queries = NVC0_HW_SM_QUERY_COUNT;
387 return 1;
388 }
389 }
390 }
391 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
392 else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
393 info->name = "Driver statistics";
394 info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
395 info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
396 info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
397 return 1;
398 }
399 #endif
400
401 /* user asked for info about non-existing query group */
402 info->name = "this_is_not_the_query_group_you_are_looking_for";
403 info->max_active_queries = 0;
404 info->num_queries = 0;
405 info->type = 0;
406 return 0;
407 }
408
409 void
410 nvc0_init_query_functions(struct nvc0_context *nvc0)
411 {
412 struct pipe_context *pipe = &nvc0->base.pipe;
413
414 pipe->create_query = nvc0_create_query;
415 pipe->destroy_query = nvc0_destroy_query;
416 pipe->begin_query = nvc0_begin_query;
417 pipe->end_query = nvc0_end_query;
418 pipe->get_query_result = nvc0_get_query_result;
419 pipe->render_condition = nvc0_render_condition;
420 }