2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Jonathan Marek <jonathan@marek.ca>
25 * Rob Clark <robclark@freedesktop.org>
28 /* NOTE: perfcntrs are 48-bits but we only have 32-bit accumulate (?)
29 * so we work with 32-bits only. we accumulate start/stop separately,
30 * which differs from a5xx but works with only accumulate (no add/neg)
33 #include "freedreno_query_acc.h"
34 #include "freedreno_resource.h"
36 #include "fd2_context.h"
37 #include "fd2_query.h"
39 struct PACKED fd2_query_sample
{
44 /* offset of a single field of an array of fd2_query_sample: */
45 #define query_sample_idx(aq, idx, field) \
46 fd_resource((aq)->prsc)->bo, \
47 (idx * sizeof(struct fd2_query_sample)) + \
48 offsetof(struct fd2_query_sample, field), \
51 /* offset of a single field of fd2_query_sample: */
52 #define query_sample(aq, field) \
53 query_sample_idx(aq, 0, field)
56 * Performance Counter (batch) queries:
58 * Only one of these is active at a time, per design of the gallium
59 * batch_query API design. On perfcntr query tracks N query_types,
60 * each of which has a 'fd_batch_query_entry' that maps it back to
61 * the associated group and counter.
64 struct fd_batch_query_entry
{
65 uint8_t gid
; /* group-id */
66 uint8_t cid
; /* countable-id within the group */
69 struct fd_batch_query_data
{
70 struct fd_screen
*screen
;
71 unsigned num_query_entries
;
72 struct fd_batch_query_entry query_entries
[];
76 perfcntr_resume(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
78 struct fd_batch_query_data
*data
= aq
->query_data
;
79 struct fd_screen
*screen
= data
->screen
;
80 struct fd_ringbuffer
*ring
= batch
->draw
;
82 unsigned counters_per_group
[screen
->num_perfcntr_groups
];
83 memset(counters_per_group
, 0, sizeof(counters_per_group
));
87 /* configure performance counters for the requested queries: */
88 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
89 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
90 const struct fd_perfcntr_group
*g
= &screen
->perfcntr_groups
[entry
->gid
];
91 unsigned counter_idx
= counters_per_group
[entry
->gid
]++;
93 debug_assert(counter_idx
< g
->num_counters
);
95 OUT_PKT0(ring
, g
->counters
[counter_idx
].select_reg
, 1);
96 OUT_RING(ring
, g
->countables
[entry
->cid
].selector
);
99 memset(counters_per_group
, 0, sizeof(counters_per_group
));
101 /* and snapshot the start values */
102 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
103 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
104 const struct fd_perfcntr_group
*g
= &screen
->perfcntr_groups
[entry
->gid
];
105 unsigned counter_idx
= counters_per_group
[entry
->gid
]++;
106 const struct fd_perfcntr_counter
*counter
= &g
->counters
[counter_idx
];
108 OUT_PKT3(ring
, CP_REG_TO_MEM
, 2);
109 OUT_RING(ring
, counter
->counter_reg_lo
| CP_MEM_TO_REG_0_ACCUMULATE
);
110 OUT_RELOCW(ring
, query_sample_idx(aq
, i
, start
));
115 perfcntr_pause(struct fd_acc_query
*aq
, struct fd_batch
*batch
)
117 struct fd_batch_query_data
*data
= aq
->query_data
;
118 struct fd_screen
*screen
= data
->screen
;
119 struct fd_ringbuffer
*ring
= batch
->draw
;
121 unsigned counters_per_group
[screen
->num_perfcntr_groups
];
122 memset(counters_per_group
, 0, sizeof(counters_per_group
));
126 /* TODO do we need to bother to turn anything off? */
128 /* snapshot the end values: */
129 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++) {
130 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
131 const struct fd_perfcntr_group
*g
= &screen
->perfcntr_groups
[entry
->gid
];
132 unsigned counter_idx
= counters_per_group
[entry
->gid
]++;
133 const struct fd_perfcntr_counter
*counter
= &g
->counters
[counter_idx
];
135 OUT_PKT3(ring
, CP_REG_TO_MEM
, 2);
136 OUT_RING(ring
, counter
->counter_reg_lo
| CP_MEM_TO_REG_0_ACCUMULATE
);
137 OUT_RELOCW(ring
, query_sample_idx(aq
, i
, stop
));
142 perfcntr_accumulate_result(struct fd_acc_query
*aq
, void *buf
,
143 union pipe_query_result
*result
)
145 struct fd_batch_query_data
*data
= aq
->query_data
;
146 struct fd2_query_sample
*sp
= buf
;
148 for (unsigned i
= 0; i
< data
->num_query_entries
; i
++)
149 result
->batch
[i
].u64
= sp
[i
].stop
- sp
[i
].start
;
152 static const struct fd_acc_sample_provider perfcntr
= {
153 .query_type
= FD_QUERY_FIRST_PERFCNTR
,
154 .active
= FD_STAGE_DRAW
| FD_STAGE_CLEAR
,
155 .resume
= perfcntr_resume
,
156 .pause
= perfcntr_pause
,
157 .result
= perfcntr_accumulate_result
,
160 static struct pipe_query
*
161 fd2_create_batch_query(struct pipe_context
*pctx
,
162 unsigned num_queries
, unsigned *query_types
)
164 struct fd_context
*ctx
= fd_context(pctx
);
165 struct fd_screen
*screen
= ctx
->screen
;
167 struct fd_acc_query
*aq
;
168 struct fd_batch_query_data
*data
;
170 data
= CALLOC_VARIANT_LENGTH_STRUCT(fd_batch_query_data
,
171 num_queries
* sizeof(data
->query_entries
[0]));
173 data
->screen
= screen
;
174 data
->num_query_entries
= num_queries
;
176 /* validate the requested query_types and ensure we don't try
177 * to request more query_types of a given group than we have
180 unsigned counters_per_group
[screen
->num_perfcntr_groups
];
181 memset(counters_per_group
, 0, sizeof(counters_per_group
));
183 for (unsigned i
= 0; i
< num_queries
; i
++) {
184 unsigned idx
= query_types
[i
] - FD_QUERY_FIRST_PERFCNTR
;
186 /* verify valid query_type, ie. is it actually a perfcntr? */
187 if ((query_types
[i
] < FD_QUERY_FIRST_PERFCNTR
) ||
188 (idx
>= screen
->num_perfcntr_queries
)) {
189 debug_printf("invalid batch query query_type: %u\n", query_types
[i
]);
193 struct fd_batch_query_entry
*entry
= &data
->query_entries
[i
];
194 struct pipe_driver_query_info
*pq
= &screen
->perfcntr_queries
[idx
];
196 entry
->gid
= pq
->group_id
;
198 /* the perfcntr_queries[] table flattens all the countables
199 * for each group in series, ie:
201 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
203 * So to find the countable index just step back through the
204 * table to find the first entry with the same group-id.
206 while (pq
> screen
->perfcntr_queries
) {
208 if (pq
->group_id
== entry
->gid
)
212 if (counters_per_group
[entry
->gid
] >=
213 screen
->perfcntr_groups
[entry
->gid
].num_counters
) {
214 debug_printf("too many counters for group %u\n", entry
->gid
);
218 counters_per_group
[entry
->gid
]++;
221 q
= fd_acc_create_query2(ctx
, 0, &perfcntr
);
222 aq
= fd_acc_query(q
);
224 /* sample buffer size is based on # of queries: */
225 aq
->size
= num_queries
* sizeof(struct fd2_query_sample
);
226 aq
->query_data
= data
;
228 return (struct pipe_query
*)q
;
236 fd2_query_context_init(struct pipe_context
*pctx
)
238 struct fd_context
*ctx
= fd_context(pctx
);
240 ctx
->create_query
= fd_acc_create_query
;
241 ctx
->query_set_stage
= fd_acc_query_set_stage
;
243 pctx
->create_batch_query
= fd2_create_batch_query
;