freedreno: Add a6xx backend
[mesa.git] / src / gallium / drivers / freedreno / a6xx / fd6_query.c
1 /*
2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 /* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */
29
30 #include "freedreno_query_acc.h"
31 #include "freedreno_resource.h"
32
33 #include "fd6_context.h"
34 #include "fd6_format.h"
35 #include "fd6_query.h"
36
37 struct PACKED fd6_query_sample {
38 uint64_t start;
39 uint64_t result;
40 uint64_t stop;
41 };
42
43 #define query_sample(aq, field) \
44 fd_resource((aq)->prsc)->bo, \
45 offsetof(struct fd6_query_sample, field), \
46 0, 0
47
48 /*
49 * Occlusion Query:
50 *
51 * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
52 * interpret results
53 */
54
55 static void
56 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
57 {
58 struct fd_ringbuffer *ring = batch->draw;
59
60 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
61 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
62
63 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
64 OUT_RELOCW(ring, query_sample(aq, start));
65
66 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
67 OUT_RING(ring, ZPASS_DONE);
68 fd_reset_wfi(batch);
69
70 fd6_context(batch->ctx)->samples_passed_queries++;
71 }
72
73 static void
74 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
75 {
76 struct fd_ringbuffer *ring = batch->draw;
77
78 OUT_PKT7(ring, CP_MEM_WRITE, 4);
79 OUT_RELOCW(ring, query_sample(aq, stop));
80 OUT_RING(ring, 0xffffffff);
81 OUT_RING(ring, 0xffffffff);
82
83 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
84
85 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
86 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
87
88 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
89 OUT_RELOCW(ring, query_sample(aq, stop));
90
91 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
92 OUT_RING(ring, ZPASS_DONE);
93 fd_reset_wfi(batch);
94
95 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
96 OUT_RING(ring, 0x00000014); // XXX
97 OUT_RELOC(ring, query_sample(aq, stop));
98 OUT_RING(ring, 0xffffffff);
99 OUT_RING(ring, 0xffffffff);
100 OUT_RING(ring, 0x00000010); // XXX
101
102 /* result += stop - start: */
103 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
104 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
105 CP_MEM_TO_MEM_0_NEG_C);
106 OUT_RELOCW(ring, query_sample(aq, result)); /* dst */
107 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
108 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
109 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
110
111 fd6_context(batch->ctx)->samples_passed_queries--;
112 }
113
114 static void
115 occlusion_counter_result(struct fd_acc_query *aq, void *buf,
116 union pipe_query_result *result)
117 {
118 struct fd6_query_sample *sp = buf;
119 result->u64 = sp->result;
120 }
121
122 static void
123 occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
124 union pipe_query_result *result)
125 {
126 struct fd6_query_sample *sp = buf;
127 result->b = !!sp->result;
128 }
129
130 static const struct fd_acc_sample_provider occlusion_counter = {
131 .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
132 .active = FD_STAGE_DRAW,
133 .size = sizeof(struct fd6_query_sample),
134 .resume = occlusion_resume,
135 .pause = occlusion_pause,
136 .result = occlusion_counter_result,
137 };
138
139 static const struct fd_acc_sample_provider occlusion_predicate = {
140 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
141 .active = FD_STAGE_DRAW,
142 .size = sizeof(struct fd6_query_sample),
143 .resume = occlusion_resume,
144 .pause = occlusion_pause,
145 .result = occlusion_predicate_result,
146 };
147
148 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
149 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
150 .active = FD_STAGE_DRAW,
151 .size = sizeof(struct fd6_query_sample),
152 .resume = occlusion_resume,
153 .pause = occlusion_pause,
154 .result = occlusion_predicate_result,
155 };
156
157 /*
158 * Timestamp Queries:
159 */
160
161 static void
162 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
163 {
164 struct fd_ringbuffer *ring = batch->draw;
165
166 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
167 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) |
168 CP_EVENT_WRITE_0_TIMESTAMP);
169 OUT_RELOCW(ring, query_sample(aq, start));
170 OUT_RING(ring, 0x00000000);
171
172 fd_reset_wfi(batch);
173 }
174
175 static void
176 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
177 {
178 struct fd_ringbuffer *ring = batch->draw;
179
180 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
181 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_AND_INV_EVENT) |
182 CP_EVENT_WRITE_0_TIMESTAMP);
183 OUT_RELOCW(ring, query_sample(aq, stop));
184 OUT_RING(ring, 0x00000000);
185
186 fd_reset_wfi(batch);
187 fd_wfi(batch, ring);
188
189 /* result += stop - start: */
190 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
191 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE |
192 CP_MEM_TO_MEM_0_NEG_C);
193 OUT_RELOCW(ring, query_sample(aq, result)); /* dst */
194 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
195 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
196 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
197 }
198
199 static uint64_t
200 ticks_to_ns(uint32_t ts)
201 {
202 /* This is based on the 19.2MHz always-on rbbm timer.
203 *
204 * TODO we should probably query this value from kernel..
205 */
206 return ts * (1000000000 / 19200000);
207 }
208
209 static void
210 time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
211 union pipe_query_result *result)
212 {
213 struct fd6_query_sample *sp = buf;
214 result->u64 = ticks_to_ns(sp->result);
215 }
216
217 static void
218 timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
219 union pipe_query_result *result)
220 {
221 struct fd6_query_sample *sp = buf;
222 result->u64 = ticks_to_ns(sp->result);
223 }
224
225 static const struct fd_acc_sample_provider time_elapsed = {
226 .query_type = PIPE_QUERY_TIME_ELAPSED,
227 .active = FD_STAGE_DRAW | FD_STAGE_CLEAR,
228 .size = sizeof(struct fd6_query_sample),
229 .resume = timestamp_resume,
230 .pause = timestamp_pause,
231 .result = time_elapsed_accumulate_result,
232 };
233
234 /* NOTE: timestamp query isn't going to give terribly sensible results
235 * on a tiler. But it is needed by qapitrace profile heatmap. If you
236 * add in a binning pass, the results get even more non-sensical. So
237 * we just return the timestamp on the first tile and hope that is
238 * kind of good enough.
239 */
240
241 static const struct fd_acc_sample_provider timestamp = {
242 .query_type = PIPE_QUERY_TIMESTAMP,
243 .active = FD_STAGE_ALL,
244 .size = sizeof(struct fd6_query_sample),
245 .resume = timestamp_resume,
246 .pause = timestamp_pause,
247 .result = timestamp_accumulate_result,
248 };
249
250 void
251 fd6_query_context_init(struct pipe_context *pctx)
252 {
253 struct fd_context *ctx = fd_context(pctx);
254
255 ctx->create_query = fd_acc_create_query;
256 ctx->query_set_stage = fd_acc_query_set_stage;
257
258 fd_acc_query_register_provider(pctx, &occlusion_counter);
259 fd_acc_query_register_provider(pctx, &occlusion_predicate);
260 fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
261
262 fd_acc_query_register_provider(pctx, &time_elapsed);
263 fd_acc_query_register_provider(pctx, &timestamp);
264 }