nvc0: add some metrics to driver specific queries
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_screen.h
1 #ifndef __NVC0_SCREEN_H__
2 #define __NVC0_SCREEN_H__
3
4 #include "nouveau/nouveau_screen.h"
5 #include "nouveau/nouveau_mm.h"
6 #include "nouveau/nouveau_fence.h"
7 #include "nouveau/nouveau_heap.h"
8
9 #include "nouveau/nv_object.xml.h"
10
11 #include "nvc0_winsys.h"
12 #include "nvc0_stateobj.h"
13
14 #define NVC0_TIC_MAX_ENTRIES 2048
15 #define NVC0_TSC_MAX_ENTRIES 2048
16
17 /* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
18 #define NVC0_MAX_PIPE_CONSTBUFS 14
19 #define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7
20
21 #define NVC0_MAX_SURFACE_SLOTS 16
22
23 struct nvc0_context;
24
25 struct nvc0_blitter;
26
27 struct nvc0_screen {
28 struct nouveau_screen base;
29
30 struct nvc0_context *cur_ctx;
31
32 int num_occlusion_queries_active;
33
34 struct nouveau_bo *text;
35 struct nouveau_bo *parm; /* for COMPUTE */
36 struct nouveau_bo *uniform_bo; /* for 3D */
37 struct nouveau_bo *tls;
38 struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
39 struct nouveau_bo *poly_cache;
40
41 uint16_t mp_count;
42 uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */
43
44 struct nouveau_heap *text_heap;
45 struct nouveau_heap *lib_code; /* allocated from text_heap */
46
47 struct nvc0_blitter *blitter;
48
49 struct {
50 void **entries;
51 int next;
52 uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
53 } tic;
54
55 struct {
56 void **entries;
57 int next;
58 uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
59 } tsc;
60
61 struct {
62 struct nouveau_bo *bo;
63 uint32_t *map;
64 } fence;
65
66 struct {
67 struct nvc0_program *prog; /* compute state object to read MP counters */
68 struct pipe_query *mp_counter[8]; /* counter to query allocation */
69 uint8_t num_mp_pm_active[2];
70 boolean mp_counters_enabled;
71 } pm;
72
73 struct nouveau_mman *mm_VRAM_fe0;
74
75 struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
76 struct nouveau_object *eng2d;
77 struct nouveau_object *m2mf;
78 struct nouveau_object *compute;
79 };
80
81 static INLINE struct nvc0_screen *
82 nvc0_screen(struct pipe_screen *screen)
83 {
84 return (struct nvc0_screen *)screen;
85 }
86
87
88 /* Performance counter queries:
89 */
90 #define NVE4_PM_QUERY_COUNT 38
91 #define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
92 #define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
93 #define NVE4_PM_QUERY_PROF_TRIGGER_0 0
94 #define NVE4_PM_QUERY_PROF_TRIGGER_1 1
95 #define NVE4_PM_QUERY_PROF_TRIGGER_2 2
96 #define NVE4_PM_QUERY_PROF_TRIGGER_3 3
97 #define NVE4_PM_QUERY_PROF_TRIGGER_4 4
98 #define NVE4_PM_QUERY_PROF_TRIGGER_5 5
99 #define NVE4_PM_QUERY_PROF_TRIGGER_6 6
100 #define NVE4_PM_QUERY_PROF_TRIGGER_7 7
101 #define NVE4_PM_QUERY_LAUNCHED_WARPS 8
102 #define NVE4_PM_QUERY_LAUNCHED_THREADS 9
103 #define NVE4_PM_QUERY_LAUNCHED_CTA 10
104 #define NVE4_PM_QUERY_INST_ISSUED1 11
105 #define NVE4_PM_QUERY_INST_ISSUED2 12
106 #define NVE4_PM_QUERY_INST_EXECUTED 13
107 #define NVE4_PM_QUERY_LD_LOCAL 14
108 #define NVE4_PM_QUERY_ST_LOCAL 15
109 #define NVE4_PM_QUERY_LD_SHARED 16
110 #define NVE4_PM_QUERY_ST_SHARED 17
111 #define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18
112 #define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19
113 #define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20
114 #define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21
115 #define NVE4_PM_QUERY_GLD_REQUEST 22
116 #define NVE4_PM_QUERY_GST_REQUEST 23
117 #define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24
118 #define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25
119 #define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
120 #define NVE4_PM_QUERY_GST_TRANSACTIONS 27
121 #define NVE4_PM_QUERY_BRANCH 28
122 #define NVE4_PM_QUERY_BRANCH_DIVERGENT 29
123 #define NVE4_PM_QUERY_ACTIVE_WARPS 30
124 #define NVE4_PM_QUERY_ACTIVE_CYCLES 31
125 #define NVE4_PM_QUERY_METRIC_IPC 32
126 #define NVE4_PM_QUERY_METRIC_IPAC 33
127 #define NVE4_PM_QUERY_METRIC_IPEC 34
128 #define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 35
129 #define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 36
130 #define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 37
131 /*
132 #define NVE4_PM_QUERY_GR_IDLE 50
133 #define NVE4_PM_QUERY_BSP_IDLE 51
134 #define NVE4_PM_QUERY_VP_IDLE 52
135 #define NVE4_PM_QUERY_PPP_IDLE 53
136 #define NVE4_PM_QUERY_CE0_IDLE 54
137 #define NVE4_PM_QUERY_CE1_IDLE 55
138 #define NVE4_PM_QUERY_CE2_IDLE 56
139 */
140 /* L2 queries (PCOUNTER) */
141 /*
142 #define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
143 ...
144 */
145 /* TEX queries (PCOUNTER) */
146 /*
147 #define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
148 ...
149 */
150
151 /* Driver statistics queries:
152 */
153 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
154
155 #define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
156 #define NVC0_QUERY_DRV_STAT_COUNT 29
157 #define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
158 #define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0
159 #define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1
160 #define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2
161 #define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3
162 #define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4
163 #define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5
164 #define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6
165 #define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7
166 #define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8
167 #define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9
168 #define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10
169 #define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11
170 #define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12
171 #define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13
172 #define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14
173 #define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15
174 #define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16
175 #define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
176 #define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
177 #define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19
178 #define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20
179 #define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21
180 #define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22
181 #define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23
182 #define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24
183 #define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25
184 #define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26
185 #define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27
186 #define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28
187
188 #else
189
190 #define NVC0_QUERY_DRV_STAT_COUNT 0
191
192 #endif
193
194 int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
195 struct pipe_driver_query_info *);
196
197 boolean nvc0_blitter_create(struct nvc0_screen *);
198 void nvc0_blitter_destroy(struct nvc0_screen *);
199
200 void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
201
202 int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
203 int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
204
205 int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
206
207 boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
208 uint32_t lneg, uint32_t cstack);
209
210 static INLINE void
211 nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
212 {
213 struct nvc0_screen *screen = nvc0_screen(res->base.screen);
214
215 if (res->mm) {
216 nouveau_fence_ref(screen->base.fence.current, &res->fence);
217 if (flags & NOUVEAU_BO_WR)
218 nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
219 }
220 }
221
222 static INLINE void
223 nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
224 {
225 if (likely(res->bo)) {
226 if (flags & NOUVEAU_BO_WR)
227 res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
228 NOUVEAU_BUFFER_STATUS_DIRTY;
229 if (flags & NOUVEAU_BO_RD)
230 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
231
232 nvc0_resource_fence(res, flags);
233 }
234 }
235
236 struct nvc0_format {
237 uint32_t rt;
238 uint32_t tic;
239 uint32_t vtx;
240 uint32_t usage;
241 };
242
243 extern const struct nvc0_format nvc0_format_table[];
244
245 static INLINE void
246 nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
247 {
248 if (tic->id >= 0)
249 screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
250 }
251
252 static INLINE void
253 nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
254 {
255 if (tsc->id >= 0)
256 screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
257 }
258
259 static INLINE void
260 nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
261 {
262 if (tic->id >= 0) {
263 screen->tic.entries[tic->id] = NULL;
264 screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
265 }
266 }
267
268 static INLINE void
269 nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
270 {
271 if (tsc->id >= 0) {
272 screen->tsc.entries[tsc->id] = NULL;
273 screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
274 }
275 }
276
277 #endif