nvc0: keep track of PGRAPH state in nvc0_screen
[mesa.git] / src / gallium / drivers / nouveau / nvc0 / nvc0_screen.h
1 #ifndef __NVC0_SCREEN_H__
2 #define __NVC0_SCREEN_H__
3
4 #include "nouveau_screen.h"
5 #include "nouveau_mm.h"
6 #include "nouveau_fence.h"
7 #include "nouveau_heap.h"
8
9 #include "nv_object.xml.h"
10
11 #include "nvc0/nvc0_winsys.h"
12 #include "nvc0/nvc0_stateobj.h"
13
14 #define NVC0_TIC_MAX_ENTRIES 2048
15 #define NVC0_TSC_MAX_ENTRIES 2048
16
17 /* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
18 #define NVC0_MAX_PIPE_CONSTBUFS 14
19 #define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7
20
21 #define NVC0_MAX_SURFACE_SLOTS 16
22
23 #define NVC0_MAX_VIEWPORTS 16
24
25
26 struct nvc0_context;
27
28 struct nvc0_blitter;
29
30 struct nvc0_graph_state {
31 boolean flushed;
32 boolean rasterizer_discard;
33 boolean early_z_forced;
34 boolean prim_restart;
35 uint32_t instance_elts; /* bitmask of per-instance elements */
36 uint32_t instance_base;
37 uint32_t constant_vbos;
38 uint32_t constant_elts;
39 int32_t index_bias;
40 uint16_t scissor;
41 uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
42 uint8_t num_vtxbufs;
43 uint8_t num_vtxelts;
44 uint8_t num_textures[6];
45 uint8_t num_samplers[6];
46 uint8_t tls_required; /* bitmask of shader types using l[] */
47 uint8_t c14_bound; /* whether immediate array constbuf is bound */
48 uint8_t clip_enable;
49 uint32_t clip_mode;
50 uint32_t uniform_buffer_bound[5];
51 struct nvc0_transform_feedback_state *tfb;
52 };
53
54 struct nvc0_screen {
55 struct nouveau_screen base;
56
57 struct nvc0_context *cur_ctx;
58 struct nvc0_graph_state save_state;
59
60 int num_occlusion_queries_active;
61
62 struct nouveau_bo *text;
63 struct nouveau_bo *parm; /* for COMPUTE */
64 struct nouveau_bo *uniform_bo; /* for 3D */
65 struct nouveau_bo *tls;
66 struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
67 struct nouveau_bo *poly_cache;
68
69 uint16_t mp_count;
70 uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */
71
72 struct nouveau_heap *text_heap;
73 struct nouveau_heap *lib_code; /* allocated from text_heap */
74
75 struct nvc0_blitter *blitter;
76
77 struct {
78 void **entries;
79 int next;
80 uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
81 } tic;
82
83 struct {
84 void **entries;
85 int next;
86 uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
87 } tsc;
88
89 struct {
90 struct nouveau_bo *bo;
91 uint32_t *map;
92 } fence;
93
94 struct {
95 struct nvc0_program *prog; /* compute state object to read MP counters */
96 struct pipe_query *mp_counter[8]; /* counter to query allocation */
97 uint8_t num_mp_pm_active[2];
98 boolean mp_counters_enabled;
99 } pm;
100
101 struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
102 struct nouveau_object *eng2d;
103 struct nouveau_object *m2mf;
104 struct nouveau_object *compute;
105 struct nouveau_object *nvsw;
106 };
107
108 static INLINE struct nvc0_screen *
109 nvc0_screen(struct pipe_screen *screen)
110 {
111 return (struct nvc0_screen *)screen;
112 }
113
114 /*
115 * Performance counters groups:
116 */
117 #define NVC0_QUERY_MP_COUNTER_GROUP 0
118 #define NVC0_QUERY_DRV_STAT_GROUP 1
119
120 /* Performance counter queries:
121 */
122 #define NVE4_PM_QUERY_COUNT 49
123 #define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
124 #define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
125 #define NVE4_PM_QUERY_PROF_TRIGGER_0 0
126 #define NVE4_PM_QUERY_PROF_TRIGGER_1 1
127 #define NVE4_PM_QUERY_PROF_TRIGGER_2 2
128 #define NVE4_PM_QUERY_PROF_TRIGGER_3 3
129 #define NVE4_PM_QUERY_PROF_TRIGGER_4 4
130 #define NVE4_PM_QUERY_PROF_TRIGGER_5 5
131 #define NVE4_PM_QUERY_PROF_TRIGGER_6 6
132 #define NVE4_PM_QUERY_PROF_TRIGGER_7 7
133 #define NVE4_PM_QUERY_LAUNCHED_WARPS 8
134 #define NVE4_PM_QUERY_LAUNCHED_THREADS 9
135 #define NVE4_PM_QUERY_LAUNCHED_CTA 10
136 #define NVE4_PM_QUERY_INST_ISSUED1 11
137 #define NVE4_PM_QUERY_INST_ISSUED2 12
138 #define NVE4_PM_QUERY_INST_EXECUTED 13
139 #define NVE4_PM_QUERY_LD_LOCAL 14
140 #define NVE4_PM_QUERY_ST_LOCAL 15
141 #define NVE4_PM_QUERY_LD_SHARED 16
142 #define NVE4_PM_QUERY_ST_SHARED 17
143 #define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18
144 #define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19
145 #define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20
146 #define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21
147 #define NVE4_PM_QUERY_GLD_REQUEST 22
148 #define NVE4_PM_QUERY_GST_REQUEST 23
149 #define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24
150 #define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25
151 #define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
152 #define NVE4_PM_QUERY_GST_TRANSACTIONS 27
153 #define NVE4_PM_QUERY_BRANCH 28
154 #define NVE4_PM_QUERY_BRANCH_DIVERGENT 29
155 #define NVE4_PM_QUERY_ACTIVE_WARPS 30
156 #define NVE4_PM_QUERY_ACTIVE_CYCLES 31
157 #define NVE4_PM_QUERY_INST_ISSUED 32
158 #define NVE4_PM_QUERY_ATOM_COUNT 33
159 #define NVE4_PM_QUERY_GRED_COUNT 34
160 #define NVE4_PM_QUERY_LD_SHARED_REPLAY 35
161 #define NVE4_PM_QUERY_ST_SHARED_REPLAY 36
162 #define NVE4_PM_QUERY_LD_LOCAL_TRANSACTIONS 37
163 #define NVE4_PM_QUERY_ST_LOCAL_TRANSACTIONS 38
164 #define NVE4_PM_QUERY_L1_LD_SHARED_TRANSACTIONS 39
165 #define NVE4_PM_QUERY_L1_ST_SHARED_TRANSACTIONS 40
166 #define NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY 41
167 #define NVE4_PM_QUERY_GST_MEM_DIV_REPLAY 42
168 #define NVE4_PM_QUERY_METRIC_IPC 43
169 #define NVE4_PM_QUERY_METRIC_IPAC 44
170 #define NVE4_PM_QUERY_METRIC_IPEC 45
171 #define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 46
172 #define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 47
173 #define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 48
174
175 /*
176 #define NVE4_PM_QUERY_GR_IDLE 50
177 #define NVE4_PM_QUERY_BSP_IDLE 51
178 #define NVE4_PM_QUERY_VP_IDLE 52
179 #define NVE4_PM_QUERY_PPP_IDLE 53
180 #define NVE4_PM_QUERY_CE0_IDLE 54
181 #define NVE4_PM_QUERY_CE1_IDLE 55
182 #define NVE4_PM_QUERY_CE2_IDLE 56
183 */
184 /* L2 queries (PCOUNTER) */
185 /*
186 #define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
187 ...
188 */
189 /* TEX queries (PCOUNTER) */
190 /*
191 #define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
192 ...
193 */
194
195 #define NVC0_PM_QUERY_COUNT 31
196 #define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
197 #define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
198 #define NVC0_PM_QUERY_INST_EXECUTED 0
199 #define NVC0_PM_QUERY_BRANCH 1
200 #define NVC0_PM_QUERY_BRANCH_DIVERGENT 2
201 #define NVC0_PM_QUERY_ACTIVE_WARPS 3
202 #define NVC0_PM_QUERY_ACTIVE_CYCLES 4
203 #define NVC0_PM_QUERY_LAUNCHED_WARPS 5
204 #define NVC0_PM_QUERY_LAUNCHED_THREADS 6
205 #define NVC0_PM_QUERY_LD_SHARED 7
206 #define NVC0_PM_QUERY_ST_SHARED 8
207 #define NVC0_PM_QUERY_LD_LOCAL 9
208 #define NVC0_PM_QUERY_ST_LOCAL 10
209 #define NVC0_PM_QUERY_GRED_COUNT 11
210 #define NVC0_PM_QUERY_ATOM_COUNT 12
211 #define NVC0_PM_QUERY_GLD_REQUEST 13
212 #define NVC0_PM_QUERY_GST_REQUEST 14
213 #define NVC0_PM_QUERY_INST_ISSUED1_0 15
214 #define NVC0_PM_QUERY_INST_ISSUED1_1 16
215 #define NVC0_PM_QUERY_INST_ISSUED2_0 17
216 #define NVC0_PM_QUERY_INST_ISSUED2_1 18
217 #define NVC0_PM_QUERY_TH_INST_EXECUTED_0 19
218 #define NVC0_PM_QUERY_TH_INST_EXECUTED_1 20
219 #define NVC0_PM_QUERY_TH_INST_EXECUTED_2 21
220 #define NVC0_PM_QUERY_TH_INST_EXECUTED_3 22
221 #define NVC0_PM_QUERY_PROF_TRIGGER_0 23
222 #define NVC0_PM_QUERY_PROF_TRIGGER_1 24
223 #define NVC0_PM_QUERY_PROF_TRIGGER_2 25
224 #define NVC0_PM_QUERY_PROF_TRIGGER_3 26
225 #define NVC0_PM_QUERY_PROF_TRIGGER_4 27
226 #define NVC0_PM_QUERY_PROF_TRIGGER_5 28
227 #define NVC0_PM_QUERY_PROF_TRIGGER_6 29
228 #define NVC0_PM_QUERY_PROF_TRIGGER_7 30
229
230 /* Driver statistics queries:
231 */
232 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
233
234 #define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
235 #define NVC0_QUERY_DRV_STAT_COUNT 29
236 #define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
237 #define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0
238 #define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1
239 #define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2
240 #define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3
241 #define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4
242 #define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5
243 #define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6
244 #define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7
245 #define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8
246 #define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9
247 #define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10
248 #define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11
249 #define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12
250 #define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13
251 #define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14
252 #define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15
253 #define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16
254 #define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
255 #define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
256 #define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19
257 #define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20
258 #define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21
259 #define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22
260 #define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23
261 #define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24
262 #define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25
263 #define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26
264 #define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27
265 #define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28
266
267 #else
268
269 #define NVC0_QUERY_DRV_STAT_COUNT 0
270
271 #endif
272
273 int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
274 struct pipe_driver_query_info *);
275
276 int nvc0_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
277 struct pipe_driver_query_group_info *);
278
279 boolean nvc0_blitter_create(struct nvc0_screen *);
280 void nvc0_blitter_destroy(struct nvc0_screen *);
281
282 void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
283
284 int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
285 int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
286
287 int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
288 int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
289
290 boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
291 uint32_t lneg, uint32_t cstack);
292
293 static INLINE void
294 nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
295 {
296 struct nvc0_screen *screen = nvc0_screen(res->base.screen);
297
298 if (res->mm) {
299 nouveau_fence_ref(screen->base.fence.current, &res->fence);
300 if (flags & NOUVEAU_BO_WR)
301 nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
302 }
303 }
304
305 static INLINE void
306 nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
307 {
308 if (likely(res->bo)) {
309 if (flags & NOUVEAU_BO_WR)
310 res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
311 NOUVEAU_BUFFER_STATUS_DIRTY;
312 if (flags & NOUVEAU_BO_RD)
313 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
314
315 nvc0_resource_fence(res, flags);
316 }
317 }
318
319 struct nvc0_format {
320 uint32_t rt;
321 uint32_t tic;
322 uint32_t vtx;
323 uint32_t usage;
324 };
325
326 extern const struct nvc0_format nvc0_format_table[];
327
328 static INLINE void
329 nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
330 {
331 if (tic->id >= 0)
332 screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
333 }
334
335 static INLINE void
336 nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
337 {
338 if (tsc->id >= 0)
339 screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
340 }
341
342 static INLINE void
343 nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
344 {
345 if (tic->id >= 0) {
346 screen->tic.entries[tic->id] = NULL;
347 screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
348 }
349 }
350
351 static INLINE void
352 nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
353 {
354 if (tsc->id >= 0) {
355 screen->tsc.entries[tsc->id] = NULL;
356 screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
357 }
358 }
359
360 #endif