1 #ifndef __NVC0_SCREEN_H__
2 #define __NVC0_SCREEN_H__
4 #include "nouveau_screen.h"
5 #include "nouveau_mm.h"
6 #include "nouveau_fence.h"
7 #include "nouveau_heap.h"
9 #include "nv_object.xml.h"
11 #include "nvc0/nvc0_winsys.h"
12 #include "nvc0/nvc0_stateobj.h"
14 #define NVC0_TIC_MAX_ENTRIES 2048
15 #define NVC0_TSC_MAX_ENTRIES 2048
17 /* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
18 #define NVC0_MAX_PIPE_CONSTBUFS 14
19 #define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7
21 #define NVC0_MAX_SURFACE_SLOTS 16
23 #define NVC0_MAX_VIEWPORTS 16
30 struct nvc0_graph_state
{
32 bool rasterizer_discard
;
35 uint32_t instance_elts
; /* bitmask of per-instance elements */
36 uint32_t instance_base
;
37 uint32_t constant_vbos
;
38 uint32_t constant_elts
;
41 uint8_t patch_vertices
;
42 uint8_t vbo_mode
; /* 0 = normal, 1 = translate, 3 = translate, forced */
45 uint8_t num_textures
[6];
46 uint8_t num_samplers
[6];
47 uint8_t tls_required
; /* bitmask of shader types using l[] */
48 uint8_t c14_bound
; /* whether immediate array constbuf is bound */
51 uint32_t uniform_buffer_bound
[5];
52 struct nvc0_transform_feedback_state
*tfb
;
56 struct nouveau_screen base
;
58 struct nvc0_context
*cur_ctx
;
59 struct nvc0_graph_state save_state
;
61 int num_occlusion_queries_active
;
63 struct nouveau_bo
*text
;
64 struct nouveau_bo
*parm
; /* for COMPUTE */
65 struct nouveau_bo
*uniform_bo
; /* for 3D */
66 struct nouveau_bo
*tls
;
67 struct nouveau_bo
*txc
; /* TIC (offset 0) and TSC (65536) */
68 struct nouveau_bo
*poly_cache
;
71 uint16_t mp_count_compute
; /* magic reg can make compute use fewer MPs */
73 struct nouveau_heap
*text_heap
;
74 struct nouveau_heap
*lib_code
; /* allocated from text_heap */
76 struct nvc0_blitter
*blitter
;
81 uint32_t lock
[NVC0_TIC_MAX_ENTRIES
/ 32];
87 uint32_t lock
[NVC0_TSC_MAX_ENTRIES
/ 32];
91 struct nouveau_bo
*bo
;
96 struct nvc0_program
*prog
; /* compute state object to read MP counters */
97 struct pipe_query
*mp_counter
[8]; /* counter to query allocation */
98 uint8_t num_hw_sm_active
[2];
99 bool mp_counters_enabled
;
102 struct nouveau_object
*eng3d
; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
103 struct nouveau_object
*eng2d
;
104 struct nouveau_object
*m2mf
;
105 struct nouveau_object
*compute
;
106 struct nouveau_object
*nvsw
;
109 static inline struct nvc0_screen
*
110 nvc0_screen(struct pipe_screen
*screen
)
112 return (struct nvc0_screen
*)screen
;
116 * Performance counters groups:
118 #define NVC0_QUERY_MP_COUNTER_GROUP 0
119 #define NVC0_QUERY_DRV_STAT_GROUP 1
121 /* Performance counter queries:
123 #define NVE4_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
124 #define NVE4_HW_SM_QUERY_LAST NVE4_HW_SM_QUERY(NVE4_HW_SM_QUERY_COUNT - 1)
127 NVE4_HW_SM_QUERY_ACTIVE_CYCLES
= 0,
128 NVE4_HW_SM_QUERY_ACTIVE_WARPS
,
129 NVE4_HW_SM_QUERY_ATOM_COUNT
,
130 NVE4_HW_SM_QUERY_BRANCH
,
131 NVE4_HW_SM_QUERY_DIVERGENT_BRANCH
,
132 NVE4_HW_SM_QUERY_GLD_REQUEST
,
133 NVE4_HW_SM_QUERY_GLD_MEM_DIV_REPLAY
,
134 NVE4_HW_SM_QUERY_GST_TRANSACTIONS
,
135 NVE4_HW_SM_QUERY_GST_MEM_DIV_REPLAY
,
136 NVE4_HW_SM_QUERY_GRED_COUNT
,
137 NVE4_HW_SM_QUERY_GST_REQUEST
,
138 NVE4_HW_SM_QUERY_INST_EXECUTED
,
139 NVE4_HW_SM_QUERY_INST_ISSUED
,
140 NVE4_HW_SM_QUERY_INST_ISSUED1
,
141 NVE4_HW_SM_QUERY_INST_ISSUED2
,
142 NVE4_HW_SM_QUERY_L1_GLD_HIT
,
143 NVE4_HW_SM_QUERY_L1_GLD_MISS
,
144 NVE4_HW_SM_QUERY_L1_LOCAL_LD_HIT
,
145 NVE4_HW_SM_QUERY_L1_LOCAL_LD_MISS
,
146 NVE4_HW_SM_QUERY_L1_LOCAL_ST_HIT
,
147 NVE4_HW_SM_QUERY_L1_LOCAL_ST_MISS
,
148 NVE4_HW_SM_QUERY_L1_SHARED_LD_TRANSACTIONS
,
149 NVE4_HW_SM_QUERY_L1_SHARED_ST_TRANSACTIONS
,
150 NVE4_HW_SM_QUERY_LOCAL_LD
,
151 NVE4_HW_SM_QUERY_LOCAL_LD_TRANSACTIONS
,
152 NVE4_HW_SM_QUERY_LOCAL_ST
,
153 NVE4_HW_SM_QUERY_LOCAL_ST_TRANSACTIONS
,
154 NVE4_HW_SM_QUERY_PROF_TRIGGER_0
,
155 NVE4_HW_SM_QUERY_PROF_TRIGGER_1
,
156 NVE4_HW_SM_QUERY_PROF_TRIGGER_2
,
157 NVE4_HW_SM_QUERY_PROF_TRIGGER_3
,
158 NVE4_HW_SM_QUERY_PROF_TRIGGER_4
,
159 NVE4_HW_SM_QUERY_PROF_TRIGGER_5
,
160 NVE4_HW_SM_QUERY_PROF_TRIGGER_6
,
161 NVE4_HW_SM_QUERY_PROF_TRIGGER_7
,
162 NVE4_HW_SM_QUERY_SHARED_LD
,
163 NVE4_HW_SM_QUERY_SHARED_LD_REPLAY
,
164 NVE4_HW_SM_QUERY_SHARED_ST
,
165 NVE4_HW_SM_QUERY_SHARED_ST_REPLAY
,
166 NVE4_HW_SM_QUERY_SM_CTA_LAUNCHED
,
167 NVE4_HW_SM_QUERY_THREADS_LAUNCHED
,
168 NVE4_HW_SM_QUERY_UNCACHED_GLD_TRANSACTIONS
,
169 NVE4_HW_SM_QUERY_WARPS_LAUNCHED
,
170 NVE4_HW_SM_QUERY_METRIC_IPC
,
171 NVE4_HW_SM_QUERY_METRIC_IPAC
,
172 NVE4_HW_SM_QUERY_METRIC_IPEC
,
173 NVE4_HW_SM_QUERY_METRIC_MP_OCCUPANCY
,
174 NVE4_HW_SM_QUERY_METRIC_MP_EFFICIENCY
,
175 NVE4_HW_SM_QUERY_METRIC_INST_REPLAY_OHEAD
,
176 NVE4_HW_SM_QUERY_COUNT
179 #define NVC0_HW_SM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
180 #define NVC0_HW_SM_QUERY_LAST NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_COUNT - 1)
183 NVC0_HW_SM_QUERY_ACTIVE_CYCLES
= 0,
184 NVC0_HW_SM_QUERY_ACTIVE_WARPS
,
185 NVC0_HW_SM_QUERY_ATOM_COUNT
,
186 NVC0_HW_SM_QUERY_BRANCH
,
187 NVC0_HW_SM_QUERY_DIVERGENT_BRANCH
,
188 NVC0_HW_SM_QUERY_GLD_REQUEST
,
189 NVC0_HW_SM_QUERY_GRED_COUNT
,
190 NVC0_HW_SM_QUERY_GST_REQUEST
,
191 NVC0_HW_SM_QUERY_INST_EXECUTED
,
192 NVC0_HW_SM_QUERY_INST_ISSUED1_0
,
193 NVC0_HW_SM_QUERY_INST_ISSUED1_1
,
194 NVC0_HW_SM_QUERY_INST_ISSUED2_0
,
195 NVC0_HW_SM_QUERY_INST_ISSUED2_1
,
196 NVC0_HW_SM_QUERY_LOCAL_LD
,
197 NVC0_HW_SM_QUERY_LOCAL_ST
,
198 NVC0_HW_SM_QUERY_PROF_TRIGGER_0
,
199 NVC0_HW_SM_QUERY_PROF_TRIGGER_1
,
200 NVC0_HW_SM_QUERY_PROF_TRIGGER_2
,
201 NVC0_HW_SM_QUERY_PROF_TRIGGER_3
,
202 NVC0_HW_SM_QUERY_PROF_TRIGGER_4
,
203 NVC0_HW_SM_QUERY_PROF_TRIGGER_5
,
204 NVC0_HW_SM_QUERY_PROF_TRIGGER_6
,
205 NVC0_HW_SM_QUERY_PROF_TRIGGER_7
,
206 NVC0_HW_SM_QUERY_SHARED_LD
,
207 NVC0_HW_SM_QUERY_SHARED_ST
,
208 NVC0_HW_SM_QUERY_THREADS_LAUNCHED
,
209 NVC0_HW_SM_QUERY_TH_INST_EXECUTED_0
,
210 NVC0_HW_SM_QUERY_TH_INST_EXECUTED_1
,
211 NVC0_HW_SM_QUERY_TH_INST_EXECUTED_2
,
212 NVC0_HW_SM_QUERY_TH_INST_EXECUTED_3
,
213 NVC0_HW_SM_QUERY_WARPS_LAUNCHED
,
214 NVC0_HW_SM_QUERY_COUNT
217 /* Driver statistics queries:
219 #define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
220 #define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
221 enum nvc0_drv_stats_queries
223 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
224 NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT
= 0,
225 NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES
,
226 NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT
,
227 NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID
,
228 NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS
,
229 NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ
,
230 NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE
,
231 NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT
,
232 NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT
,
233 NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT
,
234 NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ
,
235 NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE
,
236 NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID
,
237 NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT
,
238 NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID
,
239 NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS
,
240 NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES
,
241 NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT
,
242 NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT
,
243 NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT
,
244 NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT
,
245 NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY
,
246 NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED
,
247 NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT
,
248 NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES
,
249 NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT
,
250 NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES
,
251 NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT
,
252 NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT
,
254 NVC0_QUERY_DRV_STAT_COUNT
257 int nvc0_screen_get_driver_query_info(struct pipe_screen
*, unsigned,
258 struct pipe_driver_query_info
*);
260 int nvc0_screen_get_driver_query_group_info(struct pipe_screen
*, unsigned,
261 struct pipe_driver_query_group_info
*);
263 bool nvc0_blitter_create(struct nvc0_screen
*);
264 void nvc0_blitter_destroy(struct nvc0_screen
*);
266 void nvc0_screen_make_buffers_resident(struct nvc0_screen
*);
268 int nvc0_screen_tic_alloc(struct nvc0_screen
*, void *);
269 int nvc0_screen_tsc_alloc(struct nvc0_screen
*, void *);
271 int nve4_screen_compute_setup(struct nvc0_screen
*, struct nouveau_pushbuf
*);
272 int nvc0_screen_compute_setup(struct nvc0_screen
*, struct nouveau_pushbuf
*);
274 bool nvc0_screen_resize_tls_area(struct nvc0_screen
*, uint32_t lpos
,
275 uint32_t lneg
, uint32_t cstack
);
278 nvc0_resource_fence(struct nv04_resource
*res
, uint32_t flags
)
280 struct nvc0_screen
*screen
= nvc0_screen(res
->base
.screen
);
283 nouveau_fence_ref(screen
->base
.fence
.current
, &res
->fence
);
284 if (flags
& NOUVEAU_BO_WR
)
285 nouveau_fence_ref(screen
->base
.fence
.current
, &res
->fence_wr
);
290 nvc0_resource_validate(struct nv04_resource
*res
, uint32_t flags
)
292 if (likely(res
->bo
)) {
293 if (flags
& NOUVEAU_BO_WR
)
294 res
->status
|= NOUVEAU_BUFFER_STATUS_GPU_WRITING
|
295 NOUVEAU_BUFFER_STATUS_DIRTY
;
296 if (flags
& NOUVEAU_BO_RD
)
297 res
->status
|= NOUVEAU_BUFFER_STATUS_GPU_READING
;
299 nvc0_resource_fence(res
, flags
);
310 extern const struct nvc0_format nvc0_format_table
[];
313 nvc0_screen_tic_unlock(struct nvc0_screen
*screen
, struct nv50_tic_entry
*tic
)
316 screen
->tic
.lock
[tic
->id
/ 32] &= ~(1 << (tic
->id
% 32));
320 nvc0_screen_tsc_unlock(struct nvc0_screen
*screen
, struct nv50_tsc_entry
*tsc
)
323 screen
->tsc
.lock
[tsc
->id
/ 32] &= ~(1 << (tsc
->id
% 32));
327 nvc0_screen_tic_free(struct nvc0_screen
*screen
, struct nv50_tic_entry
*tic
)
330 screen
->tic
.entries
[tic
->id
] = NULL
;
331 screen
->tic
.lock
[tic
->id
/ 32] &= ~(1 << (tic
->id
% 32));
336 nvc0_screen_tsc_free(struct nvc0_screen
*screen
, struct nv50_tsc_entry
*tsc
)
339 screen
->tsc
.entries
[tsc
->id
] = NULL
;
340 screen
->tsc
.lock
[tsc
->id
/ 32] &= ~(1 << (tsc
->id
% 32));