struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *poly_cache;
- uint64_t tls_size;
+ uint16_t mp_count;
+ uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */
struct nouveau_heap *text_heap;
struct nouveau_heap *lib_code; /* allocated from text_heap */
uint32_t *map;
} fence;
+ struct {
+ struct nvc0_program *prog; /* compute state object to read MP counters */
+ struct pipe_query *mp_counter[8]; /* counter to query allocation */
+ uint8_t num_mp_pm_active[2];
+ boolean mp_counters_enabled;
+ } pm;
+
struct nouveau_mman *mm_VRAM_fe0;
struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
return (struct nvc0_screen *)screen;
}
+
+/* Performance counter queries:
+ */
+#define NVE4_PM_QUERY_COUNT 39
+#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
+#define NVE4_PM_QUERY_PROF_TRIGGER_0 0
+#define NVE4_PM_QUERY_PROF_TRIGGER_1 1
+#define NVE4_PM_QUERY_PROF_TRIGGER_2 2
+#define NVE4_PM_QUERY_PROF_TRIGGER_3 3
+#define NVE4_PM_QUERY_PROF_TRIGGER_4 4
+#define NVE4_PM_QUERY_PROF_TRIGGER_5 5
+#define NVE4_PM_QUERY_PROF_TRIGGER_6 6
+#define NVE4_PM_QUERY_PROF_TRIGGER_7 7
+#define NVE4_PM_QUERY_LAUNCHED_WARPS 8
+#define NVE4_PM_QUERY_LAUNCHED_THREADS 9
+#define NVE4_PM_QUERY_LAUNCHED_CTA 10
+#define NVE4_PM_QUERY_INST_ISSUED1 11
+#define NVE4_PM_QUERY_INST_ISSUED2 12
+#define NVE4_PM_QUERY_INST_EXECUTED 13
+#define NVE4_PM_QUERY_LD_LOCAL 14
+#define NVE4_PM_QUERY_ST_LOCAL 15
+#define NVE4_PM_QUERY_LD_SHARED 16
+#define NVE4_PM_QUERY_ST_SHARED 17
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21
+#define NVE4_PM_QUERY_GLD_REQUEST 22
+#define NVE4_PM_QUERY_GST_REQUEST 23
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25
+#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
+#define NVE4_PM_QUERY_GST_TRANSACTIONS 27
+#define NVE4_PM_QUERY_BRANCH 28
+#define NVE4_PM_QUERY_BRANCH_DIVERGENT 29
+#define NVE4_PM_QUERY_ACTIVE_WARPS 30
+#define NVE4_PM_QUERY_ACTIVE_CYCLES 31
+#define NVE4_PM_QUERY_INST_ISSUED 32
+#define NVE4_PM_QUERY_METRIC_IPC 33
+#define NVE4_PM_QUERY_METRIC_IPAC 34
+#define NVE4_PM_QUERY_METRIC_IPEC 35
+#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 36
+#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 37
+#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 38
+/*
+#define NVE4_PM_QUERY_GR_IDLE 50
+#define NVE4_PM_QUERY_BSP_IDLE 51
+#define NVE4_PM_QUERY_VP_IDLE 52
+#define NVE4_PM_QUERY_PPP_IDLE 53
+#define NVE4_PM_QUERY_CE0_IDLE 54
+#define NVE4_PM_QUERY_CE1_IDLE 55
+#define NVE4_PM_QUERY_CE2_IDLE 56
+*/
+/* L2 queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
+...
+*/
+/* TEX queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
+...
+*/
+
+/* Driver statistics queries:
+ */
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+
+#define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NVC0_QUERY_DRV_STAT_COUNT 29
+#define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6
+#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7
+#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8
+#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11
+#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15
+#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16
+#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
+#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
+#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19
+#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23
+#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26
+#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27
+#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28
+
+#else
+
+#define NVC0_QUERY_DRV_STAT_COUNT 0
+
+#endif
+
+int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+ struct pipe_driver_query_info *);
+
boolean nvc0_blitter_create(struct nvc0_screen *);
void nvc0_blitter_destroy(struct nvc0_screen *);
int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
+boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
+ uint32_t lneg, uint32_t cstack);
+
static INLINE void
nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
{