trace: Dump result of create_stream_output_target
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_screen.h
index 81f404ada830b4302e19dac52c25053f20c41b7c..826014e88026618beecb7e6cfb84825367414b0d 100644 (file)
@@ -1,43 +1,50 @@
 #ifndef __NVC0_SCREEN_H__
 #define __NVC0_SCREEN_H__
 
-#define NOUVEAU_NVC0
 #include "nouveau/nouveau_screen.h"
 #include "nouveau/nouveau_mm.h"
-#undef NOUVEAU_NVC0
+#include "nouveau/nouveau_fence.h"
+#include "nouveau/nouveau_heap.h"
+
+#include "nouveau/nv_object.xml.h"
+
 #include "nvc0_winsys.h"
 #include "nvc0_stateobj.h"
 
 #define NVC0_TIC_MAX_ENTRIES 2048
 #define NVC0_TSC_MAX_ENTRIES 2048
 
+/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
+#define NVC0_MAX_PIPE_CONSTBUFS         14
+#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE  7
+
+#define NVC0_MAX_SURFACE_SLOTS 16
+
 struct nvc0_context;
 
-#define NVC0_SCRATCH_SIZE (2 << 20)
-#define NVC0_SCRATCH_NR_BUFFERS 2
+struct nvc0_blitter;
 
 struct nvc0_screen {
    struct nouveau_screen base;
-   struct nouveau_winsys *nvws;
 
    struct nvc0_context *cur_ctx;
 
+   int num_occlusion_queries_active;
+
    struct nouveau_bo *text;
-   struct nouveau_bo *uniforms;
+   struct nouveau_bo *parm;       /* for COMPUTE */
+   struct nouveau_bo *uniform_bo; /* for 3D */
    struct nouveau_bo *tls;
    struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
-   struct nouveau_bo *mp_stack_bo;
+   struct nouveau_bo *poly_cache;
 
-   uint64_t tls_size;
+   uint16_t mp_count;
+   uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */
 
-   struct nouveau_resource *text_heap;
+   struct nouveau_heap *text_heap;
+   struct nouveau_heap *lib_code; /* allocated from text_heap */
 
-   struct {
-      struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS];
-      uint8_t *buf;
-      int index;
-      uint32_t offset;
-   } scratch;
+   struct nvc0_blitter *blitter;
 
    struct {
       void **entries;
@@ -56,11 +63,19 @@ struct nvc0_screen {
       uint32_t *map;
    } fence;
 
+   struct {
+      struct nvc0_program *prog; /* compute state object to read MP counters */
+      struct pipe_query *mp_counter[8]; /* counter to query allocation */
+      uint8_t num_mp_pm_active[2];
+      boolean mp_counters_enabled;
+   } pm;
+
    struct nouveau_mman *mm_VRAM_fe0;
 
-   struct nouveau_grobj *fermi;
-   struct nouveau_grobj *eng2d;
-   struct nouveau_grobj *m2mf;
+   struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
+   struct nouveau_object *eng2d;
+   struct nouveau_object *m2mf;
+   struct nouveau_object *compute;
 };
 
 static INLINE struct nvc0_screen *
@@ -69,11 +84,130 @@ nvc0_screen(struct pipe_screen *screen)
    return (struct nvc0_screen *)screen;
 }
 
+
+/* Performance counter queries:
+ */
+#define NVE4_PM_QUERY_COUNT  39
+#define NVE4_PM_QUERY(i)    (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NVE4_PM_QUERY_LAST   NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
+#define NVE4_PM_QUERY_PROF_TRIGGER_0            0
+#define NVE4_PM_QUERY_PROF_TRIGGER_1            1
+#define NVE4_PM_QUERY_PROF_TRIGGER_2            2
+#define NVE4_PM_QUERY_PROF_TRIGGER_3            3
+#define NVE4_PM_QUERY_PROF_TRIGGER_4            4
+#define NVE4_PM_QUERY_PROF_TRIGGER_5            5
+#define NVE4_PM_QUERY_PROF_TRIGGER_6            6
+#define NVE4_PM_QUERY_PROF_TRIGGER_7            7
+#define NVE4_PM_QUERY_LAUNCHED_WARPS            8
+#define NVE4_PM_QUERY_LAUNCHED_THREADS          9
+#define NVE4_PM_QUERY_LAUNCHED_CTA              10
+#define NVE4_PM_QUERY_INST_ISSUED1              11
+#define NVE4_PM_QUERY_INST_ISSUED2              12
+#define NVE4_PM_QUERY_INST_EXECUTED             13
+#define NVE4_PM_QUERY_LD_LOCAL                  14
+#define NVE4_PM_QUERY_ST_LOCAL                  15
+#define NVE4_PM_QUERY_LD_SHARED                 16
+#define NVE4_PM_QUERY_ST_SHARED                 17
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT         18
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS        19
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT        20
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS       21
+#define NVE4_PM_QUERY_GLD_REQUEST               22
+#define NVE4_PM_QUERY_GST_REQUEST               23
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT        24
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS       25
+#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
+#define NVE4_PM_QUERY_GST_TRANSACTIONS          27
+#define NVE4_PM_QUERY_BRANCH                    28
+#define NVE4_PM_QUERY_BRANCH_DIVERGENT          29
+#define NVE4_PM_QUERY_ACTIVE_WARPS              30
+#define NVE4_PM_QUERY_ACTIVE_CYCLES             31
+#define NVE4_PM_QUERY_INST_ISSUED               32
+#define NVE4_PM_QUERY_METRIC_IPC                33
+#define NVE4_PM_QUERY_METRIC_IPAC               34
+#define NVE4_PM_QUERY_METRIC_IPEC               35
+#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY       36
+#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY      37
+#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD  38
+/*
+#define NVE4_PM_QUERY_GR_IDLE                   50
+#define NVE4_PM_QUERY_BSP_IDLE                  51
+#define NVE4_PM_QUERY_VP_IDLE                   52
+#define NVE4_PM_QUERY_PPP_IDLE                  53
+#define NVE4_PM_QUERY_CE0_IDLE                  54
+#define NVE4_PM_QUERY_CE1_IDLE                  55
+#define NVE4_PM_QUERY_CE2_IDLE                  56
+*/
+/* L2 queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
+...
+*/
+/* TEX queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
+...
+*/
+
+/* Driver statistics queries:
+ */
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+
+#define NVC0_QUERY_DRV_STAT(i)    (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NVC0_QUERY_DRV_STAT_COUNT  29
+#define NVC0_QUERY_DRV_STAT_LAST   NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT         0
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES         1
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT         2
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID     3
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS     4
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ               5
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE              6
+#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT                   7
+#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT                   8
+#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT            9
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ              10
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE             11
+#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID      12
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT          13
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID     14
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS     15
+#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES                  16
+#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
+#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
+#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT                19
+#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT             20
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY                21
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED              22
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT       23
+#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES        24
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT           25
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES           26
+#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT                   27
+#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT         28
+
+#else
+
+#define NVC0_QUERY_DRV_STAT_COUNT 0
+
+#endif
+
+int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+                                      struct pipe_driver_query_info *);
+
+boolean nvc0_blitter_create(struct nvc0_screen *);
+void nvc0_blitter_destroy(struct nvc0_screen *);
+
 void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
 
 int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
 int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
 
+int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
+
+boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
+                                    uint32_t lneg, uint32_t cstack);
+
 static INLINE void
 nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
 {
@@ -81,7 +215,6 @@ nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
 
    if (res->mm) {
       nouveau_fence_ref(screen->base.fence.current, &res->fence);
-
       if (flags & NOUVEAU_BO_WR)
          nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
    }
@@ -90,13 +223,10 @@ nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
 static INLINE void
 nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
 {
-   struct nvc0_screen *screen = nvc0_screen(res->base.screen);
-
    if (likely(res->bo)) {
-      nouveau_bo_validate(screen->base.channel, res->bo, flags);
-
       if (flags & NOUVEAU_BO_WR)
-         res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+         res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
+            NOUVEAU_BUFFER_STATUS_DIRTY;
       if (flags & NOUVEAU_BO_RD)
          res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;