trace: Dump result of create_stream_output_target
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_screen.h
index 1fac142e2be588888f5db7ab2ba5a604570f7f31..826014e88026618beecb7e6cfb84825367414b0d 100644 (file)
@@ -1,44 +1,50 @@
 #ifndef __NVC0_SCREEN_H__
 #define __NVC0_SCREEN_H__
 
-#define NOUVEAU_NVC0
 #include "nouveau/nouveau_screen.h"
-#undef NOUVEAU_NVC0
+#include "nouveau/nouveau_mm.h"
+#include "nouveau/nouveau_fence.h"
+#include "nouveau/nouveau_heap.h"
+
+#include "nouveau/nv_object.xml.h"
+
 #include "nvc0_winsys.h"
 #include "nvc0_stateobj.h"
 
 #define NVC0_TIC_MAX_ENTRIES 2048
 #define NVC0_TSC_MAX_ENTRIES 2048
 
-struct nvc0_mman;
+/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
+#define NVC0_MAX_PIPE_CONSTBUFS         14
+#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE  7
+
+#define NVC0_MAX_SURFACE_SLOTS 16
+
 struct nvc0_context;
-struct nvc0_fence;
 
-#define NVC0_SCRATCH_SIZE (2 << 20)
-#define NVC0_SCRATCH_NR_BUFFERS 2
+struct nvc0_blitter;
 
 struct nvc0_screen {
    struct nouveau_screen base;
-   struct nouveau_winsys *nvws;
 
    struct nvc0_context *cur_ctx;
 
+   int num_occlusion_queries_active;
+
    struct nouveau_bo *text;
-   struct nouveau_bo *uniforms;
+   struct nouveau_bo *parm;       /* for COMPUTE */
+   struct nouveau_bo *uniform_bo; /* for 3D */
    struct nouveau_bo *tls;
    struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
-   struct nouveau_bo *mp_stack_bo;
+   struct nouveau_bo *poly_cache;
 
-   uint64_t tls_size;
+   uint16_t mp_count;
+   uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */
 
-   struct nouveau_resource *text_heap;
+   struct nouveau_heap *text_heap;
+   struct nouveau_heap *lib_code; /* allocated from text_heap */
 
-   struct {
-      struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS];
-      uint8_t *buf;
-      int index;
-      uint32_t offset;
-   } scratch;
+   struct nvc0_blitter *blitter;
 
    struct {
       void **entries;
@@ -53,22 +59,23 @@ struct nvc0_screen {
    } tsc;
 
    struct {
-      uint32_t *map;
-      struct nvc0_fence *head;
-      struct nvc0_fence *tail;
-      struct nvc0_fence *current;
-      uint32_t sequence;
-      uint32_t sequence_ack;
       struct nouveau_bo *bo;
+      uint32_t *map;
    } fence;
 
-   struct nvc0_mman *mm_GART;
-   struct nvc0_mman *mm_VRAM;
-   struct nvc0_mman *mm_VRAM_fe0;
-
-   struct nouveau_grobj *fermi;
-   struct nouveau_grobj *eng2d;
-   struct nouveau_grobj *m2mf;
+   struct {
+      struct nvc0_program *prog; /* compute state object to read MP counters */
+      struct pipe_query *mp_counter[8]; /* counter to query allocation */
+      uint8_t num_mp_pm_active[2];
+      boolean mp_counters_enabled;
+   } pm;
+
+   struct nouveau_mman *mm_VRAM_fe0;
+
+   struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
+   struct nouveau_object *eng2d;
+   struct nouveau_object *m2mf;
+   struct nouveau_object *compute;
 };
 
 static INLINE struct nvc0_screen *
@@ -77,75 +84,154 @@ nvc0_screen(struct pipe_screen *screen)
    return (struct nvc0_screen *)screen;
 }
 
-/* Since a resource can be migrated, we need to decouple allocations from
- * them. This struct is linked with fences for delayed freeing of allocs.
- */
-struct nvc0_mm_allocation {
-   struct nvc0_mm_allocation *next;
-   void *priv;
-   uint32_t offset;
-};
 
-static INLINE void
-nvc0_fence_sched_release(struct nvc0_fence *nf, struct nvc0_mm_allocation *mm)
-{
-   mm->next = nf->buffers;
-   nf->buffers = mm;
-}
+/* Performance counter queries:
+ */
+#define NVE4_PM_QUERY_COUNT  39
+#define NVE4_PM_QUERY(i)    (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NVE4_PM_QUERY_LAST   NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
+#define NVE4_PM_QUERY_PROF_TRIGGER_0            0
+#define NVE4_PM_QUERY_PROF_TRIGGER_1            1
+#define NVE4_PM_QUERY_PROF_TRIGGER_2            2
+#define NVE4_PM_QUERY_PROF_TRIGGER_3            3
+#define NVE4_PM_QUERY_PROF_TRIGGER_4            4
+#define NVE4_PM_QUERY_PROF_TRIGGER_5            5
+#define NVE4_PM_QUERY_PROF_TRIGGER_6            6
+#define NVE4_PM_QUERY_PROF_TRIGGER_7            7
+#define NVE4_PM_QUERY_LAUNCHED_WARPS            8
+#define NVE4_PM_QUERY_LAUNCHED_THREADS          9
+#define NVE4_PM_QUERY_LAUNCHED_CTA              10
+#define NVE4_PM_QUERY_INST_ISSUED1              11
+#define NVE4_PM_QUERY_INST_ISSUED2              12
+#define NVE4_PM_QUERY_INST_EXECUTED             13
+#define NVE4_PM_QUERY_LD_LOCAL                  14
+#define NVE4_PM_QUERY_ST_LOCAL                  15
+#define NVE4_PM_QUERY_LD_SHARED                 16
+#define NVE4_PM_QUERY_ST_SHARED                 17
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT         18
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS        19
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT        20
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS       21
+#define NVE4_PM_QUERY_GLD_REQUEST               22
+#define NVE4_PM_QUERY_GST_REQUEST               23
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT        24
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS       25
+#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
+#define NVE4_PM_QUERY_GST_TRANSACTIONS          27
+#define NVE4_PM_QUERY_BRANCH                    28
+#define NVE4_PM_QUERY_BRANCH_DIVERGENT          29
+#define NVE4_PM_QUERY_ACTIVE_WARPS              30
+#define NVE4_PM_QUERY_ACTIVE_CYCLES             31
+#define NVE4_PM_QUERY_INST_ISSUED               32
+#define NVE4_PM_QUERY_METRIC_IPC                33
+#define NVE4_PM_QUERY_METRIC_IPAC               34
+#define NVE4_PM_QUERY_METRIC_IPEC               35
+#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY       36
+#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY      37
+#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD  38
+/*
+#define NVE4_PM_QUERY_GR_IDLE                   50
+#define NVE4_PM_QUERY_BSP_IDLE                  51
+#define NVE4_PM_QUERY_VP_IDLE                   52
+#define NVE4_PM_QUERY_PPP_IDLE                  53
+#define NVE4_PM_QUERY_CE0_IDLE                  54
+#define NVE4_PM_QUERY_CE1_IDLE                  55
+#define NVE4_PM_QUERY_CE2_IDLE                  56
+*/
+/* L2 queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
+...
+*/
+/* TEX queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
+...
+*/
+
+/* Driver statistics queries:
+ */
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+
+#define NVC0_QUERY_DRV_STAT(i)    (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NVC0_QUERY_DRV_STAT_COUNT  29
+#define NVC0_QUERY_DRV_STAT_LAST   NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT         0
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES         1
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT         2
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID     3
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS     4
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ               5
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE              6
+#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT                   7
+#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT                   8
+#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT            9
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ              10
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE             11
+#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID      12
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT          13
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID     14
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS     15
+#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES                  16
+#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
+#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
+#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT                19
+#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT             20
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY                21
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED              22
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT       23
+#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES        24
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT           25
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES           26
+#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT                   27
+#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT         28
+
+#else
+
+#define NVC0_QUERY_DRV_STAT_COUNT 0
 
-extern struct nvc0_mman *
-nvc0_mm_create(struct nouveau_device *, uint32_t domain, uint32_t storage_type);
+#endif
 
-extern void
-nvc0_mm_destroy(struct nvc0_mman *);
+int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+                                      struct pipe_driver_query_info *);
 
-extern struct nvc0_mm_allocation *
-nvc0_mm_allocate(struct nvc0_mman *,
-                 uint32_t size, struct nouveau_bo **, uint32_t *offset);
-extern void
-nvc0_mm_free(struct nvc0_mm_allocation *);
+boolean nvc0_blitter_create(struct nvc0_screen *);
+void nvc0_blitter_destroy(struct nvc0_screen *);
 
 void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
 
 int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
 int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
 
+int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
+
+boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
+                                    uint32_t lneg, uint32_t cstack);
+
 static INLINE void
-nvc0_resource_fence(struct nvc0_resource *res, uint32_t flags)
+nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
 {
    struct nvc0_screen *screen = nvc0_screen(res->base.screen);
 
    if (res->mm) {
-      nvc0_fence_reference(&res->fence, screen->fence.current);
-
+      nouveau_fence_ref(screen->base.fence.current, &res->fence);
       if (flags & NOUVEAU_BO_WR)
-         nvc0_fence_reference(&res->fence_wr, screen->fence.current);
+         nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
    }
 }
 
 static INLINE void
-nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags)
-{
-   struct nvc0_screen *screen = nvc0_screen(res->base.screen);
-
-   nouveau_bo_validate(screen->base.channel, res->bo, flags);
-
-   nvc0_resource_fence(res, flags);
-}
-
-
-boolean
-nvc0_screen_fence_new(struct nvc0_screen *, struct nvc0_fence **, boolean emit);
-
-void
-nvc0_screen_fence_next(struct nvc0_screen *);
-
-static INLINE boolean
-nvc0_screen_fence_emit(struct nvc0_screen *screen)
+nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
 {
-   nvc0_fence_emit(screen->fence.current);
+   if (likely(res->bo)) {
+      if (flags & NOUVEAU_BO_WR)
+         res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
+            NOUVEAU_BUFFER_STATUS_DIRTY;
+      if (flags & NOUVEAU_BO_RD)
+         res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
 
-   return nvc0_screen_fence_new(screen, &screen->fence.current, FALSE);
+      nvc0_resource_fence(res, flags);
+   }
 }
 
 struct nvc0_format {
@@ -158,21 +244,21 @@ struct nvc0_format {
 extern const struct nvc0_format nvc0_format_table[];
 
 static INLINE void
-nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
+nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
 {
    if (tic->id >= 0)
       screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
 }
 
 static INLINE void
-nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
+nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
 {
    if (tsc->id >= 0)
       screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
 }
 
 static INLINE void
-nvc0_screen_tic_free(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
+nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
 {
    if (tic->id >= 0) {
       screen->tic.entries[tic->id] = NULL;
@@ -181,7 +267,7 @@ nvc0_screen_tic_free(struct nvc0_screen *screen, struct nvc0_tic_entry *tic)
 }
 
 static INLINE void
-nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nvc0_tsc_entry *tsc)
+nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
 {
    if (tsc->id >= 0) {
       screen->tsc.entries[tsc->id] = NULL;