mesa: add/update comments in _mesa_copy_buffer_subdata()
[mesa.git] / src / mesa / drivers / dri / i965 / brw_queryobj.c
index f6868c83ac724c5492c14fdd7e7b4ff9174cd5c1..72b83f457086792c520c97e2121cb31710d3133e 100644 (file)
 
 /** Waits on the query object's BO and totals the results for this query */
 static void
-brw_queryobj_get_results(struct brw_query_object *query)
+brw_queryobj_get_results(struct gl_context *ctx,
+                        struct brw_query_object *query)
 {
+   struct intel_context *intel = intel_context(ctx);
+
    int i;
    uint64_t *results;
 
    if (query->bo == NULL)
       return;
 
-   drm_intel_bo_map(query->bo, GL_FALSE);
+   drm_intel_bo_map(query->bo, false);
    results = query->bo->virtual;
-   if (query->Base.Target == GL_TIME_ELAPSED_EXT) {
-      query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32));
-   } else {
+   switch (query->Base.Target) {
+   case GL_TIME_ELAPSED_EXT:
+      if (intel->gen >= 6)
+        query->Base.Result += 80 * (results[1] - results[0]);
+      else
+        query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32));
+      break;
+
+   case GL_SAMPLES_PASSED_ARB:
       /* Map and count the pixels from the current query BO */
       for (i = query->first_index; i <= query->last_index; i++) {
         query->Base.Result += results[i * 2 + 1] - results[i * 2];
       }
+      break;
+
+   case GL_PRIMITIVES_GENERATED:
+   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+      /* We don't actually query the hardware for this value, so query->bo
+       * should always be NULL and execution should never reach here.
+       */
+      assert(!"Unreachable");
+      break;
+
+   default:
+      assert(!"Unrecognized query target in brw_queryobj_get_results()");
+      break;
    }
    drm_intel_bo_unmap(query->bo);
 
@@ -72,7 +94,7 @@ brw_queryobj_get_results(struct brw_query_object *query)
 }
 
 static struct gl_query_object *
-brw_new_query_object(GLcontext *ctx, GLuint id)
+brw_new_query_object(struct gl_context *ctx, GLuint id)
 {
    struct brw_query_object *query;
 
@@ -80,14 +102,14 @@ brw_new_query_object(GLcontext *ctx, GLuint id)
 
    query->Base.Id = id;
    query->Base.Result = 0;
-   query->Base.Active = GL_FALSE;
-   query->Base.Ready = GL_TRUE;
+   query->Base.Active = false;
+   query->Base.Ready = true;
 
    return &query->Base;
 }
 
 static void
-brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
+brw_delete_query(struct gl_context *ctx, struct gl_query_object *q)
 {
    struct brw_query_object *query = (struct brw_query_object *)q;
 
@@ -96,28 +118,44 @@ brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
 }
 
 static void
-brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
+brw_begin_query(struct gl_context *ctx, struct gl_query_object *q)
 {
    struct brw_context *brw = brw_context(ctx);
    struct intel_context *intel = intel_context(ctx);
    struct brw_query_object *query = (struct brw_query_object *)q;
 
-   if (query->Base.Target == GL_TIME_ELAPSED_EXT) {
+   switch (query->Base.Target) {
+   case GL_TIME_ELAPSED_EXT:
       drm_intel_bo_unreference(query->bo);
       query->bo = drm_intel_bo_alloc(intel->bufmgr, "timer query",
                                     4096, 4096);
 
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL |
-               PIPE_CONTROL_WRITE_TIMESTAMP);
-      OUT_RELOC(query->bo,
-               I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-               PIPE_CONTROL_GLOBAL_GTT_WRITE |
-               0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else {
+      if (intel->gen >= 6) {
+         BEGIN_BATCH(4);
+         OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+         OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
+         OUT_RELOC(query->bo,
+                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                 PIPE_CONTROL_GLOBAL_GTT_WRITE |
+                 0);
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+      
+      } else {
+         BEGIN_BATCH(4);
+         OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+                 PIPE_CONTROL_WRITE_TIMESTAMP);
+         OUT_RELOC(query->bo,
+                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                 PIPE_CONTROL_GLOBAL_GTT_WRITE |
+                 0);
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+      }
+      break;
+
+   case GL_SAMPLES_PASSED_ARB:
       /* Reset our driver's tracking of query state. */
       drm_intel_bo_unreference(query->bo);
       query->bo = NULL;
@@ -126,6 +164,25 @@ brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
 
       brw->query.obj = query;
       intel->stats_wm++;
+      break;
+
+   case GL_PRIMITIVES_GENERATED:
+      /* We don't actually query the hardware for this value; we keep track of
+       * it a software counter.  So just reset the counter.
+       */
+      brw->sol.primitives_generated = 0;
+      break;
+
+   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+      /* We don't actually query the hardware for this value; we keep track of
+       * it a software counter.  So just reset the counter.
+       */
+      brw->sol.primitives_written = 0;
+      break;
+
+   default:
+      assert(!"Unrecognized query target in brw_begin_query()");
+      break;
    }
 }
 
@@ -133,33 +190,49 @@ brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
  * Begin the ARB_occlusion_query query on a query object.
  */
 static void
-brw_end_query(GLcontext *ctx, struct gl_query_object *q)
+brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
 {
    struct brw_context *brw = brw_context(ctx);
    struct intel_context *intel = intel_context(ctx);
    struct brw_query_object *query = (struct brw_query_object *)q;
 
-   if (query->Base.Target == GL_TIME_ELAPSED_EXT) {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_PIPE_CONTROL |
-               PIPE_CONTROL_WRITE_TIMESTAMP);
-      OUT_RELOC(query->bo,
-               I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-               PIPE_CONTROL_GLOBAL_GTT_WRITE |
-               8);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-
-      intel_batchbuffer_flush(intel->batch);
-   } else {
+   switch (query->Base.Target) {
+   case GL_TIME_ELAPSED_EXT:
+      if (intel->gen >= 6) {
+         BEGIN_BATCH(4);
+         OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+         OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
+         OUT_RELOC(query->bo,
+                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                 PIPE_CONTROL_GLOBAL_GTT_WRITE |
+                 8);
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+      
+      } else {
+         BEGIN_BATCH(4);
+         OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+                 PIPE_CONTROL_WRITE_TIMESTAMP);
+         OUT_RELOC(query->bo,
+                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                 PIPE_CONTROL_GLOBAL_GTT_WRITE |
+                 8);
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+      }
+
+      intel_batchbuffer_flush(intel);
+      break;
+
+   case GL_SAMPLES_PASSED_ARB:
       /* Flush the batchbuffer in case it has writes to our query BO.
        * Have later queries write to a new query BO so that further rendering
        * doesn't delay the collection of our results.
        */
       if (query->bo) {
         brw_emit_query_end(brw);
-        intel_batchbuffer_flush(intel->batch);
+        intel_batchbuffer_flush(intel);
 
         drm_intel_bo_unreference(brw->query.bo);
         brw->query.bo = NULL;
@@ -168,24 +241,55 @@ brw_end_query(GLcontext *ctx, struct gl_query_object *q)
       brw->query.obj = NULL;
 
       intel->stats_wm--;
+      break;
+
+   case GL_PRIMITIVES_GENERATED:
+      /* We don't actually query the hardware for this value; we keep track of
+       * it in a software counter.  So just read the counter and store it in
+       * the query object.
+       */
+      query->Base.Result = brw->sol.primitives_generated;
+
+      /* And set brw->query.obj to NULL so that this query won't try to wait
+       * for any rendering to complete.
+       */
+      query->bo = NULL;
+      break;
+
+   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+      /* We don't actually query the hardware for this value; we keep track of
+       * it in a software counter.  So just read the counter and store it in
+       * the query object.
+       */
+      query->Base.Result = brw->sol.primitives_written;
+
+      /* And set brw->query.obj to NULL so that this query won't try to wait
+       * for any rendering to complete.
+       */
+      query->bo = NULL;
+      break;
+
+   default:
+      assert(!"Unrecognized query target in brw_end_query()");
+      break;
    }
 }
 
-static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q)
+static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q)
 {
    struct brw_query_object *query = (struct brw_query_object *)q;
 
-   brw_queryobj_get_results(query);
-   query->Base.Ready = GL_TRUE;
+   brw_queryobj_get_results(ctx, query);
+   query->Base.Ready = true;
 }
 
-static void brw_check_query(GLcontext *ctx, struct gl_query_object *q)
+static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q)
 {
    struct brw_query_object *query = (struct brw_query_object *)q;
 
    if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
-      brw_queryobj_get_results(query);
-      query->Base.Ready = GL_TRUE;
+      brw_queryobj_get_results(ctx, query);
+      query->Base.Ready = true;
    }
 }
 
@@ -206,10 +310,14 @@ brw_prepare_query_begin(struct brw_context *brw)
       brw->query.bo = NULL;
 
       brw->query.bo = drm_intel_bo_alloc(intel->bufmgr, "query", 4096, 1);
+
+      /* clear target buffer */
+      drm_intel_bo_map(brw->query.bo, true);
+      memset((char *)brw->query.bo->virtual, 0, 4096);
+      drm_intel_bo_unmap(brw->query.bo);
+
       brw->query.index = 0;
    }
-
-   brw_add_validated_bo(brw, brw->query.bo);
 }
 
 /** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
@@ -217,38 +325,60 @@ void
 brw_emit_query_begin(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
    struct brw_query_object *query = brw->query.obj;
 
    /* Skip if we're not doing any queries, or we've emitted the start. */
    if (!query || brw->query.active)
       return;
 
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
-            PIPE_CONTROL_DEPTH_STALL |
-            PIPE_CONTROL_WRITE_DEPTH_COUNT);
-   /* This object could be mapped cacheable, but we don't have an exposed
-    * mechanism to support that.  Since it's going uncached, tell GEM that
-    * we're writing to it.  The usual clflush should be all that's required
-    * to pick up the results.
-    */
-   OUT_RELOC(brw->query.bo,
-            I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-            PIPE_CONTROL_GLOBAL_GTT_WRITE |
-            ((brw->query.index * 2) * sizeof(uint64_t)));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
+   if (intel->gen >= 6) {
+       BEGIN_BATCH(8);
+
+       /* workaround: CS stall required before depth stall. */
+       OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+       OUT_BATCH(PIPE_CONTROL_CS_STALL);
+       OUT_BATCH(0); /* write address */
+       OUT_BATCH(0); /* write data */
+
+       OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+       OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
+                PIPE_CONTROL_WRITE_DEPTH_COUNT);
+       OUT_RELOC(brw->query.bo,
+                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                PIPE_CONTROL_GLOBAL_GTT_WRITE |
+                ((brw->query.index * 2) * sizeof(uint64_t)));
+       OUT_BATCH(0);
+       ADVANCE_BATCH();
+       
+   } else {
+       BEGIN_BATCH(4);
+       OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+              PIPE_CONTROL_DEPTH_STALL |
+              PIPE_CONTROL_WRITE_DEPTH_COUNT);
+       /* This object could be mapped cacheable, but we don't have an exposed
+       * mechanism to support that.  Since it's going uncached, tell GEM that
+       * we're writing to it.  The usual clflush should be all that's required
+       * to pick up the results.
+       */
+       OUT_RELOC(brw->query.bo,
+              I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+              PIPE_CONTROL_GLOBAL_GTT_WRITE |
+              ((brw->query.index * 2) * sizeof(uint64_t)));
+       OUT_BATCH(0);
+       OUT_BATCH(0);
+       ADVANCE_BATCH();
+   }
 
    if (query->bo != brw->query.bo) {
       if (query->bo != NULL)
-        brw_queryobj_get_results(query);
+        brw_queryobj_get_results(ctx, query);
       drm_intel_bo_reference(brw->query.bo);
       query->bo = brw->query.bo;
       query->first_index = brw->query.index;
    }
    query->last_index = brw->query.index;
-   brw->query.active = GL_TRUE;
+   brw->query.active = true;
 }
 
 /** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
@@ -260,19 +390,39 @@ brw_emit_query_end(struct brw_context *brw)
    if (!brw->query.active)
       return;
 
-   BEGIN_BATCH(4);
-   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
-            PIPE_CONTROL_DEPTH_STALL |
-            PIPE_CONTROL_WRITE_DEPTH_COUNT);
-   OUT_RELOC(brw->query.bo,
-            I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-            PIPE_CONTROL_GLOBAL_GTT_WRITE |
-            ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-
-   brw->query.active = GL_FALSE;
+   if (intel->gen >= 6) {
+       BEGIN_BATCH(8);
+       /* workaround: CS stall required before depth stall. */
+       OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+       OUT_BATCH(PIPE_CONTROL_CS_STALL);
+       OUT_BATCH(0); /* write address */
+       OUT_BATCH(0); /* write data */
+
+       OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+       OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
+                PIPE_CONTROL_WRITE_DEPTH_COUNT);
+       OUT_RELOC(brw->query.bo,
+                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                PIPE_CONTROL_GLOBAL_GTT_WRITE |
+                ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+       OUT_BATCH(0);
+       ADVANCE_BATCH();
+   
+   } else {
+       BEGIN_BATCH(4);
+       OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+              PIPE_CONTROL_DEPTH_STALL |
+              PIPE_CONTROL_WRITE_DEPTH_COUNT);
+       OUT_RELOC(brw->query.bo,
+              I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+              PIPE_CONTROL_GLOBAL_GTT_WRITE |
+              ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+       OUT_BATCH(0);
+       OUT_BATCH(0);
+       ADVANCE_BATCH();
+   }
+
+   brw->query.active = false;
    brw->query.index++;
 }