i965: Allow a per gen timebase scale factor

author Robert Bragg <robert@sixbynine.org>

Thu, 27 Oct 2016 21:08:19 +0000 (22:08 +0100)

committer Robert Bragg <robert@sixbynine.org>

Fri, 17 Mar 2017 15:45:19 +0000 (15:45 +0000)
author Robert Bragg <robert@sixbynine.org>
Thu, 27 Oct 2016 21:08:19 +0000 (22:08 +0100)
committer Robert Bragg <robert@sixbynine.org>
Fri, 17 Mar 2017 15:45:19 +0000 (15:45 +0000)
diff --git a/src/intel/common/gen_device_info.c b/src/intel/common/gen_device_info.c

index 9bf3cd5cc42ab7c72a7e4973214155b5ce7aedd1..209b293e5104678b7771bdfec212af0195a749cd 100644 (file)
--- a/src/intel/common/gen_device_info.c
+++ b/src/intel/common/gen_device_info.c
@@ -36,6 +36,7 @@ static const struct gen_device_info gen_device_info_i965 = {
     .urb = {
        .size = 256,
     },
+   .timebase_scale = 80,
  };
  
  static const struct gen_device_info gen_device_info_g4x = {
@@ -51,6 +52,7 @@ static const struct gen_device_info gen_device_info_g4x = {
     .urb = {
        .size = 384,
     },
+   .timebase_scale = 80,
  };
  
  static const struct gen_device_info gen_device_info_ilk = {
@@ -65,6 +67,7 @@ static const struct gen_device_info gen_device_info_ilk = {
     .urb = {
        .size = 1024,
     },
+   .timebase_scale = 80,
  };
  
  static const struct gen_device_info gen_device_info_snb_gt1 = {
@@ -89,6 +92,7 @@ static const struct gen_device_info gen_device_info_snb_gt1 = {
           [MESA_SHADER_GEOMETRY] = 256,
        },
     },
+   .timebase_scale = 80,
  };
  
  static const struct gen_device_info gen_device_info_snb_gt2 = {
@@ -113,6 +117,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 = {
           [MESA_SHADER_GEOMETRY] = 256,
        },
     },
+   .timebase_scale = 80,
  };
  
  #define GEN7_FEATURES                               \
@@ -121,7 +126,8 @@ static const struct gen_device_info gen_device_info_snb_gt2 = {
     .must_use_separate_stencil = true,               \
     .has_llc = true,                                 \
     .has_pln = true,                                 \
-   .has_surface_tile_offset = true
+   .has_surface_tile_offset = true,                 \
+   .timebase_scale = 80
  
  static const struct gen_device_info gen_device_info_ivb_gt1 = {
     GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
@@ -287,7 +293,8 @@ static const struct gen_device_info gen_device_info_hsw_gt3 = {
     .max_tcs_threads = 504,                          \
     .max_tes_threads = 504,                          \
     .max_gs_threads = 504,                           \
-   .max_wm_threads = 384
+   .max_wm_threads = 384,                           \
+   .timebase_scale = 80
  
  static const struct gen_device_info gen_device_info_bdw_gt1 = {
     GEN8_FEATURES, .gt = 1,
@@ -385,6 +392,7 @@ static const struct gen_device_info gen_device_info_chv = {
     .max_tcs_threads = 336,                          \
     .max_tes_threads = 336,                          \
     .max_cs_threads = 56,                            \
+   .timebase_scale = 1000000000.0 / 12000000.0,     \
     .urb = {                                         \
        .size = 384,                                  \
        .min_entries = {                              \
@@ -410,6 +418,7 @@ static const struct gen_device_info gen_device_info_chv = {
     .max_tes_threads = 112,                         \
     .max_gs_threads = 112,                          \
     .max_cs_threads = 6 * 6,                        \
+   .timebase_scale = 1000000000.0 / 19200123.0,    \
     .urb = {                                        \
        .size = 192,                                 \
        .min_entries = {                             \
diff --git a/src/intel/common/gen_device_info.h b/src/intel/common/gen_device_info.h

index f0e8750d0eab24cdf1f37ba2b46ceecdeff1f6d4..80676d0e003b892fad08eb21331664c442de5fb7 100644 (file)
--- a/src/intel/common/gen_device_info.h
+++ b/src/intel/common/gen_device_info.h
@@ -147,6 +147,30 @@ struct gen_device_info
         */
        unsigned max_entries[4];
     } urb;
+
+   /**
+    * For the longest time the timestamp frequency for Gen's timestamp counter
+    * could be assumed to be 12.5MHz, where the least significant bit neatly
+    * corresponded to 80 nanoseconds.
+    *
+    * Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for
+    * SKL (or scale factor of 83.33333333) and a frequency of 19200123Hz for
+    * BXT.
+    *
+    * For simplicty to fit with the current code scaling by a single constant
+    * to map from raw timestamps to nanoseconds we now do the conversion in
+    * floating point instead of integer arithmetic.
+    *
+    * In general it's probably worth noting that the documented constants we
+    * have for the per-platform timestamp frequencies aren't perfect and
+    * shouldn't be trusted for scaling and comparing timestamps with a large
+    * delta.
+    *
+    * E.g. with crude testing on my system using the 'correct' scale factor I'm
+    * seeing a drift of ~2 milliseconds per second.
+    */
+   double timebase_scale;
+
     /** @} */
  };
  
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c

index 32cfb2efe4c6c4329743f2d902af740cc162ceb8..648ae50814535f2faeefd0429d5f596cd390c28d 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -524,6 +524,21 @@ brw_initialize_context_constants(struct brw_context *brw)
     ctx->Const.MaxCombinedShaderOutputResources =
        MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
  
+   /* The timestamp register we can read for glGetTimestamp() is
+    * sometimes only 32 bits, before scaling to nanoseconds (depending
+    * on kernel).
+    *
+    * Once scaled to nanoseconds the timestamp would roll over at a
+    * non-power-of-two, so an application couldn't use
+    * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
+    * report 36 bits and truncate at that (rolling over 5 times as
+    * often as the HW counter), and when the 32-bit counter rolls
+    * over, it happens to also be at a rollover in the reported value
+    * from near (1<<36) to 0.
+    *
+    * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
+    * rolls over every ~69 seconds.
+    */
     ctx->Const.QueryCounterBits.Timestamp = 36;
  
     ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h

index 024f4b5e98bfc22e2f6d34f9fd36154a01a785e5..6f4dbe81c3795add855f8a978203011043cf62bf 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1298,6 +1298,9 @@ void brw_emit_query_begin(struct brw_context *brw);
  void brw_emit_query_end(struct brw_context *brw);
  void brw_query_counter(struct gl_context *ctx, struct gl_query_object *q);
  bool brw_is_query_pipelined(struct brw_query_object *query);
+uint64_t brw_timebase_scale(struct brw_context *brw, uint64_t gpu_timestamp);
+uint64_t brw_raw_timestamp_delta(struct brw_context *brw,
+                                 uint64_t time0, uint64_t time1);
  
  /** gen6_queryobj.c */
  void gen6_init_queryobj_functions(struct dd_function_table *functions);
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c

index 40b86a0f64bc7842f08afa1ee207fc337c5fd12f..5c3ecbafba65ffc767248b8685f89528bcd45754 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -42,6 +42,42 @@
  #include "brw_state.h"
  #include "intel_batchbuffer.h"
  
+uint64_t
+brw_timebase_scale(struct brw_context *brw, uint64_t gpu_timestamp)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   return (double)gpu_timestamp * devinfo->timebase_scale;
+}
+
+/* As best we know currently, the Gen HW timestamps are 36bits across
+ * all platforms, which we need to account for when calculating a
+ * delta to measure elapsed time.
+ *
+ * The timestamps read via glGetTimestamp() / brw_get_timestamp() sometimes
+ * only have 32bits due to a kernel bug and so in that case we make sure to
+ * treat all raw timestamps as 32bits so they overflow consistently and remain
+ * comparable. (Note: the timestamps being passed here are not from the kernel
+ * so we don't need to be taking the upper 32bits in this buggy kernel case we
+ * are just clipping to 32bits here for consistency.)
+ */
+uint64_t
+brw_raw_timestamp_delta(struct brw_context *brw, uint64_t time0, uint64_t time1)
+{
+   if (brw->screen->hw_has_timestamp == 2) {
+      /* Kernel clips timestamps to 32bits in this case, so we also clip
+       * PIPE_CONTROL timestamps for consistency.
+       */
+      return (uint32_t)time1 - (uint32_t)time0;
+   } else {
+      if (time0 > time1) {
+         return (1ULL << 36) + time1 - time0;
+      } else {
+         return time1 - time0;
+      }
+   }
+}
+
  /**
   * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.
   */
@@ -117,12 +153,18 @@ brw_queryobj_get_results(struct gl_context *ctx,
        /* The query BO contains the starting and ending timestamps.
         * Subtract the two and convert to nanoseconds.
         */
-      query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32));
+      query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);
+      query->Base.Result = brw_timebase_scale(brw, query->Base.Result);
        break;
  
     case GL_TIMESTAMP:
        /* The query BO contains a single timestamp value in results[0]. */
-      query->Base.Result = 1000 * (results[0] >> 32);
+      query->Base.Result = brw_timebase_scale(brw, results[0]);
+
+      /* Ensure the scaled timestamp overflows according to
+       * GL_QUERY_COUNTER_BITS
+       */
+      query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
        break;
  
     case GL_SAMPLES_PASSED_ARB:
@@ -508,9 +550,15 @@ brw_get_timestamp(struct gl_context *ctx)
        break;
     }
  
-   /* See logic in brw_queryobj_get_results() */
-   result *= 80;
-   result &= (1ull << 36) - 1;
+   /* Scale to nanosecond units */
+   result = brw_timebase_scale(brw, result);
+
+   /* Ensure the scaled timestamp overflows according to
+    * GL_QUERY_COUNTER_BITS.  Technically this isn't required if
+    * querying GL_TIMESTAMP via glGetInteger but it seems best to keep
+    * QueryObject and GetInteger timestamps consistent.
+    */
+   result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
     return result;
  }
  
diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c

index 98cbbff91401fd07c16168dccea75befa218de1f..0014a67e71900adb23425a6ee9966788c41122a1 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -219,30 +219,18 @@ gen6_queryobj_get_results(struct gl_context *ctx,
        /* The query BO contains the starting and ending timestamps.
         * Subtract the two and convert to nanoseconds.
         */
-      query->Base.Result += 80 * (results[1] - results[0]);
+      query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]);
+      query->Base.Result = brw_timebase_scale(brw, query->Base.Result);
        break;
  
     case GL_TIMESTAMP:
-      /* Our timer is a clock that increments every 80ns (regardless of
-       * other clock scaling in the system).  The timestamp register we can
-       * read for glGetTimestamp() masks out the top 32 bits, so we do that
-       * here too to let the two counters be compared against each other.
-       *
-       * If we just multiplied that 32 bits of data by 80, it would roll
-       * over at a non-power-of-two, so an application couldn't use
-       * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
-       * report 36 bits and truncate at that (rolling over 5 times as often
-       * as the HW counter), and when the 32-bit counter rolls over, it
-       * happens to also be at a rollover in the reported value from near
-       * (1<<36) to 0.
-       *
-       * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
-       * rolls over every ~69 seconds.
-       *
-       * The query BO contains a single timestamp value in results[0].
+      /* The query BO contains a single timestamp value in results[0]. */
+      query->Base.Result = brw_timebase_scale(brw, results[0]);
+
+      /* Ensure the scaled timestamp overflows according to
+       * GL_QUERY_COUNTER_BITS
         */
-      query->Base.Result = 80 * (results[0] & 0xffffffff);
-      query->Base.Result &= (1ull << 36) - 1;
+      query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1;
        break;
  
     case GL_SAMPLES_PASSED_ARB:
author	Robert Bragg <robert@sixbynine.org>
	Thu, 27 Oct 2016 21:08:19 +0000 (22:08 +0100)
committer	Robert Bragg <robert@sixbynine.org>
	Fri, 17 Mar 2017 15:45:19 +0000 (15:45 +0000)
src/intel/common/gen_device_info.c		patch \| blob \| history
src/intel/common/gen_device_info.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_context.c		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_context.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_queryobj.c		patch \| blob \| history
src/mesa/drivers/dri/i965/gen6_queryobj.c		patch \| blob \| history