+ if (brw->batch.ring == RENDER_RING) {
+ /* Work around L3 state leaks into contexts set MI_RESTORE_INHIBIT which
+ * assume that the L3 cache is configured according to the hardware
+ * defaults.
+ */
+ if (brw->gen >= 7)
+ gen7_restore_default_l3_config(brw);
+
+ if (brw->is_haswell) {
+ /* From the Haswell PRM, Volume 2b, Command Reference: Instructions,
+ * 3DSTATE_CC_STATE_POINTERS > "Note":
+ *
+ * "SW must program 3DSTATE_CC_STATE_POINTERS command at the end of every
+ * 3D batch buffer followed by a PIPE_CONTROL with RC flush and CS stall."
+ *
+ * From the example in the docs, it seems to expect a regular pipe control
+ * flush here as well. We may have done it already, but meh.
+ *
+ * See also WaAvoidRCZCounterRollover.
+ */
+ brw_emit_mi_flush(brw);
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+ OUT_BATCH(brw->cc.state_offset | 1);
+ ADVANCE_BATCH();
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_CS_STALL);
+ }
+ }
+
+ /* Mark that the current program cache BO has been used by the GPU.
+ * It will be reallocated if we need to put new programs in for the
+ * next batch.
+ */
+ brw->cache.bo_used_by_gpu = true;
+}
+
+static void
+throttle(struct brw_context *brw)
+{
+ /* Wait for the swapbuffers before the one we just emitted, so we
+ * don't get too many swaps outstanding for apps that are GPU-heavy
+ * but not CPU-heavy.
+ *
+ * We're using intelDRI2Flush (called from the loader before
+ * swapbuffer) and glFlush (for front buffer rendering) as the
+ * indicator that a frame is done and then throttle when we get
+ * here as we prepare to render the next frame. At this point for
+ * round trips for swap/copy and getting new buffers are done and
+ * we'll spend less time waiting on the GPU.
+ *
+ * Unfortunately, we don't have a handle to the batch containing
+ * the swap, and getting our hands on that doesn't seem worth it,
+ * so we just use the first batch we emitted after the last swap.
+ */
+ if (brw->need_swap_throttle && brw->throttle_batch[0]) {
+ if (brw->throttle_batch[1]) {
+ if (!brw->disable_throttling)
+ drm_intel_bo_wait_rendering(brw->throttle_batch[1]);
+ drm_intel_bo_unreference(brw->throttle_batch[1]);
+ }
+ brw->throttle_batch[1] = brw->throttle_batch[0];
+ brw->throttle_batch[0] = NULL;
+ brw->need_swap_throttle = false;
+ /* Throttling here is more precise than the throttle ioctl, so skip it */
+ brw->need_flush_throttle = false;
+ }
+
+ if (brw->need_flush_throttle) {
+ __DRIscreen *dri_screen = brw->screen->driScrnPriv;
+ drmCommandNone(dri_screen->fd, DRM_I915_GEM_THROTTLE);
+ brw->need_flush_throttle = false;