i965/vec4: Don't lose the force_writemask_all flag during CSE.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
index 8257fb600439fe7fb1a6a0c407733564425487a0..ed6fdffd265c8a53d87332d0853e6c98686c8a77 100644 (file)
@@ -551,7 +551,12 @@ brw_initialize_context_constants(struct brw_context *brw)
       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
    }
 
-   static const nir_shader_compiler_options nir_options = {
+   static const nir_shader_compiler_options gen4_nir_options = {
+      .native_integers = true,
+      .lower_ffma = true,
+   };
+
+   static const nir_shader_compiler_options gen6_nir_options = {
       .native_integers = true,
    };
 
@@ -568,7 +573,10 @@ brw_initialize_context_constants(struct brw_context *brw)
         (i == MESA_SHADER_FRAGMENT);
       ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
       ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
-      ctx->Const.ShaderCompilerOptions[i].NirOptions = &nir_options;
+      if (brw->gen >= 6)
+         ctx->Const.ShaderCompilerOptions[i].NirOptions = &gen6_nir_options;
+      else
+         ctx->Const.ShaderCompilerOptions[i].NirOptions = &gen4_nir_options;
    }
 
    ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
@@ -658,6 +666,29 @@ brw_process_driconf_options(struct brw_context *brw)
       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
 }
 
+/* drop when libdrm 2.4.61 is released */
+#ifndef I915_PARAM_REVISION
+#define I915_PARAM_REVISION 32
+#endif
+
+static int
+brw_get_revision(int fd)
+{
+   struct drm_i915_getparam gp;
+   int revision;
+   int ret;
+
+   memset(&gp, 0, sizeof(gp));
+   gp.param = I915_PARAM_REVISION;
+   gp.value = &revision;
+
+   ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+   if (ret)
+      revision = -1;
+
+   return revision;
+}
+
 GLboolean
 brwCreateContext(gl_api api,
                 const struct gl_config *mesaVis,
@@ -716,6 +747,7 @@ brwCreateContext(gl_api api,
    brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
    brw->needs_unlit_centroid_workaround =
       devinfo->needs_unlit_centroid_workaround;
+   brw->revision = brw_get_revision(sPriv->fd);
 
    brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
    brw->has_swizzling = screen->hw_has_swizzling;
@@ -1231,40 +1263,6 @@ intel_prepare_render(struct brw_context *brw)
     */
    if (brw_is_front_buffer_drawing(ctx->DrawBuffer))
       brw->front_buffer_dirty = true;
-
-   /* Wait for the swapbuffers before the one we just emitted, so we
-    * don't get too many swaps outstanding for apps that are GPU-heavy
-    * but not CPU-heavy.
-    *
-    * We're using intelDRI2Flush (called from the loader before
-    * swapbuffer) and glFlush (for front buffer rendering) as the
-    * indicator that a frame is done and then throttle when we get
-    * here as we prepare to render the next frame.  At this point for
-    * round trips for swap/copy and getting new buffers are done and
-    * we'll spend less time waiting on the GPU.
-    *
-    * Unfortunately, we don't have a handle to the batch containing
-    * the swap, and getting our hands on that doesn't seem worth it,
-    * so we just us the first batch we emitted after the last swap.
-    */
-   if (brw->need_swap_throttle && brw->throttle_batch[0]) {
-      if (brw->throttle_batch[1]) {
-         if (!brw->disable_throttling)
-            drm_intel_bo_wait_rendering(brw->throttle_batch[1]);
-         drm_intel_bo_unreference(brw->throttle_batch[1]);
-      }
-      brw->throttle_batch[1] = brw->throttle_batch[0];
-      brw->throttle_batch[0] = NULL;
-      brw->need_swap_throttle = false;
-      /* Throttling here is more precise than the throttle ioctl, so skip it */
-      brw->need_flush_throttle = false;
-   }
-
-   if (brw->need_flush_throttle) {
-      __DRIscreen *psp = brw->intelScreen->driScrnPriv;
-      drmCommandNone(psp->fd, DRM_I915_GEM_THROTTLE);
-      brw->need_flush_throttle = false;
-   }
 }
 
 /**