i965: Enable resource streamer for the batchbuffer

[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c

index f0de711cedc22a2cbc8e65a736f2373c94a73d3e..05cb53b37110b8f3ed3f8de5ace28bccea958646 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -50,6 +50,7 @@
  
  #include "brw_context.h"
  #include "brw_defines.h"
+#include "brw_shader.h"
  #include "brw_draw.h"
  #include "brw_state.h"
  
@@ -68,8 +69,6 @@
  #include "tnl/t_pipeline.h"
  #include "util/ralloc.h"
  
-#include "glsl/nir/nir.h"
-
  /***************************************
   * Mesa's Driver Functions
   ***************************************/
@@ -288,6 +287,9 @@ brw_init_driver_functions(struct brw_context *brw,
        gen6_init_queryobj_functions(functions);
     else
        gen4_init_queryobj_functions(functions);
+   brw_init_compute_functions(functions);
+   if (brw->gen >= 7)
+      brw_init_conditional_render_functions(functions);
  
     functions->QuerySamplesForFormat = brw_query_samples_for_format;
  
@@ -425,11 +427,7 @@ brw_initialize_context_constants(struct brw_context *brw)
  
     ctx->Const.MinLineWidth = 1.0;
     ctx->Const.MinLineWidthAA = 1.0;
-   if (brw->gen >= 9 || brw->is_cherryview) {
-      ctx->Const.MaxLineWidth = 40.0;
-      ctx->Const.MaxLineWidthAA = 40.0;
-      ctx->Const.LineWidthGranularity = 0.125;
-   } else if (brw->gen >= 6) {
+   if (brw->gen >= 6) {
        ctx->Const.MaxLineWidth = 7.375;
        ctx->Const.MaxLineWidthAA = 7.375;
        ctx->Const.LineWidthGranularity = 0.125;
@@ -439,6 +437,13 @@ brw_initialize_context_constants(struct brw_context *brw)
        ctx->Const.LineWidthGranularity = 0.5;
     }
  
+   /* For non-antialiased lines, we have to round the line width to the
+    * nearest whole number. Make sure that we don't advertise a line
+    * width that, when rounded, will be beyond the actual hardware
+    * maximum.
+    */
+   assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
+
     ctx->Const.MinPointSize = 1.0;
     ctx->Const.MinPointSizeAA = 1.0;
     ctx->Const.MaxPointSize = 255.0;
@@ -542,6 +547,7 @@ brw_initialize_context_constants(struct brw_context *brw)
      */
     ctx->Const.UniformBufferOffsetAlignment = 16;
     ctx->Const.TextureBufferOffsetAlignment = 16;
+   ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
  
     if (brw->gen >= 6) {
        ctx->Const.MaxVarying = 32;
@@ -551,57 +557,18 @@ brw_initialize_context_constants(struct brw_context *brw)
        ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
     }
  
-   static const nir_shader_compiler_options nir_options = {
-      .native_integers = true,
-      /* In order to help allow for better CSE at the NIR level we tell NIR
-       * to split all ffma instructions during opt_algebraic and we then
-       * re-combine them as a later step.
-       */
-      .lower_ffma = true,
-   };
-
-   bool use_nir_default[MESA_SHADER_STAGES];
-   use_nir_default[MESA_SHADER_VERTEX] = false;
-   use_nir_default[MESA_SHADER_GEOMETRY] = false;
-   use_nir_default[MESA_SHADER_FRAGMENT] = false;
-   use_nir_default[MESA_SHADER_COMPUTE] = false;
-
     /* We want the GLSL compiler to emit code that uses condition codes */
     for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-      ctx->Const.ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
-      ctx->Const.ShaderCompilerOptions[i].EmitCondCodes = true;
-      ctx->Const.ShaderCompilerOptions[i].EmitNoNoise = true;
-      ctx->Const.ShaderCompilerOptions[i].EmitNoMainReturn = true;
-      ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectInput = true;
-      ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectOutput =
-        (i == MESA_SHADER_FRAGMENT);
-      ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectTemp =
-        (i == MESA_SHADER_FRAGMENT);
-      ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
-      ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
-
-      if (brw_env_var_as_boolean("INTEL_USE_NIR", use_nir_default[i]))
-         ctx->Const.ShaderCompilerOptions[i].NirOptions = &nir_options;
-   }
-
-   ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
-   ctx->Const.ShaderCompilerOptions[MESA_SHADER_GEOMETRY].OptimizeForAOS = true;
-
-   if (brw->scalar_vs) {
-      /* If we're using the scalar backend for vertex shaders, we need to
-       * configure these accordingly.
-       */
-      ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectOutput = true;
-      ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectTemp = true;
-      ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = false;
+      ctx->Const.ShaderCompilerOptions[i] =
+         brw->intelScreen->compiler->glsl_compiler_options[i];
     }
  
     /* ARB_viewport_array */
-   if (brw->gen >= 7 && ctx->API == API_OPENGL_CORE) {
-      ctx->Const.MaxViewports = GEN7_NUM_VIEWPORTS;
+   if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
+      ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
        ctx->Const.ViewportSubpixelBits = 0;
  
-      /* Cast to float before negating becuase MaxViewportWidth is unsigned.
+      /* Cast to float before negating because MaxViewportWidth is unsigned.
         */
        ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
        ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
@@ -610,6 +577,34 @@ brw_initialize_context_constants(struct brw_context *brw)
     /* ARB_gpu_shader5 */
     if (brw->gen >= 7)
        ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
+
+   /* ARB_framebuffer_no_attachments */
+   ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
+   ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
+   ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
+   ctx->Const.MaxFramebufferSamples = max_samples;
+}
+
+static void
+brw_adjust_cs_context_constants(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+
+   /* For ES, we set these constants based on SIMD8.
+    *
+    * TODO: Once we can always generate SIMD16, we should update this.
+    *
+    * For GL, we assume we can generate a SIMD16 program, but this currently
+    * is not always true. This allows us to run more test cases, and will be
+    * required based on desktop GL compute shader requirements.
+    */
+   const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
+
+   const uint32_t max_invocations = simd_size * brw->max_cs_threads;
+   ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
+   ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
+   ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
+   ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
  }
  
  /**
@@ -671,29 +666,6 @@ brw_process_driconf_options(struct brw_context *brw)
        driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
  }
  
-/* drop when libdrm 2.4.61 is released */
-#ifndef I915_PARAM_REVISION
-#define I915_PARAM_REVISION 32
-#endif
-
-static int
-brw_get_revision(int fd)
-{
-   struct drm_i915_getparam gp;
-   int revision;
-   int ret;
-
-   memset(&gp, 0, sizeof(gp));
-   gp.param = I915_PARAM_REVISION;
-   gp.value = &revision;
-
-   ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
-   if (ret)
-      revision = -1;
-
-   return revision;
-}
-
  GLboolean
  brwCreateContext(gl_api api,
                  const struct gl_config *mesaVis,
@@ -727,7 +699,7 @@ brwCreateContext(gl_api api,
  
     struct brw_context *brw = rzalloc(NULL, struct brw_context);
     if (!brw) {
-      fprintf(stderr, "%s: failed to alloc context\n", __FUNCTION__);
+      fprintf(stderr, "%s: failed to alloc context\n", __func__);
        *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
        return false;
     }
@@ -743,6 +715,7 @@ brwCreateContext(gl_api api,
     brw->is_baytrail = devinfo->is_baytrail;
     brw->is_haswell = devinfo->is_haswell;
     brw->is_cherryview = devinfo->is_cherryview;
+   brw->is_broxton = devinfo->is_broxton;
     brw->has_llc = devinfo->has_llc;
     brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
     brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
@@ -752,7 +725,6 @@ brwCreateContext(gl_api api,
     brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
     brw->needs_unlit_centroid_workaround =
        devinfo->needs_unlit_centroid_workaround;
-   brw->revision = brw_get_revision(sPriv->fd);
  
     brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
     brw->has_swizzling = screen->hw_has_swizzling;
@@ -783,7 +755,7 @@ brwCreateContext(gl_api api,
  
     if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
        *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
-      fprintf(stderr, "%s: failed to init mesa context\n", __FUNCTION__);
+      fprintf(stderr, "%s: failed to init mesa context\n", __func__);
        intelDestroyContext(driContextPriv);
        return false;
     }
@@ -814,10 +786,9 @@ brwCreateContext(gl_api api,
     _mesa_meta_init(ctx);
  
     brw_process_driconf_options(brw);
-   brw_process_intel_debug_variable(brw);
  
-   if (brw->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS))
-      brw->scalar_vs = true;
+   if (INTEL_DEBUG & DEBUG_PERF)
+      brw->perf_debug = true;
  
     brw_initialize_context_constants(brw);
  
@@ -848,6 +819,12 @@ brwCreateContext(gl_api api,
        }
     }
  
+   if (brw_init_pipe_control(brw, devinfo)) {
+      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+      intelDestroyContext(driContextPriv);
+      return false;
+   }
+
     brw_init_state(brw);
  
     intelInitExtensions(ctx);
@@ -859,6 +836,7 @@ brwCreateContext(gl_api api,
     brw->max_ds_threads = devinfo->max_ds_threads;
     brw->max_gs_threads = devinfo->max_gs_threads;
     brw->max_wm_threads = devinfo->max_wm_threads;
+   brw->max_cs_threads = devinfo->max_cs_threads;
     brw->urb.size = devinfo->urb.size;
     brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
     brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
@@ -866,6 +844,8 @@ brwCreateContext(gl_api api,
     brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
     brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
  
+   brw_adjust_cs_context_constants(brw);
+
     /* Estimate the size of the mappable aperture into the GTT.  There's an
      * ioctl to get the whole GTT size, but not one to get the mappable subset.
      * It turns out it's basically always 256MB, though some ancient hardware
@@ -889,6 +869,12 @@ brwCreateContext(gl_api api,
     brw->gs.enabled = false;
     brw->sf.viewport_transform_enable = true;
  
+   brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
+
+   brw->use_resource_streamer = screen->has_resource_streamer &&
+      (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) ||
+       brw_env_var_as_boolean("INTEL_USE_GATHER", false));
+
     ctx->VertexProgram._MaintainTnlProgram = true;
     ctx->FragmentProgram._MaintainTexEnvProgram = true;
  
@@ -927,10 +913,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
        (struct brw_context *) driContextPriv->driverPrivate;
     struct gl_context *ctx = &brw->ctx;
  
-   assert(brw); /* should never be null */
-   if (!brw)
-      return;
-
     /* Dump a final BMP in case the application doesn't call SwapBuffers */
     if (INTEL_DEBUG & DEBUG_AUB) {
        intel_batchbuffer_flush(brw);
@@ -970,6 +952,7 @@ intelDestroyContext(__DRIcontext * driContextPriv)
     if (ctx->swrast_context)
        _swrast_DestroyContext(&brw->ctx);
  
+   brw_fini_pipe_control(brw);
     intel_batchbuffer_free(brw);
  
     drm_intel_bo_unreference(brw->throttle_batch[1]);
@@ -1004,7 +987,7 @@ intelUnbindContext(__DRIcontext * driContextPriv)
   * sRGB encode if the renderbuffer can handle it.  You can ask specifically
   * for a visual where you're guaranteed to be capable, but it turns out that
   * everyone just makes all their ARGB8888 visuals capable and doesn't offer
- * incapable ones, becuase there's no difference between the two in resources
+ * incapable ones, because there's no difference between the two in resources
   * used.  Applications thus get built that accidentally rely on the default
   * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
   * great...