i965: Move brw_query_samples_for_format() to brw_queryformat.c
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
index e70ad982f482cf339f75eb11b027f72210f5909f..c16da0d2291698986353669c2fb332b7aee4210d 100644 (file)
 #include "tnl/tnl.h"
 #include "tnl/t_pipeline.h"
 #include "util/ralloc.h"
+#include "util/debug.h"
 
 /***************************************
  * Mesa's Driver Functions
  ***************************************/
 
-static size_t
-brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
-                             GLenum internalFormat, int samples[16])
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   (void) target;
-
-   switch (brw->gen) {
-   case 9:
-      samples[0] = 16;
-      samples[1] = 8;
-      samples[2] = 4;
-      samples[3] = 2;
-      return 4;
-
-   case 8:
-      samples[0] = 8;
-      samples[1] = 4;
-      samples[2] = 2;
-      return 3;
-
-   case 7:
-      samples[0] = 8;
-      samples[1] = 4;
-      return 2;
-
-   case 6:
-      samples[0] = 4;
-      return 1;
-
-   default:
-      assert(brw->gen < 6);
-      samples[0] = 1;
-      return 1;
-   }
-}
-
 const char *const brw_vendor_string = "Intel Open Source Technology Center";
 
 const char *
@@ -158,11 +121,26 @@ intel_viewport(struct gl_context *ctx)
    __DRIcontext *driContext = brw->driContext;
 
    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
-      dri2InvalidateDrawable(driContext->driDrawablePriv);
-      dri2InvalidateDrawable(driContext->driReadablePriv);
+      if (driContext->driDrawablePriv)
+         dri2InvalidateDrawable(driContext->driDrawablePriv);
+      if (driContext->driReadablePriv)
+         dri2InvalidateDrawable(driContext->driReadablePriv);
    }
 }
 
+static void
+intel_update_framebuffer(struct gl_context *ctx,
+                         struct gl_framebuffer *fb)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   /* Quantize the derived default number of samples
+    */
+   fb->DefaultGeometry._NumSamples =
+      intel_quantize_num_samples(brw->intelScreen,
+                                 fb->DefaultGeometry.NumSamples);
+}
+
 static void
 intel_update_state(struct gl_context * ctx, GLuint new_state)
 {
@@ -192,11 +170,76 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
       if (!tex_obj || !tex_obj->mt)
         continue;
       intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
-      intel_miptree_resolve_color(brw, tex_obj->mt);
+      /* Sampling engine understands lossless compression and resolving
+       * those surfaces should be skipped for performance reasons.
+       */
+      intel_miptree_resolve_color(brw, tex_obj->mt,
+                                  INTEL_MIPTREE_IGNORE_CCS_E);
       brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
    }
 
+   /* Resolve color for each active shader image. */
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
+         ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
+
+      if (unlikely(shader && shader->NumImages)) {
+         for (unsigned j = 0; j < shader->NumImages; j++) {
+            struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
+            tex_obj = intel_texture_object(u->TexObj);
+
+            if (tex_obj && tex_obj->mt) {
+               /* Access to images is implemented using indirect messages
+                * against data port. Normal render target write understands
+                * lossless compression but unfortunately the typed/untyped
+                * read/write interface doesn't. Therefore the compressed
+                * surfaces need to be resolved prior to accessing them.
+                */
+               intel_miptree_resolve_color(brw, tex_obj->mt, 0);
+               brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
+            }
+         }
+      }
+   }
+
+   /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
+    * single-sampled color renderbuffers because the CCS buffer isn't
+    * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
+    * enabled because otherwise the surface state will be programmed with the
+    * linear equivalent format anyway.
+    */
+   if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
+      struct gl_framebuffer *fb = ctx->DrawBuffer;
+      for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+         struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
+
+         if (rb == NULL)
+            continue;
+
+         struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+         struct intel_mipmap_tree *mt = irb->mt;
+
+         if (mt == NULL ||
+             mt->num_samples > 1 ||
+             _mesa_get_srgb_format_linear(mt->format) == mt->format)
+               continue;
+
+         /* Lossless compression is not supported for SRGB formats, it
+          * should be impossible to get here with such surfaces.
+          */
+         assert(!intel_miptree_is_lossless_compressed(brw, mt));
+         intel_miptree_resolve_color(brw, mt, 0);
+         brw_render_cache_set_check_flush(brw, mt->bo);
+      }
+   }
+
    _mesa_lock_context_textures(ctx);
+
+   if (new_state & _NEW_BUFFERS) {
+      intel_update_framebuffer(ctx, ctx->DrawBuffer);
+      if (ctx->DrawBuffer != ctx->ReadBuffer)
+         intel_update_framebuffer(ctx, ctx->ReadBuffer);
+   }
 }
 
 #define flushFront(screen)      ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
@@ -299,6 +342,7 @@ brw_init_driver_functions(struct brw_context *brw,
       brw_init_conditional_render_functions(functions);
 
    functions->QuerySamplesForFormat = brw_query_samples_for_format;
+   functions->QueryInternalFormat = brw_query_internal_format;
 
    functions->NewTransformFeedback = brw_new_transform_feedback;
    functions->DeleteTransformFeedback = brw_delete_transform_feedback;
@@ -326,11 +370,16 @@ brw_initialize_context_constants(struct brw_context *brw)
 
    const bool stage_exists[MESA_SHADER_STAGES] = {
       [MESA_SHADER_VERTEX] = true,
-      [MESA_SHADER_TESS_CTRL] = false,
-      [MESA_SHADER_TESS_EVAL] = false,
+      [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
+      [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
       [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
       [MESA_SHADER_FRAGMENT] = true,
-      [MESA_SHADER_COMPUTE] = _mesa_extension_override_enables.ARB_compute_shader,
+      [MESA_SHADER_COMPUTE] =
+         (ctx->API == API_OPENGL_CORE &&
+          ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
+         (ctx->API == API_OPENGLES2 &&
+          ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
+         _mesa_extension_override_enables.ARB_compute_shader,
    };
 
    unsigned num_stages = 0;
@@ -391,6 +440,7 @@ brw_initialize_context_constants(struct brw_context *brw)
            ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
 
    ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
+   ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
    ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
    ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
    ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
@@ -482,6 +532,8 @@ brw_initialize_context_constants(struct brw_context *brw)
    if (brw->gen >= 5 || brw->is_g4x)
       ctx->Const.MaxClipPlanes = 8;
 
+   ctx->Const.LowerTessLevel = true;
+
    ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
    ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
    ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
@@ -581,6 +633,10 @@ brw_initialize_context_constants(struct brw_context *brw)
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
       ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
       ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
+      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
+      ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
+      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
+      ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
    }
 
    /* We want the GLSL compiler to emit code that uses condition codes */
@@ -589,6 +645,11 @@ brw_initialize_context_constants(struct brw_context *brw)
          brw->intelScreen->compiler->glsl_compiler_options[i];
    }
 
+   if (brw->gen >= 7) {
+      ctx->Const.MaxViewportWidth = 32768;
+      ctx->Const.MaxViewportHeight = 32768;
+   }
+
    /* ARB_viewport_array */
    if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
       ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
@@ -612,7 +673,7 @@ brw_initialize_context_constants(struct brw_context *brw)
 }
 
 static void
-brw_adjust_cs_context_constants(struct brw_context *brw)
+brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
 {
    struct gl_context *ctx = &brw->ctx;
 
@@ -626,11 +687,12 @@ brw_adjust_cs_context_constants(struct brw_context *brw)
     */
    const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
 
-   const uint32_t max_invocations = simd_size * brw->max_cs_threads;
+   const uint32_t max_invocations = simd_size * max_threads;
    ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
    ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
    ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
    ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
+   ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
 }
 
 /**
@@ -690,6 +752,9 @@ brw_process_driconf_options(struct brw_context *brw)
 
    ctx->Const.AllowGLSLExtensionDirectiveMidShader =
       driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
+
+   brw->dual_color_blend_by_location =
+      driQueryOptionb(options, "dual_color_blend_by_location");
 }
 
 GLboolean
@@ -752,10 +817,12 @@ brwCreateContext(gl_api api,
    brw->needs_unlit_centroid_workaround =
       devinfo->needs_unlit_centroid_workaround;
 
-   brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
+   brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
    brw->has_swizzling = screen->hw_has_swizzling;
 
    brw->vs.base.stage = MESA_SHADER_VERTEX;
+   brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
+   brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
    brw->gs.base.stage = MESA_SHADER_GEOMETRY;
    brw->wm.base.stage = MESA_SHADER_FRAGMENT;
    if (brw->gen >= 8) {
@@ -816,6 +883,7 @@ brwCreateContext(gl_api api,
    if (INTEL_DEBUG & DEBUG_PERF)
       brw->perf_debug = true;
 
+   brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
    brw_initialize_context_constants(brw);
 
    ctx->Const.ResetStrategy = notify_reset
@@ -870,8 +938,6 @@ brwCreateContext(gl_api api,
    brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
    brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
 
-   brw_adjust_cs_context_constants(brw);
-
    /* Estimate the size of the mappable aperture into the GTT.  There's an
     * ioctl to get the whole GTT size, but not one to get the mappable subset.
     * It turns out it's basically always 256MB, though some ancient hardware
@@ -898,8 +964,8 @@ brwCreateContext(gl_api api,
    brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
 
    brw->use_resource_streamer = screen->has_resource_streamer &&
-      (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) ||
-       brw_env_var_as_boolean("INTEL_USE_GATHER", false));
+      (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
+       env_var_as_boolean("INTEL_USE_GATHER", false));
 
    ctx->VertexProgram._MaintainTnlProgram = true;
    ctx->FragmentProgram._MaintainTexEnvProgram = true;
@@ -1144,7 +1210,7 @@ intel_resolve_for_dri2_flush(struct brw_context *brw,
       if (rb == NULL || rb->mt == NULL)
          continue;
       if (rb->mt->num_samples <= 1)
-         intel_miptree_resolve_color(brw, rb->mt);
+         intel_miptree_resolve_color(brw, rb->mt, 0);
       else
          intel_renderbuffer_downsample(brw, rb);
    }