i965/fs: Lower 32x32 bit multiplication on BXT.
[mesa.git] / src / mesa / drivers / dri / i965 / intel_screen.c
index 8070e9708c5d9253d7c46f1f779d2748becb16d8..147fa1ea49e77ef8d4f28541bcaa4a884fbd5735 100644 (file)
@@ -38,6 +38,8 @@
 #include "main/version.h"
 #include "swrast/s_renderbuffer.h"
 #include "util/ralloc.h"
+#include "brw_shader.h"
+#include "glsl/nir/nir.h"
 
 #include "utils.h"
 #include "xmlpool.h"
@@ -61,10 +63,6 @@ DRI_CONF_BEGIN
       DRI_CONF_OPT_BEGIN_B(hiz, "true")
         DRI_CONF_DESC(en, "Enable Hierarchical Z on gen6+")
       DRI_CONF_OPT_END
-
-      DRI_CONF_OPT_BEGIN_B(disable_derivative_optimization, "false")
-        DRI_CONF_DESC(en, "Derivatives with finer granularity by default")
-      DRI_CONF_OPT_END
    DRI_CONF_SECTION_END
 
    DRI_CONF_SECTION_QUALITY
@@ -96,7 +94,6 @@ DRI_CONF_END
 #include "intel_batchbuffer.h"
 #include "intel_buffers.h"
 #include "intel_bufmgr.h"
-#include "intel_chipset.h"
 #include "intel_fbo.h"
 #include "intel_mipmap_tree.h"
 #include "intel_screen.h"
@@ -179,10 +176,10 @@ intel_dri2_flush_with_flags(__DRIcontext *cPriv,
    if (flags & __DRI2_FLUSH_DRAWABLE)
       intel_resolve_for_dri2_flush(brw, dPriv);
 
-   if (reason == __DRI2_THROTTLE_SWAPBUFFER ||
-       reason == __DRI2_THROTTLE_FLUSHFRONT) {
-      brw->need_throttle = true;
-   }
+   if (reason == __DRI2_THROTTLE_SWAPBUFFER)
+      brw->need_swap_throttle = true;
+   if (reason == __DRI2_THROTTLE_FLUSHFRONT)
+      brw->need_flush_throttle = true;
 
    intel_batchbuffer_flush(brw);
 
@@ -232,6 +229,12 @@ static struct intel_image_format intel_image_formats[] = {
    { __DRI_IMAGE_FOURCC_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } },
 
+   { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } },
+
+   { __DRI_IMAGE_FOURCC_GR88, __DRI_IMAGE_COMPONENTS_RG, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, } },
+
    { __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
        { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
@@ -305,6 +308,17 @@ intel_image_format_lookup(int fourcc)
    return f;
 }
 
+static boolean intel_lookup_fourcc(int dri_format, int *fourcc)
+{
+   for (unsigned i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
+      if (intel_image_formats[i].planes[0].dri_format == dri_format) {
+         *fourcc = intel_image_formats[i].fourcc;
+         return true;
+      }
+   }
+   return false;
+}
+
 static __DRIimage *
 intel_allocate_image(int dri_format, void *loaderPrivate)
 {
@@ -564,6 +578,14 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
       if (drm_intel_bo_gem_export_to_prime(image->bo, value) == 0)
          return true;
       return false;
+   case __DRI_IMAGE_ATTRIB_FOURCC:
+      if (intel_lookup_fourcc(image->dri_format, value))
+         return true;
+      return false;
+   case __DRI_IMAGE_ATTRIB_NUM_PLANES:
+      *value = 1;
+      return true;
+
   default:
       return false;
    }
@@ -692,7 +714,7 @@ intel_create_image_from_fds(__DRIscreen *screen,
 
    if (f->nplanes == 1) {
       image->offset = image->offsets[0];
-      intel_image_warn_if_unaligned(image, __FUNCTION__);
+      intel_image_warn_if_unaligned(image, __func__);
    }
 
    return image;
@@ -783,13 +805,13 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
     image->pitch = stride;
     image->offset = offset;
 
-    intel_image_warn_if_unaligned(image, __FUNCTION__);
+    intel_image_warn_if_unaligned(image, __func__);
 
     return image;
 }
 
 static const __DRIimageExtension intelImageExtension = {
-    .base = { __DRI_IMAGE, 8 },
+    .base = { __DRI_IMAGE, 11 },
 
     .createImageFromName                = intel_create_image_from_name,
     .createImageFromRenderbuffer        = intel_create_image_from_renderbuffer,
@@ -802,7 +824,9 @@ static const __DRIimageExtension intelImageExtension = {
     .fromPlanar                         = intel_from_planar,
     .createImageFromTexture             = intel_create_image_from_texture,
     .createImageFromFds                 = intel_create_image_from_fds,
-    .createImageFromDmaBufs             = intel_create_image_from_dma_bufs
+    .createImageFromDmaBufs             = intel_create_image_from_dma_bufs,
+    .blitImage                          = NULL,
+    .getCapabilities                    = NULL
 };
 
 static int
@@ -892,6 +916,7 @@ static const __DRIrobustnessExtension dri2Robustness = {
 
 static const __DRIextension *intelScreenExtensions[] = {
     &intelTexBufferExtension.base,
+    &intelFenceExtension.base,
     &intelFlushExtension.base,
     &intelImageExtension.base,
     &intelRendererQueryExtension.base,
@@ -901,6 +926,7 @@ static const __DRIextension *intelScreenExtensions[] = {
 
 static const __DRIextension *intelRobustScreenExtensions[] = {
     &intelTexBufferExtension.base,
+    &intelFenceExtension.base,
     &intelFlushExtension.base,
     &intelImageExtension.base,
     &intelRendererQueryExtension.base,
@@ -1103,6 +1129,50 @@ intel_detect_swizzling(struct intel_screen *screen)
       return true;
 }
 
+static int
+intel_detect_timestamp(struct intel_screen *screen)
+{
+   uint64_t dummy = 0, last = 0;
+   int upper, lower, loops;
+
+   /* On 64bit systems, some old kernels trigger a hw bug resulting in the
+    * TIMESTAMP register being shifted and the low 32bits always zero.
+    *
+    * More recent kernels offer an interface to read the full 36bits
+    * everywhere.
+    */
+   if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP | 1, &dummy) == 0)
+      return 3;
+
+   /* Determine if we have a 32bit or 64bit kernel by inspecting the
+    * upper 32bits for a rapidly changing timestamp.
+    */
+   if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &last))
+      return 0;
+
+   upper = lower = 0;
+   for (loops = 0; loops < 10; loops++) {
+      /* The TIMESTAMP should change every 80ns, so several round trips
+       * through the kernel should be enough to advance it.
+       */
+      if (drm_intel_reg_read(screen->bufmgr, TIMESTAMP, &dummy))
+         return 0;
+
+      upper += (dummy >> 32) != (last >> 32);
+      if (upper > 1) /* beware 32bit counter overflow */
+         return 2; /* upper dword holds the low 32bits of the timestamp */
+
+      lower += (dummy & 0xffffffff) != (last & 0xffffffff);
+      if (lower > 1)
+         return 1; /* timestamp is unshifted */
+
+      last = dummy;
+   }
+
+   /* No advancement? No timestamp! */
+   return 0;
+}
+
 /**
  * Return array of MSAA modes supported by the hardware. The array is
  * zero-terminated and sorted in decreasing order.
@@ -1131,7 +1201,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
 {
    static const mesa_format formats[] = {
       MESA_FORMAT_B5G6R5_UNORM,
-      MESA_FORMAT_B8G8R8A8_UNORM
+      MESA_FORMAT_B8G8R8A8_UNORM,
+      MESA_FORMAT_B8G8R8X8_UNORM
    };
 
    /* GLX_SWAP_COPY_OML is not supported due to page flipping. */
@@ -1267,15 +1338,11 @@ set_max_gl_versions(struct intel_screen *screen)
    __DRIscreen *psp = screen->driScrnPriv;
 
    switch (screen->devinfo->gen) {
+   case 9:
    case 8:
    case 7:
-      psp->max_gl_core_version = 33;
-      psp->max_gl_compat_version = 30;
-      psp->max_gl_es1_version = 11;
-      psp->max_gl_es2_version = 30;
-      break;
    case 6:
-      psp->max_gl_core_version = 32;
+      psp->max_gl_core_version = 33;
       psp->max_gl_compat_version = 30;
       psp->max_gl_es1_version = 11;
       psp->max_gl_es2_version = 30;
@@ -1292,6 +1359,29 @@ set_max_gl_versions(struct intel_screen *screen)
    }
 }
 
+static int
+brw_get_revision(int fd)
+{
+   struct drm_i915_getparam gp;
+   int revision;
+   int ret;
+
+   memset(&gp, 0, sizeof(gp));
+   gp.param = I915_PARAM_REVISION;
+   gp.value = &revision;
+
+   ret = drmCommandWriteRead(fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+   if (ret)
+      revision = -1;
+
+   return revision;
+}
+
+/* Drop when RS headers get pulled to libdrm */
+#ifndef I915_PARAM_HAS_RESOURCE_STREAMER
+#define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#endif
+
 /**
  * This is the driver specific part of the createNewScreen entry point.
  * Called when using DRI2.
@@ -1328,13 +1418,17 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
        return false;
 
    intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
-   intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID);
+   intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID,
+                                              brw_get_revision(psp->fd));
    if (!intelScreen->devinfo)
       return false;
 
+   brw_process_intel_debug_variable(intelScreen);
+
    intelScreen->hw_must_use_separate_stencil = intelScreen->devinfo->gen >= 7;
 
    intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
+   intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
 
    const char *force_msaa = getenv("INTEL_FORCE_MSAA");
    if (force_msaa) {
@@ -1367,11 +1461,27 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
          (ret != -1 || errno != EINVAL);
    }
 
+   struct drm_i915_getparam getparam;
+   getparam.param = I915_PARAM_CMD_PARSER_VERSION;
+   getparam.value = &intelScreen->cmd_parser_version;
+   const int ret = drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam);
+   if (ret == -1)
+      intelScreen->cmd_parser_version = 0;
+
    psp->extensions = !intelScreen->has_context_reset_notification
       ? intelScreenExtensions : intelRobustScreenExtensions;
 
-   brw_fs_alloc_reg_sets(intelScreen);
-   brw_vec4_alloc_reg_set(intelScreen);
+   intelScreen->compiler = brw_compiler_create(intelScreen,
+                                               intelScreen->devinfo);
+
+   if (intelScreen->devinfo->has_resource_streamer) {
+      int val = -1;
+      getparam.param = I915_PARAM_HAS_RESOURCE_STREAMER;
+      getparam.value = &val;
+
+      drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam);
+      intelScreen->has_resource_streamer = val > 0;
+   }
 
    return (const __DRIconfig**) intel_screen_make_configs(psp);
 }