nir: Add a flag to lower_io to force "sample" interpolation
[mesa.git] / src / mesa / drivers / dri / i965 / intel_screen.c
index c2efc6ed200f453fc73e4ad1b81063bfdce883bc..a3d252d030ddb613ef886f2bdfe0b2712d6a06ee 100644 (file)
@@ -65,6 +65,8 @@ DRI_CONF_BEGIN
    DRI_CONF_SECTION_QUALITY
       DRI_CONF_FORCE_S3TC_ENABLE("false")
 
+      DRI_CONF_PRECISE_TRIG("false")
+
       DRI_CONF_OPT_BEGIN(clamp_max_samples, int, -1)
               DRI_CONF_DESC(en, "Clamp the value of GL_MAX_SAMPLES to the "
                             "given integer. If negative, then do not clamp.")
@@ -86,6 +88,10 @@ DRI_CONF_BEGIN
         DRI_CONF_DESC(en, "Perform code generation at shader link time.")
       DRI_CONF_OPT_END
    DRI_CONF_SECTION_END
+
+   DRI_CONF_SECTION_MISCELLANEOUS
+      DRI_CONF_GLSL_ZERO_INIT("false")
+   DRI_CONF_SECTION_END
 DRI_CONF_END
 };
 
@@ -258,6 +264,31 @@ static struct intel_image_format intel_image_formats[] = {
        { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
        { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
 
+   { __DRI_IMAGE_FOURCC_YVU410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } },
+
+   { __DRI_IMAGE_FOURCC_YVU411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
+
+   { __DRI_IMAGE_FOURCC_YVU420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } },
+
+   { __DRI_IMAGE_FOURCC_YVU422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
+
+   { __DRI_IMAGE_FOURCC_YVU444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
+
    { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
      { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
        { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } },
@@ -572,13 +603,9 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
       *value = image->planar_format->components;
       return true;
    case __DRI_IMAGE_ATTRIB_FD:
-      if (drm_intel_bo_gem_export_to_prime(image->bo, value) == 0)
-         return true;
-      return false;
+      return !drm_intel_bo_gem_export_to_prime(image->bo, value);
    case __DRI_IMAGE_ATTRIB_FOURCC:
-      if (intel_lookup_fourcc(image->dri_format, value))
-         return true;
-      return false;
+      return intel_lookup_fourcc(image->dri_format, value);
    case __DRI_IMAGE_ATTRIB_NUM_PLANES:
       *value = 1;
       return true;
@@ -676,9 +703,14 @@ intel_create_image_from_fds(__DRIscreen *screen,
    __DRIimage *image;
    int i, index;
 
-   if (fds == NULL || num_fds != 1)
+   if (fds == NULL || num_fds < 1)
       return NULL;
 
+   /* We only support all planes from the same bo */
+   for (i = 0; i < num_fds; i++)
+      if (fds[0] != fds[i])
+         return NULL;
+
    f = intel_image_format_lookup(fourcc);
    if (f == NULL)
       return NULL;
@@ -691,22 +723,28 @@ intel_create_image_from_fds(__DRIscreen *screen,
    if (image == NULL)
       return NULL;
 
-   image->bo = drm_intel_bo_gem_create_from_prime(intelScreen->bufmgr,
-                                                  fds[0],
-                                                  height * strides[0]);
-   if (image->bo == NULL) {
-      free(image);
-      return NULL;
-   }
    image->width = width;
    image->height = height;
    image->pitch = strides[0];
 
    image->planar_format = f;
+   int size = 0;
    for (i = 0; i < f->nplanes; i++) {
       index = f->planes[i].buffer_index;
       image->offsets[index] = offsets[index];
       image->strides[index] = strides[index];
+
+      const int plane_height = height >> f->planes[i].height_shift;
+      const int end = offsets[index] + plane_height * strides[index];
+      if (size < end)
+         size = end;
+   }
+
+   image->bo = drm_intel_bo_gem_create_from_prime(intelScreen->bufmgr,
+                                                  fds[0], size);
+   if (image->bo == NULL) {
+      free(image);
+      return NULL;
    }
 
    if (f->nplanes == 1) {
@@ -732,8 +770,7 @@ intel_create_image_from_dma_bufs(__DRIscreen *screen,
    __DRIimage *image;
    struct intel_image_format *f = intel_image_format_lookup(fourcc);
 
-   /* For now only packed formats that have native sampling are supported. */
-   if (!f || f->nplanes != 1) {
+   if (!f) {
       *error = __DRI_IMAGE_ERROR_BAD_MATCH;
       return NULL;
    }
@@ -933,27 +970,40 @@ static const __DRIextension *intelRobustScreenExtensions[] = {
 };
 
 static int
-intel_get_param(__DRIscreen *psp, int param, int *value)
+intel_get_param(struct intel_screen *screen, int param, int *value)
 {
-   int ret;
+   int ret = 0;
    struct drm_i915_getparam gp;
 
    memset(&gp, 0, sizeof(gp));
    gp.param = param;
    gp.value = value;
 
-   ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
-   if (ret < 0 && ret != -EINVAL)
-        _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
+   if (drmIoctl(screen->driScrnPriv->fd, DRM_IOCTL_I915_GETPARAM, &gp) == -1) {
+      ret = -errno;
+      if (ret != -EINVAL)
+         _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
+   }
 
    return ret;
 }
 
 static bool
-intel_get_boolean(__DRIscreen *psp, int param)
+intel_get_boolean(struct intel_screen *screen, int param)
 {
    int value = 0;
-   return (intel_get_param(psp, param, &value) == 0) && value;
+   return (intel_get_param(screen, param, &value) == 0) && value;
+}
+
+static int
+intel_get_integer(struct intel_screen *screen, int param)
+{
+   int value = -1;
+
+   if (intel_get_param(screen, param, &value) == 0)
+      return value;
+
+   return -1;
 }
 
 static void
@@ -1088,12 +1138,12 @@ intel_detect_sseu(struct intel_screen *intelScreen)
    intelScreen->subslice_total = -1;
    intelScreen->eu_total = -1;
 
-   ret = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_SUBSLICE_TOTAL,
+   ret = intel_get_param(intelScreen, I915_PARAM_SUBSLICE_TOTAL,
                          &intelScreen->subslice_total);
    if (ret < 0 && ret != -EINVAL)
       goto err_out;
 
-   ret = intel_get_param(intelScreen->driScrnPriv,
+   ret = intel_get_param(intelScreen,
                          I915_PARAM_EU_TOTAL, &intelScreen->eu_total);
    if (ret < 0 && ret != -EINVAL)
       goto err_out;
@@ -1111,7 +1161,7 @@ intel_detect_sseu(struct intel_screen *intelScreen)
 err_out:
    intelScreen->subslice_total = -1;
    intelScreen->eu_total = -1;
-   _mesa_warning(NULL, "Failed to query GPU properties (%s).\n", strerror(ret));
+   _mesa_warning(NULL, "Failed to query GPU properties (%s).\n", strerror(-ret));
 }
 
 static bool
@@ -1130,7 +1180,7 @@ intel_init_bufmgr(struct intel_screen *intelScreen)
 
    drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr);
 
-   if (!intel_get_boolean(spriv, I915_PARAM_HAS_RELAXED_DELTA)) {
+   if (!intel_get_boolean(intelScreen, I915_PARAM_HAS_RELAXED_DELTA)) {
       fprintf(stderr, "[%s: %u] Kernel 2.6.39 required.\n", __func__, __LINE__);
       return false;
    }
@@ -1250,7 +1300,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
    static const uint8_t multisample_samples[2]  = {4, 8};
 
    struct intel_screen *screen = dri_screen->driverPrivate;
-   const struct brw_device_info *devinfo = screen->devinfo;
+   const struct gen_device_info *devinfo = screen->devinfo;
    uint8_t depth_bits[4], stencil_bits[4];
    __DRIconfig **configs = NULL;
 
@@ -1285,7 +1335,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
                                      num_depth_stencil_bits,
                                      back_buffer_modes, 2,
                                      singlesample_samples, 1,
-                                     false);
+                                     false, false);
       configs = driConcatConfigs(configs, new_configs);
    }
 
@@ -1307,7 +1357,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
                                      depth_bits, stencil_bits, 1,
                                      back_buffer_modes, 1,
                                      singlesample_samples, 1,
-                                     true);
+                                     true, false);
       configs = driConcatConfigs(configs, new_configs);
    }
 
@@ -1355,7 +1405,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
                                      back_buffer_modes, 1,
                                      multisample_samples,
                                      num_msaa_modes,
-                                     false);
+                                     false, false);
       configs = driConcatConfigs(configs, new_configs);
    }
 
@@ -1376,12 +1426,17 @@ set_max_gl_versions(struct intel_screen *screen)
    switch (screen->devinfo->gen) {
    case 9:
    case 8:
-      psp->max_gl_core_version = 40;
+      psp->max_gl_core_version = 44;
       psp->max_gl_compat_version = 30;
       psp->max_gl_es1_version = 11;
       psp->max_gl_es2_version = 31;
       break;
    case 7:
+      psp->max_gl_core_version = 33;
+      psp->max_gl_compat_version = 30;
+      psp->max_gl_es1_version = 11;
+      psp->max_gl_es2_version = screen->devinfo->is_haswell ? 31 : 30;
+      break;
    case 6:
       psp->max_gl_core_version = 33;
       psp->max_gl_compat_version = 30;
@@ -1432,6 +1487,46 @@ brw_get_revision(int fd)
 #define I915_PARAM_HAS_RESOURCE_STREAMER 36
 #endif
 
+static void
+shader_debug_log_mesa(void *data, const char *fmt, ...)
+{
+   struct brw_context *brw = (struct brw_context *)data;
+   va_list args;
+
+   va_start(args, fmt);
+   GLuint msg_id = 0;
+   _mesa_gl_vdebug(&brw->ctx, &msg_id,
+                   MESA_DEBUG_SOURCE_SHADER_COMPILER,
+                   MESA_DEBUG_TYPE_OTHER,
+                   MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
+   va_end(args);
+}
+
+static void
+shader_perf_log_mesa(void *data, const char *fmt, ...)
+{
+   struct brw_context *brw = (struct brw_context *)data;
+
+   va_list args;
+   va_start(args, fmt);
+
+   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+      va_list args_copy;
+      va_copy(args_copy, args);
+      vfprintf(stderr, fmt, args_copy);
+      va_end(args_copy);
+   }
+
+   if (brw->perf_debug) {
+      GLuint msg_id = 0;
+      _mesa_gl_vdebug(&brw->ctx, &msg_id,
+                      MESA_DEBUG_SOURCE_SHADER_COMPILER,
+                      MESA_DEBUG_TYPE_PERFORMANCE,
+                      MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
+   }
+   va_end(args);
+}
+
 /**
  * This is the driver specific part of the createNewScreen entry point.
  * Called when using DRI2.
@@ -1468,7 +1563,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
        return false;
 
    intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
-   intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID);
+   intelScreen->devinfo = gen_get_device_info(intelScreen->deviceID);
    if (!intelScreen->devinfo)
       return false;
 
@@ -1486,12 +1581,52 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
    if (INTEL_DEBUG & DEBUG_AUB)
       drm_intel_bufmgr_gem_set_aub_dump(intelScreen->bufmgr, true);
 
+#ifndef I915_PARAM_MMAP_GTT_VERSION
+#define I915_PARAM_MMAP_GTT_VERSION 40 /* XXX delete me with new libdrm */
+#endif
+   if (intel_get_integer(intelScreen, I915_PARAM_MMAP_GTT_VERSION) >= 1) {
+      /* Theorectically unlimited! At least for individual objects...
+       *
+       * Currently the entire (global) address space for all GTT maps is
+       * limited to 64bits. That is all objects on the system that are
+       * setup for GTT mmapping must fit within 64bits. An attempt to use
+       * one that exceeds the limit with fail in drm_intel_bo_map_gtt().
+       *
+       * Long before we hit that limit, we will be practically limited by
+       * that any single object must fit in physical memory (RAM). The upper
+       * limit on the CPU's address space is currently 48bits (Skylake), of
+       * which only 39bits can be physical memory. (The GPU itself also has
+       * a 48bit addressable virtual space.) We can fit over 32 million
+       * objects of the current maximum allocable size before running out
+       * of mmap space.
+       */
+      intelScreen->max_gtt_map_object_size = UINT64_MAX;
+   } else {
+      /* Estimate the size of the mappable aperture into the GTT.  There's an
+       * ioctl to get the whole GTT size, but not one to get the mappable subset.
+       * It turns out it's basically always 256MB, though some ancient hardware
+       * was smaller.
+       */
+      uint32_t gtt_size = 256 * 1024 * 1024;
+
+      /* We don't want to map two objects such that a memcpy between them would
+       * just fault one mapping in and then the other over and over forever.  So
+       * we would need to divide the GTT size by 2.  Additionally, some GTT is
+       * taken up by things like the framebuffer and the ringbuffer and such, so
+       * be more conservative.
+       */
+      intelScreen->max_gtt_map_object_size = gtt_size / 4;
+   }
+
    intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
    intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
 
    /* GENs prior to 8 do not support EU/Subslice info */
-   if (intelScreen->devinfo->gen >= 8)
+   if (intelScreen->devinfo->gen >= 8) {
       intel_detect_sseu(intelScreen);
+   } else if (intelScreen->devinfo->gen == 7) {
+      intelScreen->subslice_total = 1 << (intelScreen->devinfo->gt - 1);
+   }
 
    const char *force_msaa = getenv("INTEL_FORCE_MSAA");
    if (force_msaa) {
@@ -1524,12 +1659,10 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
          (ret != -1 || errno != EINVAL);
    }
 
-   struct drm_i915_getparam getparam;
-   getparam.param = I915_PARAM_CMD_PARSER_VERSION;
-   getparam.value = &intelScreen->cmd_parser_version;
-   const int ret = drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam);
-   if (ret == -1)
+   if (intel_get_param(intelScreen, I915_PARAM_CMD_PARSER_VERSION,
+                       &intelScreen->cmd_parser_version) < 0) {
       intelScreen->cmd_parser_version = 0;
+   }
 
    /* Haswell requires command parser version 6 in order to write to the
     * MI_MATH GPR registers, and version 7 in order to use
@@ -1544,15 +1677,13 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
 
    intelScreen->compiler = brw_compiler_create(intelScreen,
                                                intelScreen->devinfo);
+   intelScreen->compiler->shader_debug_log = shader_debug_log_mesa;
+   intelScreen->compiler->shader_perf_log = shader_perf_log_mesa;
    intelScreen->program_id = 1;
 
    if (intelScreen->devinfo->has_resource_streamer) {
-      int val = -1;
-      getparam.param = I915_PARAM_HAS_RESOURCE_STREAMER;
-      getparam.value = &val;
-
-      drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &getparam);
-      intelScreen->has_resource_streamer = val > 0;
+      intelScreen->has_resource_streamer =
+        intel_get_boolean(intelScreen, I915_PARAM_HAS_RESOURCE_STREAMER);
    }
 
    return (const __DRIconfig**) intel_screen_make_configs(psp);