i965/miptree: Use miptree_map in map_blit functions
[mesa.git] / src / mesa / drivers / dri / i965 / intel_fbo.c
index ca80b962956b80ffbd4e03349882fdfa5ad27c44..4ad274741007701295ed54d722ef72f9270fefcf 100644 (file)
@@ -105,7 +105,8 @@ intel_map_renderbuffer(struct gl_context *ctx,
                       GLuint x, GLuint y, GLuint w, GLuint h,
                       GLbitfield mode,
                       GLubyte **out_map,
-                      GLint *out_stride)
+                      GLint *out_stride,
+                      bool flip_y)
 {
    struct brw_context *brw = brw_context(ctx);
    struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
@@ -162,14 +163,14 @@ intel_map_renderbuffer(struct gl_context *ctx,
     * upside-down.  So we need to ask for a rectangle on flipped vertically, and
     * we then return a pointer to the bottom of it with a negative stride.
     */
-   if (rb->Name == 0) {
+   if (flip_y) {
       y = rb->Height - y - h;
    }
 
    intel_miptree_map(brw, mt, irb->mt_level, irb->mt_layer,
                     x, y, w, h, mode, &map, &stride);
 
-   if (rb->Name == 0) {
+   if (flip_y) {
       map += (h - 1) * stride;
       stride = -stride;
    }
@@ -249,6 +250,7 @@ static mesa_format
 intel_renderbuffer_format(struct gl_context * ctx, GLenum internalFormat)
 {
    struct brw_context *brw = brw_context(ctx);
+   MAYBE_UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    switch (internalFormat) {
    default:
@@ -270,7 +272,7 @@ intel_renderbuffer_format(struct gl_context * ctx, GLenum internalFormat)
       if (brw->has_separate_stencil) {
         return MESA_FORMAT_S_UINT8;
       } else {
-        assert(!brw->must_use_separate_stencil);
+        assert(!devinfo->must_use_separate_stencil);
         return MESA_FORMAT_Z24_UNORM_S8_UINT;
       }
    }
@@ -288,6 +290,7 @@ intel_alloc_private_renderbuffer_storage(struct gl_context * ctx, struct gl_rend
    assert(rb->Format != MESA_FORMAT_NONE);
 
    rb->NumSamples = intel_quantize_num_samples(screen, rb->NumSamples);
+   rb->NumStorageSamples = rb->NumSamples;
    rb->Width = width;
    rb->Height = height;
    rb->_BaseFormat = _mesa_get_format_base_format(rb->Format);
@@ -363,7 +366,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
     * content.
     */
    irb->mt = intel_miptree_create_for_dri_image(brw, image, GL_TEXTURE_2D,
-                                                ISL_COLORSPACE_NONE, false);
+                                                image->format, false);
    if (!irb->mt)
       return;
 
@@ -431,6 +434,7 @@ intel_create_winsys_renderbuffer(struct intel_screen *screen,
    _mesa_init_renderbuffer(rb, 0);
    rb->ClassID = INTEL_RB_CLASS;
    rb->NumSamples = num_samples;
+   rb->NumStorageSamples = num_samples;
 
    /* The base format and internal format must be derived from the user-visible
     * format (that is, the gl_config's format), even if we internally use
@@ -634,6 +638,7 @@ static void
 intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
 {
    struct brw_context *brw = brw_context(ctx);
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
    struct intel_renderbuffer *depthRb =
       intel_get_renderbuffer(fb, BUFFER_DEPTH);
    struct intel_renderbuffer *stencilRb =
@@ -654,7 +659,7 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
    }
 
    if (depth_mt && stencil_mt) {
-      if (brw->gen >= 6) {
+      if (devinfo->gen >= 6) {
          const unsigned d_width = depth_mt->surf.phys_level0_sa.width;
          const unsigned d_height = depth_mt->surf.phys_level0_sa.height;
          const unsigned d_depth = depth_mt->surf.dim == ISL_SURF_DIM_3D ?
@@ -707,7 +712,7 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
                            "instead of S8\n",
                            _mesa_get_format_name(stencil_mt->format));
         }
-        if (brw->gen < 7 && !intel_renderbuffer_has_hiz(depthRb)) {
+        if (devinfo->gen < 7 && !intel_renderbuffer_has_hiz(depthRb)) {
            /* Before Gen7, separate depth and stencil buffers can be used
             * only if HiZ is enabled. From the Sandybridge PRM, Volume 2,
             * Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable:
@@ -843,11 +848,12 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
          if (!intel_miptree_blit(brw,
                                  src_irb->mt,
                                  src_irb->mt_level, src_irb->mt_layer,
-                                 srcX0, srcY0, src_rb->Name == 0,
+                                 srcX0, srcY0, readFb->FlipY,
                                  dst_irb->mt,
                                  dst_irb->mt_level, dst_irb->mt_layer,
-                                 dstX0, dstY0, dst_rb->Name == 0,
-                                 dstX1 - dstX0, dstY1 - dstY0, GL_COPY)) {
+                                 dstX0, dstY0, drawFb->FlipY,
+                                 dstX1 - dstX0, dstY1 - dstY0,
+                                 COLOR_LOGICOP_COPY)) {
             perf_debug("glBlitFramebuffer(): unknown blit failure.  "
                        "Falling back to software rendering.\n");
             return mask;
@@ -869,6 +875,7 @@ intel_blit_framebuffer(struct gl_context *ctx,
                        GLbitfield mask, GLenum filter)
 {
    struct brw_context *brw = brw_context(ctx);
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
    /* Page 679 of OpenGL 4.4 spec says:
     *    "Added BlitFramebuffer to commands affected by conditional rendering in
@@ -877,7 +884,7 @@ intel_blit_framebuffer(struct gl_context *ctx,
    if (!_mesa_check_conditional_render(ctx))
       return;
 
-   if (brw->gen < 6) {
+   if (devinfo->gen < 6) {
       /* On gen4-5, try BLT first.
        *
        * Gen4-5 have a single ring for both 3D and BLT operations, so there's
@@ -907,18 +914,10 @@ intel_blit_framebuffer(struct gl_context *ctx,
    if (mask == 0x0)
       return;
 
-   if (brw->gen >= 8 && (mask & GL_STENCIL_BUFFER_BIT)) {
+   if (devinfo->gen >= 8 && (mask & GL_STENCIL_BUFFER_BIT)) {
       assert(!"Invalid blit");
    }
 
-   /* Try using the BLT engine. */
-   mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
-                                              srcX0, srcY0, srcX1, srcY1,
-                                              dstX0, dstY0, dstX1, dstY1,
-                                              mask);
-   if (mask == 0x0)
-      return;
-
    _swrast_BlitFramebuffer(ctx, readFb, drawFb,
                            srcX0, srcY0, srcX1, srcY1,
                            dstX0, dstY0, dstX1, dstY1,
@@ -967,19 +966,15 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
 }
 
 void
-brw_render_cache_set_clear(struct brw_context *brw)
+brw_cache_sets_clear(struct brw_context *brw)
 {
-   struct set_entry *entry;
+   struct hash_entry *render_entry;
+   hash_table_foreach(brw->render_cache, render_entry)
+      _mesa_hash_table_remove(brw->render_cache, render_entry);
 
-   set_foreach(brw->render_cache, entry) {
-      _mesa_set_remove(brw->render_cache, entry);
-   }
-}
-
-void
-brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo)
-{
-   _mesa_set_add(brw->render_cache, bo);
+   struct set_entry *depth_entry;
+   set_foreach(brw->depth_cache, depth_entry)
+      _mesa_set_remove(brw->depth_cache, depth_entry);
 }
 
 /**
@@ -994,13 +989,12 @@ brw_render_cache_set_add_bo(struct brw_context *brw, struct brw_bo *bo)
  * necessary is flushed before another use of that BO, but for reuse from
  * different caches within a batchbuffer, it's all our responsibility.
  */
-void
-brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo)
+static void
+flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo)
 {
-   if (!_mesa_set_search(brw->render_cache, bo))
-      return;
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
-   if (brw->gen >= 6) {
+   if (devinfo->gen >= 6) {
       brw_emit_pipe_control_flush(brw,
                                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
                                   PIPE_CONTROL_RENDER_TARGET_FLUSH |
@@ -1013,7 +1007,89 @@ brw_render_cache_set_check_flush(struct brw_context *brw, struct brw_bo *bo)
       brw_emit_mi_flush(brw);
    }
 
-   brw_render_cache_set_clear(brw);
+   brw_cache_sets_clear(brw);
+}
+
+void
+brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo)
+{
+   if (_mesa_hash_table_search(brw->render_cache, bo) ||
+       _mesa_set_search(brw->depth_cache, bo))
+      flush_depth_and_render_caches(brw, bo);
+}
+
+static void *
+format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
+{
+   return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
+}
+
+void
+brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
+                           enum isl_format format,
+                           enum isl_aux_usage aux_usage)
+{
+   if (_mesa_set_search(brw->depth_cache, bo))
+      flush_depth_and_render_caches(brw, bo);
+
+   /* Check to see if this bo has been used by a previous rendering operation
+    * but with a different format or aux usage.  If it has, flush the render
+    * cache so we ensure that it's only in there with one format or aux usage
+    * at a time.
+    *
+    * Even though it's not obvious, this can easily happen in practice.
+    * Suppose a client is blending on a surface with sRGB encode enabled on
+    * gen9.  This implies that you get AUX_USAGE_CCS_D at best.  If the client
+    * then disables sRGB decode and continues blending we will flip on
+    * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
+    * perfectly valid since CCS_E is a subset of CCS_D).  However, this means
+    * that we have fragments in-flight which are rendering with UNORM+CCS_E
+    * and other fragments in-flight with SRGB+CCS_D on the same surface at the
+    * same time and the pixel scoreboard and color blender are trying to sort
+    * it all out.  This ends badly (i.e. GPU hangs).
+    *
+    * To date, we have never observed GPU hangs or even corruption to be
+    * associated with switching the format, only the aux usage.  However,
+    * there are comments in various docs which indicate that the render cache
+    * isn't 100% resilient to format changes.  We may as well be conservative
+    * and flush on format changes too.  We can always relax this later if we
+    * find it to be a performance problem.
+    */
+   struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
+   if (entry && entry->data != format_aux_tuple(format, aux_usage))
+      flush_depth_and_render_caches(brw, bo);
+}
+
+void
+brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
+                        enum isl_format format,
+                        enum isl_aux_usage aux_usage)
+{
+#ifndef NDEBUG
+   struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
+   if (entry) {
+      /* Otherwise, someone didn't do a flush_for_render and that would be
+       * very bad indeed.
+       */
+      assert(entry->data == format_aux_tuple(format, aux_usage));
+   }
+#endif
+
+   _mesa_hash_table_insert(brw->render_cache, bo,
+                           format_aux_tuple(format, aux_usage));
+}
+
+void
+brw_cache_flush_for_depth(struct brw_context *brw, struct brw_bo *bo)
+{
+   if (_mesa_hash_table_search(brw->render_cache, bo))
+      flush_depth_and_render_caches(brw, bo);
+}
+
+void
+brw_depth_cache_add_bo(struct brw_context *brw, struct brw_bo *bo)
+{
+   _mesa_set_add(brw->depth_cache, bo);
 }
 
 /**
@@ -1033,6 +1109,8 @@ intel_fbo_init(struct brw_context *brw)
    dd->EGLImageTargetRenderbufferStorage =
       intel_image_target_renderbuffer_storage;
 
-   brw->render_cache = _mesa_set_create(brw, _mesa_hash_pointer,
-                                        _mesa_key_pointer_equal);
+   brw->render_cache = _mesa_hash_table_create(brw, _mesa_hash_pointer,
+                                               _mesa_key_pointer_equal);
+   brw->depth_cache = _mesa_set_create(brw, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
 }