intel/isl: Add an isl_swizzle structure and use it for isl_view swizzles

[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c

index 38701d04eeae923361518560b42a9da0ad482369..3068c84ca1816ab85e9b4da38e489e137fea0cf1 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -54,6 +54,11 @@
  #include "brw_defines.h"
  #include "brw_wm.h"
  
+enum {
+   INTEL_RENDERBUFFER_LAYERED = 1 << 0,
+   INTEL_AUX_BUFFER_DISABLED = 1 << 1,
+};
+
  struct surface_state_info {
     unsigned num_dwords;
     unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
@@ -74,25 +79,68 @@ static const struct surface_state_info surface_state_infos[] = {
  
  static void
  brw_emit_surface_state(struct brw_context *brw,
-                       struct intel_mipmap_tree *mt,
-                       const struct isl_view *view,
-                       uint32_t mocs, bool for_gather,
-                       uint32_t *surf_offset, int surf_index,
+                       struct intel_mipmap_tree *mt, uint32_t flags,
+                       GLenum target, struct isl_view view,
+                       uint32_t mocs, uint32_t *surf_offset, int surf_index,
                         unsigned read_domains, unsigned write_domains)
  {
     const struct surface_state_info ss_info = surface_state_infos[brw->gen];
+   uint32_t tile_x = 0, tile_y = 0;
+   uint32_t offset = mt->offset;
  
     struct isl_surf surf;
     intel_miptree_get_isl_surf(brw, mt, &surf);
  
+   surf.dim = get_isl_surf_dim(target);
+
+   const enum isl_dim_layout dim_layout =
+      get_isl_dim_layout(brw->intelScreen->devinfo, mt->tiling, target);
+
+   if (surf.dim_layout != dim_layout) {
+      /* The layout of the specified texture target is not compatible with the
+       * actual layout of the miptree structure in memory -- You're entering
+       * dangerous territory, this can only possibly work if you only intended
+       * to access a single level and slice of the texture, and the hardware
+       * supports the tile offset feature in order to allow non-tile-aligned
+       * base offsets, since we'll have to point the hardware to the first
+       * texel of the level instead of relying on the usual base level/layer
+       * controls.
+       */
+      assert(brw->has_surface_tile_offset);
+      assert(view.levels == 1 && view.array_len == 1);
+
+      offset += intel_miptree_get_tile_offsets(mt, view.base_level,
+                                               view.base_array_layer,
+                                               &tile_x, &tile_y);
+
+      /* Minify the logical dimensions of the texture. */
+      const unsigned l = view.base_level - mt->first_level;
+      surf.logical_level0_px.width = minify(surf.logical_level0_px.width, l);
+      surf.logical_level0_px.height = surf.dim <= ISL_SURF_DIM_1D ? 1 :
+         minify(surf.logical_level0_px.height, l);
+      surf.logical_level0_px.depth = surf.dim <= ISL_SURF_DIM_2D ? 1 :
+         minify(surf.logical_level0_px.depth, l);
+
+      /* Only the base level and layer can be addressed with the overridden
+       * layout.
+       */
+      surf.logical_level0_px.array_len = 1;
+      surf.levels = 1;
+      surf.dim_layout = dim_layout;
+
+      /* The requested slice of the texture is now at the base level and
+       * layer.
+       */
+      view.base_level = 0;
+      view.base_array_layer = 0;
+   }
+
     union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
  
     struct isl_surf *aux_surf = NULL, aux_surf_s;
     uint64_t aux_offset = 0;
     enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
-   if (mt->mcs_mt &&
-       ((view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
-        mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)) {
+   if (mt->mcs_mt && !(flags & INTEL_AUX_BUFFER_DISABLED)) {
        intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
        aux_surf = &aux_surf_s;
        assert(mt->mcs_mt->offset == 0);
@@ -108,15 +156,16 @@ brw_emit_surface_state(struct brw_context *brw,
                                      ss_info.num_dwords * 4, ss_info.ss_align,
                                      surf_index, surf_offset);
  
-   isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = view,
-                       .address = mt->bo->offset64 + mt->offset,
+   isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = &view,
+                       .address = mt->bo->offset64 + offset,
                         .aux_surf = aux_surf, .aux_usage = aux_usage,
                         .aux_address = aux_offset,
-                       .mocs = mocs, .clear_color = clear_color);
+                       .mocs = mocs, .clear_color = clear_color,
+                       .x_offset_sa = tile_x, .y_offset_sa = tile_y);
  
     drm_intel_bo_emit_reloc(brw->batch.bo,
                             *surf_offset + 4 * ss_info.reloc_dw,
-                           mt->bo, mt->offset,
+                           mt->bo, offset,
                             read_domains, write_domains);
  
     if (aux_surf) {
@@ -137,13 +186,17 @@ brw_emit_surface_state(struct brw_context *brw,
  uint32_t
  brw_update_renderbuffer_surface(struct brw_context *brw,
                                  struct gl_renderbuffer *rb,
-                                bool layered, unsigned unit /* unused */,
+                                uint32_t flags, unsigned unit /* unused */,
                                  uint32_t surf_index)
  {
     struct gl_context *ctx = &brw->ctx;
     struct intel_renderbuffer *irb = intel_renderbuffer(rb);
     struct intel_mipmap_tree *mt = irb->mt;
  
+   if (brw->gen < 9) {
+      assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
+   }
+
     assert(brw_render_target_supported(brw, rb));
     intel_miptree_used_for_rendering(mt);
  
@@ -164,18 +217,13 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
        .levels = 1,
        .base_array_layer = irb->mt_layer / layer_multiplier,
        .array_len = MAX2(irb->layer_count, 1),
-      .channel_select = {
-         ISL_CHANNEL_SELECT_RED,
-         ISL_CHANNEL_SELECT_GREEN,
-         ISL_CHANNEL_SELECT_BLUE,
-         ISL_CHANNEL_SELECT_ALPHA,
-      },
+      .swizzle = ISL_SWIZZLE_IDENTITY,
        .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
     };
  
     uint32_t offset;
-   brw_emit_surface_state(brw, mt, &view,
-                          surface_state_infos[brw->gen].rb_mocs, false,
+   brw_emit_surface_state(brw, mt, flags, mt->target, view,
+                          surface_state_infos[brw->gen].rb_mocs,
                            &offset, surf_index,
                            I915_GEM_DOMAIN_RENDER,
                            I915_GEM_DOMAIN_RENDER);
@@ -370,6 +418,90 @@ swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
     return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
  }
  
+static unsigned
+brw_find_matching_rb(const struct gl_framebuffer *fb,
+                     const struct intel_mipmap_tree *mt)
+{
+   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
+      const struct intel_renderbuffer *irb =
+         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+      if (irb && irb->mt == mt)
+         return i;
+   }
+
+   return fb->_NumColorDrawBuffers;
+}
+
+static inline bool
+brw_texture_view_sane(const struct brw_context *brw,
+                      const struct intel_mipmap_tree *mt, unsigned format)
+{
+   /* There are special cases only for lossless compression. */
+   if (!intel_miptree_is_lossless_compressed(brw, mt))
+      return true;
+
+   if (isl_format_supports_lossless_compression(brw->intelScreen->devinfo,
+                                                format))
+      return true;
+
+   /* Logic elsewhere needs to take care to resolve the color buffer prior
+    * to sampling it as non-compressed.
+    */
+   if (mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)
+      return false;
+
+   const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
+   const unsigned rb_index = brw_find_matching_rb(fb, mt);
+
+   if (rb_index == fb->_NumColorDrawBuffers)
+      return true;
+
+   /* Underlying surface is compressed but it is sampled using a format that
+    * the sampling engine doesn't support as compressed. Compression must be
+    * disabled for both sampling engine and data port in case the same surface
+    * is used also as render target.
+    */
+   return brw->draw_aux_buffer_disabled[rb_index];
+}
+
+static bool
+brw_disable_aux_surface(const struct brw_context *brw,
+                        const struct intel_mipmap_tree *mt)
+{
+   /* Nothing to disable. */
+   if (!mt->mcs_mt)
+      return false;
+
+   /* There are special cases only for lossless compression. */
+   if (!intel_miptree_is_lossless_compressed(brw, mt))
+      return mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED;
+
+   const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
+   const unsigned rb_index = brw_find_matching_rb(fb, mt);
+
+   /* If we are drawing into this with compression enabled, then we must also
+    * enable compression when texturing from it regardless of
+    * fast_clear_state.  If we don't then, after the first draw call with
+    * this setup, there will be data in the CCS which won't get picked up by
+    * subsequent texturing operations as required by ARB_texture_barrier.
+    * Since we don't want to re-emit the binding table or do a resolve
+    * operation every draw call, the easiest thing to do is just enable
+    * compression on the texturing side.  This is completely safe to do
+    * since, if compressed texturing weren't allowed, we would have disabled
+    * compression of render targets in whatever_that_function_is_called().
+    */
+   if (rb_index < fb->_NumColorDrawBuffers) {
+      if (brw->draw_aux_buffer_disabled[rb_index]) {
+         assert(mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED);
+      }
+
+      return brw->draw_aux_buffer_disabled[rb_index];
+   }
+
+   return mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED;
+}
+
  void
  brw_update_texture_surface(struct gl_context *ctx,
                             unsigned unit,
@@ -453,8 +585,16 @@ brw_update_texture_surface(struct gl_context *ctx,
        }
  
        if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
-         assert(brw->gen >= 8);
-         mt = mt->stencil_mt;
+         if (brw->gen <= 7) {
+            assert(mt->r8stencil_mt && !mt->stencil_mt->r8stencil_needs_update);
+            mt = mt->r8stencil_mt;
+         } else {
+            mt = mt->stencil_mt;
+         }
+         format = BRW_SURFACEFORMAT_R8_UINT;
+      } else if (brw->gen <= 7 && mt->format == MESA_FORMAT_S_UINT8) {
+         assert(mt->r8stencil_mt && !mt->r8stencil_needs_update);
+         mt = mt->r8stencil_mt;
           format = BRW_SURFACEFORMAT_R8_UINT;
        }
  
@@ -466,11 +606,11 @@ brw_update_texture_surface(struct gl_context *ctx,
           .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
           .base_array_layer = obj->MinLayer,
           .array_len = view_num_layers,
-         .channel_select = {
-            swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
-            swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
-            swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
-            swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
+         .swizzle = {
+            .r = swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
+            .g = swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
+            .b = swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
+            .a = swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
           },
           .usage = ISL_SURF_USAGE_TEXTURE_BIT,
        };
@@ -479,8 +619,12 @@ brw_update_texture_surface(struct gl_context *ctx,
            obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
           view.usage |= ISL_SURF_USAGE_CUBE_BIT;
  
-      brw_emit_surface_state(brw, mt, &view,
-                             surface_state_infos[brw->gen].tex_mocs, for_gather,
+      assert(brw_texture_view_sane(brw, mt, format));
+
+      const int flags =
+         brw_disable_aux_surface(brw, mt) ? INTEL_AUX_BUFFER_DISABLED : 0;
+      brw_emit_surface_state(brw, mt, flags, mt->target, view,
+                             surface_state_infos[brw->gen].tex_mocs,
                               surf_offset, surf_index,
                               I915_GEM_DOMAIN_SAMPLER, 0);
     }
@@ -811,7 +955,7 @@ brw_emit_null_surface_state(struct brw_context *brw,
  static uint32_t
  gen4_update_renderbuffer_surface(struct brw_context *brw,
                                   struct gl_renderbuffer *rb,
-                                 bool layered, unsigned unit,
+                                 uint32_t flags, unsigned unit,
                                   uint32_t surf_index)
  {
     struct gl_context *ctx = &brw->ctx;
@@ -825,7 +969,8 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
     mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
     /* BRW_NEW_FS_PROG_DATA */
  
-   assert(!layered);
+   assert(!(flags & INTEL_RENDERBUFFER_LAYERED));
+   assert(!(flags & INTEL_AUX_BUFFER_DISABLED));
  
     if (rb->TexImage && !brw->has_surface_tile_offset) {
        intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
@@ -880,8 +1025,8 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
  
     if (brw->gen < 6) {
        /* _NEW_COLOR */
-      if (!ctx->Color.ColorLogicOpEnabled &&
-         (ctx->Color.BlendEnabled & (1 << unit)))
+      if (!ctx->Color.ColorLogicOpEnabled && !ctx->Color._AdvancedBlendMode &&
+          (ctx->Color.BlendEnabled & (1 << unit)))
          surf[0] |= BRW_SURFACE_BLEND_ENABLED;
  
        if (!ctx->Color.ColorMask[unit][0])
@@ -928,12 +1073,15 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw,
     if (fb->_NumColorDrawBuffers >= 1) {
        for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
           const uint32_t surf_index = render_target_start + i;
+         const int flags = (_mesa_geometric_layers(fb) > 0 ?
+                              INTEL_RENDERBUFFER_LAYERED : 0) |
+                           (brw->draw_aux_buffer_disabled[i] ? 
+                              INTEL_AUX_BUFFER_DISABLED : 0);
  
          if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
              surf_offset[surf_index] =
                 brw->vtbl.update_renderbuffer_surface(
-                  brw, fb->_ColorDrawBuffers[i],
-                  _mesa_geometric_layers(fb) > 0, i, surf_index);
+                  brw, fb->_ColorDrawBuffers[i], flags, i, surf_index);
          } else {
              brw->vtbl.emit_null_surface_state(brw, w, h, s,
                 &surf_offset[surf_index]);
@@ -980,6 +1128,92 @@ const struct brw_tracked_state gen6_renderbuffer_surfaces = {
     .emit = update_renderbuffer_surfaces,
  };
  
+static void
+update_renderbuffer_read_surfaces(struct brw_context *brw)
+{
+   const struct gl_context *ctx = &brw->ctx;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
+       brw->fragment_program &&
+       brw->fragment_program->Base.OutputsRead) {
+      /* _NEW_BUFFERS */
+      const struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+      for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
+         struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
+         const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+         const unsigned surf_index =
+            brw->wm.prog_data->binding_table.render_target_read_start + i;
+         uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
+
+         if (irb) {
+            const unsigned format = brw->render_target_format[
+               _mesa_get_render_format(ctx, intel_rb_format(irb))];
+            assert(isl_format_supports_sampling(brw->intelScreen->devinfo,
+                                                format));
+
+            /* Override the target of the texture if the render buffer is a
+             * single slice of a 3D texture (since the minimum array element
+             * field of the surface state structure is ignored by the sampler
+             * unit for 3D textures on some hardware), or if the render buffer
+             * is a 1D array (since shaders always provide the array index
+             * coordinate at the Z component to avoid state-dependent
+             * recompiles when changing the texture target of the
+             * framebuffer).
+             */
+            const GLenum target =
+               (irb->mt->target == GL_TEXTURE_3D &&
+                irb->layer_count == 1) ? GL_TEXTURE_2D :
+               irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
+               irb->mt->target;
+
+            /* intel_renderbuffer::mt_layer is expressed in sample units for
+             * the UMS and CMS multisample layouts, but
+             * intel_renderbuffer::layer_count is expressed in units of whole
+             * logical layers regardless of the multisample layout.
+             */
+            const unsigned mt_layer_unit =
+               (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
+                irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
+               MAX2(irb->mt->num_samples, 1) : 1;
+
+            const struct isl_view view = {
+               .format = format,
+               .base_level = irb->mt_level - irb->mt->first_level,
+               .levels = 1,
+               .base_array_layer = irb->mt_layer / mt_layer_unit,
+               .array_len = irb->layer_count,
+               .swizzle = ISL_SWIZZLE_IDENTITY,
+               .usage = ISL_SURF_USAGE_TEXTURE_BIT,
+            };
+
+            const int flags = brw->draw_aux_buffer_disabled[i] ?
+                                 INTEL_AUX_BUFFER_DISABLED : 0;
+            brw_emit_surface_state(brw, irb->mt, flags, target, view,
+                                   surface_state_infos[brw->gen].tex_mocs,
+                                   surf_offset, surf_index,
+                                   I915_GEM_DOMAIN_SAMPLER, 0);
+
+         } else {
+            brw->vtbl.emit_null_surface_state(
+               brw, _mesa_geometric_width(fb), _mesa_geometric_height(fb),
+               _mesa_geometric_samples(fb), surf_offset);
+         }
+      }
+
+      brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
+   }
+}
+
+const struct brw_tracked_state brw_renderbuffer_read_surfaces = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_FRAGMENT_PROGRAM,
+   },
+   .emit = update_renderbuffer_read_surfaces,
+};
  
  static void
  update_stage_texture_surfaces(struct brw_context *brw,
@@ -1343,7 +1577,7 @@ const struct brw_tracked_state brw_cs_image_surfaces = {
  static uint32_t
  get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
  {
-   const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
+   const struct gen_device_info *devinfo = brw->intelScreen->devinfo;
     uint32_t hw_format = brw_format_for_mesa_format(format);
     if (access == GL_WRITE_ONLY) {
        return hw_format;
@@ -1384,10 +1618,10 @@ update_buffer_image_param(struct brw_context *brw,
                            struct brw_image_param *param)
  {
     struct gl_buffer_object *obj = u->TexObj->BufferObject;
-
+   const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
     update_default_image_param(brw, u, surface_idx, param);
  
-   param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
+   param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
     param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
  }
  
@@ -1504,19 +1738,16 @@ update_image_surface(struct brw_context *brw,
                 .levels = 1,
                 .base_array_layer = obj->MinLayer + u->_Layer,
                 .array_len = num_layers,
-               .channel_select = {
-                  ISL_CHANNEL_SELECT_RED,
-                  ISL_CHANNEL_SELECT_GREEN,
-                  ISL_CHANNEL_SELECT_BLUE,
-                  ISL_CHANNEL_SELECT_ALPHA,
-               },
+               .swizzle = ISL_SWIZZLE_IDENTITY,
                 .usage = ISL_SURF_USAGE_STORAGE_BIT,
              };
  
              const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
-
-            brw_emit_surface_state(brw, mt, &view,
-                                   surface_state_infos[brw->gen].tex_mocs, false,
+            const int flags =
+               mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED ?
+               INTEL_AUX_BUFFER_DISABLED : 0;
+            brw_emit_surface_state(brw, mt, flags, mt->target, view,
+                                   surface_state_infos[brw->gen].tex_mocs,
                                     surf_offset, surf_index,
                                     I915_GEM_DOMAIN_SAMPLER,
                                     access == GL_READ_ONLY ? 0 :