i965/state: Account for the element size in emit_buffer_surface_state
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
index fcaaddd6f2f80fc9101d00272365c2d0821869c2..547cc4ec187fab940d07faaa530932a41da0f972 100644 (file)
@@ -133,6 +133,54 @@ brw_emit_surface_state(struct brw_context *brw,
    }
 }
 
+uint32_t
+brw_update_renderbuffer_surface(struct brw_context *brw,
+                                struct gl_renderbuffer *rb,
+                                bool layered, unsigned unit /* unused */,
+                                uint32_t surf_index)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct intel_mipmap_tree *mt = irb->mt;
+
+   assert(brw_render_target_supported(brw, rb));
+   intel_miptree_used_for_rendering(mt);
+
+   mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
+   if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
+      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
+                    __func__, _mesa_get_format_name(rb_format));
+   }
+
+   const unsigned layer_multiplier =
+      (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_UMS ||
+       irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
+      MAX2(irb->mt->num_samples, 1) : 1;
+
+   struct isl_view view = {
+      .format = brw->render_target_format[rb_format],
+      .base_level = irb->mt_level - irb->mt->first_level,
+      .levels = 1,
+      .base_array_layer = irb->mt_layer / layer_multiplier,
+      .array_len = MAX2(irb->layer_count, 1),
+      .channel_select = {
+         ISL_CHANNEL_SELECT_RED,
+         ISL_CHANNEL_SELECT_GREEN,
+         ISL_CHANNEL_SELECT_BLUE,
+         ISL_CHANNEL_SELECT_ALPHA,
+      },
+      .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
+   };
+
+   uint32_t offset;
+   brw_emit_surface_state(brw, mt, &view,
+                          surface_state_infos[brw->gen].rb_mocs, false,
+                          &offset, surf_index,
+                          I915_GEM_DOMAIN_RENDER,
+                          I915_GEM_DOMAIN_RENDER);
+   return offset;
+}
+
 GLuint
 translate_tex_target(GLenum target)
 {
@@ -300,6 +348,143 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
                         swizzles[GET_SWZ(t->_Swizzle, 3)]);
 }
 
+/**
+ * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
+ * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED).  The mappings are
+ *
+ * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
+ *         0          1          2          3             4            5
+ *         4          5          6          7             0            1
+ *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
+ *
+ * which is simply adding 4 then modding by 8 (or anding with 7).
+ *
+ * We then may need to apply workarounds for textureGather hardware bugs.
+ */
+static unsigned
+swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
+{
+   unsigned scs = (swizzle + 4) & 7;
+
+   return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
+}
+
+void
+brw_update_texture_surface(struct gl_context *ctx,
+                           unsigned unit,
+                           uint32_t *surf_offset,
+                           bool for_gather,
+                           uint32_t plane)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *obj = ctx->Texture.Unit[unit]._Current;
+
+   if (obj->Target == GL_TEXTURE_BUFFER) {
+      brw_update_buffer_texture_surface(ctx, unit, surf_offset);
+
+   } else {
+      struct intel_texture_object *intel_obj = intel_texture_object(obj);
+      struct intel_mipmap_tree *mt = intel_obj->mt;
+      struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+      /* If this is a view with restricted NumLayers, then our effective depth
+       * is not just the miptree depth.
+       */
+      const unsigned mt_num_layers =
+         mt->logical_depth0 * (_mesa_is_cube_map_texture(mt->target) ? 6 : 1);
+      const unsigned view_num_layers =
+         (obj->Immutable && obj->Target != GL_TEXTURE_3D) ? obj->NumLayers :
+                                                            mt_num_layers;
+
+      /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
+       * texturing functions that return a float, as our code generation always
+       * selects the .x channel (which would always be 0).
+       */
+      struct gl_texture_image *firstImage = obj->Image[0][obj->BaseLevel];
+      const bool alpha_depth = obj->DepthMode == GL_ALPHA &&
+         (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
+          firstImage->_BaseFormat == GL_DEPTH_STENCIL);
+      const unsigned swizzle = (unlikely(alpha_depth) ? SWIZZLE_XYZW :
+                                brw_get_texture_swizzle(&brw->ctx, obj));
+
+      unsigned format = translate_tex_format(
+         brw, intel_obj->_Format, sampler->sRGBDecode);
+
+      /* Implement gen6 and gen7 gather work-around */
+      bool need_green_to_blue = false;
+      if (for_gather) {
+         if (brw->gen == 7 && format == BRW_SURFACEFORMAT_R32G32_FLOAT) {
+            format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
+            need_green_to_blue = brw->is_haswell;
+         } else if (brw->gen == 6) {
+            /* Sandybridge's gather4 message is broken for integer formats.
+             * To work around this, we pretend the surface is UNORM for
+             * 8 or 16-bit formats, and emit shader instructions to recover
+             * the real INT/UINT value.  For 32-bit formats, we pretend
+             * the surface is FLOAT, and simply reinterpret the resulting
+             * bits.
+             */
+            switch (format) {
+            case BRW_SURFACEFORMAT_R8_SINT:
+            case BRW_SURFACEFORMAT_R8_UINT:
+               format = BRW_SURFACEFORMAT_R8_UNORM;
+               break;
+
+            case BRW_SURFACEFORMAT_R16_SINT:
+            case BRW_SURFACEFORMAT_R16_UINT:
+               format = BRW_SURFACEFORMAT_R16_UNORM;
+               break;
+
+            case BRW_SURFACEFORMAT_R32_SINT:
+            case BRW_SURFACEFORMAT_R32_UINT:
+               format = BRW_SURFACEFORMAT_R32_FLOAT;
+               break;
+
+            default:
+               break;
+            }
+         }
+      }
+
+      if (obj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
+         assert(brw->gen >= 8);
+         mt = mt->stencil_mt;
+         format = BRW_SURFACEFORMAT_R8_UINT;
+      } else if (obj->Target == GL_TEXTURE_EXTERNAL_OES) {
+         if (plane > 0)
+            mt = mt->plane[plane - 1];
+         if (mt == NULL)
+            return;
+         format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
+      }
+
+      const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
+
+      struct isl_view view = {
+         .format = format,
+         .base_level = obj->MinLevel + obj->BaseLevel,
+         .levels = intel_obj->_MaxLevel - obj->BaseLevel + 1,
+         .base_array_layer = obj->MinLayer,
+         .array_len = view_num_layers,
+         .channel_select = {
+            swizzle_to_scs(GET_SWZ(swizzle, 0), need_green_to_blue),
+            swizzle_to_scs(GET_SWZ(swizzle, 1), need_green_to_blue),
+            swizzle_to_scs(GET_SWZ(swizzle, 2), need_green_to_blue),
+            swizzle_to_scs(GET_SWZ(swizzle, 3), need_green_to_blue),
+         },
+         .usage = ISL_SURF_USAGE_TEXTURE_BIT,
+      };
+
+      if (obj->Target == GL_TEXTURE_CUBE_MAP ||
+          obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
+         view.usage |= ISL_SURF_USAGE_CUBE_BIT;
+
+      brw_emit_surface_state(brw, mt, &view,
+                             surface_state_infos[brw->gen].tex_mocs, for_gather,
+                             surf_offset, surf_index,
+                             I915_GEM_DOMAIN_SAMPLER, 0);
+   }
+}
+
 static void
 gen4_emit_buffer_surface_state(struct brw_context *brw,
                                uint32_t *out_offset,
@@ -310,6 +495,7 @@ gen4_emit_buffer_surface_state(struct brw_context *brw,
                                unsigned pitch,
                                bool rw)
 {
+   unsigned elements = buffer_size / pitch;
    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
                                     6 * 4, 32, out_offset);
    memset(surf, 0, 6 * 4);
@@ -318,9 +504,9 @@ gen4_emit_buffer_surface_state(struct brw_context *brw,
              surface_format << BRW_SURFACE_FORMAT_SHIFT |
              (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
    surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
-   surf[2] = ((buffer_size - 1) & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
-             (((buffer_size - 1) >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
-   surf[3] = (((buffer_size - 1) >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
+   surf[2] = ((elements - 1) & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
+             (((elements - 1) >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
+   surf[3] = (((elements - 1) >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
              (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
 
    /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
@@ -363,103 +549,11 @@ brw_update_buffer_texture_surface(struct gl_context *ctx,
    brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
                                        tObj->BufferOffset,
                                        brw_format,
-                                       size / texel_size,
+                                       size,
                                        texel_size,
                                        false /* rw */);
 }
 
-static void
-brw_update_texture_surface(struct gl_context *ctx,
-                           unsigned unit,
-                           uint32_t *surf_offset,
-                           bool for_gather,
-                           uint32_t plane)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
-   struct intel_texture_object *intelObj = intel_texture_object(tObj);
-   struct intel_mipmap_tree *mt = intelObj->mt;
-   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
-   uint32_t *surf;
-
-   /* BRW_NEW_TEXTURE_BUFFER */
-   if (tObj->Target == GL_TEXTURE_BUFFER) {
-      brw_update_buffer_texture_surface(ctx, unit, surf_offset);
-      return;
-   }
-
-   if (plane > 0) {
-      if (mt->plane[plane - 1] == NULL)
-         return;
-      mt = mt->plane[plane - 1];
-   }
-
-   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-                         6 * 4, 32, surf_offset);
-
-   mesa_format mesa_fmt = plane == 0 ? intelObj->_Format : mt->format;
-   uint32_t tex_format = translate_tex_format(brw, mesa_fmt,
-                                              sampler->sRGBDecode);
-
-   if (for_gather) {
-      /* Sandybridge's gather4 message is broken for integer formats.
-       * To work around this, we pretend the surface is UNORM for
-       * 8 or 16-bit formats, and emit shader instructions to recover
-       * the real INT/UINT value.  For 32-bit formats, we pretend
-       * the surface is FLOAT, and simply reinterpret the resulting
-       * bits.
-       */
-      switch (tex_format) {
-      case BRW_SURFACEFORMAT_R8_SINT:
-      case BRW_SURFACEFORMAT_R8_UINT:
-         tex_format = BRW_SURFACEFORMAT_R8_UNORM;
-         break;
-
-      case BRW_SURFACEFORMAT_R16_SINT:
-      case BRW_SURFACEFORMAT_R16_UINT:
-         tex_format = BRW_SURFACEFORMAT_R16_UNORM;
-         break;
-
-      case BRW_SURFACEFORMAT_R32_SINT:
-      case BRW_SURFACEFORMAT_R32_UINT:
-         tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
-         break;
-
-      default:
-         break;
-      }
-   }
-
-   surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
-             BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
-             BRW_SURFACE_CUBEFACE_ENABLES |
-             tex_format << BRW_SURFACE_FORMAT_SHIFT);
-
-   surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
-
-   surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
-             (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
-             (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
-
-   surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
-             (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
-             (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
-
-   const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level;
-   surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
-              SET_FIELD(min_lod, BRW_SURFACE_MIN_LOD) |
-              SET_FIELD(tObj->MinLayer, BRW_SURFACE_MIN_ARRAY_ELEMENT));
-
-   surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
-
-   /* Emit relocation to surface contents */
-   drm_intel_bo_emit_reloc(brw->batch.bo,
-                           *surf_offset + 4,
-                           mt->bo,
-                           surf[1] - mt->bo->offset64,
-                           I915_GEM_DOMAIN_SAMPLER, 0);
-}
-
 /**
  * Create the constant buffer surface.  Vertex/fragment shader constants will be
  * read from this buffer with Data Port Read instructions/messages.
@@ -717,10 +811,10 @@ brw_emit_null_surface_state(struct brw_context *brw,
  * usable for further buffers when doing ARB_draw_buffer support.
  */
 static uint32_t
-brw_update_renderbuffer_surface(struct brw_context *brw,
-                                struct gl_renderbuffer *rb,
-                                bool layered, unsigned unit,
-                                uint32_t surf_index)
+gen4_update_renderbuffer_surface(struct brw_context *brw,
+                                 struct gl_renderbuffer *rb,
+                                 bool layered, unsigned unit,
+                                 uint32_t surf_index)
 {
    struct gl_context *ctx = &brw->ctx;
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
@@ -1386,7 +1480,7 @@ update_image_surface(struct brw_context *brw,
 
          brw->vtbl.emit_buffer_surface_state(
             brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
-            format, intel_obj->Base.Size / texel_size, texel_size,
+            format, intel_obj->Base.Size, texel_size,
             access != GL_READ_ONLY);
 
          update_buffer_image_param(brw, u, surface_idx, param);
@@ -1402,22 +1496,33 @@ update_image_surface(struct brw_context *brw,
                access != GL_READ_ONLY);
 
          } else {
-            const unsigned min_layer = obj->MinLayer + u->_Layer;
-            const unsigned min_level = obj->MinLevel + u->Level;
             const unsigned num_layers = (!u->Layered ? 1 :
                                          obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
                                          mt->logical_depth0);
-            const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
-                                   obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
-                                   GL_TEXTURE_2D_ARRAY : obj->Target);
+
+            struct isl_view view = {
+               .format = format,
+               .base_level = obj->MinLevel + u->Level,
+               .levels = 1,
+               .base_array_layer = obj->MinLayer + u->_Layer,
+               .array_len = num_layers,
+               .channel_select = {
+                  ISL_CHANNEL_SELECT_RED,
+                  ISL_CHANNEL_SELECT_GREEN,
+                  ISL_CHANNEL_SELECT_BLUE,
+                  ISL_CHANNEL_SELECT_ALPHA,
+               },
+               .usage = ISL_SURF_USAGE_STORAGE_BIT,
+            };
+
             const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
 
-            brw->vtbl.emit_texture_surface_state(
-               brw, mt, target,
-               min_layer, min_layer + num_layers,
-               min_level, min_level + 1,
-               format, SWIZZLE_XYZW,
-               surf_offset, surf_index, access != GL_READ_ONLY, false);
+            brw_emit_surface_state(brw, mt, &view,
+                                   surface_state_infos[brw->gen].rb_mocs, false,
+                                   surf_offset, surf_index,
+                                   I915_GEM_DOMAIN_SAMPLER,
+                                   access == GL_READ_ONLY ? 0 :
+                                             I915_GEM_DOMAIN_SAMPLER);
          }
 
          update_texture_image_param(brw, u, surface_idx, param);
@@ -1487,7 +1592,7 @@ void
 gen4_init_vtable_surface_functions(struct brw_context *brw)
 {
    brw->vtbl.update_texture_surface = brw_update_texture_surface;
-   brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
+   brw->vtbl.update_renderbuffer_surface = gen4_update_renderbuffer_surface;
    brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
    brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
 }