iris: Drop vtbl usage for some load_register calls

[mesa.git] / src / gallium / drivers / iris / iris_resource.c
diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c

index cc19e601a75a34a3103876528822bc2da5395480..b2f91eb32ed7b455ab276246b4372b8528bb578b 100644 (file)
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -628,6 +628,28 @@ supports_ccs(const struct gen_device_info *devinfo,
     return true;
  }
  
+static bool
+want_ccs_e_for_format(const struct gen_device_info *devinfo,
+                      enum isl_format format)
+{
+   if (!isl_format_supports_ccs_e(devinfo, format))
+      return false;
+
+   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+
+   /* CCS_E seems to significantly hurt performance with 32-bit floating
+    * point formats.  For example, Paraview's "Wavelet Volume" case uses
+    * both R32_FLOAT and R32G32B32A32_FLOAT, and enabling CCS_E for those
+    * formats causes a 62% FPS drop.
+    *
+    * However, many benchmarks seem to use 16-bit float with no issues.
+    */
+   if (fmtl->channels.r.bits == 32 && fmtl->channels.r.type == ISL_SFLOAT)
+      return false;
+
+   return true;
+}
+
  static struct pipe_resource *
  iris_resource_create_for_buffer(struct pipe_screen *pscreen,
                                  const struct pipe_resource *templ)
@@ -765,7 +787,7 @@ iris_resource_create_with_modifiers(struct pipe_screen *pscreen,
           res->aux.possible_usages |= 1 << ISL_AUX_USAGE_HIZ;
     } else if (likely(!(INTEL_DEBUG & DEBUG_NO_RBC)) &&
                supports_ccs(devinfo, &res->surf)) {
-      if (isl_format_supports_ccs_e(devinfo, res->surf.format))
+      if (want_ccs_e_for_format(devinfo, res->surf.format))
           res->aux.possible_usages |= 1 << ISL_AUX_USAGE_CCS_E;
  
        if (isl_format_supports_ccs_d(devinfo, res->surf.format))
@@ -1023,11 +1045,33 @@ iris_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
                                  mod ? mod->supports_clear_color : false);
  }
  
+static void
+iris_resource_disable_aux_on_first_query(struct pipe_resource *resource,
+                                         unsigned usage)
+{
+   struct iris_resource *res = (struct iris_resource *)resource;
+   bool mod_with_aux =
+      res->mod_info && res->mod_info->aux_usage != ISL_AUX_USAGE_NONE;
+
+   /* Disable aux usage if explicit flush not set and this is the first time
+    * we are dealing with this resource and the resource was not created with
+    * a modifier with aux.
+    */
+   if (!mod_with_aux &&
+      (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && res->aux.usage != 0) &&
+       p_atomic_read(&resource->reference.count) == 1) {
+         iris_resource_disable_aux(res);
+   }
+}
+
  static bool
  iris_resource_get_param(struct pipe_screen *screen,
+                        struct pipe_context *context,
                          struct pipe_resource *resource,
-                        unsigned int plane,
+                        unsigned plane,
+                        unsigned layer,
                          enum pipe_resource_param param,
+                        unsigned handle_usage,
                          uint64_t *value)
  {
     struct iris_resource *res = (struct iris_resource *)resource;
@@ -1038,9 +1082,18 @@ iris_resource_get_param(struct pipe_screen *screen,
     bool result;
     unsigned handle;
  
+   iris_resource_disable_aux_on_first_query(resource, handle_usage);
+
     switch (param) {
     case PIPE_RESOURCE_PARAM_NPLANES:
-      *value = mod_with_aux ? 2 : 1;
+      if (mod_with_aux) {
+         *value = 2;
+      } else {
+         unsigned count = 0;
+         for (struct pipe_resource *cur = resource; cur; cur = cur->next)
+            count++;
+         *value = count;
+      }
        return true;
     case PIPE_RESOURCE_PARAM_STRIDE:
        *value = wants_aux ? res->aux.surf.row_pitch_B : res->surf.row_pitch_B;
@@ -1081,15 +1134,7 @@ iris_resource_get_handle(struct pipe_screen *pscreen,
     bool mod_with_aux =
        res->mod_info && res->mod_info->aux_usage != ISL_AUX_USAGE_NONE;
  
-   /* Disable aux usage if explicit flush not set and this is the first time
-    * we are dealing with this resource and the resource was not created with
-    * a modifier with aux.
-    */
-   if (!mod_with_aux &&
-       (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && res->aux.usage != 0) &&
-       p_atomic_read(&resource->reference.count) == 1) {
-         iris_resource_disable_aux(res);
-   }
+   iris_resource_disable_aux_on_first_query(resource, usage);
  
     struct iris_bo *bo;
     if (mod_with_aux && whandle->plane > 0) {
@@ -1418,7 +1463,7 @@ iris_resource_get_tile_offsets(const struct iris_resource *res,
   *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
   */
  static intptr_t
-s8_offset(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
+s8_offset(uint32_t stride, uint32_t x, uint32_t y)
  {
     uint32_t tile_size = 4096;
     uint32_t tile_width = 64;
@@ -1443,17 +1488,6 @@ s8_offset(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
                 +   2 * (byte_y % 2)
                 +   1 * (byte_x % 2);
  
-   if (swizzled) {
-      /* adjust for bit6 swizzling */
-      if (((byte_x / 8) % 2) == 1) {
-         if (((byte_y / 8) % 2) == 0) {
-            u += 64;
-         } else {
-            u -= 64;
-         }
-      }
-   }
-
     return u;
  }
  
@@ -1464,7 +1498,6 @@ iris_unmap_s8(struct iris_transfer *map)
     const struct pipe_box *box = &xfer->box;
     struct iris_resource *res = (struct iris_resource *) xfer->resource;
     struct isl_surf *surf = &res->surf;
-   const bool has_swizzling = false;
  
     if (xfer->usage & PIPE_TRANSFER_WRITE) {
        uint8_t *untiled_s8_map = map->ptr;
@@ -1479,8 +1512,7 @@ iris_unmap_s8(struct iris_transfer *map)
              for (uint32_t x = 0; x < box->width; x++) {
                 ptrdiff_t offset = s8_offset(surf->row_pitch_B,
                                              x0_el + box->x + x,
-                                            y0_el + box->y + y,
-                                            has_swizzling);
+                                            y0_el + box->y + y);
                 tiled_s8_map[offset] =
                    untiled_s8_map[s * xfer->layer_stride + y * xfer->stride + x];
              }
@@ -1509,8 +1541,6 @@ iris_map_s8(struct iris_transfer *map)
     map->buffer = map->ptr = malloc(xfer->layer_stride * box->depth);
     assert(map->buffer);
  
-   const bool has_swizzling = false;
-
     /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
      * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
      * invalidate is set, since we'll be writing the whole rectangle from our
@@ -1529,8 +1559,7 @@ iris_map_s8(struct iris_transfer *map)
              for (uint32_t x = 0; x < box->width; x++) {
                 ptrdiff_t offset = s8_offset(surf->row_pitch_B,
                                              x0_el + box->x + x,
-                                            y0_el + box->y + y,
-                                            has_swizzling);
+                                            y0_el + box->y + y);
                 untiled_s8_map[s * xfer->layer_stride + y * xfer->stride + x] =
                    tiled_s8_map[offset];
              }
@@ -1707,21 +1736,29 @@ iris_transfer_map(struct pipe_context *ctx,
        usage |= PIPE_TRANSFER_DISCARD_RANGE;
     }
  
-   bool map_would_stall = false;
-
-   if (resource->target != PIPE_BUFFER) {
-      iris_resource_access_raw(ice, &ice->batches[IRIS_BATCH_RENDER], res,
-                               level, box->z, box->depth,
-                               usage & PIPE_TRANSFER_WRITE);
-   }
-
     if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
         can_promote_to_async(res, box, usage)) {
        usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
     }
  
+   bool need_resolve = false;
+   bool need_color_resolve = false;
+
+   if (resource->target != PIPE_BUFFER) {
+      bool need_hiz_resolve = iris_resource_level_has_hiz(res, level);
+
+      need_color_resolve =
+         (res->aux.usage == ISL_AUX_USAGE_CCS_D ||
+          res->aux.usage == ISL_AUX_USAGE_CCS_E) &&
+         iris_has_color_unresolved(res, level, 1, box->z, box->depth);
+
+      need_resolve = need_color_resolve || need_hiz_resolve;
+   }
+
+   bool map_would_stall = false;
+
     if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
-      map_would_stall = resource_is_busy(ice, res);
+      map_would_stall = need_resolve || resource_is_busy(ice, res);
  
        if (map_would_stall && (usage & PIPE_TRANSFER_DONTBLOCK) &&
                               (usage & PIPE_TRANSFER_MAP_DIRECTLY))
@@ -1747,6 +1784,10 @@ iris_transfer_map(struct pipe_context *ctx,
     xfer->box = *box;
     *ptransfer = xfer;
  
+   map->dest_had_defined_contents =
+      util_ranges_intersect(&res->valid_buffer_range, box->x,
+                            box->x + box->width);
+
     if (usage & PIPE_TRANSFER_WRITE)
        util_range_add(&res->valid_buffer_range, box->x, box->x + box->width);
  
@@ -1769,25 +1810,29 @@ iris_transfer_map(struct pipe_context *ctx,
      * temporary and map that, to avoid the resolve.  (It might be better to
      * a tiled temporary and use the tiled_memcpy paths...)
      */
-   if (!(usage & PIPE_TRANSFER_DISCARD_RANGE) &&
-       res->aux.usage != ISL_AUX_USAGE_CCS_E &&
-       res->aux.usage != ISL_AUX_USAGE_CCS_D) {
+   if (!(usage & PIPE_TRANSFER_DISCARD_RANGE) && !need_color_resolve)
        no_gpu = true;
-   }
  
     const struct isl_format_layout *fmtl = isl_format_get_layout(surf->format);
     if (fmtl->txc == ISL_TXC_ASTC)
        no_gpu = true;
  
     if ((map_would_stall || res->aux.usage == ISL_AUX_USAGE_CCS_E) && !no_gpu) {
-      /* If we need a synchronous mapping and the resource is busy,
-       * we copy to/from a linear temporary buffer using the GPU.
+      /* If we need a synchronous mapping and the resource is busy, or needs
+       * resolving, we copy to/from a linear temporary buffer using the GPU.
         */
        map->batch = &ice->batches[IRIS_BATCH_RENDER];
        map->blorp = &ice->blorp;
        iris_map_copy_region(map);
     } else {
-      /* Otherwise we're free to map on the CPU.  Flush if needed. */
+      /* Otherwise we're free to map on the CPU. */
+
+      if (need_resolve) {
+         iris_resource_access_raw(ice, &ice->batches[IRIS_BATCH_RENDER], res,
+                                  level, box->z, box->depth,
+                                  usage & PIPE_TRANSFER_WRITE);
+      }
+
        if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
           for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
              if (iris_batch_references(&ice->batches[i], res->bo))
@@ -1823,8 +1868,13 @@ iris_transfer_flush_region(struct pipe_context *ctx,
     uint32_t history_flush = 0;
  
     if (res->base.target == PIPE_BUFFER) {
-      history_flush |= iris_flush_bits_for_history(res) |
-                       (map->staging ? PIPE_CONTROL_RENDER_TARGET_FLUSH : 0);
+      if (map->staging)
+         history_flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
+
+      if (map->dest_had_defined_contents)
+         history_flush |= iris_flush_bits_for_history(res);
+
+      util_range_add(&res->valid_buffer_range, box->x, box->x + box->width);
     }
  
     if (history_flush & ~PIPE_CONTROL_CS_STALL) {
@@ -1851,7 +1901,8 @@ iris_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *xfer)
     struct iris_context *ice = (struct iris_context *)ctx;
     struct iris_transfer *map = (void *) xfer;
  
-   if (!(xfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+   if (!(xfer->usage & (PIPE_TRANSFER_FLUSH_EXPLICIT |
+                        PIPE_TRANSFER_COHERENT))) {
        struct pipe_box flush_box = {
           .x = 0, .y = 0, .z = 0,
           .width  = xfer->box.width,
@@ -1878,13 +1929,7 @@ iris_dirty_for_history(struct iris_context *ice,
     uint64_t dirty = 0ull;
  
     if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
-      dirty |= IRIS_DIRTY_CONSTANTS_VS |
-               IRIS_DIRTY_CONSTANTS_TCS |
-               IRIS_DIRTY_CONSTANTS_TES |
-               IRIS_DIRTY_CONSTANTS_GS |
-               IRIS_DIRTY_CONSTANTS_FS |
-               IRIS_DIRTY_CONSTANTS_CS |
-               IRIS_ALL_DIRTY_BINDINGS;
+      dirty |= ((uint64_t)res->bind_stages) << IRIS_SHIFT_FOR_DIRTY_CONSTANTS;
     }
  
     ice->state.dirty |= dirty;