i965: Revert recent tiled memcpy changes.

author Kenneth Graunke <kenneth@whitecape.org>

Sat, 26 May 2018 23:25:34 +0000 (16:25 -0700)

committer Kenneth Graunke <kenneth@whitecape.org>

Sat, 26 May 2018 23:25:50 +0000 (16:25 -0700)
author Kenneth Graunke <kenneth@whitecape.org>
Sat, 26 May 2018 23:25:34 +0000 (16:25 -0700)
committer Kenneth Graunke <kenneth@whitecape.org>
Sat, 26 May 2018 23:25:50 +0000 (16:25 -0700)
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am

index ff47add93f4a9e37404864ab918e9a02c00c870c..889d4c68a2bd25cb22ae0f6175c3382a009b3a0e 100644 (file)
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -92,14 +92,8 @@ libi965_gen11_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=110
  
  noinst_LTLIBRARIES = \
         libi965_dri.la \
-       libintel_tiled_memcpy.la \
         $(I965_PERGEN_LIBS)
  
-libintel_tiled_memcpy_la_SOURCES = \
-       $(intel_tiled_memcpy_FILES)
-libintel_tiled_memcpy_la_CFLAGS = \
-       $(AM_CFLAGS) $(SSE41_CFLAGS)
-
  libi965_dri_la_SOURCES = \
         $(i965_FILES) \
         $(i965_oa_GENERATED_FILES)
@@ -110,7 +104,6 @@ libi965_dri_la_LIBADD = \
         $(top_builddir)/src/intel/compiler/libintel_compiler.la \
         $(top_builddir)/src/intel/blorp/libblorp.la \
         $(I965_PERGEN_LIBS) \
-       libintel_tiled_memcpy.la
         $(LIBDRM_LIBS)
  
  BUILT_SOURCES = $(i965_oa_GENERATED_FILES)
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources

index ce7633c53c4097e20ade700869952b0aabfb846a..db6591ab90a2695401aa392d6b824f8c17af5d8b 100644 (file)
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -110,13 +110,11 @@ i965_FILES = \
         intel_tex_image.c \
         intel_tex_obj.h \
         intel_tex_validate.c \
+       intel_tiled_memcpy.c \
+       intel_tiled_memcpy.h \
         intel_upload.c \
         libdrm_macros.h
  
-intel_tiled_memcpy_FILES = \
-       intel_tiled_memcpy.c \
-       intel_tiled_memcpy.h
-
  i965_gen4_FILES = \
         genX_blorp_exec.c \
         genX_state_upload.c
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c

index 269bd7067739be75ac10bd42a79dec0c855dae45..7d1fa96b91953d13a4cddb5d60e3d4fa024f9994 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -31,7 +31,6 @@
  #include "intel_image.h"
  #include "intel_mipmap_tree.h"
  #include "intel_tex.h"
-#include "intel_tiled_memcpy.h"
  #include "intel_blit.h"
  #include "intel_fbo.h"
  
@@ -3024,7 +3023,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
  }
  
  static void
-intel_miptree_unmap_map(struct brw_context *brw,
+intel_miptree_unmap_gtt(struct brw_context *brw,
                          struct intel_mipmap_tree *mt,
                          struct intel_miptree_map *map,
                          unsigned int level, unsigned int slice)
@@ -3033,7 +3032,7 @@ intel_miptree_unmap_map(struct brw_context *brw,
  }
  
  static void
-intel_miptree_map_map(struct brw_context *brw,
+intel_miptree_map_gtt(struct brw_context *brw,
                       struct intel_mipmap_tree *mt,
                       struct intel_miptree_map *map,
                       unsigned int level, unsigned int slice)
@@ -3081,7 +3080,7 @@ intel_miptree_map_map(struct brw_context *brw,
         mt, _mesa_get_format_name(mt->format),
         x, y, map->ptr, map->stride);
  
-   map->unmap = intel_miptree_unmap_map;
+   map->unmap = intel_miptree_unmap_gtt;
  }
  
  static void
@@ -3113,94 +3112,6 @@ intel_miptree_unmap_blit(struct brw_context *brw,
     intel_miptree_release(&map->linear_mt);
  }
  
-/* Compute extent parameters for use with tiled_memcpy functions.
- * xs are in units of bytes and ys are in units of strides.
- */
-static inline void
-tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
-             unsigned int level, unsigned int slice, unsigned int *x1_B,
-             unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
-{
-   unsigned int block_width, block_height;
-   unsigned int x0_el, y0_el;
-
-   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
-
-   assert(map->x % block_width == 0);
-   assert(map->y % block_height == 0);
-
-   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
-   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
-   *y1_el = map->y / block_height + y0_el;
-   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
-   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
-}
-
-static void
-intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
-                                 struct intel_mipmap_tree *mt,
-                                 struct intel_miptree_map *map,
-                                 unsigned int level,
-                                 unsigned int slice)
-{
-   if (map->mode & GL_MAP_WRITE_BIT) {
-      unsigned int x1, x2, y1, y2;
-      tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
-
-      char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
-      dst += mt->offset;
-
-      linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
-                      map->stride, brw->has_swizzling, mt->surf.tiling, memcpy);
-
-      intel_miptree_unmap_raw(mt);
-   }
-   _mesa_align_free(map->buffer);
-   map->buffer = map->ptr = NULL;
-}
-
-static void
-intel_miptree_map_tiled_memcpy(struct brw_context *brw,
-                               struct intel_mipmap_tree *mt,
-                               struct intel_miptree_map *map,
-                               unsigned int level, unsigned int slice)
-{
-   intel_miptree_access_raw(brw, mt, level, slice,
-                            map->mode & GL_MAP_WRITE_BIT);
-
-   unsigned int x1, x2, y1, y2;
-   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
-   map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16);
-
-   /* The tiling and detiling functions require that the linear buffer
-    * has proper 16-byte alignment (that is, its `x0` is 16-byte
-    * aligned). Here we over-allocate the linear buffer by enough
-    * bytes to get the proper alignment.
-    */
-   map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16);
-   map->ptr = (char *)map->buffer + (x1 & 0xf);
-   assert(map->buffer);
-
-   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
-      char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
-      src += mt->offset;
-
-      const mem_copy_fn fn =
-#if defined(USE_SSE41)
-         cpu_has_sse4_1 ? (mem_copy_fn)_mesa_streaming_load_memcpy :
-#endif
-         memcpy;
-
-      tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
-                      mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling,
-                      fn);
-
-      intel_miptree_unmap_raw(mt);
-   }
-
-   map->unmap = intel_miptree_unmap_tiled_memcpy;
-}
-
  static void
  intel_miptree_map_blit(struct brw_context *brw,
                        struct intel_mipmap_tree *mt,
@@ -3732,7 +3643,6 @@ intel_miptree_map(struct brw_context *brw,
                    void **out_ptr,
                    ptrdiff_t *out_stride)
  {
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
     struct intel_miptree_map *map;
  
     assert(mt->surf.samples == 1);
@@ -3753,8 +3663,6 @@ intel_miptree_map(struct brw_context *brw,
        intel_miptree_map_depthstencil(brw, mt, map, level, slice);
     } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
        intel_miptree_map_blit(brw, mt, map, level, slice);
-   } else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->gen > 4) {
-      intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice);
  #if defined(USE_SSE41)
     } else if (!(mode & GL_MAP_WRITE_BIT) &&
                !mt->compressed && cpu_has_sse4_1 &&
@@ -3762,9 +3670,7 @@ intel_miptree_map(struct brw_context *brw,
        intel_miptree_map_movntdqa(brw, mt, map, level, slice);
  #endif
     } else {
-      if (mt->surf.tiling != ISL_TILING_LINEAR)
-         perf_debug("intel_miptree_map: mapping via gtt");
-      intel_miptree_map_map(brw, mt, map, level, slice);
+      intel_miptree_map_gtt(brw, mt, map, level, slice);
     }
  
     *out_ptr = map->ptr;
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c

index 6440dceac362806ad9eec8a40c747d97bc4bd502..7c6bde990d62dcf05906299f610ad84bcbcd20b4 100644 (file)
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -36,10 +36,6 @@
  #include "brw_context.h"
  #include "intel_tiled_memcpy.h"
  
-#if defined(USE_SSE41)
-#include "main/streaming-load-memcpy.h"
-#include <smmintrin.h>
-#endif
  #if defined(__SSSE3__)
  #include <tmmintrin.h>
  #elif defined(__SSE2__)
@@ -217,31 +213,6 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
     return dst;
  }
  
-#if defined(USE_SSE41)
-static ALWAYS_INLINE void *
-_memcpy_streaming_load(void *dest, const void *src, size_t count)
-{
-   if (count == 16) {
-      __m128i val = _mm_stream_load_si128((__m128i *)src);
-      _mm_storeu_si128((__m128i *)dest, val);
-      return dest;
-   } else if (count == 64) {
-      __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
-      __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
-      __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
-      __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
-      _mm_storeu_si128(((__m128i *)dest) + 0, val0);
-      _mm_storeu_si128(((__m128i *)dest) + 1, val1);
-      _mm_storeu_si128(((__m128i *)dest) + 2, val2);
-      _mm_storeu_si128(((__m128i *)dest) + 3, val3);
-      return dest;
-   } else {
-      assert(count < 64); /* and (count < 16) for ytiled */
-      return memcpy(dest, src, count);
-   }
-}
-#endif
-
  /**
   * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
   * These ranges are in bytes, i.e. pixels * bytes-per-pixel.
@@ -706,12 +677,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
           return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
                                   dst, src, dst_pitch, swizzle_bit,
                                   rgba8_copy, rgba8_copy_aligned_src);
-#if defined(USE_SSE41)
-      else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
-         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
        else
           unreachable("not reached");
     } else {
@@ -722,12 +687,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
           return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
                                   dst, src, dst_pitch, swizzle_bit,
                                   rgba8_copy, rgba8_copy_aligned_src);
-#if defined(USE_SSE41)
-      else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
-         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
        else
           unreachable("not reached");
     }
@@ -760,12 +719,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
           return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
                                   dst, src, dst_pitch, swizzle_bit,
                                   rgba8_copy, rgba8_copy_aligned_src);
-#if defined(USE_SSE41)
-      else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
-         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
        else
           unreachable("not reached");
     } else {
@@ -776,12 +729,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
           return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
                                   dst, src, dst_pitch, swizzle_bit,
                                   rgba8_copy, rgba8_copy_aligned_src);
-#if defined(USE_SSE41)
-      else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
-         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
        else
           unreachable("not reached");
     }
@@ -921,15 +868,6 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
        unreachable("unsupported tiling");
     }
  
-#if defined(USE_SSE41)
-   if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) {
-      /* The hidden cacheline sized register used by movntdqa can apparently
-       * give you stale data, so do an mfence to invalidate it.
-       */
-      _mm_mfence();
-   }
-#endif
-
     /* Round out to tile boundaries. */
     xt0 = ALIGN_DOWN(xt1, tw);
     xt3 = ALIGN_UP  (xt2, tw);
diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build

index 1eac329f49cac6403d22e8ef81bba641cb07198e..20404d5b059736b9bc8c3e2db998b3f04226f6f1 100644 (file)
--- a/src/mesa/drivers/dri/i965/meson.build
+++ b/src/mesa/drivers/dri/i965/meson.build
@@ -129,13 +129,10 @@ files_i965 = files(
    'intel_tex_image.c',
    'intel_tex_obj.h',
    'intel_tex_validate.c',
-  'intel_upload.c',
-  'libdrm_macros.h',
-)
-
-files_intel_tiled_memcpy = files(
    'intel_tiled_memcpy.c',
    'intel_tiled_memcpy.h',
+  'intel_upload.c',
+  'libdrm_macros.h',
  )
  
  i965_gen_libs = []
@@ -179,15 +176,6 @@ i965_oa_sources = custom_target(
    ],
  )
  
-intel_tiled_memcpy = static_library(
-  'intel_tiled_memcpy',
-  [files_intel_tiled_memcpy],
-  include_directories : [
-    inc_common, inc_intel, inc_dri_common, inc_drm_uapi,
-  ],
-  c_args : [c_vis_args, no_override_init_args, '-msse2', sse41_args],
-)
-
  libi965 = static_library(
    'i965',
    [files_i965, i965_oa_sources, ir_expression_operation_h,
@@ -199,7 +187,7 @@ libi965 = static_library(
    cpp_args : [cpp_vis_args, '-msse2'],
    link_with : [
      i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler,
-    libblorp, intel_tiled_memcpy,
+    libblorp,
    ],
    dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
  )
author	Kenneth Graunke <kenneth@whitecape.org>
	Sat, 26 May 2018 23:25:34 +0000 (16:25 -0700)
committer	Kenneth Graunke <kenneth@whitecape.org>
	Sat, 26 May 2018 23:25:50 +0000 (16:25 -0700)
src/mesa/drivers/dri/i965/Makefile.am		patch \| blob \| history
src/mesa/drivers/dri/i965/Makefile.sources		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_mipmap_tree.c		patch \| blob \| history
src/mesa/drivers/dri/i965/intel_tiled_memcpy.c		patch \| blob \| history
src/mesa/drivers/dri/i965/meson.build		patch \| blob \| history