From: Kenneth Graunke Date: Sat, 26 May 2018 23:25:34 +0000 (-0700) Subject: i965: Revert recent tiled memcpy changes. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=58fb613a51994d111ee77a65bc7f3d60b155c687;p=mesa.git i965: Revert recent tiled memcpy changes. This reverts commit 79fe00efb474b3f3f0ba4c88826ff67c53a02aef. This reverts commit f5e8b13f78a085bc95a1c0895e4a38ff6b87b375. This reverts commit d21c086d819d78fb3f6abcbb14aa492970f442aa. They broke the Android build and I'd rather not leave it broken for the long holiday weekend. --- diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index ff47add93f4..889d4c68a2b 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -92,14 +92,8 @@ libi965_gen11_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=110 noinst_LTLIBRARIES = \ libi965_dri.la \ - libintel_tiled_memcpy.la \ $(I965_PERGEN_LIBS) -libintel_tiled_memcpy_la_SOURCES = \ - $(intel_tiled_memcpy_FILES) -libintel_tiled_memcpy_la_CFLAGS = \ - $(AM_CFLAGS) $(SSE41_CFLAGS) - libi965_dri_la_SOURCES = \ $(i965_FILES) \ $(i965_oa_GENERATED_FILES) @@ -110,7 +104,6 @@ libi965_dri_la_LIBADD = \ $(top_builddir)/src/intel/compiler/libintel_compiler.la \ $(top_builddir)/src/intel/blorp/libblorp.la \ $(I965_PERGEN_LIBS) \ - libintel_tiled_memcpy.la $(LIBDRM_LIBS) BUILT_SOURCES = $(i965_oa_GENERATED_FILES) diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index ce7633c53c4..db6591ab90a 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -110,13 +110,11 @@ i965_FILES = \ intel_tex_image.c \ intel_tex_obj.h \ intel_tex_validate.c \ + intel_tiled_memcpy.c \ + intel_tiled_memcpy.h \ intel_upload.c \ libdrm_macros.h -intel_tiled_memcpy_FILES = \ - intel_tiled_memcpy.c \ - intel_tiled_memcpy.h - i965_gen4_FILES = \ genX_blorp_exec.c \ genX_state_upload.c diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 269bd706773..7d1fa96b919 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -31,7 +31,6 @@ #include "intel_image.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" -#include "intel_tiled_memcpy.h" #include "intel_blit.h" #include "intel_fbo.h" @@ -3024,7 +3023,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt) } static void -intel_miptree_unmap_map(struct brw_context *brw, +intel_miptree_unmap_gtt(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) @@ -3033,7 +3032,7 @@ intel_miptree_unmap_map(struct brw_context *brw, } static void -intel_miptree_map_map(struct brw_context *brw, +intel_miptree_map_gtt(struct brw_context *brw, struct intel_mipmap_tree *mt, struct intel_miptree_map *map, unsigned int level, unsigned int slice) @@ -3081,7 +3080,7 @@ intel_miptree_map_map(struct brw_context *brw, mt, _mesa_get_format_name(mt->format), x, y, map->ptr, map->stride); - map->unmap = intel_miptree_unmap_map; + map->unmap = intel_miptree_unmap_gtt; } static void @@ -3113,94 +3112,6 @@ intel_miptree_unmap_blit(struct brw_context *brw, intel_miptree_release(&map->linear_mt); } -/* Compute extent parameters for use with tiled_memcpy functions. - * xs are in units of bytes and ys are in units of strides. - */ -static inline void -tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map, - unsigned int level, unsigned int slice, unsigned int *x1_B, - unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el) -{ - unsigned int block_width, block_height; - unsigned int x0_el, y0_el; - - _mesa_get_format_block_size(mt->format, &block_width, &block_height); - - assert(map->x % block_width == 0); - assert(map->y % block_height == 0); - - intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el); - *x1_B = (map->x / block_width + x0_el) * mt->cpp; - *y1_el = map->y / block_height + y0_el; - *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp; - *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el; -} - -static void -intel_miptree_unmap_tiled_memcpy(struct brw_context *brw, - struct intel_mipmap_tree *mt, - struct intel_miptree_map *map, - unsigned int level, - unsigned int slice) -{ - if (map->mode & GL_MAP_WRITE_BIT) { - unsigned int x1, x2, y1, y2; - tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); - - char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); - dst += mt->offset; - - linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch, - map->stride, brw->has_swizzling, mt->surf.tiling, memcpy); - - intel_miptree_unmap_raw(mt); - } - _mesa_align_free(map->buffer); - map->buffer = map->ptr = NULL; -} - -static void -intel_miptree_map_tiled_memcpy(struct brw_context *brw, - struct intel_mipmap_tree *mt, - struct intel_miptree_map *map, - unsigned int level, unsigned int slice) -{ - intel_miptree_access_raw(brw, mt, level, slice, - map->mode & GL_MAP_WRITE_BIT); - - unsigned int x1, x2, y1, y2; - tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2); - map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16); - - /* The tiling and detiling functions require that the linear buffer - * has proper 16-byte alignment (that is, its `x0` is 16-byte - * aligned). Here we over-allocate the linear buffer by enough - * bytes to get the proper alignment. - */ - map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16); - map->ptr = (char *)map->buffer + (x1 & 0xf); - assert(map->buffer); - - if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { - char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW); - src += mt->offset; - - const mem_copy_fn fn = -#if defined(USE_SSE41) - cpu_has_sse4_1 ? (mem_copy_fn)_mesa_streaming_load_memcpy : -#endif - memcpy; - - tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride, - mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling, - fn); - - intel_miptree_unmap_raw(mt); - } - - map->unmap = intel_miptree_unmap_tiled_memcpy; -} - static void intel_miptree_map_blit(struct brw_context *brw, struct intel_mipmap_tree *mt, @@ -3732,7 +3643,6 @@ intel_miptree_map(struct brw_context *brw, void **out_ptr, ptrdiff_t *out_stride) { - const struct gen_device_info *devinfo = &brw->screen->devinfo; struct intel_miptree_map *map; assert(mt->surf.samples == 1); @@ -3753,8 +3663,6 @@ intel_miptree_map(struct brw_context *brw, intel_miptree_map_depthstencil(brw, mt, map, level, slice); } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) { intel_miptree_map_blit(brw, mt, map, level, slice); - } else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->gen > 4) { - intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice); #if defined(USE_SSE41) } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1 && @@ -3762,9 +3670,7 @@ intel_miptree_map(struct brw_context *brw, intel_miptree_map_movntdqa(brw, mt, map, level, slice); #endif } else { - if (mt->surf.tiling != ISL_TILING_LINEAR) - perf_debug("intel_miptree_map: mapping via gtt"); - intel_miptree_map_map(brw, mt, map, level, slice); + intel_miptree_map_gtt(brw, mt, map, level, slice); } *out_ptr = map->ptr; diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 6440dceac36..7c6bde990d6 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -36,10 +36,6 @@ #include "brw_context.h" #include "intel_tiled_memcpy.h" -#if defined(USE_SSE41) -#include "main/streaming-load-memcpy.h" -#include -#endif #if defined(__SSSE3__) #include #elif defined(__SSE2__) @@ -217,31 +213,6 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes) return dst; } -#if defined(USE_SSE41) -static ALWAYS_INLINE void * -_memcpy_streaming_load(void *dest, const void *src, size_t count) -{ - if (count == 16) { - __m128i val = _mm_stream_load_si128((__m128i *)src); - _mm_storeu_si128((__m128i *)dest, val); - return dest; - } else if (count == 64) { - __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0); - __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1); - __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2); - __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3); - _mm_storeu_si128(((__m128i *)dest) + 0, val0); - _mm_storeu_si128(((__m128i *)dest) + 1, val1); - _mm_storeu_si128(((__m128i *)dest) + 2, val2); - _mm_storeu_si128(((__m128i *)dest) + 3, val3); - return dest; - } else { - assert(count < 64); /* and (count < 16) for ytiled */ - return memcpy(dest, src, count); - } -} -#endif - /** * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3). * These ranges are in bytes, i.e. pixels * bytes-per-pixel. @@ -706,12 +677,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); -#if defined(USE_SSE41) - else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) - return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif else unreachable("not reached"); } else { @@ -722,12 +687,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return xtiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); -#if defined(USE_SSE41) - else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) - return xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif else unreachable("not reached"); } @@ -760,12 +719,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); -#if defined(USE_SSE41) - else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) - return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif else unreachable("not reached"); } else { @@ -776,12 +729,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return ytiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, rgba8_copy, rgba8_copy_aligned_src); -#if defined(USE_SSE41) - else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) - return ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, - memcpy, _memcpy_streaming_load); -#endif else unreachable("not reached"); } @@ -921,15 +868,6 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2, unreachable("unsupported tiling"); } -#if defined(USE_SSE41) - if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) { - /* The hidden cacheline sized register used by movntdqa can apparently - * give you stale data, so do an mfence to invalidate it. - */ - _mm_mfence(); - } -#endif - /* Round out to tile boundaries. */ xt0 = ALIGN_DOWN(xt1, tw); xt3 = ALIGN_UP (xt2, tw); diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index 1eac329f49c..20404d5b059 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -129,13 +129,10 @@ files_i965 = files( 'intel_tex_image.c', 'intel_tex_obj.h', 'intel_tex_validate.c', - 'intel_upload.c', - 'libdrm_macros.h', -) - -files_intel_tiled_memcpy = files( 'intel_tiled_memcpy.c', 'intel_tiled_memcpy.h', + 'intel_upload.c', + 'libdrm_macros.h', ) i965_gen_libs = [] @@ -179,15 +176,6 @@ i965_oa_sources = custom_target( ], ) -intel_tiled_memcpy = static_library( - 'intel_tiled_memcpy', - [files_intel_tiled_memcpy], - include_directories : [ - inc_common, inc_intel, inc_dri_common, inc_drm_uapi, - ], - c_args : [c_vis_args, no_override_init_args, '-msse2', sse41_args], -) - libi965 = static_library( 'i965', [files_i965, i965_oa_sources, ir_expression_operation_h, @@ -199,7 +187,7 @@ libi965 = static_library( cpp_args : [cpp_vis_args, '-msse2'], link_with : [ i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler, - libblorp, intel_tiled_memcpy, + libblorp, ], dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers], )