From 86666f3a21a2380e19b5f0de90b6b6329ce3de53 Mon Sep 17 00:00:00 2001 From: Samuel Martin Date: Thu, 4 May 2017 23:53:33 +0200 Subject: [PATCH] package/opencv3: fix build for x86_64 target This change backports a fix from upstream fixing missing symbols due to wrong compiler optimization flags. On Buildroot side, also remove deprecated options passed on the configure command. Fixes: http://autobuild.buildroot.net/results/af950c6172a578c4eeeb9440b6990ceb48263fa6/ http://autobuild.buildroot.net/results/e8832302cd4f30cec076f67965f6aacbaca5b955/ Cc: Vicente Olivert Riera Signed-off-by: Samuel Martin Signed-off-by: Thomas Petazzoni --- ...1-cmake-support-multiple-CPU-targets.patch | 2499 +++++++++++++++++ package/opencv3/opencv3.mk | 16 +- 2 files changed, 2500 insertions(+), 15 deletions(-) create mode 100644 package/opencv3/0001-cmake-support-multiple-CPU-targets.patch diff --git a/package/opencv3/0001-cmake-support-multiple-CPU-targets.patch b/package/opencv3/0001-cmake-support-multiple-CPU-targets.patch new file mode 100644 index 0000000000..44cc018700 --- /dev/null +++ b/package/opencv3/0001-cmake-support-multiple-CPU-targets.patch @@ -0,0 +1,2499 @@ +From 5628a02177eec7adac1fcaf30634a6d6e17bb15d Mon Sep 17 00:00:00 2001 +From: Alexander Alekhin +Date: Wed, 7 Sep 2016 18:02:36 +0300 +Subject: [PATCH] cmake: support multiple CPU targets + +Backported from: https://github.com/opencv/opencv/commit/e16227b53cabab1caa4b7aba8ff59a630528348f +Signed-off-by: Samuel Martin +--- + CMakeLists.txt | 50 +- + cmake/OpenCVCompilerOptimizations.cmake | 651 +++++++++++++++++++++ + cmake/OpenCVCompilerOptions.cmake | 161 +---- + cmake/OpenCVGenHeaders.cmake | 4 + + cmake/OpenCVModule.cmake | 3 + + cmake/OpenCVPCHSupport.cmake | 5 +- + cmake/OpenCVUtils.cmake | 50 +- + cmake/checks/cpu_avx.cpp | 9 + + cmake/checks/cpu_avx2.cpp | 10 + + cmake/checks/cpu_avx512.cpp | 10 + + cmake/checks/{fp16.cpp => cpu_fp16.cpp} | 2 +- + cmake/checks/cpu_popcnt.cpp | 8 + + cmake/checks/cpu_sse.cpp | 2 + + cmake/checks/cpu_sse2.cpp | 2 + + cmake/checks/cpu_sse3.cpp | 7 + + cmake/checks/cpu_sse41.cpp | 6 + + cmake/checks/cpu_sse42.cpp | 5 + + cmake/checks/cpu_ssse3.cpp | 7 + + cmake/templates/cv_cpu_config.h.in | 5 + + cmake/templates/cvconfig.h.in | 13 + + .../core/include/opencv2/core/cv_cpu_dispatch.h | 166 ++++++ + modules/core/include/opencv2/core/cv_cpu_helper.h | 133 +++++ + modules/core/include/opencv2/core/cvdef.h | 145 +---- + modules/core/include/opencv2/core/fast_math.hpp | 60 +- + modules/core/src/system.cpp | 301 ++++++++-- + modules/highgui/CMakeLists.txt | 2 +- + modules/imgproc/src/imgwarp.cpp | 4 +- + modules/objdetect/src/haar.cpp | 5 +- + 28 files changed, 1440 insertions(+), 386 deletions(-) + create mode 100644 cmake/OpenCVCompilerOptimizations.cmake + create mode 100644 cmake/checks/cpu_avx.cpp + create mode 100644 cmake/checks/cpu_avx2.cpp + create mode 100644 cmake/checks/cpu_avx512.cpp + rename cmake/checks/{fp16.cpp => cpu_fp16.cpp} (86%) + create mode 100644 cmake/checks/cpu_popcnt.cpp + create mode 100644 cmake/checks/cpu_sse.cpp + create mode 100644 cmake/checks/cpu_sse2.cpp + create mode 100644 cmake/checks/cpu_sse3.cpp + create mode 100644 cmake/checks/cpu_sse41.cpp + create mode 100644 cmake/checks/cpu_sse42.cpp + create mode 100644 cmake/checks/cpu_ssse3.cpp + create mode 100644 cmake/templates/cv_cpu_config.h.in + create mode 100644 modules/core/include/opencv2/core/cv_cpu_dispatch.h + create mode 100644 modules/core/include/opencv2/core/cv_cpu_helper.h + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index cc45f6f39..9c9971ec0 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -85,6 +85,10 @@ if(POLICY CMP0042) + cmake_policy(SET CMP0042 NEW) + endif() + ++if(POLICY CMP0051) ++ cmake_policy(SET CMP0051 NEW) ++endif() ++ + include(cmake/OpenCVUtils.cmake) + + # must go before the project command +@@ -274,16 +278,6 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" + OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) ) + OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) + OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_POPCNT "Enable POPCNT instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_AVX2 "Enable AVX2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) +-OCV_OPTION(ENABLE_FMA3 "Enable FMA3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) + OCV_OPTION(ENABLE_NEON "Enable NEON instructions" "${NEON}" IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) ) + OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) ) + OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF ) +@@ -292,6 +286,9 @@ OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with n + OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF ) + OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF ) + OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX) ++OCV_OPTION(CV_ENABLE_INTRINSICS "Use intrinsic-based optimized code" ON ) ++OCV_OPTION(CV_DISABLE_OPTIMIZATION "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF ) ++ + + OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF ) + +@@ -492,6 +489,9 @@ if(CMAKE_GENERATOR MATCHES "Makefiles|Ninja" AND "${CMAKE_BUILD_TYPE}" STREQUAL + set(CMAKE_BUILD_TYPE Release) + endif() + ++# --- Python Support --- ++include(cmake/OpenCVDetectPython.cmake) ++ + include(cmake/OpenCVCompilerOptions.cmake) + + +@@ -569,9 +569,6 @@ else() + unset(DOXYGEN_FOUND CACHE) + endif() + +-# --- Python Support --- +-include(cmake/OpenCVDetectPython.cmake) +- + # --- Java Support --- + include(cmake/OpenCVDetectApacheAnt.cmake) + if(ANDROID) +@@ -860,6 +857,33 @@ if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio") + status(" Configuration:" ${CMAKE_BUILD_TYPE}) + endif() + ++ ++# ========================= CPU code generation mode ========================= ++status("") ++status(" CPU/HW features:") ++status(" Baseline:" "${CPU_BASELINE_FINAL}") ++if(NOT CPU_BASELINE STREQUAL CPU_BASELINE_FINAL) ++ status(" requested:" "${CPU_BASELINE}") ++endif() ++if(CPU_BASELINE_REQUIRE) ++ status(" required:" "${CPU_BASELINE_REQUIRE}") ++endif() ++if(CPU_BASELINE_DISABLE) ++ status(" disabled:" "${CPU_BASELINE_DISABLE}") ++endif() ++if(CPU_DISPATCH_FINAL OR CPU_DISPATCH) ++ status(" Dispatched code generation:" "${CPU_DISPATCH_FINAL}") ++ if(NOT CPU_DISPATCH STREQUAL CPU_DISPATCH_FINAL) ++ status(" requested:" "${CPU_DISPATCH}") ++ endif() ++ if(CPU_DISPATCH_REQUIRE) ++ status(" required:" "${CPU_DISPATCH_REQUIRE}") ++ endif() ++ foreach(OPT ${CPU_DISPATCH_FINAL}) ++ status(" ${OPT} (${CPU_${OPT}_USAGE_COUNT} files):" "+ ${CPU_DISPATCH_${OPT}_INCLUDED}") ++ endforeach() ++endif() ++ + # ========================== C/C++ options ========================== + if(CMAKE_CXX_COMPILER_VERSION) + set(OPENCV_COMPILER_STR "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1} (ver ${CMAKE_CXX_COMPILER_VERSION})") +diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake +new file mode 100644 +index 000000000..b849f02b1 +--- /dev/null ++++ b/cmake/OpenCVCompilerOptimizations.cmake +@@ -0,0 +1,651 @@ ++# x86/x86-64 arch: ++# SSE / SSE2 (always available on 64-bit CPUs) ++# SSE3 / SSSE3 ++# SSE4_1 / SSE4_2 / POPCNT ++# AVX / AVX2 / AVX512 ++# FMA3 ++ ++# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag) ++# CPU_{opt}_IMPLIES= ++# CPU_{opt}_FORCE= - subset of "implies" list ++# CPU_{opt}_FLAGS_ON="" ++# CPU_{opt}_FEATURE_ALIAS - mapping to CV_CPU_* HWFeature enum ++ ++# Input variables: ++# CPU_BASELINE= - preferred list of baseline optimizations ++# CPU_DISPATCH= - preferred list of dispatched optimizations ++ ++# Advanced input variables: ++# CPU_BASELINE_REQUIRE= - list of required baseline optimizations ++# CPU_DISPATCH_REQUIRE= - list of required dispatched optimizations ++# CPU_BASELINE_DISABLE= - list of disabled baseline optimizations ++ ++# Output variables: ++# CPU_BASELINE_FINAL= - final list of enabled compiler optimizations ++# CPU_DISPATCH_FINAL= - final list of dispatched optimizations ++# ++# CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (_opt_avx2.cpp) ++ ++set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512 ++list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) ++list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS) ++ ++ocv_update(CPU_VFPV3_FEATURE_ALIAS "") ++ ++ ++set(HELP_CPU_BASELINE "Specify list of enabled baseline CPU optimizations") ++set(HELP_CPU_BASELINE_REQUIRE "Specify list of required baseline CPU optimizations") ++set(HELP_CPU_BASELINE_DISABLE "Specify list of forbidden baseline CPU optimizations") ++set(HELP_CPU_DISPATCH "Specify list of dispatched CPU optimizations") ++set(HELP_CPU_DISPATCH_REQUIRE "Specify list of required dispatched CPU optimizations") ++ ++foreach(var CPU_BASELINE CPU_BASELINE_REQUIRE CPU_BASELINE_DISABLE CPU_DISPATCH CPU_DISPATCH_REQUIRE) ++ if(DEFINED ${var}) ++ string(REPLACE "," ";" _list "${${var}}") ++ set(${var} "${_list}" CACHE STRING "${HELP_${var}}" FORCE) ++ endif() ++endforeach() ++ ++# process legacy flags ++macro(ocv_optimization_process_obsolete_option legacy_flag OPT legacy_warn) ++ if(DEFINED ${legacy_flag}) ++ if(${legacy_warn}) ++ message(STATUS "WARNING: Option ${legacy_flag}='${${legacy_flag}}' is deprecated and should not be used anymore") ++ message(STATUS " Behaviour of this option is not backward compatible") ++ message(STATUS " Refer to 'CPU_BASELINE'/'CPU_DISPATCH' CMake options documentation") ++ endif() ++ if(${legacy_flag}) ++ if(NOT ";${CPU_BASELINE_REQUIRE};" MATCHES ";${OPT};") ++ set(CPU_BASELINE_REQUIRE "${CPU_BASELINE_REQUIRE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_REQUIRE}" FORCE) ++ endif() ++ else() ++ if(NOT ";${CPU_BASELINE_DISABLE};" MATCHES ";${OPT};") ++ set(CPU_BASELINE_DISABLE "${CPU_BASELINE_DISABLE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_DISABLE}" FORCE) ++ endif() ++ endif() ++ endif() ++endmacro() ++ocv_optimization_process_obsolete_option(ENABLE_SSE SSE ON) ++ocv_optimization_process_obsolete_option(ENABLE_SSE2 SSE2 ON) ++ocv_optimization_process_obsolete_option(ENABLE_SSE3 SSE3 ON) ++ocv_optimization_process_obsolete_option(ENABLE_SSSE3 SSSE3 ON) ++ocv_optimization_process_obsolete_option(ENABLE_SSE41 SSE4_1 ON) ++ocv_optimization_process_obsolete_option(ENABLE_SSE42 SSE4_2 ON) ++ocv_optimization_process_obsolete_option(ENABLE_POPCNT POPCNT ON) ++ocv_optimization_process_obsolete_option(ENABLE_AVX AVX ON) ++ocv_optimization_process_obsolete_option(ENABLE_AVX2 AVX2 ON) ++ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON) ++ ++ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF) ++ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF) ++ ++ ++macro(ocv_is_optimization_in_list resultvar check_opt) ++ set(__checked "") ++ set(__queue ${ARGN}) ++ set(${resultvar} 0) ++ while(__queue AND NOT ${resultvar}) ++ list(REMOVE_DUPLICATES __queue) ++ set(__queue_current ${__queue}) ++ set(__queue "") ++ foreach(OPT ${__queue_current}) ++ if("x${OPT}" STREQUAL "x${check_opt}") ++ set(${resultvar} 1) ++ break() ++ elseif(NOT ";${__checked};" MATCHES ";${OPT};") ++ list(APPEND __queue ${CPU_${OPT}_IMPLIES}) ++ endif() ++ list(APPEND __checked ${OPT}) ++ endforeach() ++ endwhile() ++endmacro() ++ ++macro(ocv_is_optimization_in_force_list resultvar check_opt) ++ set(__checked "") ++ set(__queue ${ARGN}) ++ set(${resultvar} 0) ++ while(__queue AND NOT ${resultvar}) ++ list(REMOVE_DUPLICATES __queue) ++ set(__queue_current ${__queue}) ++ set(__queue "") ++ foreach(OPT ${__queue_current}) ++ if(OPT STREQUAL "${check_opt}") ++ set(${resultvar} 1) ++ break() ++ elseif(NOT ";${__checked};" MATCHES ";${OPT};") ++ list(APPEND __queue ${CPU_${OPT}_FORCE}) ++ endif() ++ list(APPEND __checked ${OPT}) ++ endforeach() ++ endwhile() ++endmacro() ++ ++macro(ocv_append_optimization_flag var OPT) ++ if(CPU_${OPT}_FLAGS_CONFLICT) ++ string(REGEX REPLACE " ${CPU_${OPT}_FLAGS_CONFLICT}" "" ${var} " ${${var}}") ++ string(REGEX REPLACE "^ +" "" ${var} "${${var}}") ++ endif() ++ set(${var} "${${var}} ${CPU_${OPT}_FLAGS_ON}") ++endmacro() ++ ++# Support GCC -march=native or Intel Compiler -xHost flags ++if(";${CPU_BASELINE};" MATCHES ";NATIVE;" OR ";${CPU_BASELINE};" MATCHES ";HOST;") ++ set(CPU_BASELINE_DETECT ON) ++ set(_add_native_flag ON) ++elseif(";${CPU_BASELINE};" MATCHES ";DETECT;") ++ set(CPU_BASELINE_DETECT ON) ++elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ") ++ if(DEFINED CPU_BASELINE) ++ message(STATUS "CPU: Detected '-march=native' or '-xHost' compiler flag. Force CPU_BASELINE=DETECT.") ++ endif() ++ set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") ++ set(CPU_BASELINE_DETECT ON) ++endif() ++ ++if(X86 OR X86_64) ++ ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX512") ++ ++ ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp") ++ ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp") ++ ocv_update(CPU_SSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse3.cpp") ++ ocv_update(CPU_SSSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_ssse3.cpp") ++ ocv_update(CPU_SSE4_1_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse41.cpp") ++ ocv_update(CPU_SSE4_2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse42.cpp") ++ ocv_update(CPU_POPCNT_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_popcnt.cpp") ++ ocv_update(CPU_AVX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx.cpp") ++ ocv_update(CPU_AVX2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx2.cpp") ++ ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp") ++ ocv_update(CPU_AVX512_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512.cpp") ++ ++ if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE) ++ ocv_update(CPU_AVX512_IMPLIES "AVX2") ++ ocv_update(CPU_AVX512_FORCE "") # Don't force other optimizations ++ ocv_update(CPU_AVX2_IMPLIES "AVX;FMA3;FP16") ++ ocv_update(CPU_FMA3_IMPLIES "AVX2") ++ ocv_update(CPU_FMA3_FORCE "") # Don't force other optimizations ++ ocv_update(CPU_FP16_IMPLIES "AVX") ++ ocv_update(CPU_FP16_FORCE "") # Don't force other optimizations ++ ocv_update(CPU_AVX_IMPLIES "SSE4_2") ++ ocv_update(CPU_SSE4_2_IMPLIES "SSE4_1;POPCNT") ++ ocv_update(CPU_POPCNT_IMPLIES "SSE4_1") ++ ocv_update(CPU_POPCNT_FORCE "") # Don't force other optimizations ++ ocv_update(CPU_SSE4_1_IMPLIES "SSE3;SSSE3") ++ ocv_update(CPU_SSSE3_IMPLIES "SSE3") ++ ocv_update(CPU_SSE3_IMPLIES "SSE2") ++ ocv_update(CPU_SSE2_IMPLIES "SSE") ++ endif() ++ ++ if(CV_ICC) ++ macro(ocv_intel_compiler_optimization_option name unix_flags msvc_flags) ++ ocv_update(CPU_${name}_FLAGS_NAME "${name}") ++ if(MSVC) ++ set(enable_flags "${msvc_flags}") ++ set(flags_conflict "/arch:[^ ]+") ++ else() ++ set(enable_flags "${unix_flags}") ++ set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]+") ++ endif() ++ ocv_update(CPU_${name}_FLAGS_ON "${enable_flags}") ++ if(flags_conflict) ++ ocv_update(CPU_${name}_FLAGS_CONFLICT "${flags_conflict}") ++ endif() ++ endmacro() ++ ocv_intel_compiler_optimization_option(AVX2 "-march=core-avx2" "/arch:CORE-AVX2") ++ ocv_intel_compiler_optimization_option(FP16 "-mavx" "/arch:AVX") ++ ocv_intel_compiler_optimization_option(AVX "-mavx" "/arch:AVX") ++ ocv_intel_compiler_optimization_option(FMA3 "" "") ++ ocv_intel_compiler_optimization_option(POPCNT "" "") ++ ocv_intel_compiler_optimization_option(SSE4_2 "-msse4.2" "/arch:SSE4.2") ++ ocv_intel_compiler_optimization_option(SSE4_1 "-msse4.1" "/arch:SSE4.1") ++ ocv_intel_compiler_optimization_option(SSE3 "-msse3" "/arch:SSE3") ++ ocv_intel_compiler_optimization_option(SSSE3 "-mssse3" "/arch:SSSE3") ++ ocv_intel_compiler_optimization_option(SSE2 "-msse2" "/arch:SSE2") ++ if(NOT X86_64) # x64 compiler doesn't support /arch:sse ++ ocv_intel_compiler_optimization_option(SSE "-msse" "/arch:SSE") ++ endif() ++ #ocv_intel_compiler_optimization_option(AVX512 "-march=core-avx512") ++ elseif(CMAKE_COMPILER_IS_GNUCXX) ++ ocv_update(CPU_AVX2_FLAGS_ON "-mavx2") ++ ocv_update(CPU_FP16_FLAGS_ON "-mf16c") ++ ocv_update(CPU_AVX_FLAGS_ON "-mavx") ++ ocv_update(CPU_FMA3_FLAGS_ON "-mfma") ++ ocv_update(CPU_POPCNT_FLAGS_ON "-mpopcnt") ++ ocv_update(CPU_SSE4_2_FLAGS_ON "-msse4.2") ++ ocv_update(CPU_SSE4_1_FLAGS_ON "-msse4.1") ++ ocv_update(CPU_SSE3_FLAGS_ON "-msse3") ++ ocv_update(CPU_SSSE3_FLAGS_ON "-mssse3") ++ ocv_update(CPU_SSE2_FLAGS_ON "-msse2") ++ ocv_update(CPU_SSE_FLAGS_ON "-msse") ++ if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") ++ ocv_update(CPU_AVX512_FLAGS_ON "-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi") ++ endif() ++ elseif(MSVC) ++ ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2") ++ ocv_update(CPU_AVX_FLAGS_ON "/arch:AVX") ++ if(NOT MSVC64) ++ # 64-bit MSVC compiler uses SSE/SSE2 by default ++ ocv_update(CPU_SSE_FLAGS_ON "/arch:SSE") ++ ocv_update(CPU_SSE_SUPPORTED ON) ++ ocv_update(CPU_SSE2_FLAGS_ON "/arch:SSE2") ++ ocv_update(CPU_SSE2_SUPPORTED ON) ++ else() ++ ocv_update(CPU_SSE_SUPPORTED ON) ++ ocv_update(CPU_SSE2_SUPPORTED ON) ++ endif() ++ # Other instruction sets are supported by default since MSVC 2008 at least ++ else() ++ message(WARNING "TODO: Unsupported compiler") ++ endif() ++ ++ if(NOT DEFINED CPU_DISPATCH) ++ set(CPU_DISPATCH "SSE4_1;AVX;FP16;AVX2" CACHE STRING "${HELP_CPU_DISPATCH}") ++ endif() ++ ++ if(NOT DEFINED CPU_BASELINE) ++ if(X86_64) ++ set(CPU_BASELINE "SSSE3" CACHE STRING "${HELP_CPU_BASELINE}") ++ else() ++ set(CPU_BASELINE "SSE2" CACHE STRING "${HELP_CPU_BASELINE}") ++ endif() ++ endif() ++ ++elseif(ARM OR AARCH64) ++ ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp") ++ if(NOT AARCH64) ++ ocv_update(CPU_KNOWN_OPTIMIZATIONS "VFPV3;NEON;FP16") ++ ocv_update(CPU_NEON_FLAGS_ON "-mfpu=neon") ++ ocv_update(CPU_VFPV3_FLAGS_ON "-mfpu=vfpv3") ++ ocv_update(CPU_FP16_FLAGS_ON "-mfpu=neon-fp16") ++ set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") ++ else() ++ ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16") ++ ocv_update(CPU_NEON_FLAGS_ON "") ++ set(CPU_BASELINE "NEON" CACHE STRING "${HELP_CPU_BASELINE}") ++ endif() ++endif() ++ ++# Helper values for cmake-gui ++set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") ++set(CPU_DISPATCH "" CACHE STRING "${HELP_CPU_DISPATCH}") ++set_property(CACHE CPU_BASELINE PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS}) ++set_property(CACHE CPU_DISPATCH PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS}) ++ ++set(CPU_BASELINE_FLAGS "") ++ ++set(CPU_BASELINE_FINAL "") ++set(CPU_DISPATCH_FINAL "") ++ ++macro(ocv_check_compiler_optimization OPT) ++ if(NOT DEFINED CPU_${OPT}_SUPPORTED) ++ if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE) ++ set(_varname "") ++ if(CPU_${OPT}_TEST_FILE) ++ set(__available 0) ++ if(CPU_BASELINE_DETECT) ++ set(_varname "HAVE_CPU_${OPT}_SUPPORT") ++ ocv_check_compiler_flag(CXX "${CPU_BASELINE_FLAGS}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") ++ if(${_varname}) ++ list(APPEND CPU_BASELINE_FINAL ${OPT}) ++ set(__available 1) ++ endif() ++ endif() ++ if(NOT __available) ++ if(NOT "x${CPU_${OPT}_FLAGS_NAME}" STREQUAL "x") ++ set(_varname "HAVE_CPU_${CPU_${OPT}_FLAGS_NAME}") ++ set(_compile_flags "${CPU_BASELINE_FLAGS}") ++ ocv_append_optimization_flag(_compile_flags ${OPT}) ++ ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") ++ elseif(NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") ++ ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "" "${CPU_${OPT}_TEST_FILE}") ++ else() ++ set(_varname "HAVE_CPU_${OPT}_SUPPORT") ++ set(_compile_flags "${CPU_BASELINE_FLAGS}") ++ ocv_append_optimization_flag(_compile_flags ${OPT}) ++ ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") ++ endif() ++ endif() ++ else() ++ ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "") ++ endif() ++ if(_varname AND ${_varname}) ++ set(CPU_${OPT}_SUPPORTED ON) ++ elseif(NOT CPU_${OPT}_SUPPORTED) ++ message(STATUS "${OPT} is not supported by C++ compiler") ++ endif() ++ else() ++ set(CPU_${OPT}_SUPPORTED ON) ++ endif() ++ endif() ++endmacro() ++ ++foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS}) ++ set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "" FORCE) ++ if(NOT DEFINED CPU_${OPT}_FORCE) ++ set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}") ++ endif() ++endforeach() ++ ++if(_add_native_flag) ++ set(_varname "HAVE_CPU_NATIVE_SUPPORT") ++ ocv_check_compiler_flag(CXX "-march=native" "${_varname}" "") ++ if(_varname) ++ set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} -march=native") ++ else() ++ set(_varname "HAVE_CPU_HOST_SUPPORT") ++ if(MSVC) ++ set(_flag "/QxHost") ++ else() ++ set(_flag "-xHost") ++ endif() ++ ocv_check_compiler_flag(CXX "${_flag}" "${_varname}" "") ++ if(_varname) ++ set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} ${flag}") ++ endif() ++ endif() ++endif() ++ ++foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS}) ++ set(__is_disabled 0) ++ foreach(OPT2 ${CPU_BASELINE_DISABLE}) ++ ocv_is_optimization_in_list(__is_disabled ${OPT2} ${OPT}) ++ if(__is_disabled) ++ break() ++ endif() ++ endforeach() ++ if(__is_disabled) ++ set(__is_from_baseline 0) ++ else() ++ ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_REQUIRE}) ++ if(NOT __is_from_baseline) ++ ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE}) ++ endif() ++ endif() ++ ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH_REQUIRE}) ++ if(NOT __is_from_dispatch) ++ ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH}) ++ endif() ++ if(__is_from_dispatch OR __is_from_baseline OR CPU_BASELINE_DETECT) ++ ocv_check_compiler_optimization(${OPT}) ++ endif() ++ if(CPU_BASELINE_DETECT AND NOT __is_from_baseline AND NOT __is_disabled) ++ ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_FINAL}) ++ endif() ++ if(CPU_${OPT}_SUPPORTED) ++ if(";${CPU_DISPATCH};" MATCHES ";${OPT};" AND NOT __is_from_baseline) ++ list(APPEND CPU_DISPATCH_FINAL ${OPT}) ++ elseif(__is_from_baseline AND NOT CPU_BASELINE_DETECT) ++ list(APPEND CPU_BASELINE_FINAL ${OPT}) ++ ocv_append_optimization_flag(CPU_BASELINE_FLAGS ${OPT}) ++ endif() ++ endif() ++endforeach() ++ ++foreach(OPT ${CPU_BASELINE_REQUIRE}) ++ if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") ++ message(SEND_ERROR "Required baseline optimization is not supported: ${OPT} (CPU_BASELINE_REQUIRE=${CPU_BASELINE_REQUIRE})") ++ endif() ++endforeach() ++ ++foreach(OPT ${CPU_BASELINE}) ++ if(OPT STREQUAL "DETECT" OR OPT STREQUAL "HOST" OR OPT STREQUAL "NATIVE") ++ # nothing ++ elseif(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") ++ message(STATUS "Optimization ${OPT} is not available, skipped") ++ endif() ++endforeach() ++ ++foreach(OPT ${CPU_DISPATCH_REQUIRE}) ++ if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};") ++ # OK ++ elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") ++ message(SEND_ERROR "Dispatched optimization ${OPT} is in baseline list (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})") ++ else() ++ message(SEND_ERROR "Required dispatch optimization is not supported: ${OPT} (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})") ++ endif() ++endforeach() ++ ++foreach(OPT ${CPU_DISPATCH}) ++ if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};") ++ # OK ++ elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") ++ # OK ++ else() ++ message(STATUS "Dispatch optimization ${OPT} is not available, skipped") ++ endif() ++endforeach() ++ ++#message(STATUS "CPU_BASELINE_FINAL=${CPU_BASELINE_FINAL}") ++#message(STATUS "CPU_DISPATCH_FINAL=${CPU_DISPATCH_FINAL}") ++ ++#if(CPU_DISPATCH_FINAL AND NOT PYTHON_DEFAULT_EXECUTABLE) ++# message(FATAL_ERROR "Python is required for CPU dispatched optimization support") ++#endif() ++ ++macro(ocv_compiler_optimization_options) ++ set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${CPU_BASELINE_FLAGS}") ++ if(NOT __flags STREQUAL CACHED_CPU_BASELINE_FLAGS) ++ set(CACHED_CPU_BASELINE_FLAGS "${__flags}" CACHE INTERNAL "" FORCE) ++ ocv_clear_vars(HAVE_CPU_BASELINE_FLAGS) ++ endif() ++ ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_BASELINE_FLAGS) ++ if(NOT HAVE_CPU_BASELINE_FLAGS) ++ message(FATAL_ERROR "Compiler doesn't support baseline optimization flags: ${CPU_BASELINE_FLAGS}") ++ endif() ++ add_extra_compiler_option_force("${CPU_BASELINE_FLAGS}") ++ ++ foreach(OPT ${CPU_DISPATCH_FINAL}) ++ set(__dispatch_flags "") ++ set(__dispatch_definitions "") ++ set(__dispatch_opts "") ++ set(__dispatch_opts_force "") ++ foreach(OPT2 ${CPU_KNOWN_OPTIMIZATIONS}) ++ if(NOT CPU_${OPT2}_SUPPORTED) ++ #continue() ++ else() ++ ocv_is_optimization_in_list(__is_from_baseline ${OPT2} ${CPU_BASELINE_FINAL}) ++ if(NOT __is_from_baseline) ++ ocv_is_optimization_in_list(__is_active ${OPT2} ${OPT}) ++ if(__is_active) ++ ocv_append_optimization_flag(__dispatch_flags ${OPT2}) ++ list(APPEND __dispatch_definitions "CV_CPU_COMPILE_${OPT2}=1") ++ list(APPEND __dispatch_opts "${OPT2}") ++ endif() ++ ocv_is_optimization_in_force_list(__is_force ${OPT2} ${OPT}) ++ if(__is_force) ++ list(APPEND __dispatch_opts_force "${OPT2}") ++ endif() ++ endif() ++ endif() ++ endforeach() ++ set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${__dispatch_flags}") ++ if(NOT __flags STREQUAL CACHED_CPU_DISPATCH_${OPT}_FLAGS) ++ set(CACHED_CPU_DISPATCH_${OPT}_FLAGS "${__flags}" CACHE INTERNAL "" FORCE) ++ ocv_clear_vars(HAVE_CPU_DISPATCH_FLAGS_${OPT}) ++ endif() ++ ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_DISPATCH_FLAGS_${OPT}) ++ if(NOT HAVE_CPU_DISPATCH_FLAGS_${OPT}) ++ message(FATAL_ERROR "Compiler doesn't support optimization flags for ${OPT} dispatch mode: ${__dispatch_flags}") ++ endif() ++ set(CPU_DISPATCH_FLAGS_${OPT} "${__dispatch_flags}") ++ set(CPU_DISPATCH_DEFINITIONS_${OPT} "${__dispatch_definitions}") ++ set(CPU_DISPATCH_${OPT}_INCLUDED "${__dispatch_opts}") ++ set(CPU_DISPATCH_${OPT}_FORCED "${__dispatch_opts_force}") ++ endforeach() ++ ++ if(ENABLE_POWERPC) ++ add_extra_compiler_option("-mcpu=G3 -mtune=G5") ++ endif() ++ if(ARM) ++ add_extra_compiler_option("-mfp16-format=ieee") ++ endif(ARM) ++ if(ENABLE_NEON) ++ add_extra_compiler_option("-mfpu=neon") ++ endif() ++ if(ENABLE_VFPV3 AND NOT ENABLE_NEON) ++ add_extra_compiler_option("-mfpu=vfpv3") ++ endif() ++endmacro() ++ ++macro(ocv_compiler_optimization_options_finalize) ++ if(CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ++ if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4) ++ if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)") ++ add_extra_compiler_option(-mfpmath=sse) # !! important - be on the same wave with x64 compilers ++ else() ++ add_extra_compiler_option(-mfpmath=387) ++ endif() ++ endif() ++ endif() ++ ++ if(MSVC) ++ # Generate Intrinsic Functions ++ set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi") ++ ++ if((X86 OR X86_64) AND CMAKE_SIZEOF_VOID_P EQUAL 4 AND ";${CPU_BASELINE_FINAL};" MATCHES ";SSE;") ++ set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers ++ endif() ++ endif(MSVC) ++endmacro() ++ ++macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME TARGET_BASE_NAME) ++ set(__result "") ++ set(__result_libs "") ++ foreach(OPT ${CPU_DISPATCH_FINAL}) ++ set(__result_${OPT} "") ++ endforeach() ++ foreach(fname ${${SOURCES_VAR_NAME}}) ++ string(TOLOWER "${fname}" fname_LOWER) ++ if(fname_LOWER MATCHES "[.]opt_.*[.]cpp$") ++ if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS) ++ message(STATUS "Excluding from source files list: ${fname}") ++ #continue() ++ else() ++ set(__opt_found 0) ++ foreach(OPT ${CPU_BASELINE_FINAL}) ++ string(TOLOWER "${OPT}" OPT_LOWER) ++ if(fname_LOWER MATCHES "_${OPT_LOWER}[.]cpp$") ++#message("${fname} BASELINE-${OPT}") ++ set(__opt_found 1) ++ list(APPEND __result "${fname}") ++ break() ++ endif() ++ endforeach() ++ foreach(OPT ${CPU_DISPATCH_FINAL}) ++ foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED}) ++ string(TOLOWER "${OPT2}" OPT2_LOWER) ++ if(fname_LOWER MATCHES "_${OPT2_LOWER}[.]cpp$") ++ list(APPEND __result_${OPT} "${fname}") ++ math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1") ++ set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE) ++#message("${fname} ${OPT}") ++#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}") ++#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}") ++#message(" ${CPU_DISPATCH_FLAGS_${OPT}}") ++ set(__opt_found 1) ++ break() ++ endif() ++ endforeach() ++ if(__opt_found) ++ set(__opt_found 1) ++ break() ++ endif() ++ endforeach() ++ if(NOT __opt_found) ++ message(STATUS "Excluding from source files list: ${fname}") ++ endif() ++ endif() ++ else() ++ list(APPEND __result "${fname}") ++ endif() ++ endforeach() ++ ++ foreach(OPT ${CPU_DISPATCH_FINAL}) ++ if(__result_${OPT}) ++#message("${OPT}: ${__result_${OPT}}") ++ if(CMAKE_GENERATOR MATCHES "^Visual") ++ # extra flags are added before common flags, so switching between optimizations doesn't work correctly ++ # Also CMAKE_CXX_FLAGS doesn't work (it is directory-based, so add_subdirectory is required) ++ add_library(${TARGET_BASE_NAME}_${OPT} OBJECT ${__result_${OPT}}) ++ ocv_append_dependant_targets(${TARGET_BASE_NAME} ${TARGET_BASE_NAME}_${OPT}) ++ set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}") ++ set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}") ++ #list(APPEND __result_libs ${TARGET_BASE_NAME}_${OPT}) ++ list(APPEND __result "$") ++ else() ++ foreach(fname ${__result_${OPT}}) ++ set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}") ++ set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}") ++ endforeach() ++ list(APPEND __result ${__result_${OPT}}) ++ endif() ++ endif() ++ endforeach() ++ set(${SOURCES_VAR_NAME} "${__result}") ++ list(APPEND ${LIBS_VAR_NAME} ${__result_libs}) ++endmacro() ++ ++macro(ocv_compiler_optimization_fill_cpu_config) ++ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "") ++ foreach(OPT ${CPU_BASELINE_FINAL}) ++ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE} ++#define CV_CPU_COMPILE_${OPT} 1 ++#define CV_CPU_BASELINE_COMPILE_${OPT} 1 ++") ++ endforeach() ++ ++ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE} ++#define CV_CPU_BASELINE_FEATURES 0 \\") ++ foreach(OPT ${CPU_BASELINE_FINAL}) ++ if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x") ++ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE} ++ , CV_CPU_${OPT} \\") ++ endif() ++ endforeach() ++ set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}\n") ++ ++ set(__dispatch_modes "") ++ foreach(OPT ${CPU_DISPATCH_FINAL}) ++ list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT}) ++ endforeach() ++ list(REMOVE_DUPLICATES __dispatch_modes) ++ set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "") ++ foreach(OPT ${__dispatch_modes}) ++ set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE} ++#define CV_CPU_DISPATCH_COMPILE_${OPT} 1") ++ endforeach() ++ ++ set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n") ++ foreach(OPT ${CPU_ALL_OPTIMIZATIONS}) ++ if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x") ++ set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE} ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT} ++# define CV_CPU_HAS_SUPPORT_${OPT} 1 ++# define CV_CPU_CALL_${OPT}(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT} ++# define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT})) ++# define CV_CPU_CALL_${OPT}(...) if (CV_CPU_HAS_SUPPORT_${OPT}) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_${OPT} 0 ++# define CV_CPU_CALL_${OPT}(...) ++#endif ++") ++ endif() ++ endforeach() ++ ++ set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h") ++ if(EXISTS "${__file}") ++ file(READ "${__file}" __content) ++ endif() ++ if(__content STREQUAL OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE) ++ #message(STATUS "${__file} contains same content") ++ else() ++ file(WRITE "${__file}" "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}") ++ message(WARNING "${__file} is updated") ++ endif() ++endmacro() ++ ++if(CV_DISABLE_OPTIMIZATION OR CV_ICC) ++ ocv_update(CV_ENABLE_UNROLLED 0) ++else() ++ ocv_update(CV_ENABLE_UNROLLED 1) ++endif() +diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake +index 5bb047911..0eb68b656 100644 +--- a/cmake/OpenCVCompilerOptions.cmake ++++ b/cmake/OpenCVCompilerOptions.cmake +@@ -31,24 +31,21 @@ endif() + if(MINGW OR (X86 AND UNIX AND NOT APPLE)) + # mingw compiler is known to produce unstable SSE code with -O3 hence we are trying to use -O2 instead + if(CMAKE_COMPILER_IS_GNUCXX) +- foreach(flags CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) +- string(REPLACE "-O3" "-O2" ${flags} "${${flags}}") +- endforeach() +- endif() +- +- if(CMAKE_COMPILER_IS_GNUCC) +- foreach(flags CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG) ++ foreach(flags ++ CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG ++ CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG) + string(REPLACE "-O3" "-O2" ${flags} "${${flags}}") + endforeach() + endif() + endif() + + if(MSVC) +- string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +- string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT}") ++ string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS) ++ string(STRIP "${CMAKE_CXX_FLAGS_INIT}" CMAKE_CXX_FLAGS_INIT) + if(CMAKE_CXX_FLAGS STREQUAL CMAKE_CXX_FLAGS_INIT) + # override cmake default exception handling option +- string(REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") ++ string(REPLACE "/EHsc" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") ++ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHa") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "Flags used by the compiler during all build types." FORCE) + endif() + endif() +@@ -63,9 +60,6 @@ set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "") + set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "") + + macro(add_extra_compiler_option option) +- if(CMAKE_BUILD_TYPE) +- set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE}) +- endif() + ocv_check_flag_support(CXX "${option}" _varname "${OPENCV_EXTRA_CXX_FLAGS} ${ARGN}") + if(${_varname}) + set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}") +@@ -77,6 +71,12 @@ macro(add_extra_compiler_option option) + endif() + endmacro() + ++macro(add_extra_compiler_option_force option) ++ set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}") ++ set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} ${option}") ++endmacro() ++ ++ + # Gets environment variable and puts its value to the corresponding preprocessor definition + # Useful for WINRT that has no access to environment variables + macro(add_env_definitions option) +@@ -102,7 +102,11 @@ if(MINGW) + endif() + + if(CV_ICC AND NOT ENABLE_FAST_MATH) +- add_extra_compiler_option("-fp-model precise") ++ if(MSVC) ++ add_extra_compiler_option("/fp:precise") ++ else() ++ add_extra_compiler_option("-fp-model precise") ++ endif() + endif() + + if(CMAKE_COMPILER_IS_GNUCXX) +@@ -141,7 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) + endif() + + # We need pthread's +- if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) ++ if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) # TODO + add_extra_compiler_option(-pthread) + endif() + +@@ -170,83 +174,6 @@ if(CMAKE_COMPILER_IS_GNUCXX) + if(ENABLE_FAST_MATH) + add_extra_compiler_option(-ffast-math) + endif() +- if(ENABLE_POWERPC) +- add_extra_compiler_option("-mcpu=G3 -mtune=G5") +- endif() +- if(ENABLE_SSE) +- add_extra_compiler_option(-msse) +- endif() +- if(ENABLE_SSE2) +- add_extra_compiler_option(-msse2) +- elseif(X86 OR X86_64) +- add_extra_compiler_option(-mno-sse2) +- endif() +- if(ARM) +- add_extra_compiler_option("-mfp16-format=ieee") +- endif(ARM) +- if(ENABLE_NEON) +- add_extra_compiler_option("-mfpu=neon") +- endif() +- if(ENABLE_VFPV3 AND NOT ENABLE_NEON) +- add_extra_compiler_option("-mfpu=vfpv3") +- endif() +- +- # SSE3 and further should be disabled under MingW because it generates compiler errors +- if(NOT MINGW) +- if(ENABLE_AVX) +- add_extra_compiler_option(-mavx) +- elseif(X86 OR X86_64) +- add_extra_compiler_option(-mno-avx) +- endif() +- if(ENABLE_AVX2) +- add_extra_compiler_option(-mavx2) +- +- if(ENABLE_FMA3) +- add_extra_compiler_option(-mfma) +- endif() +- endif() +- +- # GCC depresses SSEx instructions when -mavx is used. Instead, it generates new AVX instructions or AVX equivalence for all SSEx instructions when needed. +- if(NOT OPENCV_EXTRA_CXX_FLAGS MATCHES "-mavx") +- if(ENABLE_SSE3) +- add_extra_compiler_option(-msse3) +- elseif(X86 OR X86_64) +- add_extra_compiler_option(-mno-sse3) +- endif() +- +- if(ENABLE_SSSE3) +- add_extra_compiler_option(-mssse3) +- elseif(X86 OR X86_64) +- add_extra_compiler_option(-mno-ssse3) +- endif() +- +- if(ENABLE_SSE41) +- add_extra_compiler_option(-msse4.1) +- elseif(X86 OR X86_64) +- add_extra_compiler_option(-mno-sse4.1) +- endif() +- +- if(ENABLE_SSE42) +- add_extra_compiler_option(-msse4.2) +- elseif(X86 OR X86_64) +- add_extra_compiler_option(-mno-sse4.2) +- endif() +- +- if(ENABLE_POPCNT) +- add_extra_compiler_option(-mpopcnt) +- endif() +- endif() +- endif(NOT MINGW) +- +- if(X86 OR X86_64) +- if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4) +- if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)") +- add_extra_compiler_option(-mfpmath=sse)# !! important - be on the same wave with x64 compilers +- else() +- add_extra_compiler_option(-mfpmath=387) +- endif() +- endif() +- endif() + + # Profiling? + if(ENABLE_PROFILING) +@@ -257,7 +184,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) + string(REPLACE "-fomit-frame-pointer" "" ${flags} "${${flags}}") + string(REPLACE "-ffunction-sections" "" ${flags} "${${flags}}") + endforeach() +- elseif(NOT APPLE AND NOT ANDROID) ++ elseif(NOT ((IOS OR ANDROID) AND NOT BUILD_SHARED_LIBS)) + # Remove unreferenced functions: function level linking + add_extra_compiler_option(-ffunction-sections) + endif() +@@ -296,41 +223,6 @@ if(MSVC) + set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi") + endif() + +- if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1800) +- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2") +- endif() +- if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") +- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX") +- endif() +- +- if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") +- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1") +- endif() +- +- if(ENABLE_SSE3 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") +- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3") +- endif() +- +- if(NOT MSVC64) +- # 64-bit MSVC compiler uses SSE/SSE2 by default +- if(ENABLE_SSE2 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") +- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2") +- endif() +- if(ENABLE_SSE AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") +- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE") +- endif() +- endif() +- +- if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1 OR ENABLE_AVX OR ENABLE_AVX2) +- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi") +- endif() +- +- if(X86 OR X86_64) +- if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND ENABLE_SSE2) +- set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers +- endif() +- endif() +- + if(OPENCV_WARNINGS_ARE_ERRORS) + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /WX") + endif() +@@ -353,6 +245,16 @@ if(NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID) + set(OPENCV_EXTRA_FLAGS "-fPIC ${OPENCV_EXTRA_FLAGS}") + endif() + ++include(cmake/OpenCVCompilerOptimizations.cmake) ++ ++if(COMMAND ocv_compiler_optimization_options) ++ ocv_compiler_optimization_options() ++endif() ++ ++if(COMMAND ocv_compiler_optimization_options_finalize) ++ ocv_compiler_optimization_options_finalize() ++endif() ++ + # Add user supplied extra options (optimization, etc...) + # ========================================================== + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options") +@@ -370,6 +272,7 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) + add_extra_compiler_option(-fvisibility-inlines-hidden) + endif() + ++# TODO !!!!! + if(NOT OPENCV_FP16_DISABLE AND NOT IOS) + if(ARM AND ENABLE_NEON) + set(FP16_OPTION "-mfpu=neon-fp16") +@@ -378,7 +281,7 @@ if(NOT OPENCV_FP16_DISABLE AND NOT IOS) + endif() + try_compile(__VALID_FP16 + "${OpenCV_BINARY_DIR}" +- "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" ++ "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp" + COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" + OUTPUT_VARIABLE TRY_OUT + ) +diff --git a/cmake/OpenCVGenHeaders.cmake b/cmake/OpenCVGenHeaders.cmake +index 298897904..477b91055 100644 +--- a/cmake/OpenCVGenHeaders.cmake ++++ b/cmake/OpenCVGenHeaders.cmake +@@ -3,6 +3,10 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CO + configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/cvconfig.h") + install(FILES "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2 COMPONENT dev) + ++# platform-specific config file ++ocv_compiler_optimization_fill_cpu_config() ++configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cv_cpu_config.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cv_cpu_config.h") ++ + # ---------------------------------------------------------------------------- + # opencv_modules.hpp based on actual modules list + # ---------------------------------------------------------------------------- +diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake +index 742a287ec..3e98bf569 100644 +--- a/cmake/OpenCVModule.cmake ++++ b/cmake/OpenCVModule.cmake +@@ -65,6 +65,7 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD + unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE) + unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE) + unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE) ++ unset(OPENCV_DEPENDANT_TARGETS_${mod} CACHE) + endforeach() + + # clean modules info which needs to be recalculated +@@ -641,6 +642,8 @@ macro(ocv_set_module_sources) + # use full paths for module to be independent from the module location + ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS) + ++ ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module}) ++ + set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}") + set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}") + endmacro() +diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake +index 90437cb20..45968e78b 100644 +--- a/cmake/OpenCVPCHSupport.cmake ++++ b/cmake/OpenCVPCHSupport.cmake +@@ -326,7 +326,10 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input) + + get_target_property(_sources ${_targetName} SOURCES) + foreach(src ${_sources}) +- if(NOT "${src}" MATCHES "\\.mm$") ++ if(NOT "${src}" MATCHES "\\.mm$" ++ AND NOT "${src}" MATCHES "\\.h$" AND NOT "${src}" MATCHES "\\.hpp$" # header files ++ AND NOT "${src}" MATCHES "^\$" # CMake generator expressions ++ ) + get_source_file_property(oldProps "${src}" COMPILE_FLAGS) + if(NOT oldProps) + set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"") +diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake +index cdf257d5f..8a5ee28f6 100644 +--- a/cmake/OpenCVUtils.cmake ++++ b/cmake/OpenCVUtils.cmake +@@ -37,7 +37,11 @@ endmacro() + + macro(ocv_update VAR) + if(NOT DEFINED ${VAR}) +- set(${VAR} ${ARGN}) ++ if("x${ARGN}" STREQUAL "x") ++ set(${VAR} "") ++ else() ++ set(${VAR} ${ARGN}) ++ endif() + else() + #ocv_debug_message("Preserve old value for ${VAR}: ${${VAR}}") + endif() +@@ -151,8 +155,15 @@ function(ocv_append_target_property target prop) + endif() + endfunction() + ++function(ocv_append_dependant_targets target) ++ #ocv_debug_message("ocv_append_dependant_targets(${target} ${ARGN})") ++ _ocv_fix_target(target) ++ set(OPENCV_DEPENDANT_TARGETS_${target} "${OPENCV_DEPENDANT_TARGETS_${target}};${ARGN}" CACHE INTERNAL "" FORCE) ++endfunction() ++ + # adds include directories in such way that directories from the OpenCV source tree go first + function(ocv_target_include_directories target) ++ #ocv_debug_message("ocv_target_include_directories(${target} ${ARGN})") + _ocv_fix_target(target) + set(__params "") + if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND +@@ -173,6 +184,11 @@ function(ocv_target_include_directories target) + else() + if(TARGET ${target}) + target_include_directories(${target} PRIVATE ${__params}) ++ if(OPENCV_DEPENDANT_TARGETS_${target}) ++ foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}}) ++ target_include_directories(${t} PRIVATE ${__params}) ++ endforeach() ++ endif() + else() + set(__new_inc "${OCV_TARGET_INCLUDE_DIRS_${target}};${__params}") + set(OCV_TARGET_INCLUDE_DIRS_${target} "${__new_inc}" CACHE INTERNAL "") +@@ -205,8 +221,11 @@ set(OCV_COMPILER_FAIL_REGEX + ) + + MACRO(ocv_check_compiler_flag LANG FLAG RESULT) ++ set(_fname "${ARGN}") + if(NOT DEFINED ${RESULT}) +- if("_${LANG}_" MATCHES "_CXX_") ++ if(_fname) ++ # nothing ++ elseif("_${LANG}_" MATCHES "_CXX_") + set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx") + if("${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror " OR "${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror=unknown-pragmas ") + FILE(WRITE "${_fname}" "int main() { return 0; }\n") +@@ -231,7 +250,13 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT) + unset(_fname) + endif() + if(_fname) +- MESSAGE(STATUS "Performing Test ${RESULT}") ++ if(NOT "x${ARGN}" STREQUAL "x") ++ file(RELATIVE_PATH __msg "${CMAKE_SOURCE_DIR}" "${ARGN}") ++ set(__msg " (check file: ${__msg})") ++ else() ++ set(__msg "") ++ endif() ++ MESSAGE(STATUS "Performing Test ${RESULT}${__msg}") + TRY_COMPILE(${RESULT} + "${CMAKE_BINARY_DIR}" + "${_fname}" +@@ -278,7 +303,11 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT) + endif() + ENDMACRO() + +-macro(ocv_check_flag_support lang flag varname) ++macro(ocv_check_flag_support lang flag varname base_options) ++ if(CMAKE_BUILD_TYPE) ++ set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE}) ++ endif() ++ + if("_${lang}_" MATCHES "_CXX_") + set(_lang CXX) + elseif("_${lang}_" MATCHES "_C_") +@@ -293,7 +322,7 @@ macro(ocv_check_flag_support lang flag varname) + string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}") + string(REGEX REPLACE " -|-|=| |\\." "_" ${varname} "${${varname}}") + +- ocv_check_compiler_flag("${_lang}" "${ARGN} ${flag}" ${${varname}}) ++ ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN}) + endmacro() + + # turns off warnings +@@ -327,7 +356,7 @@ macro(ocv_warnings_disable) + string(REPLACE "${warning}" "" ${var} "${${var}}") + string(REPLACE "-W" "-Wno-" warning "${warning}") + endif() +- ocv_check_flag_support(${var} "${warning}" _varname) ++ ocv_check_flag_support(${var} "${warning}" _varname "") + if(${_varname}) + set(${var} "${${var}} ${warning}") + endif() +@@ -342,7 +371,7 @@ macro(ocv_warnings_disable) + else() + string(REPLACE "-wd" "-Qwd" warning "${warning}") + endif() +- ocv_check_flag_support(${var} "${warning}" _varname) ++ ocv_check_flag_support(${var} "${warning}" _varname "") + if(${_varname}) + set(${var} "${${var}} ${warning}") + endif() +@@ -357,7 +386,7 @@ macro(ocv_warnings_disable) + endmacro() + + macro(add_apple_compiler_options the_module) +- ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS) ++ ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS "") + if(HAVE_OBJC_EXCEPTIONS) + foreach(source ${OPENCV_MODULE_${the_module}_SOURCES}) + if("${source}" MATCHES "\\.mm$") +@@ -892,6 +921,11 @@ function(_ocv_append_target_includes target) + if (TARGET ${target}_object) + target_include_directories(${target}_object PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}}) + endif() ++ if(OPENCV_DEPENDANT_TARGETS_${target}) ++ foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}}) ++ target_include_directories(${t} PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}}) ++ endforeach() ++ endif() + unset(OCV_TARGET_INCLUDE_DIRS_${target} CACHE) + endif() + endfunction() +diff --git a/cmake/checks/cpu_avx.cpp b/cmake/checks/cpu_avx.cpp +new file mode 100644 +index 000000000..05536f443 +--- /dev/null ++++ b/cmake/checks/cpu_avx.cpp +@@ -0,0 +1,9 @@ ++#if !defined __AVX__ // MSVC supports this flag since MSVS 2013 ++#error "__AVX__ define is missing" ++#endif ++#include ++void test() ++{ ++ __m256 a = _mm256_set1_ps(0.0f); ++} ++int main() { return 0; } +diff --git a/cmake/checks/cpu_avx2.cpp b/cmake/checks/cpu_avx2.cpp +new file mode 100644 +index 000000000..3ab1143b8 +--- /dev/null ++++ b/cmake/checks/cpu_avx2.cpp +@@ -0,0 +1,10 @@ ++#if !defined __AVX2__ // MSVC supports this flag since MSVS 2013 ++#error "__AVX2__ define is missing" ++#endif ++#include ++void test() ++{ ++ int data[8] = {0,0,0,0, 0,0,0,0}; ++ __m256i a = _mm256_loadu_si256((const __m256i *)data); ++} ++int main() { return 0; } +diff --git a/cmake/checks/cpu_avx512.cpp b/cmake/checks/cpu_avx512.cpp +new file mode 100644 +index 000000000..d0898ab3e +--- /dev/null ++++ b/cmake/checks/cpu_avx512.cpp +@@ -0,0 +1,10 @@ ++#if defined __AVX512__ || defined __AVX512F__ ++#include ++void test() ++{ ++ __m512i zmm = _mm512_setzero_si512(); ++} ++#else ++#error "AVX512 is not supported" ++#endif ++int main() { return 0; } +diff --git a/cmake/checks/fp16.cpp b/cmake/checks/cpu_fp16.cpp +similarity index 86% +rename from cmake/checks/fp16.cpp +rename to cmake/checks/cpu_fp16.cpp +index c77c84483..6951f1c4f 100644 +--- a/cmake/checks/fp16.cpp ++++ b/cmake/checks/cpu_fp16.cpp +@@ -1,6 +1,6 @@ + #include + +-#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) ++#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) || (defined __INTEL_COMPILER && defined __AVX__) + #include + int test() + { +diff --git a/cmake/checks/cpu_popcnt.cpp b/cmake/checks/cpu_popcnt.cpp +new file mode 100644 +index 000000000..f55c9f3c5 +--- /dev/null ++++ b/cmake/checks/cpu_popcnt.cpp +@@ -0,0 +1,8 @@ ++#include ++#ifndef _MSC_VER ++#include ++#endif ++int main() { ++ int i = _mm_popcnt_u64(1); ++ return 0; ++} +diff --git a/cmake/checks/cpu_sse.cpp b/cmake/checks/cpu_sse.cpp +new file mode 100644 +index 000000000..c6269acdb +--- /dev/null ++++ b/cmake/checks/cpu_sse.cpp +@@ -0,0 +1,2 @@ ++#include ++int main() { return 0; } +diff --git a/cmake/checks/cpu_sse2.cpp b/cmake/checks/cpu_sse2.cpp +new file mode 100644 +index 000000000..68a69f88c +--- /dev/null ++++ b/cmake/checks/cpu_sse2.cpp +@@ -0,0 +1,2 @@ ++#include ++int main() { return 0; } +diff --git a/cmake/checks/cpu_sse3.cpp b/cmake/checks/cpu_sse3.cpp +new file mode 100644 +index 000000000..98ce2191e +--- /dev/null ++++ b/cmake/checks/cpu_sse3.cpp +@@ -0,0 +1,7 @@ ++#include ++int main() { ++ __m128 u, v; ++ u = _mm_set1_ps(0.0f); ++ v = _mm_moveldup_ps(u); // SSE3 ++ return 0; ++} +diff --git a/cmake/checks/cpu_sse41.cpp b/cmake/checks/cpu_sse41.cpp +new file mode 100644 +index 000000000..ddd835b0e +--- /dev/null ++++ b/cmake/checks/cpu_sse41.cpp +@@ -0,0 +1,6 @@ ++#include ++int main() { ++ __m128i a = _mm_setzero_si128(), b = _mm_setzero_si128(); ++ __m128i c = _mm_packus_epi32(a, b); ++ return 0; ++} +diff --git a/cmake/checks/cpu_sse42.cpp b/cmake/checks/cpu_sse42.cpp +new file mode 100644 +index 000000000..56f56658a +--- /dev/null ++++ b/cmake/checks/cpu_sse42.cpp +@@ -0,0 +1,5 @@ ++#include ++int main() { ++ int i = _mm_popcnt_u64(1); ++ return 0; ++} +diff --git a/cmake/checks/cpu_ssse3.cpp b/cmake/checks/cpu_ssse3.cpp +new file mode 100644 +index 000000000..e583199bc +--- /dev/null ++++ b/cmake/checks/cpu_ssse3.cpp +@@ -0,0 +1,7 @@ ++#include ++const double v = 0; ++int main() { ++ __m128i a = _mm_setzero_si128(); ++ __m128i b = _mm_abs_epi32(a); ++ return 0; ++} +diff --git a/cmake/templates/cv_cpu_config.h.in b/cmake/templates/cv_cpu_config.h.in +new file mode 100644 +index 000000000..27b27315c +--- /dev/null ++++ b/cmake/templates/cv_cpu_config.h.in +@@ -0,0 +1,5 @@ ++// OpenCV CPU baseline features ++@OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE@ ++ ++// OpenCV supported CPU dispatched features ++@OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE@ +diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in +index 05add9e2c..658d12c14 100644 +--- a/cmake/templates/cvconfig.h.in ++++ b/cmake/templates/cvconfig.h.in +@@ -1,6 +1,15 @@ ++#ifndef OPENCV_CVCONFIG_H_INCLUDED ++#define OPENCV_CVCONFIG_H_INCLUDED ++ + /* OpenCV compiled as static or dynamic libs */ + #cmakedefine BUILD_SHARED_LIBS + ++/* OpenCV intrinsics optimized code */ ++#cmakedefine CV_ENABLE_INTRINSICS ++ ++/* OpenCV additional optimized code */ ++#cmakedefine CV_DISABLE_OPTIMIZATION ++ + /* Compile for 'real' NVIDIA GPU architectures */ + #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}" + +@@ -206,3 +215,7 @@ + + /* OpenVX */ + #cmakedefine HAVE_OPENVX ++ ++ ++ ++#endif // OPENCV_CVCONFIG_H_INCLUDED +diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h +new file mode 100644 +index 000000000..9a8537f90 +--- /dev/null ++++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h +@@ -0,0 +1,166 @@ ++// This file is part of OpenCV project. ++// It is subject to the license terms in the LICENSE file found in the top-level directory ++// of this distribution and at http://opencv.org/license.html. ++ ++#if defined __OPENCV_BUILD \ ++ ++#include "cv_cpu_config.h" ++#include "cv_cpu_helper.h" ++ ++#if defined CV_ENABLE_INTRINSICS \ ++ && !defined CV_DISABLE_OPTIMIZATION \ ++ && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \ ++ ++#ifdef CV_CPU_COMPILE_SSE2 ++# include ++# define CV_MMX 1 ++# define CV_SSE 1 ++# define CV_SSE2 1 ++#endif ++#ifdef CV_CPU_COMPILE_SSE3 ++# include ++# define CV_SSE3 1 ++#endif ++#ifdef CV_CPU_COMPILE_SSSE3 ++# include ++# define CV_SSSE3 1 ++#endif ++#ifdef CV_CPU_COMPILE_SSE4_1 ++# include ++# define CV_SSE4_1 1 ++#endif ++#ifdef CV_CPU_COMPILE_SSE4_2 ++# include ++# define CV_SSE4_2 1 ++#endif ++#ifdef CV_CPU_COMPILE_POPCNT ++# ifdef _MSC_VER ++# include ++# if defined(_M_X64) ++# define CV_POPCNT_U64 _mm_popcnt_u64 ++# endif ++# define CV_POPCNT_U32 _mm_popcnt_u32 ++# else ++# include ++# if defined(__x86_64__) ++# define CV_POPCNT_U64 __builtin_popcountll ++# endif ++# define CV_POPCNT_U32 __builtin_popcount ++# endif ++# define CV_POPCNT 1 ++#endif ++#ifdef CV_CPU_COMPILE_AVX ++# include ++# define CV_AVX 1 ++#endif ++#ifdef CV_CPU_COMPILE_AVX2 ++# include ++# define CV_AVX2 1 ++#endif ++#ifdef CV_CPU_COMPILE_FMA3 ++# define CV_FMA3 1 ++#endif ++ ++#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) ++# include ++# include ++# define CV_NEON 1 ++#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) ++# include ++# define CV_NEON 1 ++#endif ++ ++#if defined(__ARM_NEON__) || defined(__aarch64__) ++# include ++#endif ++ ++#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ ++ ++#endif // __OPENCV_BUILD ++ ++ ++ ++#if !defined __OPENCV_BUILD // Compatibility code ++ ++#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) ++# include ++# define CV_MMX 1 ++# define CV_SSE 1 ++# define CV_SSE2 1 ++#elif (defined WIN32 || defined _WIN32) && defined(_M_ARM) ++# include ++# include ++# define CV_NEON 1 ++#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) ++# include ++# define CV_NEON 1 ++#endif ++ ++#endif // !__OPENCV_BUILD (Compatibility code) ++ ++ ++ ++#ifndef CV_MMX ++# define CV_MMX 0 ++#endif ++#ifndef CV_SSE ++# define CV_SSE 0 ++#endif ++#ifndef CV_SSE2 ++# define CV_SSE2 0 ++#endif ++#ifndef CV_SSE3 ++# define CV_SSE3 0 ++#endif ++#ifndef CV_SSSE3 ++# define CV_SSSE3 0 ++#endif ++#ifndef CV_SSE4_1 ++# define CV_SSE4_1 0 ++#endif ++#ifndef CV_SSE4_2 ++# define CV_SSE4_2 0 ++#endif ++#ifndef CV_POPCNT ++# define CV_POPCNT 0 ++#endif ++#ifndef CV_AVX ++# define CV_AVX 0 ++#endif ++#ifndef CV_AVX2 ++# define CV_AVX2 0 ++#endif ++#ifndef CV_FMA3 ++# define CV_FMA3 0 ++#endif ++#ifndef CV_AVX_512F ++# define CV_AVX_512F 0 ++#endif ++#ifndef CV_AVX_512BW ++# define CV_AVX_512BW 0 ++#endif ++#ifndef CV_AVX_512CD ++# define CV_AVX_512CD 0 ++#endif ++#ifndef CV_AVX_512DQ ++# define CV_AVX_512DQ 0 ++#endif ++#ifndef CV_AVX_512ER ++# define CV_AVX_512ER 0 ++#endif ++#ifndef CV_AVX_512IFMA512 ++# define CV_AVX_512IFMA512 0 ++#endif ++#ifndef CV_AVX_512PF ++# define CV_AVX_512PF 0 ++#endif ++#ifndef CV_AVX_512VBMI ++# define CV_AVX_512VBMI 0 ++#endif ++#ifndef CV_AVX_512VL ++# define CV_AVX_512VL 0 ++#endif ++ ++#ifndef CV_NEON ++# define CV_NEON 0 ++#endif +diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h +new file mode 100644 +index 000000000..cb755d615 +--- /dev/null ++++ b/modules/core/include/opencv2/core/cv_cpu_helper.h +@@ -0,0 +1,133 @@ ++// AUTOGENERATED, DO NOT EDIT ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE ++# define CV_CPU_HAS_SUPPORT_SSE 1 ++# define CV_CPU_CALL_SSE(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE ++# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE)) ++# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_SSE 0 ++# define CV_CPU_CALL_SSE(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2 ++# define CV_CPU_HAS_SUPPORT_SSE2 1 ++# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2 ++# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2)) ++# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_SSE2 0 ++# define CV_CPU_CALL_SSE2(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3 ++# define CV_CPU_HAS_SUPPORT_SSE3 1 ++# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3 ++# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3)) ++# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_SSE3 0 ++# define CV_CPU_CALL_SSE3(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3 ++# define CV_CPU_HAS_SUPPORT_SSSE3 1 ++# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3 ++# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3)) ++# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_SSSE3 0 ++# define CV_CPU_CALL_SSSE3(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1 ++# define CV_CPU_HAS_SUPPORT_SSE4_1 1 ++# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1 ++# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1)) ++# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_SSE4_1 0 ++# define CV_CPU_CALL_SSE4_1(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2 ++# define CV_CPU_HAS_SUPPORT_SSE4_2 1 ++# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2 ++# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2)) ++# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_SSE4_2 0 ++# define CV_CPU_CALL_SSE4_2(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT ++# define CV_CPU_HAS_SUPPORT_POPCNT 1 ++# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT ++# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT)) ++# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_POPCNT 0 ++# define CV_CPU_CALL_POPCNT(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX ++# define CV_CPU_HAS_SUPPORT_AVX 1 ++# define CV_CPU_CALL_AVX(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX ++# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX)) ++# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_AVX 0 ++# define CV_CPU_CALL_AVX(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16 ++# define CV_CPU_HAS_SUPPORT_FP16 1 ++# define CV_CPU_CALL_FP16(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16 ++# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16)) ++# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_FP16 0 ++# define CV_CPU_CALL_FP16(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2 ++# define CV_CPU_HAS_SUPPORT_AVX2 1 ++# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2 ++# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2)) ++# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_AVX2 0 ++# define CV_CPU_CALL_AVX2(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3 ++# define CV_CPU_HAS_SUPPORT_FMA3 1 ++# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3 ++# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3)) ++# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_FMA3 0 ++# define CV_CPU_CALL_FMA3(...) ++#endif ++ ++#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON ++# define CV_CPU_HAS_SUPPORT_NEON 1 ++# define CV_CPU_CALL_NEON(...) return __VA_ARGS__ ++#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON ++# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON)) ++# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__ ++#else ++# define CV_CPU_HAS_SUPPORT_NEON 0 ++# define CV_CPU_CALL_NEON(...) ++#endif +diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h +index 699b1667b..0a46e0236 100644 +--- a/modules/core/include/opencv2/core/cvdef.h ++++ b/modules/core/include/opencv2/core/cvdef.h +@@ -48,6 +48,10 @@ + //! @addtogroup core_utils + //! @{ + ++#ifdef __OPENCV_BUILD ++#include "cvconfig.h" ++#endif ++ + #if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 + # define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ + #endif +@@ -59,10 +63,6 @@ + #undef abs + #undef Complex + +-#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 +-# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ +-#endif +- + #include + #include "opencv2/core/hal/interface.h" + +@@ -88,7 +88,7 @@ + # endif + #endif + +-#if defined CV_ICC && !defined CV_ENABLE_UNROLLED ++#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED) + # define CV_ENABLE_UNROLLED 0 + #else + # define CV_ENABLE_UNROLLED 1 +@@ -161,142 +161,9 @@ enum CpuFeatures { + CPU_NEON = 100 + }; + +-// do not include SSE/AVX/NEON headers for NVCC compiler +-#ifndef __CUDACC__ +- +-#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) +-# include +-# define CV_MMX 1 +-# define CV_SSE 1 +-# define CV_SSE2 1 +-# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) +-# include +-# define CV_SSE3 1 +-# endif +-# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) +-# include +-# define CV_SSSE3 1 +-# endif +-# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500) +-# include +-# define CV_SSE4_1 1 +-# endif +-# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500) +-# include +-# define CV_SSE4_2 1 +-# endif +-# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500) +-# ifdef _MSC_VER +-# include +-# else +-# include +-# endif +-# define CV_POPCNT 1 +-# endif +-# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0) +-// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX +-// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 +-# include +-# define CV_AVX 1 +-# if defined(_XCR_XFEATURE_ENABLED_MASK) +-# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) +-# else +-# define __xgetbv() 0 +-# endif +-# endif +-# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0) +-# include +-# define CV_AVX2 1 +-# if defined __FMA__ +-# define CV_FMA3 1 +-# endif +-# endif +-#endif +- +-#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) +-# include +-# include +-# define CV_NEON 1 +-# define CPU_HAS_NEON_FEATURE (true) +-#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) +-# include +-# define CV_NEON 1 +-#endif +- +-#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ +-# define CV_VFP 1 +-#endif +- +-#endif // __CUDACC__ +- +-#ifndef CV_POPCNT +-#define CV_POPCNT 0 +-#endif +-#ifndef CV_MMX +-# define CV_MMX 0 +-#endif +-#ifndef CV_SSE +-# define CV_SSE 0 +-#endif +-#ifndef CV_SSE2 +-# define CV_SSE2 0 +-#endif +-#ifndef CV_SSE3 +-# define CV_SSE3 0 +-#endif +-#ifndef CV_SSSE3 +-# define CV_SSSE3 0 +-#endif +-#ifndef CV_SSE4_1 +-# define CV_SSE4_1 0 +-#endif +-#ifndef CV_SSE4_2 +-# define CV_SSE4_2 0 +-#endif +-#ifndef CV_AVX +-# define CV_AVX 0 +-#endif +-#ifndef CV_AVX2 +-# define CV_AVX2 0 +-#endif +-#ifndef CV_FMA3 +-# define CV_FMA3 0 +-#endif +-#ifndef CV_AVX_512F +-# define CV_AVX_512F 0 +-#endif +-#ifndef CV_AVX_512BW +-# define CV_AVX_512BW 0 +-#endif +-#ifndef CV_AVX_512CD +-# define CV_AVX_512CD 0 +-#endif +-#ifndef CV_AVX_512DQ +-# define CV_AVX_512DQ 0 +-#endif +-#ifndef CV_AVX_512ER +-# define CV_AVX_512ER 0 +-#endif +-#ifndef CV_AVX_512IFMA512 +-# define CV_AVX_512IFMA512 0 +-#endif +-#ifndef CV_AVX_512PF +-# define CV_AVX_512PF 0 +-#endif +-#ifndef CV_AVX_512VBMI +-# define CV_AVX_512VBMI 0 +-#endif +-#ifndef CV_AVX_512VL +-# define CV_AVX_512VL 0 +-#endif + +-#ifndef CV_NEON +-# define CV_NEON 0 +-#endif ++#include "cv_cpu_dispatch.h" + +-#ifndef CV_VFP +-# define CV_VFP 0 +-#endif + + /* fundamental constants */ + #define CV_PI 3.1415926535897932384626433832795 +diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp +index c76936afe..31c106232 100644 +--- a/modules/core/include/opencv2/core/fast_math.hpp ++++ b/modules/core/include/opencv2/core/fast_math.hpp +@@ -47,6 +47,12 @@ + + #include "opencv2/core/cvdef.h" + ++#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ ++ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) ++#include ++#endif ++ ++ + //! @addtogroup core_utils + //! @{ + +@@ -66,7 +72,7 @@ + # include "tegra_round.hpp" + #endif + +-#if CV_VFP ++#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ + // 1. general scheme + #define ARM_ROUND(_value, _asm_string) \ + int res; \ +@@ -82,7 +88,7 @@ + #endif + // 3. version for float + #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") +-#endif // CV_VFP ++#endif + + /** @brief Rounds floating-point number to the nearest integer + +@@ -93,7 +99,7 @@ CV_INLINE int + cvRound( double value ) + { + #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ +- && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) ++ && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) + __m128d t = _mm_set_sd( value ); + return _mm_cvtsd_si32(t); + #elif defined _MSC_VER && defined _M_IX86 +@@ -108,7 +114,7 @@ cvRound( double value ) + defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION + TEGRA_ROUND_DBL(value); + #elif defined CV_ICC || defined __GNUC__ +-# if CV_VFP ++# if defined ARM_ROUND_DBL + ARM_ROUND_DBL(value); + # else + return (int)lrint(value); +@@ -130,18 +136,8 @@ cvRound( double value ) + */ + CV_INLINE int cvFloor( double value ) + { +-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) +- __m128d t = _mm_set_sd( value ); +- int i = _mm_cvtsd_si32(t); +- return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i))); +-#elif defined __GNUC__ + int i = (int)value; + return i - (i > value); +-#else +- int i = cvRound(value); +- float diff = (float)(value - i); +- return i - (diff < 0); +-#endif + } + + /** @brief Rounds floating-point number to the nearest integer not smaller than the original. +@@ -153,18 +149,8 @@ CV_INLINE int cvFloor( double value ) + */ + CV_INLINE int cvCeil( double value ) + { +-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) +- __m128d t = _mm_set_sd( value ); +- int i = _mm_cvtsd_si32(t); +- return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t)); +-#elif defined __GNUC__ + int i = (int)value; + return i + (i < value); +-#else +- int i = cvRound(value); +- float diff = (float)(i - value); +- return i + (diff < 0); +-#endif + } + + /** @brief Determines if the argument is Not A Number. +@@ -200,8 +186,8 @@ CV_INLINE int cvIsInf( double value ) + /** @overload */ + CV_INLINE int cvRound(float value) + { +-#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \ +- defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) ++#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ ++ && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) + __m128 t = _mm_set_ss( value ); + return _mm_cvtss_si32(t); + #elif defined _MSC_VER && defined _M_IX86 +@@ -216,7 +202,7 @@ CV_INLINE int cvRound(float value) + defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION + TEGRA_ROUND_FLT(value); + #elif defined CV_ICC || defined __GNUC__ +-# if CV_VFP ++# if defined ARM_ROUND_FLT + ARM_ROUND_FLT(value); + # else + return (int)lrintf(value); +@@ -237,18 +223,8 @@ CV_INLINE int cvRound( int value ) + /** @overload */ + CV_INLINE int cvFloor( float value ) + { +-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) +- __m128 t = _mm_set_ss( value ); +- int i = _mm_cvtss_si32(t); +- return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i))); +-#elif defined __GNUC__ + int i = (int)value; + return i - (i > value); +-#else +- int i = cvRound(value); +- float diff = (float)(value - i); +- return i - (diff < 0); +-#endif + } + + /** @overload */ +@@ -260,18 +236,8 @@ CV_INLINE int cvFloor( int value ) + /** @overload */ + CV_INLINE int cvCeil( float value ) + { +-#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) +- __m128 t = _mm_set_ss( value ); +- int i = _mm_cvtss_si32(t); +- return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t)); +-#elif defined __GNUC__ + int i = (int)value; + return i + (i < value); +-#else +- int i = cvRound(value); +- float diff = (float)(i - value); +- return i + (diff < 0); +-#endif + } + + /** @overload */ +diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp +index 3c8f39d74..a9838382f 100644 +--- a/modules/core/src/system.cpp ++++ b/modules/core/src/system.cpp +@@ -237,24 +237,81 @@ void Exception::formatMessage() + msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str()); + } + ++static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL }; ++ ++static const char* getHWFeatureName(int id) ++{ ++ return (id < CV_HARDWARE_MAX_FEATURE) ? g_hwFeatureNames[id] : NULL; ++} ++static const char* getHWFeatureNameSafe(int id) ++{ ++ const char* name = getHWFeatureName(id); ++ return name ? name : "Unknown feature"; ++} ++ + struct HWFeatures + { + enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE }; + +- HWFeatures(void) ++ HWFeatures(bool run_initialize = false) + { +- memset( have, 0, sizeof(have) ); +- x86_family = 0; ++ memset( have, 0, sizeof(have[0]) * MAX_FEATURE ); ++ if (run_initialize) ++ initialize(); + } + +- static HWFeatures initialize(void) ++ static void initializeNames() + { +- HWFeatures f; ++ for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++) ++ { ++ g_hwFeatureNames[i] = 0; ++ } ++ g_hwFeatureNames[CPU_MMX] = "MMX"; ++ g_hwFeatureNames[CPU_SSE] = "SSE"; ++ g_hwFeatureNames[CPU_SSE2] = "SSE2"; ++ g_hwFeatureNames[CPU_SSE3] = "SSE3"; ++ g_hwFeatureNames[CPU_SSSE3] = "SSSE3"; ++ g_hwFeatureNames[CPU_SSE4_1] = "SSE4.1"; ++ g_hwFeatureNames[CPU_SSE4_2] = "SSE4.2"; ++ g_hwFeatureNames[CPU_POPCNT] = "POPCNT"; ++ g_hwFeatureNames[CPU_FP16] = "FP16"; ++ g_hwFeatureNames[CPU_AVX] = "AVX"; ++ g_hwFeatureNames[CPU_AVX2] = "AVX2"; ++ g_hwFeatureNames[CPU_FMA3] = "FMA3"; ++ ++ g_hwFeatureNames[CPU_AVX_512F] = "AVX512F"; ++ g_hwFeatureNames[CPU_AVX_512BW] = "AVX512BW"; ++ g_hwFeatureNames[CPU_AVX_512CD] = "AVX512CD"; ++ g_hwFeatureNames[CPU_AVX_512DQ] = "AVX512DQ"; ++ g_hwFeatureNames[CPU_AVX_512ER] = "AVX512ER"; ++ g_hwFeatureNames[CPU_AVX_512IFMA512] = "AVX512IFMA"; ++ g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF"; ++ g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI"; ++ g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL"; ++ ++ g_hwFeatureNames[CPU_NEON] = "NEON"; ++ } ++ ++ void initialize(void) ++ { ++#ifndef WINRT ++ if (getenv("OPENCV_DUMP_CONFIG")) ++ { ++ fprintf(stderr, "\nOpenCV build configuration is:\n%s\n", ++ cv::getBuildInformation().c_str()); ++ } ++#endif ++ ++ initializeNames(); ++ + int cpuid_data[4] = { 0, 0, 0, 0 }; ++ int cpuid_data_ex[4] = { 0, 0, 0, 0 }; + + #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) ++ #define OPENCV_HAVE_X86_CPUID 1 + __cpuid(cpuid_data, 1); + #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) ++ #define OPENCV_HAVE_X86_CPUID 1 + #ifdef __x86_64__ + asm __volatile__ + ( +@@ -278,33 +335,36 @@ struct HWFeatures + #endif + #endif + +- f.x86_family = (cpuid_data[0] >> 8) & 15; +- if( f.x86_family >= 6 ) ++ #ifdef OPENCV_HAVE_X86_CPUID ++ int x86_family = (cpuid_data[0] >> 8) & 15; ++ if( x86_family >= 6 ) + { +- f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0; +- f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0; +- f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0; +- f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0; +- f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0; +- f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0; +- f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0; +- f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0; +- f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0; +- f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX +- f.have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0; ++ have[CV_CPU_MMX] = (cpuid_data[3] & (1<<23)) != 0; ++ have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0; ++ have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0; ++ have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0; ++ have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0; ++ have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0; ++ have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0; ++ have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0; ++ have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0; ++ have[CV_CPU_AVX] = (cpuid_data[2] & (1<<28)) != 0; ++ have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0; + + // make the second call to the cpuid command in order to get + // information about extended features like AVX2 + #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) +- __cpuidex(cpuid_data, 7, 0); ++ #define OPENCV_HAVE_X86_CPUID_EX 1 ++ __cpuidex(cpuid_data_ex, 7, 0); + #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) ++ #define OPENCV_HAVE_X86_CPUID_EX 1 + #ifdef __x86_64__ + asm __volatile__ + ( + "movl $7, %%eax\n\t" + "movl $0, %%ecx\n\t" + "cpuid\n\t" +- :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3]) ++ :[eax]"=a"(cpuid_data_ex[0]),[ebx]"=b"(cpuid_data_ex[1]),[ecx]"=c"(cpuid_data_ex[2]),[edx]"=d"(cpuid_data_ex[3]) + : + : "cc" + ); +@@ -317,29 +377,76 @@ struct HWFeatures + "cpuid\n\t" + "movl %%ebx, %0\n\t" + "popl %%ebx\n\t" +- : "=r"(cpuid_data[1]), "=c"(cpuid_data[2]) ++ : "=r"(cpuid_data_ex[1]), "=c"(cpuid_data_ex[2]) + : + : "cc" + ); + #endif + #endif +- f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0; +- +- f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0; +- f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0; +- f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0; +- f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0; +- f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0; +- f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0; +- f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0; +- f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0; +- f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0; ++ ++ #ifdef OPENCV_HAVE_X86_CPUID_EX ++ have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0; ++ ++ have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0; ++ have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0; ++ have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0; ++ have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0; ++ have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0; ++ have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0; ++ have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0; ++ have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0; ++ have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0; ++ #else ++ CV_UNUSED(cpuid_data_ex); ++ #endif ++ ++ bool have_AVX_OS_support = true; ++ bool have_AVX512_OS_support = true; ++ if (!(cpuid_data[2] & (1<<27))) ++ have_AVX_OS_support = false; // OS uses XSAVE_XRSTORE and CPU support AVX ++ else ++ { ++ int xcr0 = 0; ++ #ifdef _XCR_XFEATURE_ENABLED_MASK // requires immintrin.h ++ xcr0 = (int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK); ++ #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) ++ __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); ++ #endif ++ if ((xcr0 & 0x6) != 0x6) ++ have_AVX_OS_support = false; // YMM registers ++ if ((xcr0 & 0xe6) != 0xe6) ++ have_AVX512_OS_support = false; // ZMM registers ++ } ++ ++ if (!have_AVX_OS_support) ++ { ++ have[CV_CPU_AVX] = false; ++ have[CV_CPU_FP16] = false; ++ have[CV_CPU_AVX2] = false; ++ have[CV_CPU_FMA3] = false; ++ } ++ if (!have_AVX_OS_support || !have_AVX512_OS_support) ++ { ++ have[CV_CPU_AVX_512F] = false; ++ have[CV_CPU_AVX_512BW] = false; ++ have[CV_CPU_AVX_512CD] = false; ++ have[CV_CPU_AVX_512DQ] = false; ++ have[CV_CPU_AVX_512ER] = false; ++ have[CV_CPU_AVX_512IFMA512] = false; ++ have[CV_CPU_AVX_512PF] = false; ++ have[CV_CPU_AVX_512VBMI] = false; ++ have[CV_CPU_AVX_512VL] = false; ++ } + } ++ #else ++ CV_UNUSED(cpuid_data); ++ CV_UNUSED(cpuid_data_ex); ++ #endif // OPENCV_HAVE_X86_CPUID + + #if defined ANDROID || defined __linux__ + #ifdef __aarch64__ +- f.have[CV_CPU_NEON] = true; +- f.have[CV_CPU_FP16] = true; ++ have[CV_CPU_NEON] = true; ++ have[CV_CPU_FP16] = true; + #elif defined __arm__ + int cpufile = open("/proc/self/auxv", O_RDONLY); + +@@ -352,8 +459,8 @@ struct HWFeatures + { + if (auxv.a_type == AT_HWCAP) + { +- f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0; +- f.have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0; ++ have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0; ++ have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0; + break; + } + } +@@ -363,21 +470,133 @@ struct HWFeatures + #endif + #elif (defined __clang__ || defined __APPLE__) + #if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__)) +- f.have[CV_CPU_NEON] = true; ++ have[CV_CPU_NEON] = true; + #endif + #if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__)) +- f.have[CV_CPU_FP16] = true; ++ have[CV_CPU_FP16] = true; + #endif + #endif + +- return f; ++ int baseline_features[] = { CV_CPU_BASELINE_FEATURES }; ++ if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]))) ++ { ++ fprintf(stderr, "\n" ++ "******************************************************************\n" ++ "* FATAL ERROR: *\n" ++ "* This OpenCV build doesn't support current CPU/HW configuration *\n" ++ "* *\n" ++ "* Use OPENCV_DUMP_CONFIG=1 environment variable for details *\n" ++ "******************************************************************\n"); ++ fprintf(stderr, "\nRequired baseline features:\n"); ++ checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]), true); ++ CV_ErrorNoReturn(cv::Error::StsAssert, "Missing support for required CPU baseline features. Check OpenCV build configuration and required CPU/HW setup."); ++ } ++ ++ readSettings(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])); ++ } ++ ++ bool checkFeatures(const int* features, int count, bool dump = false) ++ { ++ bool result = true; ++ for (int i = 0; i < count; i++) ++ { ++ int feature = features[i]; ++ if (feature) ++ { ++ if (have[feature]) ++ { ++ if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature)); ++ } ++ else ++ { ++ result = false; ++ if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature)); ++ } ++ } ++ } ++ return result; ++ } ++ ++ static inline bool isSymbolSeparator(char c) ++ { ++ return c == ',' || c == ';' || c == '-'; ++ } ++ ++ void readSettings(const int* baseline_features, int baseline_count) ++ { ++ bool dump = true; ++ const char* disabled_features = ++#ifndef WINRT ++ getenv("OPENCV_CPU_DISABLE"); ++#else ++ NULL; ++#endif ++ if (disabled_features && disabled_features[0] != 0) ++ { ++ const char* start = disabled_features; ++ for (;;) ++ { ++ while (start[0] != 0 && isSymbolSeparator(start[0])) ++ { ++ start++; ++ } ++ if (start[0] == 0) ++ break; ++ const char* end = start; ++ while (end[0] != 0 && !isSymbolSeparator(end[0])) ++ { ++ end++; ++ } ++ if (end == start) ++ continue; ++ cv::String feature(start, end); ++ start = end; ++ ++ CV_Assert(feature.size() > 0); ++ ++ bool found = false; ++ for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++) ++ { ++ if (!g_hwFeatureNames[i]) continue; ++ size_t len = strlen(g_hwFeatureNames[i]); ++ if (len != feature.size()) continue; ++ if (feature.compare(g_hwFeatureNames[i]) == 0) ++ { ++ bool isBaseline = false; ++ for (int k = 0; k < baseline_count; k++) ++ { ++ if (baseline_features[k] == i) ++ { ++ isBaseline = true; ++ break; ++ } ++ } ++ if (isBaseline) ++ { ++ if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i)); ++ } ++ if (!have[i]) ++ { ++ if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i)); ++ } ++ have[i] = false; ++ ++ found = true; ++ break; ++ } ++ } ++ if (!found) ++ { ++ if (dump) fprintf(stderr, "OPENCV: Trying to disable unknown CPU feature: '%s'.\n", feature.c_str()); ++ } ++ } ++ } + } + +- int x86_family; + bool have[MAX_FEATURE+1]; + }; + +-static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures(); ++static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false); + static HWFeatures* currentFeatures = &featuresEnabled; + + bool checkHardwareSupport(int feature) +diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt +index eb5617751..6d9c650bc 100644 +--- a/modules/highgui/CMakeLists.txt ++++ b/modules/highgui/CMakeLists.txt +@@ -65,7 +65,7 @@ elseif(HAVE_QT) + + list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES}) + list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES}) +- ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag) ++ ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag "") + if(${_have_flag}) + set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations) + endif() +diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp +index 0fa520228..dcf2e44b5 100644 +--- a/modules/imgproc/src/imgwarp.cpp ++++ b/modules/imgproc/src/imgwarp.cpp +@@ -1649,7 +1649,7 @@ struct VResizeLanczos4 + { + CastOp castOp; + VecOp vecOp; +- int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width); ++ int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width); + #if CV_ENABLE_UNROLLED + for( ; x <= width - 4; x += 4 ) + { +@@ -1657,7 +1657,7 @@ struct VResizeLanczos4 + const WT* S = src[0]; + WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b; + +- for( k = 1; k < 8; k++ ) ++ for( int k = 1; k < 8; k++ ) + { + b = beta[k]; S = src[k]; + s0 += S[x]*b; s1 += S[x+1]*b; +diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp +index 51843fa48..bb37ee91e 100644 +--- a/modules/objdetect/src/haar.cpp ++++ b/modules/objdetect/src/haar.cpp +@@ -824,10 +824,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, + CvPoint pt, double& stage_sum, int start_stage ) + { + #ifdef CV_HAAR_USE_AVX +- bool haveAVX = false; +- if(cv::checkHardwareSupport(CV_CPU_AVX)) +- if(__xgetbv()&0x6)// Check if the OS will save the YMM registers +- haveAVX = true; ++ bool haveAVX = cv::checkHardwareSupport(CV_CPU_AVX); + #else + # ifdef CV_HAAR_USE_SSE + bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2); +-- +2.12.2 + diff --git a/package/opencv3/opencv3.mk b/package/opencv3/opencv3.mk index 2e50ba244e..153fd066a1 100644 --- a/package/opencv3/opencv3.mk +++ b/package/opencv3/opencv3.mk @@ -97,22 +97,8 @@ OPENCV3_CONF_OPTS += \ # # * PowerPC support is turned off since its only effect is altering CFLAGS, # adding '-mcpu=G3 -mtune=G5' to them, which is already handled by Buildroot. -# * fma3 and popcnt support is disabled because according to gcc manual [2], it -# is only available on x86_64 haswell, broadwell and knl architecture. -# -# [2] https://gcc.gnu.org/onlinedocs/gcc-5.1.0/gcc/x86-Options.html#x86-Options OPENCV3_CONF_OPTS += \ - -DENABLE_AVX=$(if $(BR2_X86_CPU_HAS_AVX),ON,OFF) \ - -DENABLE_AVX2=$(if $(BR2_X86_CPU_HAS_AVX2),ON,OFF) \ - -DENABLE_FMA3=OFF \ - -DENABLE_POPCNT=OFF \ - -DENABLE_POWERPC=OFF \ - -DENABLE_SSE=$(if $(BR2_X86_CPU_HAS_SSE),ON,OFF) \ - -DENABLE_SSE2=$(if $(BR2_X86_CPU_HAS_SSE2),ON,OFF) \ - -DENABLE_SSE3=$(if $(BR2_X86_CPU_HAS_SSE3),ON,OFF) \ - -DENABLE_SSE41=$(if $(BR2_X86_CPU_HAS_SSE4),ON,OFF) \ - -DENABLE_SSE42=$(if $(BR2_X86_CPU_HAS_SSE42),ON,OFF) \ - -DENABLE_SSSE3=$(if $(BR2_X86_CPU_HAS_SSSE3),ON,OFF) + -DENABLE_POWERPC=OFF # Cuda stuff OPENCV3_CONF_OPTS += \ -- 2.30.2