diff options
3 files changed, 274 insertions, 0 deletions
diff --git a/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch b/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch new file mode 100644 index 0000000000..507d7968a0 --- /dev/null +++ b/meta-oe/recipes-support/opencv/opencv/0001-Revert-cuda-fix-fp16-compilation.patch | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | From 69f9707678190f6a0948a547dce948251f972676 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Randy MacLeod <Randy.MacLeod@windriver.com> | ||
| 3 | Date: Wed, 26 Apr 2017 14:57:30 -0400 | ||
| 4 | Subject: [PATCH 1/2] Revert "cuda: fix fp16 compilation" | ||
| 5 | |||
| 6 | This reverts commit 12e00827be40576b686ea4438a6e6ef85208743d. | ||
| 7 | --- | ||
| 8 | modules/core/include/opencv2/core/cvdef.h | 3 +-- | ||
| 9 | 1 file changed, 1 insertion(+), 2 deletions(-) | ||
| 10 | |||
| 11 | diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h | ||
| 12 | index 699b166..efc24ca 100644 | ||
| 13 | --- a/modules/core/include/opencv2/core/cvdef.h | ||
| 14 | +++ b/modules/core/include/opencv2/core/cvdef.h | ||
| 15 | @@ -303,8 +303,7 @@ enum CpuFeatures { | ||
| 16 | #define CV_2PI 6.283185307179586476925286766559 | ||
| 17 | #define CV_LOG2 0.69314718055994530941723212145818 | ||
| 18 | |||
| 19 | -#if defined __ARM_FP16_FORMAT_IEEE \ | ||
| 20 | - && !defined __CUDACC__ | ||
| 21 | +#if defined (__ARM_FP16_FORMAT_IEEE) | ||
| 22 | # define CV_FP16_TYPE 1 | ||
| 23 | #else | ||
| 24 | # define CV_FP16_TYPE 0 | ||
| 25 | -- | ||
| 26 | 2.9.3 | ||
| 27 | |||
diff --git a/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch b/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch new file mode 100644 index 0000000000..d1950a9361 --- /dev/null +++ b/meta-oe/recipes-support/opencv/opencv/0002-Revert-check-FP16-build-condition-correctly.patch | |||
| @@ -0,0 +1,245 @@ | |||
| 1 | From 9108e39e5584ef9b41f80751639b4ec72b3e9538 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Randy MacLeod <Randy.MacLeod@windriver.com> | ||
| 3 | Date: Wed, 26 Apr 2017 15:00:32 -0400 | ||
| 4 | Subject: [PATCH 2/2] Revert "check FP16 build condition correctly" | ||
| 5 | |||
| 6 | This reverts commit c7cb116dc08441fe56cf82d5b21f929e5b674c13. | ||
| 7 | |||
| 8 | Fix up revert conflicts to take previous behaviour. | ||
| 9 | --- | ||
| 10 | cmake/OpenCVCompilerOptions.cmake | 45 +++++++++-------------- | ||
| 11 | modules/core/include/opencv2/core/cvdef.h | 2 +- | ||
| 12 | modules/core/src/convert.cpp | 11 +++--- | ||
| 13 | modules/core/test/test_intrin.cpp | 60 ++++++++++++++----------------- | ||
| 14 | 4 files changed, 48 insertions(+), 70 deletions(-) | ||
| 15 | |||
| 16 | diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake | ||
| 17 | index 5bb0479..4b19fdb 100644 | ||
| 18 | --- a/cmake/OpenCVCompilerOptions.cmake | ||
| 19 | +++ b/cmake/OpenCVCompilerOptions.cmake | ||
| 20 | @@ -185,7 +185,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) | ||
| 21 | add_extra_compiler_option("-mfp16-format=ieee") | ||
| 22 | endif(ARM) | ||
| 23 | if(ENABLE_NEON) | ||
| 24 | - add_extra_compiler_option("-mfpu=neon") | ||
| 25 | + add_extra_compiler_option("-mfpu=neon-fp16") | ||
| 26 | endif() | ||
| 27 | if(ENABLE_VFPV3 AND NOT ENABLE_NEON) | ||
| 28 | add_extra_compiler_option("-mfpu=vfpv3") | ||
| 29 | @@ -370,34 +370,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) | ||
| 30 | add_extra_compiler_option(-fvisibility-inlines-hidden) | ||
| 31 | endif() | ||
| 32 | |||
| 33 | -if(NOT OPENCV_FP16_DISABLE AND NOT IOS) | ||
| 34 | - if(ARM AND ENABLE_NEON) | ||
| 35 | - set(FP16_OPTION "-mfpu=neon-fp16") | ||
| 36 | - elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX) | ||
| 37 | - set(FP16_OPTION "-mf16c") | ||
| 38 | - endif() | ||
| 39 | - try_compile(__VALID_FP16 | ||
| 40 | - "${OpenCV_BINARY_DIR}" | ||
| 41 | - "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" | ||
| 42 | - COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" | ||
| 43 | - OUTPUT_VARIABLE TRY_OUT | ||
| 44 | - ) | ||
| 45 | - if(NOT __VALID_FP16) | ||
| 46 | - if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX) | ||
| 47 | - # GCC enables AVX when mf16c is passed | ||
| 48 | - message(STATUS "FP16: Feature disabled") | ||
| 49 | - else() | ||
| 50 | - message(STATUS "FP16: Compiler support is not available") | ||
| 51 | - endif() | ||
| 52 | - else() | ||
| 53 | - message(STATUS "FP16: Compiler support is available") | ||
| 54 | - set(HAVE_FP16 1) | ||
| 55 | - if(NOT ${FP16_OPTION} STREQUAL "") | ||
| 56 | - add_extra_compiler_option(${FP16_OPTION}) | ||
| 57 | - endif() | ||
| 58 | - endif() | ||
| 59 | -endif() | ||
| 60 | - | ||
| 61 | #combine all "extra" options | ||
| 62 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") | ||
| 63 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") | ||
| 64 | @@ -450,6 +422,21 @@ if(MSVC) | ||
| 65 | endif() | ||
| 66 | endif() | ||
| 67 | |||
| 68 | +if(NOT OPENCV_FP16_DISABLE) | ||
| 69 | + try_compile(__VALID_FP16 | ||
| 70 | + "${OpenCV_BINARY_DIR}" | ||
| 71 | + "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" | ||
| 72 | + COMPILE_DEFINITIONS "-DCHECK_FP16" | ||
| 73 | + OUTPUT_VARIABLE TRY_OUT | ||
| 74 | + ) | ||
| 75 | + if(NOT __VALID_FP16) | ||
| 76 | + message(STATUS "FP16: Compiler support is not available") | ||
| 77 | + else() | ||
| 78 | + message(STATUS "FP16: Compiler support is available") | ||
| 79 | + set(HAVE_FP16 1) | ||
| 80 | + endif() | ||
| 81 | +endif() | ||
| 82 | + | ||
| 83 | if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib") | ||
| 84 | link_directories("/usr/local/lib") | ||
| 85 | endif() | ||
| 86 | diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h | ||
| 87 | index efc24ca..a10936b 100644 | ||
| 88 | --- a/modules/core/include/opencv2/core/cvdef.h | ||
| 89 | +++ b/modules/core/include/opencv2/core/cvdef.h | ||
| 90 | @@ -312,7 +312,7 @@ enum CpuFeatures { | ||
| 91 | typedef union Cv16suf | ||
| 92 | { | ||
| 93 | short i; | ||
| 94 | -#if CV_FP16_TYPE | ||
| 95 | +#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) | ||
| 96 | __fp16 h; | ||
| 97 | #endif | ||
| 98 | struct _fp16Format | ||
| 99 | diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp | ||
| 100 | index e04d89e..46db26f 100644 | ||
| 101 | --- a/modules/core/src/convert.cpp | ||
| 102 | +++ b/modules/core/src/convert.cpp | ||
| 103 | @@ -44,7 +44,6 @@ | ||
| 104 | #include "precomp.hpp" | ||
| 105 | |||
| 106 | #include "opencl_kernels_core.hpp" | ||
| 107 | -#include "opencv2/core/hal/intrin.hpp" | ||
| 108 | |||
| 109 | #include "opencv2/core/openvx/ovx_defs.hpp" | ||
| 110 | |||
| 111 | @@ -4382,7 +4381,7 @@ struct Cvt_SIMD<float, int> | ||
| 112 | |||
| 113 | #endif | ||
| 114 | |||
| 115 | -#if !CV_FP16_TYPE | ||
| 116 | +#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) ) | ||
| 117 | // const numbers for floating points format | ||
| 118 | const unsigned int kShiftSignificand = 13; | ||
| 119 | const unsigned int kMaskFp16Significand = 0x3ff; | ||
| 120 | @@ -4390,7 +4389,7 @@ const unsigned int kBiasFp16Exponent = 15; | ||
| 121 | const unsigned int kBiasFp32Exponent = 127; | ||
| 122 | #endif | ||
| 123 | |||
| 124 | -#if CV_FP16_TYPE | ||
| 125 | +#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) | ||
| 126 | static float convertFp16SW(short fp16) | ||
| 127 | { | ||
| 128 | // Fp16 -> Fp32 | ||
| 129 | @@ -4452,7 +4451,7 @@ static float convertFp16SW(short fp16) | ||
| 130 | } | ||
| 131 | #endif | ||
| 132 | |||
| 133 | -#if CV_FP16_TYPE | ||
| 134 | +#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) | ||
| 135 | static short convertFp16SW(float fp32) | ||
| 136 | { | ||
| 137 | // Fp32 -> Fp16 | ||
| 138 | @@ -4560,7 +4559,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t | ||
| 139 | if ( ( (intptr_t)dst & 0xf ) == 0 ) | ||
| 140 | #endif | ||
| 141 | { | ||
| 142 | -#if CV_FP16 && CV_SIMD128 | ||
| 143 | +#if CV_FP16 | ||
| 144 | for ( ; x <= size.width - 4; x += 4) | ||
| 145 | { | ||
| 146 | v_float32x4 v_src = v_load(src + x); | ||
| 147 | @@ -4606,7 +4605,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t | ||
| 148 | if ( ( (intptr_t)src & 0xf ) == 0 ) | ||
| 149 | #endif | ||
| 150 | { | ||
| 151 | -#if CV_FP16 && CV_SIMD128 | ||
| 152 | +#if CV_FP16 | ||
| 153 | for ( ; x <= size.width - 4; x += 4) | ||
| 154 | { | ||
| 155 | v_float16x4 v_src = v_load_f16(src + x); | ||
| 156 | diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp | ||
| 157 | index 66b2083..7349d48 100644 | ||
| 158 | --- a/modules/core/test/test_intrin.cpp | ||
| 159 | +++ b/modules/core/test/test_intrin.cpp | ||
| 160 | @@ -729,56 +729,48 @@ template<typename R> struct TheTest | ||
| 161 | return *this; | ||
| 162 | } | ||
| 163 | |||
| 164 | +#if CV_FP16 | ||
| 165 | TheTest & test_loadstore_fp16() | ||
| 166 | { | ||
| 167 | -#if CV_FP16 | ||
| 168 | AlignedData<R> data; | ||
| 169 | AlignedData<R> out; | ||
| 170 | |||
| 171 | - if(checkHardwareSupport(CV_CPU_FP16)) | ||
| 172 | - { | ||
| 173 | - // check if addresses are aligned and unaligned respectively | ||
| 174 | - EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); | ||
| 175 | - EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); | ||
| 176 | - EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); | ||
| 177 | - EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); | ||
| 178 | - | ||
| 179 | - // check some initialization methods | ||
| 180 | - R r1 = data.u; | ||
| 181 | - R r2 = v_load_f16(data.a.d); | ||
| 182 | - R r3(r2); | ||
| 183 | - EXPECT_EQ(data.u[0], r1.get0()); | ||
| 184 | - EXPECT_EQ(data.a[0], r2.get0()); | ||
| 185 | - EXPECT_EQ(data.a[0], r3.get0()); | ||
| 186 | - | ||
| 187 | - // check some store methods | ||
| 188 | - out.a.clear(); | ||
| 189 | - v_store_f16(out.a.d, r1); | ||
| 190 | - EXPECT_EQ(data.a, out.a); | ||
| 191 | - } | ||
| 192 | + // check if addresses are aligned and unaligned respectively | ||
| 193 | + EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); | ||
| 194 | + EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); | ||
| 195 | + EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); | ||
| 196 | + EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); | ||
| 197 | + | ||
| 198 | + // check some initialization methods | ||
| 199 | + R r1 = data.u; | ||
| 200 | + R r2 = v_load_f16(data.a.d); | ||
| 201 | + R r3(r2); | ||
| 202 | + EXPECT_EQ(data.u[0], r1.get0()); | ||
| 203 | + EXPECT_EQ(data.a[0], r2.get0()); | ||
| 204 | + EXPECT_EQ(data.a[0], r3.get0()); | ||
| 205 | + | ||
| 206 | + // check some store methods | ||
| 207 | + out.a.clear(); | ||
| 208 | + v_store_f16(out.a.d, r1); | ||
| 209 | + EXPECT_EQ(data.a, out.a); | ||
| 210 | |||
| 211 | return *this; | ||
| 212 | -#endif | ||
| 213 | } | ||
| 214 | |||
| 215 | TheTest & test_float_cvt_fp16() | ||
| 216 | { | ||
| 217 | -#if CV_FP16 | ||
| 218 | AlignedData<v_float32x4> data; | ||
| 219 | |||
| 220 | - if(checkHardwareSupport(CV_CPU_FP16)) | ||
| 221 | - { | ||
| 222 | - // check conversion | ||
| 223 | - v_float32x4 r1 = v_load(data.a.d); | ||
| 224 | - v_float16x4 r2 = v_cvt_f16(r1); | ||
| 225 | - v_float32x4 r3 = v_cvt_f32(r2); | ||
| 226 | - EXPECT_EQ(0x3c00, r2.get0()); | ||
| 227 | - EXPECT_EQ(r3.get0(), r1.get0()); | ||
| 228 | - } | ||
| 229 | + // check conversion | ||
| 230 | + v_float32x4 r1 = v_load(data.a.d); | ||
| 231 | + v_float16x4 r2 = v_cvt_f16(r1); | ||
| 232 | + v_float32x4 r3 = v_cvt_f32(r2); | ||
| 233 | + EXPECT_EQ(0x3c00, r2.get0()); | ||
| 234 | + EXPECT_EQ(r3.get0(), r1.get0()); | ||
| 235 | |||
| 236 | return *this; | ||
| 237 | -#endif | ||
| 238 | } | ||
| 239 | +#endif | ||
| 240 | |||
| 241 | }; | ||
| 242 | |||
| 243 | -- | ||
| 244 | 2.9.3 | ||
| 245 | |||
diff --git a/meta-oe/recipes-support/opencv/opencv_3.2.bb b/meta-oe/recipes-support/opencv/opencv_3.2.bb index 2cff212ab0..98b6b06529 100644 --- a/meta-oe/recipes-support/opencv/opencv_3.2.bb +++ b/meta-oe/recipes-support/opencv/opencv_3.2.bb | |||
| @@ -27,6 +27,8 @@ SRC_URI = "git://github.com/opencv/opencv.git;name=opencv \ | |||
| 27 | file://fixpkgconfig.patch \ | 27 | file://fixpkgconfig.patch \ |
| 28 | file://uselocalxfeatures.patch;patchdir=../contrib/ \ | 28 | file://uselocalxfeatures.patch;patchdir=../contrib/ \ |
| 29 | file://useoeprotobuf.patch;patchdir=../contrib/ \ | 29 | file://useoeprotobuf.patch;patchdir=../contrib/ \ |
| 30 | file://0001-Revert-cuda-fix-fp16-compilation.patch \ | ||
| 31 | file://0002-Revert-check-FP16-build-condition-correctly.patch \ | ||
| 30 | " | 32 | " |
| 31 | 33 | ||
| 32 | PV = "3.2+git${SRCPV}" | 34 | PV = "3.2+git${SRCPV}" |
