diff options
Diffstat (limited to 'dynamic-layers/clang-layer')
58 files changed, 435 insertions, 15756 deletions
diff --git a/dynamic-layers/clang-layer/recipes-core/dnn/onednn_2.4.4.bb b/dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.4.1.bb index 4d8bf2ff..2fa3f627 100644 --- a/dynamic-layers/clang-layer/recipes-core/dnn/onednn_2.4.4.bb +++ b/dynamic-layers/clang-layer/recipes-core/dnn/onednn_3.4.1.bb | |||
@@ -3,7 +3,7 @@ SUMMARY = "Deep Neural Network Library" | |||
3 | DESCRIPTION = "This software is a user mode library that accelerates\ | 3 | DESCRIPTION = "This software is a user mode library that accelerates\ |
4 | deep-learning applications and frameworks on Intel architecture." | 4 | deep-learning applications and frameworks on Intel architecture." |
5 | LICENSE = "Apache-2.0 & BSD-3-Clause & BSL-1.0" | 5 | LICENSE = "Apache-2.0 & BSD-3-Clause & BSL-1.0" |
6 | LIC_FILES_CHKSUM = "file://LICENSE;md5=b48e3de3bfd47c27882a0d85b20823f5 \ | 6 | LIC_FILES_CHKSUM = "file://LICENSE;md5=3b64000f6e7d52516017622a37a94ce9 \ |
7 | file://tests/gtests/gtest/LICENSE;md5=cbbd27594afd089daa160d3a16dd515a \ | 7 | file://tests/gtests/gtest/LICENSE;md5=cbbd27594afd089daa160d3a16dd515a \ |
8 | file://src/cpu/x64/xbyak/COPYRIGHT;md5=3b9bf048d063d54cdb28964db558bcc7 \ | 8 | file://src/cpu/x64/xbyak/COPYRIGHT;md5=3b9bf048d063d54cdb28964db558bcc7 \ |
9 | file://src/common/ittnotify/LICENSE.BSD;md5=e671ff178b24a95a382ba670503c66fb \ | 9 | file://src/common/ittnotify/LICENSE.BSD;md5=e671ff178b24a95a382ba670503c66fb \ |
@@ -12,9 +12,11 @@ SECTION = "lib" | |||
12 | 12 | ||
13 | inherit pkgconfig cmake ptest | 13 | inherit pkgconfig cmake ptest |
14 | 14 | ||
15 | DNN_BRANCH = "rls-v${@'.'.join(d.getVar('PV').split('.')[0:2])}" | ||
16 | |||
15 | S = "${WORKDIR}/git" | 17 | S = "${WORKDIR}/git" |
16 | SRCREV = "145c4b50196ac90ec1b946fb80cb5cef6e7d2d35" | 18 | SRCREV = "f5ff0a6de16c130053bec1a1aec3a9b826c66f78" |
17 | SRC_URI = "git://github.com/oneapi-src/oneDNN.git;branch=rls-v2.4;protocol=https \ | 19 | SRC_URI = "git://github.com/oneapi-src/oneDNN.git;branch=${DNN_BRANCH};protocol=https \ |
18 | file://run-ptest \ | 20 | file://run-ptest \ |
19 | " | 21 | " |
20 | 22 | ||
@@ -32,10 +34,11 @@ EXTRA_OECMAKE += " \ | |||
32 | -DDNNL_CPU_RUNTIME=OMP \ | 34 | -DDNNL_CPU_RUNTIME=OMP \ |
33 | -DDNNL_ARCH_OPT_FLAGS="" \ | 35 | -DDNNL_ARCH_OPT_FLAGS="" \ |
34 | -DCMAKE_SKIP_RPATH=ON \ | 36 | -DCMAKE_SKIP_RPATH=ON \ |
37 | -DONEDNN_BUILD_GRAPH=OFF \ | ||
35 | " | 38 | " |
36 | 39 | ||
37 | PACKAGECONFIG ??= "" | 40 | PACKAGECONFIG ??= "gpu" |
38 | PACKAGECONFIG[gpu] = "-DDNNL_GPU_RUNTIME=OCL, , opencl-headers ocl-icd, intel-compute-runtime" | 41 | PACKAGECONFIG[gpu] = "-DDNNL_GPU_RUNTIME=OCL, , opencl-headers virtual/opencl-icd, intel-compute-runtime" |
39 | 42 | ||
40 | do_install:append () { | 43 | do_install:append () { |
41 | install -d ${D}${bindir}/mkl-dnn/tests/benchdnn/inputs | 44 | install -d ${D}${bindir}/mkl-dnn/tests/benchdnn/inputs |
diff --git a/dynamic-layers/clang-layer/recipes-core/ispc/ispc/0001-CMakeLists.txt-link-with-libclang-cpp-library-instea.patch b/dynamic-layers/clang-layer/recipes-core/ispc/ispc/0001-CMakeLists.txt-link-with-libclang-cpp-library-instea.patch deleted file mode 100644 index 4e25420c..00000000 --- a/dynamic-layers/clang-layer/recipes-core/ispc/ispc/0001-CMakeLists.txt-link-with-libclang-cpp-library-instea.patch +++ /dev/null | |||
@@ -1,28 +0,0 @@ | |||
1 | From b9bc0df996d1e65fd70d5eb2d40866693f23bb67 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Thu, 24 Jun 2021 17:53:27 +0800 | ||
4 | Subject: [PATCH] CMakeLists.txt: link with libclang-cpp library instead | ||
5 | |||
6 | Upstream-Status: Inappropriate | ||
7 | |||
8 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
9 | --- | ||
10 | CMakeLists.txt | 2 +- | ||
11 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
14 | index ef88317e..7507d6a5 100644 | ||
15 | --- a/CMakeLists.txt | ||
16 | +++ b/CMakeLists.txt | ||
17 | @@ -281,7 +281,7 @@ if (WASM_ENABLED) | ||
18 | list(APPEND ISPC_TARGETS wasm-i32x4) | ||
19 | endif() | ||
20 | |||
21 | -set(CLANG_LIBRARY_LIST clangFrontend clangDriver clangSerialization clangParse clangSema clangAnalysis clangAST clangBasic clangEdit clangLex) | ||
22 | +set(CLANG_LIBRARY_LIST clang-cpp) | ||
23 | set(LLVM_COMPONENTS engine ipo bitreader bitwriter instrumentation linker option frontendopenmp) | ||
24 | |||
25 | if (X86_ENABLED) | ||
26 | -- | ||
27 | 2.17.1 | ||
28 | |||
diff --git a/dynamic-layers/clang-layer/recipes-core/ispc/ispc/0002-cmake-don-t-build-for-32-bit-targets.patch b/dynamic-layers/clang-layer/recipes-core/ispc/ispc/0002-cmake-don-t-build-for-32-bit-targets.patch deleted file mode 100644 index 5f3c7d0b..00000000 --- a/dynamic-layers/clang-layer/recipes-core/ispc/ispc/0002-cmake-don-t-build-for-32-bit-targets.patch +++ /dev/null | |||
@@ -1,64 +0,0 @@ | |||
1 | From 3f3f81bde7d9d80921515ed0bf7fe36e69319bc4 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Wed, 30 Jun 2021 13:47:41 +0800 | ||
4 | Subject: [PATCH] cmake: don't build for 32-bit targets | ||
5 | |||
6 | Error log: | ||
7 | | tmp/work/corei7-64-poky-linux/ispc/1.16.0-r0/recipe-sysroot/usr/include/bits/long-double.h:23:10: fatal error: 'bits/long-double-32.h' file not found | ||
8 | | #include <bits/long-double-32.h> | ||
9 | | ^~~~~~~~~~~~~~~~~~~~~~~ | ||
10 | | 1 error generated. | ||
11 | |||
12 | Remove SYSTEM include search path and set -isysroot dir path | ||
13 | for root dir for cross compilation. | ||
14 | |||
15 | Upstream-Status: Inappropriate | ||
16 | |||
17 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
18 | --- | ||
19 | cmake/GenerateBuiltins.cmake | 8 ++++---- | ||
20 | 1 file changed, 4 insertions(+), 4 deletions(-) | ||
21 | |||
22 | diff --git a/cmake/GenerateBuiltins.cmake b/cmake/GenerateBuiltins.cmake | ||
23 | index 15a74788..db30f809 100644 | ||
24 | --- a/cmake/GenerateBuiltins.cmake | ||
25 | +++ b/cmake/GenerateBuiltins.cmake | ||
26 | @@ -249,7 +249,7 @@ function(builtin_to_cpp bit os_name arch supported_archs supported_oses resultFi | ||
27 | # In this case headers will be installed in /usr/arm-linux-gnueabihf/include and will not be picked up | ||
28 | # by clang by default. So the following line adds such path explicitly. If this path doesn't exist and | ||
29 | # the headers can be found in other locations, this should not be a problem. | ||
30 | - set(includePath -isystem/usr/${debian_triple}/include) | ||
31 | + set(includePath -isysroot${SYSROOT_DIR}) | ||
32 | endif() | ||
33 | endif() | ||
34 | |||
35 | @@ -331,7 +331,7 @@ function (generate_target_builtins resultList) | ||
36 | set(regular_targets ${ARGN}) | ||
37 | list(FILTER regular_targets EXCLUDE REGEX wasm) | ||
38 | foreach (ispc_target ${regular_targets}) | ||
39 | - foreach (bit 32 64) | ||
40 | + foreach (bit 64) | ||
41 | foreach (os_name ${TARGET_OS_LIST_FOR_LL}) | ||
42 | target_ll_to_cpp(target-${ispc_target} ${bit} ${os_name} output${os_name}${bit}) | ||
43 | list(APPEND tmpList ${output${os_name}${bit}}) | ||
44 | @@ -392,7 +392,7 @@ function (generate_common_builtins resultList) | ||
45 | endif() | ||
46 | |||
47 | message (STATUS "ISPC will be built with support of ${supported_oses} for ${supported_archs}") | ||
48 | - foreach (bit 32 64) | ||
49 | + foreach (bit 64) | ||
50 | foreach (os_name "windows" "linux" "freebsd" "macos" "android" "ios" "ps4" "web") | ||
51 | foreach (arch "x86" "arm" "wasm32") | ||
52 | builtin_to_cpp(${bit} ${os_name} ${arch} "${supported_archs}" "${supported_oses}" res${bit}${os_name}${arch}) | ||
53 | @@ -405,7 +405,7 @@ function (generate_common_builtins resultList) | ||
54 | endforeach() | ||
55 | endforeach() | ||
56 | if (GENX_ENABLED) | ||
57 | - foreach (bit 32 64) | ||
58 | + foreach (bit 64) | ||
59 | builtin_genx_to_cpp(${bit} res_genx_${bit}) | ||
60 | list(APPEND tmpList ${res_genx_${bit}} ) | ||
61 | if(MSVC) | ||
62 | -- | ||
63 | 2.17.1 | ||
64 | |||
diff --git a/dynamic-layers/clang-layer/recipes-core/ispc/ispc/8b5d0f26916e776bc3664e6a4dc68eff3a198d7a.patch b/dynamic-layers/clang-layer/recipes-core/ispc/ispc/8b5d0f26916e776bc3664e6a4dc68eff3a198d7a.patch deleted file mode 100644 index 1b87af4c..00000000 --- a/dynamic-layers/clang-layer/recipes-core/ispc/ispc/8b5d0f26916e776bc3664e6a4dc68eff3a198d7a.patch +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | From 8b5d0f26916e776bc3664e6a4dc68eff3a198d7a Mon Sep 17 00:00:00 2001 | ||
2 | From: Dmitry Babokin <dmitry.y.babokin@intel.com> | ||
3 | Date: Wed, 16 Jun 2021 20:38:44 -0700 | ||
4 | Subject: [PATCH] Do not use depricated file open flags | ||
5 | |||
6 | Upstream-Status: Backport [https://github.com/ispc/ispc/commit/8b5d0f26916e776bc3664e6a4dc68eff3a198d7a] | ||
7 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
8 | --- | ||
9 | src/module.cpp | 2 +- | ||
10 | src/opt.cpp | 2 +- | ||
11 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
12 | |||
13 | diff --git a/src/module.cpp b/src/module.cpp | ||
14 | index 1e68d30c4..352bcd09e 100644 | ||
15 | --- a/src/module.cpp | ||
16 | +++ b/src/module.cpp | ||
17 | @@ -1314,7 +1314,7 @@ bool Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine, llvm: | ||
18 | llvm::CodeGenFileType fileType = (outputType == Object) ? llvm::CGFT_ObjectFile : llvm::CGFT_AssemblyFile; | ||
19 | bool binary = (fileType == llvm::CGFT_ObjectFile); | ||
20 | |||
21 | - llvm::sys::fs::OpenFlags flags = binary ? llvm::sys::fs::F_None : llvm::sys::fs::F_Text; | ||
22 | + llvm::sys::fs::OpenFlags flags = binary ? llvm::sys::fs::OF_None : llvm::sys::fs::OF_Text; | ||
23 | |||
24 | std::error_code error; | ||
25 | |||
26 | diff --git a/src/opt.cpp b/src/opt.cpp | ||
27 | index ae1a11d3d..de1b27e1e 100644 | ||
28 | --- a/src/opt.cpp | ||
29 | +++ b/src/opt.cpp | ||
30 | @@ -4687,7 +4687,7 @@ void DebugPassFile::run(llvm::Module &module, bool init) { | ||
31 | std::error_code EC; | ||
32 | char fname[100]; | ||
33 | snprintf(fname, sizeof(fname), "%s_%d_%s.ll", init ? "init" : "ir", pnum, sanitize(std::string(pname)).c_str()); | ||
34 | - llvm::raw_fd_ostream OS(fname, EC, llvm::sys::fs::F_None); | ||
35 | + llvm::raw_fd_ostream OS(fname, EC, llvm::sys::fs::OF_None); | ||
36 | Assert(!EC && "IR dump file creation failed!"); | ||
37 | module.print(OS, 0); | ||
38 | } | ||
diff --git a/dynamic-layers/clang-layer/recipes-core/ispc/ispc_1.16.1.bb b/dynamic-layers/clang-layer/recipes-core/ispc/ispc_1.16.1.bb deleted file mode 100644 index c319885f..00000000 --- a/dynamic-layers/clang-layer/recipes-core/ispc/ispc_1.16.1.bb +++ /dev/null | |||
@@ -1,36 +0,0 @@ | |||
1 | SUMMARY = "Intel(R) Implicit SPMD Program Compiler" | ||
2 | DESCRIPTION = "ispc is a compiler for a variant of the C programming language, \ | ||
3 | with extensions for single program, multiple data programming." | ||
4 | HOMEPAGE = "https://github.com/ispc/ispc" | ||
5 | |||
6 | LICENSE = "BSD-3-Clause & Apache-2.0-with-LLVM-exception" | ||
7 | LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=da5ecffdd210b3cf776b32b41c182e87 \ | ||
8 | file://third-party-programs.txt;md5=3cd6f8a7c3bd9d2bb898fcb27c75221a" | ||
9 | |||
10 | inherit cmake python3native | ||
11 | |||
12 | S = "${WORKDIR}/git" | ||
13 | |||
14 | SRC_URI = "git://github.com/ispc/ispc.git;protocol=https;branch=releases/v1.16.x \ | ||
15 | file://0001-CMakeLists.txt-link-with-libclang-cpp-library-instea.patch \ | ||
16 | file://0002-cmake-don-t-build-for-32-bit-targets.patch \ | ||
17 | file://8b5d0f26916e776bc3664e6a4dc68eff3a198d7a.patch \ | ||
18 | " | ||
19 | SRCREV = "ae404c1da54422bc70696fbdaa4055bca0d1711e" | ||
20 | |||
21 | COMPATIBLE_HOST = '(x86_64).*-linux' | ||
22 | |||
23 | DEPENDS += " clang-native bison-native " | ||
24 | RDEPENDS:${PN} += " clang-libllvm clang" | ||
25 | |||
26 | EXTRA_OECMAKE += " \ | ||
27 | -DISPC_INCLUDE_TESTS=OFF \ | ||
28 | -DISPC_INCLUDE_EXAMPLES=OFF \ | ||
29 | -DISPC_NO_DUMPS=ON \ | ||
30 | -DARM_ENABLED=OFF \ | ||
31 | -DISPC_CROSS=ON \ | ||
32 | -DSYSROOT_DIR=${STAGING_DIR_NATIVE} \ | ||
33 | " | ||
34 | |||
35 | TOOLCHAIN = "clang" | ||
36 | BBCLASSEXTEND = "native nativesdk" | ||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch deleted file mode 100644 index cd519971..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch +++ /dev/null | |||
@@ -1,111 +0,0 @@ | |||
1 | From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001 | ||
2 | From: Aleksander Us <aleksander.us@intel.com> | ||
3 | Date: Mon, 26 Aug 2019 15:47:41 +0300 | ||
4 | Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in | ||
5 | SplitBlockPredecessors. | ||
6 | |||
7 | In case when BB is header of some loop and predecessor is latch of | ||
8 | this loop, metadata was not attached to newly created basic block. | ||
9 | This led to loss of loop metadata for other passes. | ||
10 | |||
11 | Upstream-Status: Submitted [https://reviews.llvm.org/D66892] | ||
12 | |||
13 | https://github.com/intel/llvm-patches/commit/8af4449e2d201707f7f2f832b473a0439e255f32 | ||
14 | |||
15 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
16 | --- | ||
17 | lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++---- | ||
18 | test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++ | ||
19 | 2 files changed, 52 insertions(+), 7 deletions(-) | ||
20 | create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll | ||
21 | |||
22 | diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp | ||
23 | index 5fa371377c8..3a90ae061fb 100644 | ||
24 | --- a/lib/Transforms/Utils/BasicBlockUtils.cpp | ||
25 | +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp | ||
26 | @@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, | ||
27 | |||
28 | // The new block unconditionally branches to the old block. | ||
29 | BranchInst *BI = BranchInst::Create(BB, NewBB); | ||
30 | + bool IsBBHeader = LI && LI->isLoopHeader(BB); | ||
31 | + Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr; | ||
32 | // Splitting the predecessors of a loop header creates a preheader block. | ||
33 | - if (LI && LI->isLoopHeader(BB)) | ||
34 | + if (IsBBHeader) | ||
35 | // Using the loop start line number prevents debuggers stepping into the | ||
36 | // loop body for this instruction. | ||
37 | - BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc()); | ||
38 | + BI->setDebugLoc(BBLoop->getStartLoc()); | ||
39 | else | ||
40 | BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc()); | ||
41 | |||
42 | // Move the edges from Preds to point to NewBB instead of BB. | ||
43 | - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { | ||
44 | + for (BasicBlock *Pred : Preds) { | ||
45 | + Instruction *PI = Pred->getTerminator(); | ||
46 | // This is slightly more strict than necessary; the minimum requirement | ||
47 | // is that there be no more than one indirectbr branching to BB. And | ||
48 | // all BlockAddress uses would need to be updated. | ||
49 | - assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && | ||
50 | + assert(!isa<IndirectBrInst>(PI) && | ||
51 | "Cannot split an edge from an IndirectBrInst"); | ||
52 | - assert(!isa<CallBrInst>(Preds[i]->getTerminator()) && | ||
53 | - "Cannot split an edge from a CallBrInst"); | ||
54 | - Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); | ||
55 | + assert(!isa<CallBrInst>(PI) && "Cannot split an edge from a CallBrInst"); | ||
56 | + if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) { | ||
57 | + // Update loop metadata if it exists. | ||
58 | + if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) { | ||
59 | + BI->setMetadata(LLVMContext::MD_loop, LoopMD); | ||
60 | + PI->setMetadata(LLVMContext::MD_loop, nullptr); | ||
61 | + } | ||
62 | + } | ||
63 | + PI->replaceUsesOfWith(BB, NewBB); | ||
64 | } | ||
65 | |||
66 | // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI | ||
67 | diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll | ||
68 | new file mode 100644 | ||
69 | index 00000000000..c15c92fe3ae | ||
70 | --- /dev/null | ||
71 | +++ b/test/Transforms/LoopSimplify/loop_metadata.ll | ||
72 | @@ -0,0 +1,36 @@ | ||
73 | +; RUN: opt -S -loop-simplify < %s | FileCheck %s | ||
74 | + | ||
75 | +; CHECK: for.cond.loopexit: | ||
76 | +; CHECK: br label %for.cond, !llvm.loop !0 | ||
77 | +; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit | ||
78 | + | ||
79 | +define void @foo() { | ||
80 | +entry: | ||
81 | + br label %for.cond | ||
82 | + | ||
83 | +for.cond: ; preds = %for.cond1, %entry | ||
84 | + %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ] | ||
85 | + %cmp = icmp ult i32 %j, 8 | ||
86 | + br i1 %cmp, label %for.body, label %for.end | ||
87 | + | ||
88 | +for.body: ; preds = %for.cond | ||
89 | + %dummy1 = add i32 1, 1 | ||
90 | + %add = add nuw nsw i32 %j, 1 | ||
91 | + br label %for.cond1 | ||
92 | + | ||
93 | +for.cond1: ; preds = %for.body1, %for.body | ||
94 | + %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ] | ||
95 | + %cmp1 = icmp ult i32 %i.0, 8 | ||
96 | + br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0 | ||
97 | + | ||
98 | +for.body1: ; preds = %for.cond1 | ||
99 | + %dummy2 = add i32 1, 1 | ||
100 | + %inc = add nuw nsw i32 %i.0, 1 | ||
101 | + br label %for.cond1 | ||
102 | + | ||
103 | +for.end: ; preds = %for.cond | ||
104 | + ret void | ||
105 | +} | ||
106 | + | ||
107 | +!0 = distinct !{!0, !1} | ||
108 | +!1 = !{!"llvm.loop.unroll.full"} | ||
109 | -- | ||
110 | 2.18.0 | ||
111 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch deleted file mode 100644 index 48307deb..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch +++ /dev/null | |||
@@ -1,146 +0,0 @@ | |||
1 | From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001 | ||
2 | From: Aleksander Us <aleksander.us@intel.com> | ||
3 | Date: Mon, 26 Aug 2019 15:45:47 +0300 | ||
4 | Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in | ||
5 | LFTR when possible. | ||
6 | |||
7 | SCEV analysis cannot properly cache instruction with poison flags | ||
8 | (for example, add nsw outside of loop will not be reused by expander). | ||
9 | This can lead to generating of additional instructions by SCEV expander. | ||
10 | |||
11 | Example IR: | ||
12 | |||
13 | ... | ||
14 | %maxval = add nuw nsw i32 %a1, %a2 | ||
15 | ... | ||
16 | for.body: | ||
17 | ... | ||
18 | %cmp22 = icmp ult i32 %ivadd, %maxval | ||
19 | br i1 %cmp22, label %for.body, label %for.end | ||
20 | ... | ||
21 | |||
22 | SCEV expander will generate copy of %maxval in preheader but without | ||
23 | nuw/nsw flags. This can be avoided by explicit check that iv count | ||
24 | value gives the same SCEV expressions as calculated by LFTR. | ||
25 | |||
26 | Upstream-Status: Submitted [https://reviews.llvm.org/D66890] | ||
27 | |||
28 | https://github.com/intel/llvm-patches/commit/fd6a6c97341a56fd21bc32bc940afea751312e8f | ||
29 | |||
30 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
31 | --- | ||
32 | lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++- | ||
33 | test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++ | ||
34 | test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++----- | ||
35 | test/Transforms/IndVarSimplify/udiv.ll | 1 + | ||
36 | 4 files changed, 38 insertions(+), 7 deletions(-) | ||
37 | create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll | ||
38 | |||
39 | diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp | ||
40 | index f9fc698a4a9..5e04dac8aa6 100644 | ||
41 | --- a/lib/Transforms/Scalar/IndVarSimplify.cpp | ||
42 | +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp | ||
43 | @@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, | ||
44 | if (UsePostInc) | ||
45 | IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); | ||
46 | |||
47 | + // If computed limit is equal to old limit then do not use SCEV expander | ||
48 | + // because it can lost NUW/NSW flags and create extra instructions. | ||
49 | + BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); | ||
50 | + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) { | ||
51 | + Value *Limit = Cmp->getOperand(0); | ||
52 | + if (!L->isLoopInvariant(Limit)) | ||
53 | + Limit = Cmp->getOperand(1); | ||
54 | + if (SE->getSCEV(Limit) == IVLimit) | ||
55 | + return Limit; | ||
56 | + } | ||
57 | + | ||
58 | // Expand the code for the iteration count. | ||
59 | assert(SE->isLoopInvariant(IVLimit, L) && | ||
60 | "Computed iteration count is not loop invariant!"); | ||
61 | @@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, | ||
62 | // SCEV expression (IVInit) for a pointer type IV value (IndVar). | ||
63 | Type *LimitTy = ExitCount->getType()->isPointerTy() ? | ||
64 | IndVar->getType() : ExitCount->getType(); | ||
65 | - BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); | ||
66 | return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); | ||
67 | } | ||
68 | } | ||
69 | diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll | ||
70 | new file mode 100644 | ||
71 | index 00000000000..abd1cbb6c51 | ||
72 | --- /dev/null | ||
73 | +++ b/test/Transforms/IndVarSimplify/add_nsw.ll | ||
74 | @@ -0,0 +1,23 @@ | ||
75 | +; RUN: opt -indvars -S %s | FileCheck %s | ||
76 | + | ||
77 | +target datalayout = "e-p:32:32-i64:64-n8:16:32" | ||
78 | + | ||
79 | +; CHECK: for.body.preheader: | ||
80 | +; CHECK-NOT: add | ||
81 | +; CHECK: for.body: | ||
82 | + | ||
83 | +define void @foo(i32 %a1, i32 %a2) { | ||
84 | +entry: | ||
85 | + %maxval = add nuw nsw i32 %a1, %a2 | ||
86 | + %cmp = icmp slt i32 %maxval, 1 | ||
87 | + br i1 %cmp, label %for.end, label %for.body | ||
88 | + | ||
89 | +for.body: ; preds = %entry, %for.body | ||
90 | + %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] | ||
91 | + %add31 = add nuw nsw i32 %j.02, 1 | ||
92 | + %cmp22 = icmp slt i32 %add31, %maxval | ||
93 | + br i1 %cmp22, label %for.body, label %for.end | ||
94 | + | ||
95 | +for.end: ; preds = %for.body | ||
96 | + ret void | ||
97 | +} | ||
98 | diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll | ||
99 | index 14ae9738696..509d662b767 100644 | ||
100 | --- a/test/Transforms/IndVarSimplify/lftr-reuse.ll | ||
101 | +++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll | ||
102 | @@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { | ||
103 | ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] | ||
104 | ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] | ||
105 | ; CHECK: outer.preheader: | ||
106 | -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 | ||
107 | ; CHECK-NEXT: br label [[OUTER:%.*]] | ||
108 | ; CHECK: outer: | ||
109 | -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] | ||
110 | -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] | ||
111 | +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] | ||
112 | ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] | ||
113 | ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 | ||
114 | ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] | ||
115 | @@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { | ||
116 | ; CHECK: inner: | ||
117 | ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] | ||
118 | ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 | ||
119 | -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] | ||
120 | +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]] | ||
121 | ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] | ||
122 | ; CHECK: outer.inc.loopexit: | ||
123 | ; CHECK-NEXT: br label [[OUTER_INC]] | ||
124 | ; CHECK: outer.inc: | ||
125 | ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 | ||
126 | -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 | ||
127 | -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] | ||
128 | +; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] | ||
129 | ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] | ||
130 | ; CHECK: exit.loopexit: | ||
131 | ; CHECK-NEXT: br label [[EXIT]] | ||
132 | diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll | ||
133 | index b3f2c2a6a66..3530343ef4a 100644 | ||
134 | --- a/test/Transforms/IndVarSimplify/udiv.ll | ||
135 | +++ b/test/Transforms/IndVarSimplify/udiv.ll | ||
136 | @@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind | ||
137 | ; CHECK-LABEL: @foo( | ||
138 | ; CHECK: for.body.preheader: | ||
139 | ; CHECK-NOT: udiv | ||
140 | +; CHECK: for.body: | ||
141 | |||
142 | define void @foo(double* %p, i64 %n) nounwind { | ||
143 | entry: | ||
144 | -- | ||
145 | 2.18.0 | ||
146 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch deleted file mode 100644 index 1ab00df0..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch +++ /dev/null | |||
@@ -1,8259 +0,0 @@ | |||
1 | From 8dbdb2f26674a938ff43b5bfe5b3bf3d1117f9e4 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Wed, 7 Apr 2021 16:36:10 +0800 | ||
4 | Subject: [PATCH 1/7] OpenCL 3.0 support | ||
5 | |||
6 | Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0001-OpenCL-3.0-support.patch] | ||
7 | Signed-off-by: Anton Zabaznov <anton.zabaznov@intel.com> | ||
8 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
9 | --- | ||
10 | clang/include/clang/Basic/Builtins.def | 65 +- | ||
11 | clang/include/clang/Basic/Builtins.h | 13 +- | ||
12 | .../clang/Basic/DiagnosticParseKinds.td | 2 + | ||
13 | .../clang/Basic/DiagnosticSemaKinds.td | 7 + | ||
14 | clang/include/clang/Basic/LangOptions.def | 2 + | ||
15 | clang/include/clang/Basic/LangStandards.def | 4 + | ||
16 | .../include/clang/Basic/OpenCLExtensions.def | 15 + | ||
17 | clang/include/clang/Basic/OpenCLOptions.h | 175 +- | ||
18 | clang/include/clang/Driver/Options.td | 2 +- | ||
19 | clang/include/clang/Sema/Sema.h | 9 + | ||
20 | clang/lib/AST/ASTContext.cpp | 3 +- | ||
21 | clang/lib/Basic/Builtins.cpp | 28 +- | ||
22 | clang/lib/Basic/TargetInfo.cpp | 11 + | ||
23 | clang/lib/Basic/Targets.cpp | 1 - | ||
24 | clang/lib/CodeGen/CodeGenFunction.cpp | 6 +- | ||
25 | clang/lib/Frontend/CompilerInvocation.cpp | 22 +- | ||
26 | clang/lib/Frontend/InitPreprocessor.cpp | 6 +- | ||
27 | clang/lib/Headers/opencl-c-base.h | 75 +- | ||
28 | clang/lib/Headers/opencl-c.h | 3228 ++++++++++++++--- | ||
29 | clang/lib/Parse/ParseDecl.cpp | 12 +- | ||
30 | clang/lib/Parse/ParsePragma.cpp | 10 +- | ||
31 | clang/lib/Sema/OpenCLBuiltins.td | 49 +- | ||
32 | clang/lib/Sema/Sema.cpp | 47 +- | ||
33 | clang/lib/Sema/SemaChecking.cpp | 38 +- | ||
34 | clang/lib/Sema/SemaDecl.cpp | 15 +- | ||
35 | clang/lib/Sema/SemaDeclAttr.cpp | 9 +- | ||
36 | clang/lib/Sema/SemaDeclCXX.cpp | 10 + | ||
37 | clang/lib/Sema/SemaLookup.cpp | 19 +- | ||
38 | clang/lib/Sema/SemaType.cpp | 14 +- | ||
39 | .../CodeGenOpenCL/addr-space-struct-arg.cl | 7 +- | ||
40 | .../address-spaces-conversions.cl | 2 + | ||
41 | .../CodeGenOpenCL/address-spaces-mangling.cl | 3 +- | ||
42 | clang/test/CodeGenOpenCL/address-spaces.cl | 4 + | ||
43 | .../amdgcn-automatic-variable.cl | 1 + | ||
44 | .../CodeGenOpenCL/amdgpu-sizeof-alignof.cl | 21 +- | ||
45 | .../CodeGenOpenCL/arm-integer-dot-product.cl | 1 + | ||
46 | .../test/CodeGenOpenCL/cl-uniform-wg-size.cl | 2 + | ||
47 | clang/test/CodeGenOpenCL/fpmath.cl | 2 + | ||
48 | .../generic-address-space-feature.cl | 28 + | ||
49 | .../intel-subgroups-avc-ext-types.cl | 1 + | ||
50 | .../kernels-have-spir-cc-by-default.cl | 3 + | ||
51 | clang/test/CodeGenOpenCL/logical-ops.cl | 1 + | ||
52 | clang/test/CodeGenOpenCL/no-half.cl | 1 + | ||
53 | clang/test/CodeGenOpenCL/pipe_builtin.cl | 3 + | ||
54 | clang/test/CodeGenOpenCL/pipe_types.cl | 1 + | ||
55 | clang/test/CodeGenOpenCL/printf.cl | 2 + | ||
56 | clang/test/CodeGenOpenCL/unroll-hint.cl | 1 + | ||
57 | clang/test/Driver/autocomplete.c | 2 + | ||
58 | clang/test/Driver/opencl.cl | 2 + | ||
59 | clang/test/Driver/unknown-std.cl | 1 + | ||
60 | clang/test/Frontend/stdlang.c | 1 + | ||
61 | clang/test/Headers/opencl-c-header.cl | 7 +- | ||
62 | clang/test/Index/pipe-size.cl | 7 + | ||
63 | clang/test/Preprocessor/predefined-macros.c | 13 + | ||
64 | .../Sema/feature-extensions-simult-support.cl | 75 + | ||
65 | clang/test/Sema/features-ignore-pragma.cl | 24 + | ||
66 | clang/test/Sema/opencl-features-pipes.cl | 18 + | ||
67 | clang/test/Sema/opencl-features.cl | 128 + | ||
68 | clang/test/Sema/pipe_builtins_feature.cl | 21 + | ||
69 | .../address-spaces-conversions-cl2.0.cl | 3 + | ||
70 | clang/test/SemaOpenCL/address-spaces.cl | 1 + | ||
71 | .../SemaOpenCL/cl20-device-side-enqueue.cl | 16 +- | ||
72 | .../SemaOpenCL/forget-unsupported-builtins.cl | 22 + | ||
73 | clang/test/SemaOpenCL/image-features.cl | 20 + | ||
74 | .../SemaOpenCL/invalid-pipe-builtin-cl2.0.cl | 1 + | ||
75 | clang/test/SemaOpenCL/storageclass-cl20.cl | 1 + | ||
76 | .../TableGen/ClangOpenCLBuiltinEmitter.cpp | 35 +- | ||
77 | 67 files changed, 3656 insertions(+), 723 deletions(-) | ||
78 | create mode 100644 clang/test/CodeGenOpenCL/generic-address-space-feature.cl | ||
79 | create mode 100644 clang/test/Sema/feature-extensions-simult-support.cl | ||
80 | create mode 100644 clang/test/Sema/features-ignore-pragma.cl | ||
81 | create mode 100644 clang/test/Sema/opencl-features-pipes.cl | ||
82 | create mode 100644 clang/test/Sema/opencl-features.cl | ||
83 | create mode 100644 clang/test/Sema/pipe_builtins_feature.cl | ||
84 | create mode 100644 clang/test/SemaOpenCL/forget-unsupported-builtins.cl | ||
85 | create mode 100644 clang/test/SemaOpenCL/image-features.cl | ||
86 | |||
87 | diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def | ||
88 | index 1a6c85ce2dd3..b76e277f0337 100644 | ||
89 | --- a/clang/include/clang/Basic/Builtins.def | ||
90 | +++ b/clang/include/clang/Basic/Builtins.def | ||
91 | @@ -106,6 +106,10 @@ | ||
92 | # define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) | ||
93 | #endif | ||
94 | |||
95 | +#if defined(BUILTIN) && !defined(OPENCLBUILTIN) | ||
96 | +# define OPENCLBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS) | ||
97 | +#endif | ||
98 | + | ||
99 | // Standard libc/libm functions: | ||
100 | BUILTIN(__builtin_atan2 , "ddd" , "Fne") | ||
101 | BUILTIN(__builtin_atan2f, "fff" , "Fne") | ||
102 | @@ -1514,50 +1518,54 @@ BUILTIN(__builtin_coro_param, "bv*v*", "n") | ||
103 | |||
104 | // OpenCL v2.0 s6.13.16, s9.17.3.5 - Pipe functions. | ||
105 | // We need the generic prototype, since the packet type could be anything. | ||
106 | -LANGBUILTIN(read_pipe, "i.", "tn", OCLC20_LANG) | ||
107 | -LANGBUILTIN(write_pipe, "i.", "tn", OCLC20_LANG) | ||
108 | +OPENCLBUILTIN(read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
109 | +OPENCLBUILTIN(write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
110 | |||
111 | -LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCLC20_LANG) | ||
112 | -LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCLC20_LANG) | ||
113 | +OPENCLBUILTIN(reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
114 | +OPENCLBUILTIN(reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
115 | |||
116 | -LANGBUILTIN(commit_write_pipe, "v.", "tn", OCLC20_LANG) | ||
117 | -LANGBUILTIN(commit_read_pipe, "v.", "tn", OCLC20_LANG) | ||
118 | +OPENCLBUILTIN(commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
119 | +OPENCLBUILTIN(commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
120 | |||
121 | -LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) | ||
122 | -LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) | ||
123 | +OPENCLBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
124 | +OPENCLBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
125 | |||
126 | -LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) | ||
127 | -LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) | ||
128 | +OPENCLBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
129 | +OPENCLBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
130 | |||
131 | -LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG) | ||
132 | -LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG) | ||
133 | +OPENCLBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
134 | +OPENCLBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
135 | |||
136 | -LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC20_LANG) | ||
137 | -LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC20_LANG) | ||
138 | +OPENCLBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
139 | +OPENCLBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
140 | |||
141 | -LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC20_LANG) | ||
142 | -LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG) | ||
143 | +OPENCLBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
144 | +OPENCLBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes") | ||
145 | |||
146 | // OpenCL v2.0 s6.13.17 - Enqueue kernel functions. | ||
147 | // Custom builtin check allows to perform special check of passed block arguments. | ||
148 | -LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG) | ||
149 | -LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG) | ||
150 | -LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG) | ||
151 | -LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG) | ||
152 | -LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG) | ||
153 | +OPENCLBUILTIN(enqueue_kernel, "i.", "tn", OCLC2P_LANG, | ||
154 | + "__opencl_c_device_enqueue") | ||
155 | +OPENCLBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC2P_LANG, | ||
156 | + "__opencl_c_device_enqueue") | ||
157 | +OPENCLBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", | ||
158 | + OCLC2P_LANG, "__opencl_c_device_enqueue") | ||
159 | +OPENCLBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", | ||
160 | + OCLC2P_LANG, "__opencl_c_device_enqueue") | ||
161 | +OPENCLBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_device_enqueue") | ||
162 | |||
163 | // OpenCL v2.0 s6.13.9 - Address space qualifier functions. | ||
164 | // FIXME: Pointer parameters of OpenCL builtins should have their address space | ||
165 | // requirement defined. | ||
166 | -LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG) | ||
167 | -LANGBUILTIN(to_local, "v*v*", "tn", OCLC20_LANG) | ||
168 | -LANGBUILTIN(to_private, "v*v*", "tn", OCLC20_LANG) | ||
169 | +OPENCLBUILTIN(to_global, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") | ||
170 | +OPENCLBUILTIN(to_local, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") | ||
171 | +OPENCLBUILTIN(to_private, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space") | ||
172 | |||
173 | // OpenCL half load/store builtin | ||
174 | -LANGBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES) | ||
175 | -LANGBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES) | ||
176 | -LANGBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES) | ||
177 | -LANGBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES) | ||
178 | +OPENCLBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES, "") | ||
179 | +OPENCLBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES, "") | ||
180 | +OPENCLBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES, "") | ||
181 | +OPENCLBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES, "") | ||
182 | |||
183 | // Builtins for os_log/os_trace | ||
184 | BUILTIN(__builtin_os_log_format_buffer_size, "zcC*.", "p:0:nut") | ||
185 | @@ -1578,3 +1586,4 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") | ||
186 | #undef BUILTIN | ||
187 | #undef LIBBUILTIN | ||
188 | #undef LANGBUILTIN | ||
189 | +#undef OPENCLBUILTIN | ||
190 | diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h | ||
191 | index e4ed482d9068..713ea4bc267d 100644 | ||
192 | --- a/clang/include/clang/Basic/Builtins.h | ||
193 | +++ b/clang/include/clang/Basic/Builtins.h | ||
194 | @@ -33,13 +33,13 @@ enum LanguageID { | ||
195 | CXX_LANG = 0x4, // builtin for cplusplus only. | ||
196 | OBJC_LANG = 0x8, // builtin for objective-c and objective-c++ | ||
197 | MS_LANG = 0x10, // builtin requires MS mode. | ||
198 | - OCLC20_LANG = 0x20, // builtin for OpenCL C 2.0 only. | ||
199 | + OCLC2P_LANG = 0x20, // builtin for OpenCL C 2.0+ versions. | ||
200 | OCLC1X_LANG = 0x40, // builtin for OpenCL C 1.x only. | ||
201 | OMP_LANG = 0x80, // builtin requires OpenMP. | ||
202 | ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages. | ||
203 | ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG, // builtin requires GNU mode. | ||
204 | ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG, // builtin requires MS mode. | ||
205 | - ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC20_LANG // builtin for OCLC languages. | ||
206 | + ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC2P_LANG // builtin for OCLC languages. | ||
207 | }; | ||
208 | |||
209 | namespace Builtin { | ||
210 | @@ -228,6 +228,10 @@ public: | ||
211 | /// for non-builtins. | ||
212 | bool canBeRedeclared(unsigned ID) const; | ||
213 | |||
214 | + bool requiresFeatures(unsigned ID) const { | ||
215 | + return requiresFeatures(getRecord(ID)); | ||
216 | + } | ||
217 | + | ||
218 | private: | ||
219 | const Info &getRecord(unsigned ID) const; | ||
220 | |||
221 | @@ -235,6 +239,11 @@ private: | ||
222 | bool builtinIsSupported(const Builtin::Info &BuiltinInfo, | ||
223 | const LangOptions &LangOpts); | ||
224 | |||
225 | + bool OclBuiltinIsSupported(const Builtin::Info &BuiltinInfo, | ||
226 | + const LangOptions &LangOpts) const; | ||
227 | + | ||
228 | + bool requiresFeatures(const Builtin::Info &BuiltinInfo) const; | ||
229 | + | ||
230 | /// Helper function for isPrintfLike and isScanfLike. | ||
231 | bool isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg, | ||
232 | const char *Fmt) const; | ||
233 | diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td | ||
234 | index 33adf093693f..43ea63586f91 100644 | ||
235 | --- a/clang/include/clang/Basic/DiagnosticParseKinds.td | ||
236 | +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td | ||
237 | @@ -1199,6 +1199,8 @@ def warn_pragma_unsupported_extension : Warning< | ||
238 | "unsupported OpenCL extension %0 - ignoring">, InGroup<IgnoredPragmas>; | ||
239 | def warn_pragma_extension_is_core : Warning< | ||
240 | "OpenCL extension %0 is core feature or supported optional core feature - ignoring">, InGroup<DiagGroup<"pedantic-core-features">>, DefaultIgnore; | ||
241 | +def warn_opencl_pragma_feature_ignore : Warning< | ||
242 | + "OpenCL feature support can't be controlled via pragma, ignoring">, InGroup<IgnoredPragmas>; | ||
243 | |||
244 | // OpenCL errors. | ||
245 | def err_opencl_taking_function_address_parser : Error< | ||
246 | diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td | ||
247 | index 917377420505..91648aa27820 100644 | ||
248 | --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td | ||
249 | +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td | ||
250 | @@ -9378,6 +9378,13 @@ def ext_opencl_ext_vector_type_rgba_selector: ExtWarn< | ||
251 | def err_openclcxx_placement_new : Error< | ||
252 | "use of placement new requires explicit declaration">; | ||
253 | |||
254 | +def err_opencl_pipes_require_feat : Error< | ||
255 | + "usage of OpenCL pipes requires feature support">; | ||
256 | +def err_opencl_memory_scope_require_feat : Error< | ||
257 | + "usage of memory scope requires feature support">; | ||
258 | +def err_opencl_memory_ordering_require_feat : Error< | ||
259 | + "usage of memory ordering requires feature support">; | ||
260 | + | ||
261 | // MIG routine annotations. | ||
262 | def warn_mig_server_routine_does_not_return_kern_return_t : Warning< | ||
263 | "'mig_server_routine' attribute only applies to routines that return a kern_return_t">, | ||
264 | diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def | ||
265 | index 3319a3123976..406f45c0be69 100644 | ||
266 | --- a/clang/include/clang/Basic/LangOptions.def | ||
267 | +++ b/clang/include/clang/Basic/LangOptions.def | ||
268 | @@ -203,6 +203,8 @@ LANGOPT(OpenCL , 1, 0, "OpenCL") | ||
269 | LANGOPT(OpenCLVersion , 32, 0, "OpenCL C version") | ||
270 | LANGOPT(OpenCLCPlusPlus , 1, 0, "C++ for OpenCL") | ||
271 | LANGOPT(OpenCLCPlusPlusVersion , 32, 0, "C++ for OpenCL version") | ||
272 | +LANGOPT(OpenCLGenericKeyword , 1, 0, "OpenCL generic keyword") | ||
273 | +LANGOPT(OpenCLPipeKeyword , 1, 0, "OpenCL pipe keyword") | ||
274 | LANGOPT(NativeHalfType , 1, 0, "Native half type support") | ||
275 | LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns") | ||
276 | LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") | ||
277 | diff --git a/clang/include/clang/Basic/LangStandards.def b/clang/include/clang/Basic/LangStandards.def | ||
278 | index 7f1a24db7e9b..69aaba3ff5a2 100644 | ||
279 | --- a/clang/include/clang/Basic/LangStandards.def | ||
280 | +++ b/clang/include/clang/Basic/LangStandards.def | ||
281 | @@ -167,6 +167,9 @@ LANGSTANDARD(opencl12, "cl1.2", | ||
282 | LANGSTANDARD(opencl20, "cl2.0", | ||
283 | OpenCL, "OpenCL 2.0", | ||
284 | LineComment | C99 | Digraphs | HexFloat | OpenCL) | ||
285 | +LANGSTANDARD(opencl30, "cl3.0", | ||
286 | + OpenCL, "OpenCL 3.0", | ||
287 | + LineComment | C99 | Digraphs | HexFloat | OpenCL) | ||
288 | LANGSTANDARD(openclcpp, "clc++", | ||
289 | OpenCL, "C++ for OpenCL", | ||
290 | LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 | | ||
291 | @@ -176,6 +179,7 @@ LANGSTANDARD_ALIAS_DEPR(opencl10, "CL") | ||
292 | LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1") | ||
293 | LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2") | ||
294 | LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0") | ||
295 | +LANGSTANDARD_ALIAS_DEPR(opencl30, "CL3.0") | ||
296 | LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++") | ||
297 | |||
298 | // CUDA | ||
299 | diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def | ||
300 | index 517481584313..608f78a13eef 100644 | ||
301 | --- a/clang/include/clang/Basic/OpenCLExtensions.def | ||
302 | +++ b/clang/include/clang/Basic/OpenCLExtensions.def | ||
303 | @@ -93,6 +93,21 @@ OPENCLEXT_INTERNAL(cl_intel_subgroups, 120, ~0U) | ||
304 | OPENCLEXT_INTERNAL(cl_intel_subgroups_short, 120, ~0U) | ||
305 | OPENCLEXT_INTERNAL(cl_intel_device_side_avc_motion_estimation, 120, ~0U) | ||
306 | |||
307 | +OPENCLEXT_INTERNAL(__opencl_c_pipes, 200, ~0U) | ||
308 | +OPENCLEXT_INTERNAL(__opencl_c_generic_address_space, 200, ~0U) | ||
309 | +OPENCLEXT_INTERNAL(__opencl_c_work_group_collective_functions, 200, ~0U) | ||
310 | +OPENCLEXT_INTERNAL(__opencl_c_atomic_order_acq_rel, 200, ~0U) | ||
311 | +OPENCLEXT_INTERNAL(__opencl_c_atomic_order_seq_cst, 200, ~0U) | ||
312 | +OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_device, 200, ~0U) | ||
313 | +OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_all_devices, 200, ~0U) | ||
314 | +OPENCLEXT_INTERNAL(__opencl_c_subgroups, 200, ~0U) | ||
315 | +OPENCLEXT_INTERNAL(__opencl_c_3d_image_writes, 100, ~0U) | ||
316 | +OPENCLEXT_INTERNAL(__opencl_c_device_enqueue, 200, ~0U) | ||
317 | +OPENCLEXT_INTERNAL(__opencl_c_read_write_images, 200, ~0U) | ||
318 | +OPENCLEXT_INTERNAL(__opencl_c_program_scope_global_variables, 200, ~0U) | ||
319 | +OPENCLEXT_INTERNAL(__opencl_c_fp64, 120, ~0U) | ||
320 | +OPENCLEXT_INTERNAL(__opencl_c_int64, 100, ~0U) | ||
321 | +OPENCLEXT_INTERNAL(__opencl_c_images, 100, ~0U) | ||
322 | #undef OPENCLEXT_INTERNAL | ||
323 | |||
324 | #ifdef OPENCLEXT | ||
325 | diff --git a/clang/include/clang/Basic/OpenCLOptions.h b/clang/include/clang/Basic/OpenCLOptions.h | ||
326 | index 15661154eab5..9a3a8e33cadd 100644 | ||
327 | --- a/clang/include/clang/Basic/OpenCLOptions.h | ||
328 | +++ b/clang/include/clang/Basic/OpenCLOptions.h | ||
329 | @@ -16,11 +16,16 @@ | ||
330 | |||
331 | #include "clang/Basic/LangOptions.h" | ||
332 | #include "llvm/ADT/StringMap.h" | ||
333 | +#include "llvm/ADT/StringSwitch.h" | ||
334 | |||
335 | namespace clang { | ||
336 | |||
337 | /// OpenCL supported extensions and optional core features | ||
338 | class OpenCLOptions { | ||
339 | + // OpenCL Version | ||
340 | + unsigned CLVer = 120; | ||
341 | + bool IsOpenCLCPlusPlus = false; | ||
342 | + | ||
343 | struct Info { | ||
344 | bool Supported; // Is this option supported | ||
345 | bool Enabled; // Is this option enabled | ||
346 | @@ -31,7 +36,38 @@ class OpenCLOptions { | ||
347 | :Supported(S), Enabled(E), Avail(A), Core(C){} | ||
348 | }; | ||
349 | llvm::StringMap<Info> OptMap; | ||
350 | + | ||
351 | public: | ||
352 | + void setOpenCLVersion(const LangOptions &LO) { | ||
353 | + IsOpenCLCPlusPlus = LO.OpenCLCPlusPlus; | ||
354 | + CLVer = IsOpenCLCPlusPlus ? 200 : LO.OpenCLVersion; | ||
355 | + } | ||
356 | + | ||
357 | + // Get extension which is semantically equivalent to a given feature | ||
358 | + // if exists (e.g. __opencl_c_subgroups -> cl_khr_subgroups) | ||
359 | + llvm::Optional<StringRef> getEquivalentExtension(StringRef Feature) const { | ||
360 | + return llvm::StringSwitch<llvm::Optional<StringRef>>(Feature) | ||
361 | + .Case("__opencl_c_3d_image_writes", | ||
362 | + Optional<StringRef>("cl_khr_3d_image_writes")) | ||
363 | + .Case("__opencl_c_subgroups", Optional<StringRef>("cl_khr_subgroups")) | ||
364 | + .Case("__opencl_c_fp64", Optional<StringRef>("cl_khr_fp64")) | ||
365 | + .Default(Optional<StringRef>()); | ||
366 | + } | ||
367 | + | ||
368 | + // Same as above but for extensions | ||
369 | + llvm::Optional<StringRef> getEquivalentFeature(StringRef Extension) const { | ||
370 | + return llvm::StringSwitch<llvm::Optional<StringRef>>(Extension) | ||
371 | + .Case("cl_khr_3d_image_writes", | ||
372 | + Optional<StringRef>("__opencl_c_3d_image_writes")) | ||
373 | + .Case("cl_khr_subgroups", Optional<StringRef>("__opencl_c_subgroups")) | ||
374 | + .Case("cl_khr_fp64", Optional<StringRef>("__opencl_c_fp64")) | ||
375 | + .Default(Optional<StringRef>()); | ||
376 | + } | ||
377 | + | ||
378 | + bool isFeature(llvm::StringRef Ext) const { | ||
379 | + return Ext.startswith("__opencl_c"); | ||
380 | + } | ||
381 | + | ||
382 | bool isKnown(llvm::StringRef Ext) const { | ||
383 | return OptMap.find(Ext) != OptMap.end(); | ||
384 | } | ||
385 | @@ -42,32 +78,88 @@ public: | ||
386 | |||
387 | // Is supported as either an extension or an (optional) core feature for | ||
388 | // OpenCL version \p CLVer. | ||
389 | - bool isSupported(llvm::StringRef Ext, const LangOptions &LO) const { | ||
390 | + bool isSupported(llvm::StringRef Ext) const { | ||
391 | // In C++ mode all extensions should work at least as in v2.0. | ||
392 | - auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; | ||
393 | auto I = OptMap.find(Ext)->getValue(); | ||
394 | return I.Supported && I.Avail <= CLVer; | ||
395 | } | ||
396 | |||
397 | // Is supported (optional) OpenCL core features for OpenCL version \p CLVer. | ||
398 | // For supported extension, return false. | ||
399 | - bool isSupportedCore(llvm::StringRef Ext, const LangOptions &LO) const { | ||
400 | + bool isSupportedCore(llvm::StringRef Ext) const { | ||
401 | // In C++ mode all extensions should work at least as in v2.0. | ||
402 | - auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; | ||
403 | auto I = OptMap.find(Ext)->getValue(); | ||
404 | return I.Supported && I.Avail <= CLVer && I.Core != ~0U && CLVer >= I.Core; | ||
405 | } | ||
406 | |||
407 | // Is supported OpenCL extension for OpenCL version \p CLVer. | ||
408 | // For supported (optional) core feature, return false. | ||
409 | - bool isSupportedExtension(llvm::StringRef Ext, const LangOptions &LO) const { | ||
410 | + bool isSupportedExtension(llvm::StringRef Ext) const { | ||
411 | // In C++ mode all extensions should work at least as in v2.0. | ||
412 | - auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; | ||
413 | auto I = OptMap.find(Ext)->getValue(); | ||
414 | return I.Supported && I.Avail <= CLVer && (I.Core == ~0U || CLVer < I.Core); | ||
415 | } | ||
416 | |||
417 | + // Support features whose support is directly related to the | ||
418 | + // specific OpenCL version. For example, OpenCL 2.0 supports | ||
419 | + // all features that are optional in 3.0 | ||
420 | + void adjustFeatures() { | ||
421 | + // Support int64 by default (assume compiling for FULL profile) | ||
422 | + OptMap["__opencl_c_int64"].Supported = true; | ||
423 | + | ||
424 | + if (CLVer >= 300) { | ||
425 | + // Simultaneously support extension and corresponding feature | ||
426 | + for (llvm::StringRef F : | ||
427 | + {"__opencl_c_subgroups", "__opencl_c_3d_image_writes", | ||
428 | + "__opencl_c_fp64"}) { | ||
429 | + auto Ext = getEquivalentExtension(F); | ||
430 | + OptMap[*Ext].Supported = OptMap[F].Supported; | ||
431 | + } | ||
432 | + | ||
433 | + // OpenCL C compilers that define the feature macro __opencl_c_pipes or | ||
434 | + // or __opencl_c_device_enqueue must also define the | ||
435 | + // feature macro __opencl_c_generic_address_space. | ||
436 | + if (OptMap["__opencl_c_pipes"].Supported || | ||
437 | + OptMap["__opencl_c_device_enqueue"].Supported) | ||
438 | + OptMap["__opencl_c_generic_address_space"].Supported = true; | ||
439 | + | ||
440 | + // OpenCL C compilers that define the feature macro | ||
441 | + // __opencl_c_3d_image_writes or __opencl_c_read_write_images must also | ||
442 | + // define the feature macro __opencl_c_images | ||
443 | + if (OptMap["__opencl_c_3d_image_writes"].Supported || | ||
444 | + OptMap["__opencl_c_read_write_images"].Supported) | ||
445 | + OptMap["__opencl_c_images"].Supported = true; | ||
446 | + | ||
447 | + // All other features are already supported with options | ||
448 | + // or in target settings | ||
449 | + return; | ||
450 | + } | ||
451 | + | ||
452 | + auto FeaturesRange = llvm::make_filter_range( | ||
453 | + OptMap, [&](llvm::StringMapEntry<Info> &OptVal) { | ||
454 | + auto Opt = OptVal.getKey(); | ||
455 | + return isFeature(Opt); | ||
456 | + }); | ||
457 | + | ||
458 | + for (auto &It : FeaturesRange) { | ||
459 | + auto &Info = It.getValue(); | ||
460 | + // For OpenCL version less then 3.0 some | ||
461 | + // features should be supported simulateneously | ||
462 | + // with specific extension | ||
463 | + if (Optional<llvm::StringRef> Ext = getEquivalentExtension(It.getKey())) | ||
464 | + Info.Supported = Info.Enabled = OptMap[*Ext].Supported; | ||
465 | + else if (Info.Avail <= CLVer) | ||
466 | + Info.Supported = Info.Enabled = true; | ||
467 | + } | ||
468 | + } | ||
469 | + | ||
470 | void enable(llvm::StringRef Ext, bool V = true) { | ||
471 | + // Ignore disabling extensions if corresponding features | ||
472 | + // already supported for OpenCL version higher then 3.0 | ||
473 | + if (CLVer >= 300) | ||
474 | + if (Optional<llvm::StringRef> F = getEquivalentFeature(Ext)) | ||
475 | + if (V != OptMap[*F].Enabled) | ||
476 | + return; | ||
477 | OptMap[Ext].Enabled = V; | ||
478 | } | ||
479 | |||
480 | @@ -96,7 +188,7 @@ public: | ||
481 | OptMap[Ext].Supported = V; | ||
482 | } | ||
483 | |||
484 | - OpenCLOptions(){ | ||
485 | + OpenCLOptions() { | ||
486 | #define OPENCLEXT_INTERNAL(Ext, AvailVer, CoreVer) \ | ||
487 | OptMap[#Ext].Avail = AvailVer; \ | ||
488 | OptMap[#Ext].Core = CoreVer; | ||
489 | @@ -104,35 +196,86 @@ public: | ||
490 | } | ||
491 | |||
492 | void addSupport(const OpenCLOptions &Opts) { | ||
493 | + assert(IsOpenCLCPlusPlus == Opts.IsOpenCLCPlusPlus && CLVer == Opts.CLVer); | ||
494 | for (auto &I:Opts.OptMap) | ||
495 | - if (I.second.Supported) | ||
496 | + if (I.second.Supported) { | ||
497 | OptMap[I.getKey()].Supported = true; | ||
498 | + // All features are enabled as they are supported | ||
499 | + if (isFeature(I.getKey())) | ||
500 | + OptMap[I.getKey()].Enabled = true; | ||
501 | + } | ||
502 | + if (CLVer >= 300) { | ||
503 | + // Enabling extensions with respect to features | ||
504 | + for (llvm::StringRef Ext : | ||
505 | + {"cl_khr_3d_image_writes", "cl_khr_subgroups", "cl_khr_fp64"}) { | ||
506 | + auto Feature = getEquivalentFeature(Ext); | ||
507 | + enable(Ext, OptMap[*Feature].Enabled); | ||
508 | + } | ||
509 | + } | ||
510 | } | ||
511 | |||
512 | void copy(const OpenCLOptions &Opts) { | ||
513 | + CLVer = Opts.CLVer; | ||
514 | + IsOpenCLCPlusPlus = Opts.IsOpenCLCPlusPlus; | ||
515 | OptMap = Opts.OptMap; | ||
516 | } | ||
517 | |||
518 | // Turn on or off support of all options. | ||
519 | void supportAll(bool On = true) { | ||
520 | - for (llvm::StringMap<Info>::iterator I = OptMap.begin(), | ||
521 | - E = OptMap.end(); I != E; ++I) | ||
522 | - I->second.Supported = On; | ||
523 | + for (llvm::StringMap<Info>::iterator I = OptMap.begin(), E = OptMap.end(); | ||
524 | + I != E; ++I) | ||
525 | + if (!isFeature(I->getKey())) | ||
526 | + I->second.Supported = On; | ||
527 | } | ||
528 | |||
529 | void disableAll() { | ||
530 | - for (llvm::StringMap<Info>::iterator I = OptMap.begin(), | ||
531 | - E = OptMap.end(); I != E; ++I) | ||
532 | - I->second.Enabled = false; | ||
533 | + for (llvm::StringMap<Info>::iterator I = OptMap.begin(), E = OptMap.end(); | ||
534 | + I != E; ++I) { | ||
535 | + auto Ext = I->getKey(); | ||
536 | + if (!isFeature(Ext)) | ||
537 | + enable(Ext, false); | ||
538 | + } | ||
539 | } | ||
540 | |||
541 | - void enableSupportedCore(LangOptions LO) { | ||
542 | + void enableSupportedCore() { | ||
543 | for (llvm::StringMap<Info>::iterator I = OptMap.begin(), E = OptMap.end(); | ||
544 | I != E; ++I) | ||
545 | - if (isSupportedCore(I->getKey(), LO)) | ||
546 | + if (isSupportedCore(I->getKey())) | ||
547 | I->second.Enabled = true; | ||
548 | } | ||
549 | |||
550 | + // This enum specifies how OpenCL versions map into values | ||
551 | + // for encoding. This is used when generating built-ins | ||
552 | + // from tablegen | ||
553 | + enum OpenCLVersionsEncodings : unsigned short { | ||
554 | + OPENCL_C_100_CODE = 0x1, | ||
555 | + OPENCL_C_110_CODE = 0x2, | ||
556 | + OPENCL_C_120_CODE = 0x4, | ||
557 | + OPENCL_C_200_CODE = 0x8, | ||
558 | + OPENCL_C_300_CODE = 0x10, | ||
559 | + OPENCL_C_ALL_CODE = 0x1f | ||
560 | + }; | ||
561 | + | ||
562 | + // Encode version into single integer | ||
563 | + static unsigned short EncodeVersion(unsigned OpenCLVersion) { | ||
564 | + switch (OpenCLVersion) { | ||
565 | + default: | ||
566 | + llvm_unreachable("Unknown OpenCL version"); | ||
567 | + case 0: | ||
568 | + return OpenCLVersionsEncodings::OPENCL_C_ALL_CODE; | ||
569 | + case 100: | ||
570 | + return OpenCLVersionsEncodings::OPENCL_C_100_CODE; | ||
571 | + case 110: | ||
572 | + return OpenCLVersionsEncodings::OPENCL_C_110_CODE; | ||
573 | + case 120: | ||
574 | + return OpenCLVersionsEncodings::OPENCL_C_120_CODE; | ||
575 | + case 200: | ||
576 | + return OpenCLVersionsEncodings::OPENCL_C_200_CODE; | ||
577 | + case 300: | ||
578 | + return OpenCLVersionsEncodings::OPENCL_C_300_CODE; | ||
579 | + } | ||
580 | + } | ||
581 | + | ||
582 | friend class ASTWriter; | ||
583 | friend class ASTReader; | ||
584 | }; | ||
585 | diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td | ||
586 | index 391c895a453b..e03c22c749ad 100644 | ||
587 | --- a/clang/include/clang/Driver/Options.td | ||
588 | +++ b/clang/include/clang/Driver/Options.td | ||
589 | @@ -522,7 +522,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group<opencl_Group>, Flags<[CC | ||
590 | def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group<opencl_Group>, Flags<[CC1Option]>, | ||
591 | HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">; | ||
592 | def cl_std_EQ : Joined<["-"], "cl-std=">, Group<opencl_Group>, Flags<[CC1Option]>, | ||
593 | - HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,clc++,CLC++">; | ||
594 | + HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++">; | ||
595 | def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group>, Flags<[CC1Option]>, | ||
596 | HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; | ||
597 | def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>, | ||
598 | diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h | ||
599 | index 842e49602274..d8ac91bc8a31 100644 | ||
600 | --- a/clang/include/clang/Sema/Sema.h | ||
601 | +++ b/clang/include/clang/Sema/Sema.h | ||
602 | @@ -9632,6 +9632,10 @@ public: | ||
603 | /// \return true if type is disabled. | ||
604 | bool checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E); | ||
605 | |||
606 | + bool checkOpenCLSubgroupExtForCallExpr(CallExpr *Call); | ||
607 | + | ||
608 | + bool isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const; | ||
609 | + | ||
610 | //===--------------------------------------------------------------------===// | ||
611 | // OpenMP directives and clauses. | ||
612 | // | ||
613 | @@ -11102,6 +11106,11 @@ public: | ||
614 | /// that the user intended an assignment used as condition. | ||
615 | void DiagnoseEqualityWithExtraParens(ParenExpr *ParenE); | ||
616 | |||
617 | + template <typename DiagLocT, typename DiagInfoT> | ||
618 | + void DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, | ||
619 | + DiagLocT DiagLoc, DiagInfoT DiagInfo, | ||
620 | + unsigned Selector, SourceRange SrcRange); | ||
621 | + | ||
622 | /// CheckCXXBooleanCondition - Returns true if conversion to bool is invalid. | ||
623 | ExprResult CheckCXXBooleanCondition(Expr *CondExpr, bool IsConstexpr = false); | ||
624 | |||
625 | diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp | ||
626 | index 1be72efe4de8..2a81fbcde79d 100644 | ||
627 | --- a/clang/lib/AST/ASTContext.cpp | ||
628 | +++ b/clang/lib/AST/ASTContext.cpp | ||
629 | @@ -1490,7 +1490,8 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target, | ||
630 | ObjCSuperType = QualType(); | ||
631 | |||
632 | // void * type | ||
633 | - if (LangOpts.OpenCLVersion >= 200) { | ||
634 | + if (Target.getSupportedOpenCLOpts().isEnabled( | ||
635 | + "__opencl_c_generic_address_space")) { | ||
636 | auto Q = VoidTy.getQualifiers(); | ||
637 | Q.setAddressSpace(LangAS::opencl_generic); | ||
638 | VoidPtrTy = getPointerType(getCanonicalType( | ||
639 | diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp | ||
640 | index 0cd89df41b67..7a3067345098 100644 | ||
641 | --- a/clang/lib/Basic/Builtins.cpp | ||
642 | +++ b/clang/lib/Basic/Builtins.cpp | ||
643 | @@ -23,6 +23,8 @@ static const Builtin::Info BuiltinInfo[] = { | ||
644 | { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr }, | ||
645 | #define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \ | ||
646 | { #ID, TYPE, ATTRS, nullptr, LANGS, nullptr }, | ||
647 | +#define OPENCLBUILTIN(ID, TYPE, ATTRS, LANGS, FEATURE) \ | ||
648 | + {#ID, TYPE, ATTRS, nullptr, LANGS, FEATURE}, | ||
649 | #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \ | ||
650 | { #ID, TYPE, ATTRS, HEADER, LANGS, nullptr }, | ||
651 | #include "clang/Basic/Builtins.def" | ||
652 | @@ -69,16 +71,17 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo, | ||
653 | bool ObjCUnsupported = !LangOpts.ObjC && BuiltinInfo.Langs == OBJC_LANG; | ||
654 | bool OclC1Unsupported = (LangOpts.OpenCLVersion / 100) != 1 && | ||
655 | (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG; | ||
656 | - bool OclC2Unsupported = | ||
657 | - (LangOpts.OpenCLVersion != 200 && !LangOpts.OpenCLCPlusPlus) && | ||
658 | - (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG; | ||
659 | + bool OclC2PUnsupported = | ||
660 | + (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC2P_LANG && | ||
661 | + ((LangOpts.OpenCLVersion < 200 && !LangOpts.OpenCLCPlusPlus) || | ||
662 | + !OclBuiltinIsSupported(BuiltinInfo, LangOpts)); | ||
663 | bool OclCUnsupported = !LangOpts.OpenCL && | ||
664 | (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES); | ||
665 | bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG; | ||
666 | bool CPlusPlusUnsupported = | ||
667 | !LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG; | ||
668 | return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported && | ||
669 | - !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported && | ||
670 | + !OclC1Unsupported && !OclC2PUnsupported && !OpenMPUnsupported && | ||
671 | !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported && | ||
672 | !CPlusPlusUnsupported; | ||
673 | } | ||
674 | @@ -191,3 +194,20 @@ bool Builtin::Context::canBeRedeclared(unsigned ID) const { | ||
675 | (!hasReferenceArgsOrResult(ID) && | ||
676 | !hasCustomTypechecking(ID)); | ||
677 | } | ||
678 | + | ||
679 | +bool Builtin::Context::OclBuiltinIsSupported( | ||
680 | + const Builtin::Info &BuiltinInfo, const LangOptions &LangOpts) const { | ||
681 | + if (!requiresFeatures(BuiltinInfo)) | ||
682 | + return true; | ||
683 | + | ||
684 | + return llvm::StringSwitch<bool>(BuiltinInfo.Features) | ||
685 | + .Case("__opencl_c_device_enqueue", LangOpts.Blocks) | ||
686 | + .Case("__opencl_c_generic_address_space", LangOpts.OpenCLGenericKeyword) | ||
687 | + .Case("__opencl_c_pipes", LangOpts.OpenCLPipeKeyword) | ||
688 | + .Default(false); | ||
689 | +} | ||
690 | + | ||
691 | +bool Builtin::Context::requiresFeatures( | ||
692 | + const Builtin::Info &BuiltinInfo) const { | ||
693 | + return BuiltinInfo.Features && llvm::StringRef(BuiltinInfo.Features) != ""; | ||
694 | +} | ||
695 | diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp | ||
696 | index 3a21a19e1f19..795311d5934e 100644 | ||
697 | --- a/clang/lib/Basic/TargetInfo.cpp | ||
698 | +++ b/clang/lib/Basic/TargetInfo.cpp | ||
699 | @@ -377,6 +377,17 @@ void TargetInfo::adjust(LangOptions &Opts) { | ||
700 | HalfFormat = &llvm::APFloat::IEEEhalf(); | ||
701 | FloatFormat = &llvm::APFloat::IEEEsingle(); | ||
702 | LongDoubleFormat = &llvm::APFloat::IEEEquad(); | ||
703 | + | ||
704 | + auto &SupportedOCLOpts = getTargetOpts().SupportedOpenCLOptions; | ||
705 | + | ||
706 | + SupportedOCLOpts.setOpenCLVersion(Opts); | ||
707 | + SupportedOCLOpts.adjustFeatures(); | ||
708 | + | ||
709 | + if (!Opts.OpenCLCPlusPlus && Opts.OpenCLVersion >= 200) | ||
710 | + Opts.Blocks = SupportedOCLOpts.isSupported("__opencl_c_device_enqueue"); | ||
711 | + Opts.OpenCLGenericKeyword = | ||
712 | + SupportedOCLOpts.isSupported("__opencl_c_generic_address_space"); | ||
713 | + Opts.OpenCLPipeKeyword = SupportedOCLOpts.isSupported("__opencl_c_pipes"); | ||
714 | } | ||
715 | |||
716 | if (Opts.LongDoubleSize) { | ||
717 | diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp | ||
718 | index c063f8ca4472..b394be18a234 100644 | ||
719 | --- a/clang/lib/Basic/Targets.cpp | ||
720 | +++ b/clang/lib/Basic/Targets.cpp | ||
721 | @@ -39,7 +39,6 @@ | ||
722 | #include "clang/Basic/Diagnostic.h" | ||
723 | #include "llvm/ADT/StringExtras.h" | ||
724 | #include "llvm/ADT/Triple.h" | ||
725 | - | ||
726 | using namespace clang; | ||
727 | |||
728 | namespace clang { | ||
729 | diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp | ||
730 | index 648e6d9c214a..6d839fa61abc 100644 | ||
731 | --- a/clang/lib/CodeGen/CodeGenFunction.cpp | ||
732 | +++ b/clang/lib/CodeGen/CodeGenFunction.cpp | ||
733 | @@ -2303,11 +2303,11 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, | ||
734 | std::string MissingFeature; | ||
735 | if (BuiltinID) { | ||
736 | SmallVector<StringRef, 1> ReqFeatures; | ||
737 | - const char *FeatureList = | ||
738 | - CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); | ||
739 | // Return if the builtin doesn't have any required features. | ||
740 | - if (!FeatureList || StringRef(FeatureList) == "") | ||
741 | + if (!CGM.getContext().BuiltinInfo.requiresFeatures(BuiltinID)) | ||
742 | return; | ||
743 | + const char *FeatureList = | ||
744 | + CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); | ||
745 | StringRef(FeatureList).split(ReqFeatures, ','); | ||
746 | if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) | ||
747 | CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) | ||
748 | diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp | ||
749 | index e98a407ac42f..18fa06bf3c6d 100644 | ||
750 | --- a/clang/lib/Frontend/CompilerInvocation.cpp | ||
751 | +++ b/clang/lib/Frontend/CompilerInvocation.cpp | ||
752 | @@ -2298,6 +2298,8 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, | ||
753 | Opts.OpenCLVersion = 120; | ||
754 | else if (LangStd == LangStandard::lang_opencl20) | ||
755 | Opts.OpenCLVersion = 200; | ||
756 | + else if (LangStd == LangStandard::lang_opencl30) | ||
757 | + Opts.OpenCLVersion = 300; | ||
758 | else if (LangStd == LangStandard::lang_openclcpp) | ||
759 | Opts.OpenCLCPlusPlusVersion = 100; | ||
760 | |||
761 | @@ -2498,14 +2500,15 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, | ||
762 | // -cl-std only applies for OpenCL language standards. | ||
763 | // Override the -std option in this case. | ||
764 | if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) { | ||
765 | - LangStandard::Kind OpenCLLangStd | ||
766 | - = llvm::StringSwitch<LangStandard::Kind>(A->getValue()) | ||
767 | - .Cases("cl", "CL", LangStandard::lang_opencl10) | ||
768 | - .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) | ||
769 | - .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) | ||
770 | - .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) | ||
771 | - .Cases("clc++", "CLC++", LangStandard::lang_openclcpp) | ||
772 | - .Default(LangStandard::lang_unspecified); | ||
773 | + LangStandard::Kind OpenCLLangStd = | ||
774 | + llvm::StringSwitch<LangStandard::Kind>(A->getValue()) | ||
775 | + .Cases("cl", "CL", LangStandard::lang_opencl10) | ||
776 | + .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) | ||
777 | + .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) | ||
778 | + .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) | ||
779 | + .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30) | ||
780 | + .Cases("clc++", "CLC++", LangStandard::lang_openclcpp) | ||
781 | + .Default(LangStandard::lang_unspecified); | ||
782 | |||
783 | if (OpenCLLangStd == LangStandard::lang_unspecified) { | ||
784 | Diags.Report(diag::err_drv_invalid_value) | ||
785 | @@ -2787,8 +2790,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, | ||
786 | |||
787 | Opts.RTTI = Opts.CPlusPlus && !Args.hasArg(OPT_fno_rtti); | ||
788 | Opts.RTTIData = Opts.RTTI && !Args.hasArg(OPT_fno_rtti_data); | ||
789 | - Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL | ||
790 | - && Opts.OpenCLVersion == 200); | ||
791 | + Opts.Blocks = Args.hasArg(OPT_fblocks); | ||
792 | Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional); | ||
793 | Opts.Coroutines = Opts.CPlusPlus2a || Args.hasArg(OPT_fcoroutines_ts); | ||
794 | |||
795 | diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp | ||
796 | index c273cb96d9b9..aefd208e6cd3 100644 | ||
797 | --- a/clang/lib/Frontend/InitPreprocessor.cpp | ||
798 | +++ b/clang/lib/Frontend/InitPreprocessor.cpp | ||
799 | @@ -445,6 +445,9 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, | ||
800 | case 200: | ||
801 | Builder.defineMacro("__OPENCL_C_VERSION__", "200"); | ||
802 | break; | ||
803 | + case 300: | ||
804 | + Builder.defineMacro("__OPENCL_C_VERSION__", "300"); | ||
805 | + break; | ||
806 | default: | ||
807 | llvm_unreachable("Unsupported OpenCL version"); | ||
808 | } | ||
809 | @@ -453,6 +456,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, | ||
810 | Builder.defineMacro("CL_VERSION_1_1", "110"); | ||
811 | Builder.defineMacro("CL_VERSION_1_2", "120"); | ||
812 | Builder.defineMacro("CL_VERSION_2_0", "200"); | ||
813 | + Builder.defineMacro("CL_VERSION_3_0", "300"); | ||
814 | |||
815 | if (TI.isLittleEndian()) | ||
816 | Builder.defineMacro("__ENDIAN_LITTLE__"); | ||
817 | @@ -1101,7 +1105,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, | ||
818 | // OpenCL definitions. | ||
819 | if (LangOpts.OpenCL) { | ||
820 | #define OPENCLEXT(Ext) \ | ||
821 | - if (TI.getSupportedOpenCLOpts().isSupported(#Ext, LangOpts)) \ | ||
822 | + if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \ | ||
823 | Builder.defineMacro(#Ext); | ||
824 | #include "clang/Basic/OpenCLExtensions.def" | ||
825 | |||
826 | diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h | ||
827 | index 430e07d36f62..2cc688ccc3da 100644 | ||
828 | --- a/clang/lib/Headers/opencl-c-base.h | ||
829 | +++ b/clang/lib/Headers/opencl-c-base.h | ||
830 | @@ -9,6 +9,59 @@ | ||
831 | #ifndef _OPENCL_BASE_H_ | ||
832 | #define _OPENCL_BASE_H_ | ||
833 | |||
834 | +// Add predefined macros to build headers with standalone executable | ||
835 | +#ifndef CL_VERSION_3_0 | ||
836 | + #define CL_VERSION_3_0 300 | ||
837 | +#endif | ||
838 | + | ||
839 | +// Define features for 2.0 for header backward compatibility | ||
840 | +#ifndef __opencl_c_int64 | ||
841 | + #define __opencl_c_int64 1 | ||
842 | +#endif | ||
843 | +#if __OPENCL_C_VERSION__ != CL_VERSION_3_0 | ||
844 | + #ifndef __opencl_c_images | ||
845 | + #define __opencl_c_images 1 | ||
846 | + #endif | ||
847 | +#endif | ||
848 | +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) | ||
849 | +#ifndef __opencl_c_pipes | ||
850 | + #define __opencl_c_pipes 1 | ||
851 | +#endif | ||
852 | +#ifndef __opencl_c_generic_address_space | ||
853 | + #define __opencl_c_generic_address_space 1 | ||
854 | +#endif | ||
855 | +#ifndef __opencl_c_work_group_collective_functions | ||
856 | + #define __opencl_c_work_group_collective_functions 1 | ||
857 | +#endif | ||
858 | +#ifndef __opencl_c_atomic_order_acq_rel | ||
859 | + #define __opencl_c_atomic_order_acq_rel 1 | ||
860 | +#endif | ||
861 | +#ifndef __opencl_c_atomic_order_seq_cst | ||
862 | + #define __opencl_c_atomic_order_seq_cst 1 | ||
863 | +#endif | ||
864 | +#ifndef __opencl_c_atomic_scope_device | ||
865 | + #define __opencl_c_atomic_scope_device 1 | ||
866 | +#endif | ||
867 | +#ifndef __opencl_c_atomic_scope_all_devices | ||
868 | + #define __opencl_c_atomic_scope_all_devices 1 | ||
869 | +#endif | ||
870 | +#ifndef __opencl_c_subgroups | ||
871 | + #define __opencl_c_subgroups 1 | ||
872 | +#endif | ||
873 | +#ifndef __opencl_c_3d_image_writes | ||
874 | + #define __opencl_c_3d_image_writes 1 | ||
875 | +#endif | ||
876 | +#ifndef __opencl_c_device_enqueue | ||
877 | + #define __opencl_c_device_enqueue 1 | ||
878 | +#endif | ||
879 | +#ifndef __opencl_c_read_write_images | ||
880 | + #define __opencl_c_read_write_images 1 | ||
881 | +#endif | ||
882 | +#ifndef __opencl_c_program_scope_global_variables | ||
883 | + #define __opencl_c_program_scope_global_variables 1 | ||
884 | +#endif | ||
885 | +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) | ||
886 | + | ||
887 | // built-in scalar data types: | ||
888 | |||
889 | /** | ||
890 | @@ -115,7 +168,12 @@ typedef half half4 __attribute__((ext_vector_type(4))); | ||
891 | typedef half half8 __attribute__((ext_vector_type(8))); | ||
892 | typedef half half16 __attribute__((ext_vector_type(16))); | ||
893 | #endif | ||
894 | -#ifdef cl_khr_fp64 | ||
895 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
896 | + | ||
897 | +#ifndef __opencl_c_fp64 | ||
898 | + #define __opencl_c_fp64 1 | ||
899 | +#endif | ||
900 | + | ||
901 | #if __OPENCL_C_VERSION__ < CL_VERSION_1_2 | ||
902 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable | ||
903 | #endif | ||
904 | @@ -281,9 +339,17 @@ typedef uint cl_mem_fence_flags; | ||
905 | typedef enum memory_scope { | ||
906 | memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, | ||
907 | memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, | ||
908 | +#ifdef __opencl_c_atomic_scope_device | ||
909 | memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, | ||
910 | +#endif | ||
911 | +#ifdef __opencl_c_atomic_scope_all_devices | ||
912 | + #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
913 | + memory_scope_all_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, | ||
914 | + #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
915 | memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, | ||
916 | -#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) | ||
917 | +#endif | ||
918 | +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ | ||
919 | + defined(__opencl_c_subgroups) | ||
920 | memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP | ||
921 | #endif | ||
922 | } memory_scope; | ||
923 | @@ -301,13 +367,14 @@ typedef enum memory_scope { | ||
924 | #define ATOMIC_FLAG_INIT 0 | ||
925 | |||
926 | // enum values aligned with what clang uses in EmitAtomicExpr() | ||
927 | -typedef enum memory_order | ||
928 | -{ | ||
929 | +typedef enum memory_order { | ||
930 | memory_order_relaxed = __ATOMIC_RELAXED, | ||
931 | memory_order_acquire = __ATOMIC_ACQUIRE, | ||
932 | memory_order_release = __ATOMIC_RELEASE, | ||
933 | memory_order_acq_rel = __ATOMIC_ACQ_REL, | ||
934 | +#ifdef __opencl_c_atomic_order_seq_cst | ||
935 | memory_order_seq_cst = __ATOMIC_SEQ_CST | ||
936 | +#endif //__opencl_c_atomic_order_seq_cst | ||
937 | } memory_order; | ||
938 | |||
939 | #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
940 | diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h | ||
941 | index 3210f93cc851..93a946cec5b1 100644 | ||
942 | --- a/clang/lib/Headers/opencl-c.h | ||
943 | +++ b/clang/lib/Headers/opencl-c.h | ||
944 | @@ -35,7 +35,6 @@ | ||
945 | #define __purefn __attribute__((pure)) | ||
946 | #define __cnfn __attribute__((const)) | ||
947 | |||
948 | - | ||
949 | // OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions | ||
950 | |||
951 | char __ovld __cnfn convert_char_rte(char); | ||
952 | @@ -4632,7 +4631,7 @@ float16 __ovld __cnfn convert_float16(float16); | ||
953 | |||
954 | // Conversions with double data type parameters or return value. | ||
955 | |||
956 | -#ifdef cl_khr_fp64 | ||
957 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
958 | char __ovld __cnfn convert_char(double); | ||
959 | char __ovld __cnfn convert_char_rte(double); | ||
960 | char __ovld __cnfn convert_char_rtn(double); | ||
961 | @@ -5452,7 +5451,7 @@ double16 __ovld __cnfn convert_double16_rtz(uchar16); | ||
962 | double16 __ovld __cnfn convert_double16_rtz(uint16); | ||
963 | double16 __ovld __cnfn convert_double16_rtz(ulong16); | ||
964 | double16 __ovld __cnfn convert_double16_rtz(ushort16); | ||
965 | -#endif //cl_khr_fp64 | ||
966 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
967 | |||
968 | #ifdef cl_khr_fp16 | ||
969 | // Convert half types to non-double types. | ||
970 | @@ -6270,7 +6269,7 @@ half16 __ovld __cnfn convert_half16_rtz(float16); | ||
971 | half16 __ovld __cnfn convert_half16_rtz(half16); | ||
972 | |||
973 | // Convert half types to double types. | ||
974 | -#ifdef cl_khr_fp64 | ||
975 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
976 | double __ovld __cnfn convert_double(half); | ||
977 | double __ovld __cnfn convert_double_rte(half); | ||
978 | double __ovld __cnfn convert_double_rtp(half); | ||
979 | @@ -6333,7 +6332,7 @@ half16 __ovld __cnfn convert_half16_rte(double16); | ||
980 | half16 __ovld __cnfn convert_half16_rtp(double16); | ||
981 | half16 __ovld __cnfn convert_half16_rtn(double16); | ||
982 | half16 __ovld __cnfn convert_half16_rtz(double16); | ||
983 | -#endif //cl_khr_fp64 | ||
984 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
985 | |||
986 | #endif // cl_khr_fp16 | ||
987 | |||
988 | @@ -6404,14 +6403,14 @@ half16 __ovld __cnfn convert_half16_rtz(double16); | ||
989 | #define as_float8(x) __builtin_astype((x), float8) | ||
990 | #define as_float16(x) __builtin_astype((x), float16) | ||
991 | |||
992 | -#ifdef cl_khr_fp64 | ||
993 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
994 | #define as_double(x) __builtin_astype((x), double) | ||
995 | #define as_double2(x) __builtin_astype((x), double2) | ||
996 | #define as_double3(x) __builtin_astype((x), double3) | ||
997 | #define as_double4(x) __builtin_astype((x), double4) | ||
998 | #define as_double8(x) __builtin_astype((x), double8) | ||
999 | #define as_double16(x) __builtin_astype((x), double16) | ||
1000 | -#endif //cl_khr_fp64 | ||
1001 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1002 | |||
1003 | #ifdef cl_khr_fp16 | ||
1004 | #define as_half(x) __builtin_astype((x), half) | ||
1005 | @@ -6534,14 +6533,14 @@ float3 __ovld __cnfn acos(float3); | ||
1006 | float4 __ovld __cnfn acos(float4); | ||
1007 | float8 __ovld __cnfn acos(float8); | ||
1008 | float16 __ovld __cnfn acos(float16); | ||
1009 | -#ifdef cl_khr_fp64 | ||
1010 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1011 | double __ovld __cnfn acos(double); | ||
1012 | double2 __ovld __cnfn acos(double2); | ||
1013 | double3 __ovld __cnfn acos(double3); | ||
1014 | double4 __ovld __cnfn acos(double4); | ||
1015 | double8 __ovld __cnfn acos(double8); | ||
1016 | double16 __ovld __cnfn acos(double16); | ||
1017 | -#endif //cl_khr_fp64 | ||
1018 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1019 | #ifdef cl_khr_fp16 | ||
1020 | half __ovld __cnfn acos(half); | ||
1021 | half2 __ovld __cnfn acos(half2); | ||
1022 | @@ -6560,14 +6559,14 @@ float3 __ovld __cnfn acosh(float3); | ||
1023 | float4 __ovld __cnfn acosh(float4); | ||
1024 | float8 __ovld __cnfn acosh(float8); | ||
1025 | float16 __ovld __cnfn acosh(float16); | ||
1026 | -#ifdef cl_khr_fp64 | ||
1027 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1028 | double __ovld __cnfn acosh(double); | ||
1029 | double2 __ovld __cnfn acosh(double2); | ||
1030 | double3 __ovld __cnfn acosh(double3); | ||
1031 | double4 __ovld __cnfn acosh(double4); | ||
1032 | double8 __ovld __cnfn acosh(double8); | ||
1033 | double16 __ovld __cnfn acosh(double16); | ||
1034 | -#endif //cl_khr_fp64 | ||
1035 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1036 | #ifdef cl_khr_fp16 | ||
1037 | half __ovld __cnfn acosh(half); | ||
1038 | half2 __ovld __cnfn acosh(half2); | ||
1039 | @@ -6586,14 +6585,14 @@ float3 __ovld __cnfn acospi(float3 x); | ||
1040 | float4 __ovld __cnfn acospi(float4 x); | ||
1041 | float8 __ovld __cnfn acospi(float8 x); | ||
1042 | float16 __ovld __cnfn acospi(float16 x); | ||
1043 | -#ifdef cl_khr_fp64 | ||
1044 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1045 | double __ovld __cnfn acospi(double x); | ||
1046 | double2 __ovld __cnfn acospi(double2 x); | ||
1047 | double3 __ovld __cnfn acospi(double3 x); | ||
1048 | double4 __ovld __cnfn acospi(double4 x); | ||
1049 | double8 __ovld __cnfn acospi(double8 x); | ||
1050 | double16 __ovld __cnfn acospi(double16 x); | ||
1051 | -#endif //cl_khr_fp64 | ||
1052 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1053 | #ifdef cl_khr_fp16 | ||
1054 | half __ovld __cnfn acospi(half x); | ||
1055 | half2 __ovld __cnfn acospi(half2 x); | ||
1056 | @@ -6612,14 +6611,14 @@ float3 __ovld __cnfn asin(float3); | ||
1057 | float4 __ovld __cnfn asin(float4); | ||
1058 | float8 __ovld __cnfn asin(float8); | ||
1059 | float16 __ovld __cnfn asin(float16); | ||
1060 | -#ifdef cl_khr_fp64 | ||
1061 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1062 | double __ovld __cnfn asin(double); | ||
1063 | double2 __ovld __cnfn asin(double2); | ||
1064 | double3 __ovld __cnfn asin(double3); | ||
1065 | double4 __ovld __cnfn asin(double4); | ||
1066 | double8 __ovld __cnfn asin(double8); | ||
1067 | double16 __ovld __cnfn asin(double16); | ||
1068 | -#endif //cl_khr_fp64 | ||
1069 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1070 | #ifdef cl_khr_fp16 | ||
1071 | half __ovld __cnfn asin(half); | ||
1072 | half2 __ovld __cnfn asin(half2); | ||
1073 | @@ -6638,14 +6637,14 @@ float3 __ovld __cnfn asinh(float3); | ||
1074 | float4 __ovld __cnfn asinh(float4); | ||
1075 | float8 __ovld __cnfn asinh(float8); | ||
1076 | float16 __ovld __cnfn asinh(float16); | ||
1077 | -#ifdef cl_khr_fp64 | ||
1078 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1079 | double __ovld __cnfn asinh(double); | ||
1080 | double2 __ovld __cnfn asinh(double2); | ||
1081 | double3 __ovld __cnfn asinh(double3); | ||
1082 | double4 __ovld __cnfn asinh(double4); | ||
1083 | double8 __ovld __cnfn asinh(double8); | ||
1084 | double16 __ovld __cnfn asinh(double16); | ||
1085 | -#endif //cl_khr_fp64 | ||
1086 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1087 | #ifdef cl_khr_fp16 | ||
1088 | half __ovld __cnfn asinh(half); | ||
1089 | half2 __ovld __cnfn asinh(half2); | ||
1090 | @@ -6664,14 +6663,14 @@ float3 __ovld __cnfn asinpi(float3 x); | ||
1091 | float4 __ovld __cnfn asinpi(float4 x); | ||
1092 | float8 __ovld __cnfn asinpi(float8 x); | ||
1093 | float16 __ovld __cnfn asinpi(float16 x); | ||
1094 | -#ifdef cl_khr_fp64 | ||
1095 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1096 | double __ovld __cnfn asinpi(double x); | ||
1097 | double2 __ovld __cnfn asinpi(double2 x); | ||
1098 | double3 __ovld __cnfn asinpi(double3 x); | ||
1099 | double4 __ovld __cnfn asinpi(double4 x); | ||
1100 | double8 __ovld __cnfn asinpi(double8 x); | ||
1101 | double16 __ovld __cnfn asinpi(double16 x); | ||
1102 | -#endif //cl_khr_fp64 | ||
1103 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1104 | #ifdef cl_khr_fp16 | ||
1105 | half __ovld __cnfn asinpi(half x); | ||
1106 | half2 __ovld __cnfn asinpi(half2 x); | ||
1107 | @@ -6690,14 +6689,14 @@ float3 __ovld __cnfn atan(float3 y_over_x); | ||
1108 | float4 __ovld __cnfn atan(float4 y_over_x); | ||
1109 | float8 __ovld __cnfn atan(float8 y_over_x); | ||
1110 | float16 __ovld __cnfn atan(float16 y_over_x); | ||
1111 | -#ifdef cl_khr_fp64 | ||
1112 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1113 | double __ovld __cnfn atan(double y_over_x); | ||
1114 | double2 __ovld __cnfn atan(double2 y_over_x); | ||
1115 | double3 __ovld __cnfn atan(double3 y_over_x); | ||
1116 | double4 __ovld __cnfn atan(double4 y_over_x); | ||
1117 | double8 __ovld __cnfn atan(double8 y_over_x); | ||
1118 | double16 __ovld __cnfn atan(double16 y_over_x); | ||
1119 | -#endif //cl_khr_fp64 | ||
1120 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1121 | #ifdef cl_khr_fp16 | ||
1122 | half __ovld __cnfn atan(half y_over_x); | ||
1123 | half2 __ovld __cnfn atan(half2 y_over_x); | ||
1124 | @@ -6716,14 +6715,14 @@ float3 __ovld __cnfn atan2(float3 y, float3 x); | ||
1125 | float4 __ovld __cnfn atan2(float4 y, float4 x); | ||
1126 | float8 __ovld __cnfn atan2(float8 y, float8 x); | ||
1127 | float16 __ovld __cnfn atan2(float16 y, float16 x); | ||
1128 | -#ifdef cl_khr_fp64 | ||
1129 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1130 | double __ovld __cnfn atan2(double y, double x); | ||
1131 | double2 __ovld __cnfn atan2(double2 y, double2 x); | ||
1132 | double3 __ovld __cnfn atan2(double3 y, double3 x); | ||
1133 | double4 __ovld __cnfn atan2(double4 y, double4 x); | ||
1134 | double8 __ovld __cnfn atan2(double8 y, double8 x); | ||
1135 | double16 __ovld __cnfn atan2(double16 y, double16 x); | ||
1136 | -#endif //cl_khr_fp64 | ||
1137 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1138 | #ifdef cl_khr_fp16 | ||
1139 | half __ovld __cnfn atan2(half y, half x); | ||
1140 | half2 __ovld __cnfn atan2(half2 y, half2 x); | ||
1141 | @@ -6742,14 +6741,14 @@ float3 __ovld __cnfn atanh(float3); | ||
1142 | float4 __ovld __cnfn atanh(float4); | ||
1143 | float8 __ovld __cnfn atanh(float8); | ||
1144 | float16 __ovld __cnfn atanh(float16); | ||
1145 | -#ifdef cl_khr_fp64 | ||
1146 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1147 | double __ovld __cnfn atanh(double); | ||
1148 | double2 __ovld __cnfn atanh(double2); | ||
1149 | double3 __ovld __cnfn atanh(double3); | ||
1150 | double4 __ovld __cnfn atanh(double4); | ||
1151 | double8 __ovld __cnfn atanh(double8); | ||
1152 | double16 __ovld __cnfn atanh(double16); | ||
1153 | -#endif //cl_khr_fp64 | ||
1154 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1155 | #ifdef cl_khr_fp16 | ||
1156 | half __ovld __cnfn atanh(half); | ||
1157 | half2 __ovld __cnfn atanh(half2); | ||
1158 | @@ -6768,14 +6767,14 @@ float3 __ovld __cnfn atanpi(float3 x); | ||
1159 | float4 __ovld __cnfn atanpi(float4 x); | ||
1160 | float8 __ovld __cnfn atanpi(float8 x); | ||
1161 | float16 __ovld __cnfn atanpi(float16 x); | ||
1162 | -#ifdef cl_khr_fp64 | ||
1163 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1164 | double __ovld __cnfn atanpi(double x); | ||
1165 | double2 __ovld __cnfn atanpi(double2 x); | ||
1166 | double3 __ovld __cnfn atanpi(double3 x); | ||
1167 | double4 __ovld __cnfn atanpi(double4 x); | ||
1168 | double8 __ovld __cnfn atanpi(double8 x); | ||
1169 | double16 __ovld __cnfn atanpi(double16 x); | ||
1170 | -#endif //cl_khr_fp64 | ||
1171 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1172 | #ifdef cl_khr_fp16 | ||
1173 | half __ovld __cnfn atanpi(half x); | ||
1174 | half2 __ovld __cnfn atanpi(half2 x); | ||
1175 | @@ -6794,14 +6793,14 @@ float3 __ovld __cnfn atan2pi(float3 y, float3 x); | ||
1176 | float4 __ovld __cnfn atan2pi(float4 y, float4 x); | ||
1177 | float8 __ovld __cnfn atan2pi(float8 y, float8 x); | ||
1178 | float16 __ovld __cnfn atan2pi(float16 y, float16 x); | ||
1179 | -#ifdef cl_khr_fp64 | ||
1180 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1181 | double __ovld __cnfn atan2pi(double y, double x); | ||
1182 | double2 __ovld __cnfn atan2pi(double2 y, double2 x); | ||
1183 | double3 __ovld __cnfn atan2pi(double3 y, double3 x); | ||
1184 | double4 __ovld __cnfn atan2pi(double4 y, double4 x); | ||
1185 | double8 __ovld __cnfn atan2pi(double8 y, double8 x); | ||
1186 | double16 __ovld __cnfn atan2pi(double16 y, double16 x); | ||
1187 | -#endif //cl_khr_fp64 | ||
1188 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1189 | #ifdef cl_khr_fp16 | ||
1190 | half __ovld __cnfn atan2pi(half y, half x); | ||
1191 | half2 __ovld __cnfn atan2pi(half2 y, half2 x); | ||
1192 | @@ -6820,14 +6819,14 @@ float3 __ovld __cnfn cbrt(float3); | ||
1193 | float4 __ovld __cnfn cbrt(float4); | ||
1194 | float8 __ovld __cnfn cbrt(float8); | ||
1195 | float16 __ovld __cnfn cbrt(float16); | ||
1196 | -#ifdef cl_khr_fp64 | ||
1197 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1198 | double __ovld __cnfn cbrt(double); | ||
1199 | double2 __ovld __cnfn cbrt(double2); | ||
1200 | double3 __ovld __cnfn cbrt(double3); | ||
1201 | double4 __ovld __cnfn cbrt(double4); | ||
1202 | double8 __ovld __cnfn cbrt(double8); | ||
1203 | double16 __ovld __cnfn cbrt(double16); | ||
1204 | -#endif //cl_khr_fp64 | ||
1205 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1206 | #ifdef cl_khr_fp16 | ||
1207 | half __ovld __cnfn cbrt(half); | ||
1208 | half2 __ovld __cnfn cbrt(half2); | ||
1209 | @@ -6847,14 +6846,14 @@ float3 __ovld __cnfn ceil(float3); | ||
1210 | float4 __ovld __cnfn ceil(float4); | ||
1211 | float8 __ovld __cnfn ceil(float8); | ||
1212 | float16 __ovld __cnfn ceil(float16); | ||
1213 | -#ifdef cl_khr_fp64 | ||
1214 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1215 | double __ovld __cnfn ceil(double); | ||
1216 | double2 __ovld __cnfn ceil(double2); | ||
1217 | double3 __ovld __cnfn ceil(double3); | ||
1218 | double4 __ovld __cnfn ceil(double4); | ||
1219 | double8 __ovld __cnfn ceil(double8); | ||
1220 | double16 __ovld __cnfn ceil(double16); | ||
1221 | -#endif //cl_khr_fp64 | ||
1222 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1223 | #ifdef cl_khr_fp16 | ||
1224 | half __ovld __cnfn ceil(half); | ||
1225 | half2 __ovld __cnfn ceil(half2); | ||
1226 | @@ -6873,14 +6872,14 @@ float3 __ovld __cnfn copysign(float3 x, float3 y); | ||
1227 | float4 __ovld __cnfn copysign(float4 x, float4 y); | ||
1228 | float8 __ovld __cnfn copysign(float8 x, float8 y); | ||
1229 | float16 __ovld __cnfn copysign(float16 x, float16 y); | ||
1230 | -#ifdef cl_khr_fp64 | ||
1231 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1232 | double __ovld __cnfn copysign(double x, double y); | ||
1233 | double2 __ovld __cnfn copysign(double2 x, double2 y); | ||
1234 | double3 __ovld __cnfn copysign(double3 x, double3 y); | ||
1235 | double4 __ovld __cnfn copysign(double4 x, double4 y); | ||
1236 | double8 __ovld __cnfn copysign(double8 x, double8 y); | ||
1237 | double16 __ovld __cnfn copysign(double16 x, double16 y); | ||
1238 | -#endif //cl_khr_fp64 | ||
1239 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1240 | #ifdef cl_khr_fp16 | ||
1241 | half __ovld __cnfn copysign(half x, half y); | ||
1242 | half2 __ovld __cnfn copysign(half2 x, half2 y); | ||
1243 | @@ -6899,14 +6898,14 @@ float3 __ovld __cnfn cos(float3); | ||
1244 | float4 __ovld __cnfn cos(float4); | ||
1245 | float8 __ovld __cnfn cos(float8); | ||
1246 | float16 __ovld __cnfn cos(float16); | ||
1247 | -#ifdef cl_khr_fp64 | ||
1248 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1249 | double __ovld __cnfn cos(double); | ||
1250 | double2 __ovld __cnfn cos(double2); | ||
1251 | double3 __ovld __cnfn cos(double3); | ||
1252 | double4 __ovld __cnfn cos(double4); | ||
1253 | double8 __ovld __cnfn cos(double8); | ||
1254 | double16 __ovld __cnfn cos(double16); | ||
1255 | -#endif //cl_khr_fp64 | ||
1256 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1257 | #ifdef cl_khr_fp16 | ||
1258 | half __ovld __cnfn cos(half); | ||
1259 | half2 __ovld __cnfn cos(half2); | ||
1260 | @@ -6925,14 +6924,14 @@ float3 __ovld __cnfn cosh(float3); | ||
1261 | float4 __ovld __cnfn cosh(float4); | ||
1262 | float8 __ovld __cnfn cosh(float8); | ||
1263 | float16 __ovld __cnfn cosh(float16); | ||
1264 | -#ifdef cl_khr_fp64 | ||
1265 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1266 | double __ovld __cnfn cosh(double); | ||
1267 | double2 __ovld __cnfn cosh(double2); | ||
1268 | double3 __ovld __cnfn cosh(double3); | ||
1269 | double4 __ovld __cnfn cosh(double4); | ||
1270 | double8 __ovld __cnfn cosh(double8); | ||
1271 | double16 __ovld __cnfn cosh(double16); | ||
1272 | -#endif //cl_khr_fp64 | ||
1273 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1274 | #ifdef cl_khr_fp16 | ||
1275 | half __ovld __cnfn cosh(half); | ||
1276 | half2 __ovld __cnfn cosh(half2); | ||
1277 | @@ -6951,14 +6950,14 @@ float3 __ovld __cnfn cospi(float3 x); | ||
1278 | float4 __ovld __cnfn cospi(float4 x); | ||
1279 | float8 __ovld __cnfn cospi(float8 x); | ||
1280 | float16 __ovld __cnfn cospi(float16 x); | ||
1281 | -#ifdef cl_khr_fp64 | ||
1282 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1283 | double __ovld __cnfn cospi(double x); | ||
1284 | double2 __ovld __cnfn cospi(double2 x); | ||
1285 | double3 __ovld __cnfn cospi(double3 x); | ||
1286 | double4 __ovld __cnfn cospi(double4 x); | ||
1287 | double8 __ovld __cnfn cospi(double8 x); | ||
1288 | double16 __ovld __cnfn cospi(double16 x); | ||
1289 | -#endif //cl_khr_fp64 | ||
1290 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1291 | #ifdef cl_khr_fp16 | ||
1292 | half __ovld __cnfn cospi(half x); | ||
1293 | half2 __ovld __cnfn cospi(half2 x); | ||
1294 | @@ -6977,14 +6976,14 @@ float3 __ovld __cnfn erfc(float3); | ||
1295 | float4 __ovld __cnfn erfc(float4); | ||
1296 | float8 __ovld __cnfn erfc(float8); | ||
1297 | float16 __ovld __cnfn erfc(float16); | ||
1298 | -#ifdef cl_khr_fp64 | ||
1299 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1300 | double __ovld __cnfn erfc(double); | ||
1301 | double2 __ovld __cnfn erfc(double2); | ||
1302 | double3 __ovld __cnfn erfc(double3); | ||
1303 | double4 __ovld __cnfn erfc(double4); | ||
1304 | double8 __ovld __cnfn erfc(double8); | ||
1305 | double16 __ovld __cnfn erfc(double16); | ||
1306 | -#endif //cl_khr_fp64 | ||
1307 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1308 | #ifdef cl_khr_fp16 | ||
1309 | half __ovld __cnfn erfc(half); | ||
1310 | half2 __ovld __cnfn erfc(half2); | ||
1311 | @@ -7004,14 +7003,14 @@ float3 __ovld __cnfn erf(float3); | ||
1312 | float4 __ovld __cnfn erf(float4); | ||
1313 | float8 __ovld __cnfn erf(float8); | ||
1314 | float16 __ovld __cnfn erf(float16); | ||
1315 | -#ifdef cl_khr_fp64 | ||
1316 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1317 | double __ovld __cnfn erf(double); | ||
1318 | double2 __ovld __cnfn erf(double2); | ||
1319 | double3 __ovld __cnfn erf(double3); | ||
1320 | double4 __ovld __cnfn erf(double4); | ||
1321 | double8 __ovld __cnfn erf(double8); | ||
1322 | double16 __ovld __cnfn erf(double16); | ||
1323 | -#endif //cl_khr_fp64 | ||
1324 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1325 | #ifdef cl_khr_fp16 | ||
1326 | half __ovld __cnfn erf(half); | ||
1327 | half2 __ovld __cnfn erf(half2); | ||
1328 | @@ -7030,14 +7029,14 @@ float3 __ovld __cnfn exp(float3 x); | ||
1329 | float4 __ovld __cnfn exp(float4 x); | ||
1330 | float8 __ovld __cnfn exp(float8 x); | ||
1331 | float16 __ovld __cnfn exp(float16 x); | ||
1332 | -#ifdef cl_khr_fp64 | ||
1333 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1334 | double __ovld __cnfn exp(double x); | ||
1335 | double2 __ovld __cnfn exp(double2 x); | ||
1336 | double3 __ovld __cnfn exp(double3 x); | ||
1337 | double4 __ovld __cnfn exp(double4 x); | ||
1338 | double8 __ovld __cnfn exp(double8 x); | ||
1339 | double16 __ovld __cnfn exp(double16 x); | ||
1340 | -#endif //cl_khr_fp64 | ||
1341 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1342 | #ifdef cl_khr_fp16 | ||
1343 | half __ovld __cnfn exp(half x); | ||
1344 | half2 __ovld __cnfn exp(half2 x); | ||
1345 | @@ -7056,14 +7055,14 @@ float3 __ovld __cnfn exp2(float3); | ||
1346 | float4 __ovld __cnfn exp2(float4); | ||
1347 | float8 __ovld __cnfn exp2(float8); | ||
1348 | float16 __ovld __cnfn exp2(float16); | ||
1349 | -#ifdef cl_khr_fp64 | ||
1350 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1351 | double __ovld __cnfn exp2(double); | ||
1352 | double2 __ovld __cnfn exp2(double2); | ||
1353 | double3 __ovld __cnfn exp2(double3); | ||
1354 | double4 __ovld __cnfn exp2(double4); | ||
1355 | double8 __ovld __cnfn exp2(double8); | ||
1356 | double16 __ovld __cnfn exp2(double16); | ||
1357 | -#endif //cl_khr_fp64 | ||
1358 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1359 | #ifdef cl_khr_fp16 | ||
1360 | half __ovld __cnfn exp2(half); | ||
1361 | half2 __ovld __cnfn exp2(half2); | ||
1362 | @@ -7082,14 +7081,14 @@ float3 __ovld __cnfn exp10(float3); | ||
1363 | float4 __ovld __cnfn exp10(float4); | ||
1364 | float8 __ovld __cnfn exp10(float8); | ||
1365 | float16 __ovld __cnfn exp10(float16); | ||
1366 | -#ifdef cl_khr_fp64 | ||
1367 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1368 | double __ovld __cnfn exp10(double); | ||
1369 | double2 __ovld __cnfn exp10(double2); | ||
1370 | double3 __ovld __cnfn exp10(double3); | ||
1371 | double4 __ovld __cnfn exp10(double4); | ||
1372 | double8 __ovld __cnfn exp10(double8); | ||
1373 | double16 __ovld __cnfn exp10(double16); | ||
1374 | -#endif //cl_khr_fp64 | ||
1375 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1376 | #ifdef cl_khr_fp16 | ||
1377 | half __ovld __cnfn exp10(half); | ||
1378 | half2 __ovld __cnfn exp10(half2); | ||
1379 | @@ -7108,14 +7107,14 @@ float3 __ovld __cnfn expm1(float3 x); | ||
1380 | float4 __ovld __cnfn expm1(float4 x); | ||
1381 | float8 __ovld __cnfn expm1(float8 x); | ||
1382 | float16 __ovld __cnfn expm1(float16 x); | ||
1383 | -#ifdef cl_khr_fp64 | ||
1384 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1385 | double __ovld __cnfn expm1(double x); | ||
1386 | double2 __ovld __cnfn expm1(double2 x); | ||
1387 | double3 __ovld __cnfn expm1(double3 x); | ||
1388 | double4 __ovld __cnfn expm1(double4 x); | ||
1389 | double8 __ovld __cnfn expm1(double8 x); | ||
1390 | double16 __ovld __cnfn expm1(double16 x); | ||
1391 | -#endif //cl_khr_fp64 | ||
1392 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1393 | #ifdef cl_khr_fp16 | ||
1394 | half __ovld __cnfn expm1(half x); | ||
1395 | half2 __ovld __cnfn expm1(half2 x); | ||
1396 | @@ -7134,14 +7133,14 @@ float3 __ovld __cnfn fabs(float3); | ||
1397 | float4 __ovld __cnfn fabs(float4); | ||
1398 | float8 __ovld __cnfn fabs(float8); | ||
1399 | float16 __ovld __cnfn fabs(float16); | ||
1400 | -#ifdef cl_khr_fp64 | ||
1401 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1402 | double __ovld __cnfn fabs(double); | ||
1403 | double2 __ovld __cnfn fabs(double2); | ||
1404 | double3 __ovld __cnfn fabs(double3); | ||
1405 | double4 __ovld __cnfn fabs(double4); | ||
1406 | double8 __ovld __cnfn fabs(double8); | ||
1407 | double16 __ovld __cnfn fabs(double16); | ||
1408 | -#endif //cl_khr_fp64 | ||
1409 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1410 | #ifdef cl_khr_fp16 | ||
1411 | half __ovld __cnfn fabs(half); | ||
1412 | half2 __ovld __cnfn fabs(half2); | ||
1413 | @@ -7160,14 +7159,14 @@ float3 __ovld __cnfn fdim(float3 x, float3 y); | ||
1414 | float4 __ovld __cnfn fdim(float4 x, float4 y); | ||
1415 | float8 __ovld __cnfn fdim(float8 x, float8 y); | ||
1416 | float16 __ovld __cnfn fdim(float16 x, float16 y); | ||
1417 | -#ifdef cl_khr_fp64 | ||
1418 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1419 | double __ovld __cnfn fdim(double x, double y); | ||
1420 | double2 __ovld __cnfn fdim(double2 x, double2 y); | ||
1421 | double3 __ovld __cnfn fdim(double3 x, double3 y); | ||
1422 | double4 __ovld __cnfn fdim(double4 x, double4 y); | ||
1423 | double8 __ovld __cnfn fdim(double8 x, double8 y); | ||
1424 | double16 __ovld __cnfn fdim(double16 x, double16 y); | ||
1425 | -#endif //cl_khr_fp64 | ||
1426 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1427 | #ifdef cl_khr_fp16 | ||
1428 | half __ovld __cnfn fdim(half x, half y); | ||
1429 | half2 __ovld __cnfn fdim(half2 x, half2 y); | ||
1430 | @@ -7187,14 +7186,14 @@ float3 __ovld __cnfn floor(float3); | ||
1431 | float4 __ovld __cnfn floor(float4); | ||
1432 | float8 __ovld __cnfn floor(float8); | ||
1433 | float16 __ovld __cnfn floor(float16); | ||
1434 | -#ifdef cl_khr_fp64 | ||
1435 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1436 | double __ovld __cnfn floor(double); | ||
1437 | double2 __ovld __cnfn floor(double2); | ||
1438 | double3 __ovld __cnfn floor(double3); | ||
1439 | double4 __ovld __cnfn floor(double4); | ||
1440 | double8 __ovld __cnfn floor(double8); | ||
1441 | double16 __ovld __cnfn floor(double16); | ||
1442 | -#endif //cl_khr_fp64 | ||
1443 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1444 | #ifdef cl_khr_fp16 | ||
1445 | half __ovld __cnfn floor(half); | ||
1446 | half2 __ovld __cnfn floor(half2); | ||
1447 | @@ -7217,14 +7216,14 @@ float3 __ovld __cnfn fma(float3 a, float3 b, float3 c); | ||
1448 | float4 __ovld __cnfn fma(float4 a, float4 b, float4 c); | ||
1449 | float8 __ovld __cnfn fma(float8 a, float8 b, float8 c); | ||
1450 | float16 __ovld __cnfn fma(float16 a, float16 b, float16 c); | ||
1451 | -#ifdef cl_khr_fp64 | ||
1452 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1453 | double __ovld __cnfn fma(double a, double b, double c); | ||
1454 | double2 __ovld __cnfn fma(double2 a, double2 b, double2 c); | ||
1455 | double3 __ovld __cnfn fma(double3 a, double3 b, double3 c); | ||
1456 | double4 __ovld __cnfn fma(double4 a, double4 b, double4 c); | ||
1457 | double8 __ovld __cnfn fma(double8 a, double8 b, double8 c); | ||
1458 | double16 __ovld __cnfn fma(double16 a, double16 b, double16 c); | ||
1459 | -#endif //cl_khr_fp64 | ||
1460 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1461 | #ifdef cl_khr_fp16 | ||
1462 | half __ovld __cnfn fma(half a, half b, half c); | ||
1463 | half2 __ovld __cnfn fma(half2 a, half2 b, half2 c); | ||
1464 | @@ -7251,7 +7250,7 @@ float3 __ovld __cnfn fmax(float3 x, float y); | ||
1465 | float4 __ovld __cnfn fmax(float4 x, float y); | ||
1466 | float8 __ovld __cnfn fmax(float8 x, float y); | ||
1467 | float16 __ovld __cnfn fmax(float16 x, float y); | ||
1468 | -#ifdef cl_khr_fp64 | ||
1469 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1470 | double __ovld __cnfn fmax(double x, double y); | ||
1471 | double2 __ovld __cnfn fmax(double2 x, double2 y); | ||
1472 | double3 __ovld __cnfn fmax(double3 x, double3 y); | ||
1473 | @@ -7263,7 +7262,7 @@ double3 __ovld __cnfn fmax(double3 x, double y); | ||
1474 | double4 __ovld __cnfn fmax(double4 x, double y); | ||
1475 | double8 __ovld __cnfn fmax(double8 x, double y); | ||
1476 | double16 __ovld __cnfn fmax(double16 x, double y); | ||
1477 | -#endif //cl_khr_fp64 | ||
1478 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1479 | #ifdef cl_khr_fp16 | ||
1480 | half __ovld __cnfn fmax(half x, half y); | ||
1481 | half2 __ovld __cnfn fmax(half2 x, half2 y); | ||
1482 | @@ -7295,7 +7294,7 @@ float3 __ovld __cnfn fmin(float3 x, float y); | ||
1483 | float4 __ovld __cnfn fmin(float4 x, float y); | ||
1484 | float8 __ovld __cnfn fmin(float8 x, float y); | ||
1485 | float16 __ovld __cnfn fmin(float16 x, float y); | ||
1486 | -#ifdef cl_khr_fp64 | ||
1487 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1488 | double __ovld __cnfn fmin(double x, double y); | ||
1489 | double2 __ovld __cnfn fmin(double2 x, double2 y); | ||
1490 | double3 __ovld __cnfn fmin(double3 x, double3 y); | ||
1491 | @@ -7307,7 +7306,7 @@ double3 __ovld __cnfn fmin(double3 x, double y); | ||
1492 | double4 __ovld __cnfn fmin(double4 x, double y); | ||
1493 | double8 __ovld __cnfn fmin(double8 x, double y); | ||
1494 | double16 __ovld __cnfn fmin(double16 x, double y); | ||
1495 | -#endif //cl_khr_fp64 | ||
1496 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1497 | #ifdef cl_khr_fp16 | ||
1498 | half __ovld __cnfn fmin(half x, half y); | ||
1499 | half2 __ovld __cnfn fmin(half2 x, half2 y); | ||
1500 | @@ -7331,14 +7330,14 @@ float3 __ovld __cnfn fmod(float3 x, float3 y); | ||
1501 | float4 __ovld __cnfn fmod(float4 x, float4 y); | ||
1502 | float8 __ovld __cnfn fmod(float8 x, float8 y); | ||
1503 | float16 __ovld __cnfn fmod(float16 x, float16 y); | ||
1504 | -#ifdef cl_khr_fp64 | ||
1505 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1506 | double __ovld __cnfn fmod(double x, double y); | ||
1507 | double2 __ovld __cnfn fmod(double2 x, double2 y); | ||
1508 | double3 __ovld __cnfn fmod(double3 x, double3 y); | ||
1509 | double4 __ovld __cnfn fmod(double4 x, double4 y); | ||
1510 | double8 __ovld __cnfn fmod(double8 x, double8 y); | ||
1511 | double16 __ovld __cnfn fmod(double16 x, double16 y); | ||
1512 | -#endif //cl_khr_fp64 | ||
1513 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1514 | #ifdef cl_khr_fp16 | ||
1515 | half __ovld __cnfn fmod(half x, half y); | ||
1516 | half2 __ovld __cnfn fmod(half2 x, half2 y); | ||
1517 | @@ -7352,21 +7351,21 @@ half16 __ovld __cnfn fmod(half16 x, half16 y); | ||
1518 | * Returns fmin(x - floor (x), 0x1.fffffep-1f ). | ||
1519 | * floor(x) is returned in iptr. | ||
1520 | */ | ||
1521 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
1522 | +#ifdef __opencl_c_generic_address_space | ||
1523 | float __ovld fract(float x, float *iptr); | ||
1524 | float2 __ovld fract(float2 x, float2 *iptr); | ||
1525 | float3 __ovld fract(float3 x, float3 *iptr); | ||
1526 | float4 __ovld fract(float4 x, float4 *iptr); | ||
1527 | float8 __ovld fract(float8 x, float8 *iptr); | ||
1528 | float16 __ovld fract(float16 x, float16 *iptr); | ||
1529 | -#ifdef cl_khr_fp64 | ||
1530 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1531 | double __ovld fract(double x, double *iptr); | ||
1532 | double2 __ovld fract(double2 x, double2 *iptr); | ||
1533 | double3 __ovld fract(double3 x, double3 *iptr); | ||
1534 | double4 __ovld fract(double4 x, double4 *iptr); | ||
1535 | double8 __ovld fract(double8 x, double8 *iptr); | ||
1536 | double16 __ovld fract(double16 x, double16 *iptr); | ||
1537 | -#endif //cl_khr_fp64 | ||
1538 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1539 | #ifdef cl_khr_fp16 | ||
1540 | half __ovld fract(half x, half *iptr); | ||
1541 | half2 __ovld fract(half2 x, half2 *iptr); | ||
1542 | @@ -7375,7 +7374,9 @@ half4 __ovld fract(half4 x, half4 *iptr); | ||
1543 | half8 __ovld fract(half8 x, half8 *iptr); | ||
1544 | half16 __ovld fract(half16 x, half16 *iptr); | ||
1545 | #endif //cl_khr_fp16 | ||
1546 | -#else | ||
1547 | +#endif //__opencl_c_generic_address_space | ||
1548 | + | ||
1549 | +#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) | ||
1550 | float __ovld fract(float x, __global float *iptr); | ||
1551 | float2 __ovld fract(float2 x, __global float2 *iptr); | ||
1552 | float3 __ovld fract(float3 x, __global float3 *iptr); | ||
1553 | @@ -7394,7 +7395,7 @@ float3 __ovld fract(float3 x, __private float3 *iptr); | ||
1554 | float4 __ovld fract(float4 x, __private float4 *iptr); | ||
1555 | float8 __ovld fract(float8 x, __private float8 *iptr); | ||
1556 | float16 __ovld fract(float16 x, __private float16 *iptr); | ||
1557 | -#ifdef cl_khr_fp64 | ||
1558 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1559 | double __ovld fract(double x, __global double *iptr); | ||
1560 | double2 __ovld fract(double2 x, __global double2 *iptr); | ||
1561 | double3 __ovld fract(double3 x, __global double3 *iptr); | ||
1562 | @@ -7413,7 +7414,7 @@ double3 __ovld fract(double3 x, __private double3 *iptr); | ||
1563 | double4 __ovld fract(double4 x, __private double4 *iptr); | ||
1564 | double8 __ovld fract(double8 x, __private double8 *iptr); | ||
1565 | double16 __ovld fract(double16 x, __private double16 *iptr); | ||
1566 | -#endif //cl_khr_fp64 | ||
1567 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1568 | #ifdef cl_khr_fp16 | ||
1569 | half __ovld fract(half x, __global half *iptr); | ||
1570 | half2 __ovld fract(half2 x, __global half2 *iptr); | ||
1571 | @@ -7434,29 +7435,29 @@ half4 __ovld fract(half4 x, __private half4 *iptr); | ||
1572 | half8 __ovld fract(half8 x, __private half8 *iptr); | ||
1573 | half16 __ovld fract(half16 x, __private half16 *iptr); | ||
1574 | #endif //cl_khr_fp16 | ||
1575 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
1576 | - | ||
1577 | +#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != | ||
1578 | + //! CL_VERSION_2_0) | ||
1579 | /** | ||
1580 | * Extract mantissa and exponent from x. For each | ||
1581 | * component the mantissa returned is a float with | ||
1582 | * magnitude in the interval [1/2, 1) or 0. Each | ||
1583 | * component of x equals mantissa returned * 2^exp. | ||
1584 | */ | ||
1585 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
1586 | +#ifdef __opencl_c_generic_address_space | ||
1587 | float __ovld frexp(float x, int *exp); | ||
1588 | float2 __ovld frexp(float2 x, int2 *exp); | ||
1589 | float3 __ovld frexp(float3 x, int3 *exp); | ||
1590 | float4 __ovld frexp(float4 x, int4 *exp); | ||
1591 | float8 __ovld frexp(float8 x, int8 *exp); | ||
1592 | float16 __ovld frexp(float16 x, int16 *exp); | ||
1593 | -#ifdef cl_khr_fp64 | ||
1594 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1595 | double __ovld frexp(double x, int *exp); | ||
1596 | double2 __ovld frexp(double2 x, int2 *exp); | ||
1597 | double3 __ovld frexp(double3 x, int3 *exp); | ||
1598 | double4 __ovld frexp(double4 x, int4 *exp); | ||
1599 | double8 __ovld frexp(double8 x, int8 *exp); | ||
1600 | double16 __ovld frexp(double16 x, int16 *exp); | ||
1601 | -#endif //cl_khr_fp64 | ||
1602 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1603 | #ifdef cl_khr_fp16 | ||
1604 | half __ovld frexp(half x, int *exp); | ||
1605 | half2 __ovld frexp(half2 x, int2 *exp); | ||
1606 | @@ -7465,7 +7466,9 @@ half4 __ovld frexp(half4 x, int4 *exp); | ||
1607 | half8 __ovld frexp(half8 x, int8 *exp); | ||
1608 | half16 __ovld frexp(half16 x, int16 *exp); | ||
1609 | #endif //cl_khr_fp16 | ||
1610 | -#else | ||
1611 | +#endif //__opencl_c_generic_address_space | ||
1612 | + | ||
1613 | +#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) | ||
1614 | float __ovld frexp(float x, __global int *exp); | ||
1615 | float2 __ovld frexp(float2 x, __global int2 *exp); | ||
1616 | float3 __ovld frexp(float3 x, __global int3 *exp); | ||
1617 | @@ -7484,7 +7487,7 @@ float3 __ovld frexp(float3 x, __private int3 *exp); | ||
1618 | float4 __ovld frexp(float4 x, __private int4 *exp); | ||
1619 | float8 __ovld frexp(float8 x, __private int8 *exp); | ||
1620 | float16 __ovld frexp(float16 x, __private int16 *exp); | ||
1621 | -#ifdef cl_khr_fp64 | ||
1622 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1623 | double __ovld frexp(double x, __global int *exp); | ||
1624 | double2 __ovld frexp(double2 x, __global int2 *exp); | ||
1625 | double3 __ovld frexp(double3 x, __global int3 *exp); | ||
1626 | @@ -7503,7 +7506,7 @@ double3 __ovld frexp(double3 x, __private int3 *exp); | ||
1627 | double4 __ovld frexp(double4 x, __private int4 *exp); | ||
1628 | double8 __ovld frexp(double8 x, __private int8 *exp); | ||
1629 | double16 __ovld frexp(double16 x, __private int16 *exp); | ||
1630 | -#endif //cl_khr_fp64 | ||
1631 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1632 | #ifdef cl_khr_fp16 | ||
1633 | half __ovld frexp(half x, __global int *exp); | ||
1634 | half2 __ovld frexp(half2 x, __global int2 *exp); | ||
1635 | @@ -7524,7 +7527,8 @@ half4 __ovld frexp(half4 x, __private int4 *exp); | ||
1636 | half8 __ovld frexp(half8 x, __private int8 *exp); | ||
1637 | half16 __ovld frexp(half16 x, __private int16 *exp); | ||
1638 | #endif //cl_khr_fp16 | ||
1639 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
1640 | +#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != | ||
1641 | + //! CL_VERSION_2_0) | ||
1642 | |||
1643 | /** | ||
1644 | * Compute the value of the square root of x^2 + y^2 | ||
1645 | @@ -7536,14 +7540,14 @@ float3 __ovld __cnfn hypot(float3 x, float3 y); | ||
1646 | float4 __ovld __cnfn hypot(float4 x, float4 y); | ||
1647 | float8 __ovld __cnfn hypot(float8 x, float8 y); | ||
1648 | float16 __ovld __cnfn hypot(float16 x, float16 y); | ||
1649 | -#ifdef cl_khr_fp64 | ||
1650 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1651 | double __ovld __cnfn hypot(double x, double y); | ||
1652 | double2 __ovld __cnfn hypot(double2 x, double2 y); | ||
1653 | double3 __ovld __cnfn hypot(double3 x, double3 y); | ||
1654 | double4 __ovld __cnfn hypot(double4 x, double4 y); | ||
1655 | double8 __ovld __cnfn hypot(double8 x, double8 y); | ||
1656 | double16 __ovld __cnfn hypot(double16 x, double16 y); | ||
1657 | -#endif //cl_khr_fp64 | ||
1658 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1659 | #ifdef cl_khr_fp16 | ||
1660 | half __ovld __cnfn hypot(half x, half y); | ||
1661 | half2 __ovld __cnfn hypot(half2 x, half2 y); | ||
1662 | @@ -7562,14 +7566,14 @@ int3 __ovld __cnfn ilogb(float3 x); | ||
1663 | int4 __ovld __cnfn ilogb(float4 x); | ||
1664 | int8 __ovld __cnfn ilogb(float8 x); | ||
1665 | int16 __ovld __cnfn ilogb(float16 x); | ||
1666 | -#ifdef cl_khr_fp64 | ||
1667 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1668 | int __ovld __cnfn ilogb(double x); | ||
1669 | int2 __ovld __cnfn ilogb(double2 x); | ||
1670 | int3 __ovld __cnfn ilogb(double3 x); | ||
1671 | int4 __ovld __cnfn ilogb(double4 x); | ||
1672 | int8 __ovld __cnfn ilogb(double8 x); | ||
1673 | int16 __ovld __cnfn ilogb(double16 x); | ||
1674 | -#endif //cl_khr_fp64 | ||
1675 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1676 | #ifdef cl_khr_fp16 | ||
1677 | int __ovld __cnfn ilogb(half x); | ||
1678 | int2 __ovld __cnfn ilogb(half2 x); | ||
1679 | @@ -7593,7 +7597,7 @@ float3 __ovld __cnfn ldexp(float3 x, int n); | ||
1680 | float4 __ovld __cnfn ldexp(float4 x, int n); | ||
1681 | float8 __ovld __cnfn ldexp(float8 x, int n); | ||
1682 | float16 __ovld __cnfn ldexp(float16 x, int n); | ||
1683 | -#ifdef cl_khr_fp64 | ||
1684 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1685 | double __ovld __cnfn ldexp(double x, int n); | ||
1686 | double2 __ovld __cnfn ldexp(double2 x, int2 n); | ||
1687 | double3 __ovld __cnfn ldexp(double3 x, int3 n); | ||
1688 | @@ -7605,7 +7609,7 @@ double3 __ovld __cnfn ldexp(double3 x, int n); | ||
1689 | double4 __ovld __cnfn ldexp(double4 x, int n); | ||
1690 | double8 __ovld __cnfn ldexp(double8 x, int n); | ||
1691 | double16 __ovld __cnfn ldexp(double16 x, int n); | ||
1692 | -#endif //cl_khr_fp64 | ||
1693 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1694 | #ifdef cl_khr_fp16 | ||
1695 | half __ovld __cnfn ldexp(half x, int n); | ||
1696 | half2 __ovld __cnfn ldexp(half2 x, int2 n); | ||
1697 | @@ -7632,14 +7636,14 @@ float3 __ovld __cnfn lgamma(float3 x); | ||
1698 | float4 __ovld __cnfn lgamma(float4 x); | ||
1699 | float8 __ovld __cnfn lgamma(float8 x); | ||
1700 | float16 __ovld __cnfn lgamma(float16 x); | ||
1701 | -#ifdef cl_khr_fp64 | ||
1702 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1703 | double __ovld __cnfn lgamma(double x); | ||
1704 | double2 __ovld __cnfn lgamma(double2 x); | ||
1705 | double3 __ovld __cnfn lgamma(double3 x); | ||
1706 | double4 __ovld __cnfn lgamma(double4 x); | ||
1707 | double8 __ovld __cnfn lgamma(double8 x); | ||
1708 | double16 __ovld __cnfn lgamma(double16 x); | ||
1709 | -#endif //cl_khr_fp64 | ||
1710 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1711 | #ifdef cl_khr_fp16 | ||
1712 | half __ovld __cnfn lgamma(half x); | ||
1713 | half2 __ovld __cnfn lgamma(half2 x); | ||
1714 | @@ -7649,21 +7653,21 @@ half8 __ovld __cnfn lgamma(half8 x); | ||
1715 | half16 __ovld __cnfn lgamma(half16 x); | ||
1716 | #endif //cl_khr_fp16 | ||
1717 | |||
1718 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
1719 | +#ifdef __opencl_c_generic_address_space | ||
1720 | float __ovld lgamma_r(float x, int *signp); | ||
1721 | float2 __ovld lgamma_r(float2 x, int2 *signp); | ||
1722 | float3 __ovld lgamma_r(float3 x, int3 *signp); | ||
1723 | float4 __ovld lgamma_r(float4 x, int4 *signp); | ||
1724 | float8 __ovld lgamma_r(float8 x, int8 *signp); | ||
1725 | float16 __ovld lgamma_r(float16 x, int16 *signp); | ||
1726 | -#ifdef cl_khr_fp64 | ||
1727 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1728 | double __ovld lgamma_r(double x, int *signp); | ||
1729 | double2 __ovld lgamma_r(double2 x, int2 *signp); | ||
1730 | double3 __ovld lgamma_r(double3 x, int3 *signp); | ||
1731 | double4 __ovld lgamma_r(double4 x, int4 *signp); | ||
1732 | double8 __ovld lgamma_r(double8 x, int8 *signp); | ||
1733 | double16 __ovld lgamma_r(double16 x, int16 *signp); | ||
1734 | -#endif //cl_khr_fp64 | ||
1735 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1736 | #ifdef cl_khr_fp16 | ||
1737 | half __ovld lgamma_r(half x, int *signp); | ||
1738 | half2 __ovld lgamma_r(half2 x, int2 *signp); | ||
1739 | @@ -7672,7 +7676,9 @@ half4 __ovld lgamma_r(half4 x, int4 *signp); | ||
1740 | half8 __ovld lgamma_r(half8 x, int8 *signp); | ||
1741 | half16 __ovld lgamma_r(half16 x, int16 *signp); | ||
1742 | #endif //cl_khr_fp16 | ||
1743 | -#else | ||
1744 | +#endif //__opencl_c_generic_address_space | ||
1745 | + | ||
1746 | +#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) | ||
1747 | float __ovld lgamma_r(float x, __global int *signp); | ||
1748 | float2 __ovld lgamma_r(float2 x, __global int2 *signp); | ||
1749 | float3 __ovld lgamma_r(float3 x, __global int3 *signp); | ||
1750 | @@ -7691,7 +7697,7 @@ float3 __ovld lgamma_r(float3 x, __private int3 *signp); | ||
1751 | float4 __ovld lgamma_r(float4 x, __private int4 *signp); | ||
1752 | float8 __ovld lgamma_r(float8 x, __private int8 *signp); | ||
1753 | float16 __ovld lgamma_r(float16 x, __private int16 *signp); | ||
1754 | -#ifdef cl_khr_fp64 | ||
1755 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1756 | double __ovld lgamma_r(double x, __global int *signp); | ||
1757 | double2 __ovld lgamma_r(double2 x, __global int2 *signp); | ||
1758 | double3 __ovld lgamma_r(double3 x, __global int3 *signp); | ||
1759 | @@ -7710,7 +7716,7 @@ double3 __ovld lgamma_r(double3 x, __private int3 *signp); | ||
1760 | double4 __ovld lgamma_r(double4 x, __private int4 *signp); | ||
1761 | double8 __ovld lgamma_r(double8 x, __private int8 *signp); | ||
1762 | double16 __ovld lgamma_r(double16 x, __private int16 *signp); | ||
1763 | -#endif //cl_khr_fp64 | ||
1764 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1765 | #ifdef cl_khr_fp16 | ||
1766 | half __ovld lgamma_r(half x, __global int *signp); | ||
1767 | half2 __ovld lgamma_r(half2 x, __global int2 *signp); | ||
1768 | @@ -7731,8 +7737,8 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp); | ||
1769 | half8 __ovld lgamma_r(half8 x, __private int8 *signp); | ||
1770 | half16 __ovld lgamma_r(half16 x, __private int16 *signp); | ||
1771 | #endif //cl_khr_fp16 | ||
1772 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
1773 | - | ||
1774 | +#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != | ||
1775 | + //! CL_VERSION_2_0) | ||
1776 | /** | ||
1777 | * Compute natural logarithm. | ||
1778 | */ | ||
1779 | @@ -7742,14 +7748,14 @@ float3 __ovld __cnfn log(float3); | ||
1780 | float4 __ovld __cnfn log(float4); | ||
1781 | float8 __ovld __cnfn log(float8); | ||
1782 | float16 __ovld __cnfn log(float16); | ||
1783 | -#ifdef cl_khr_fp64 | ||
1784 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1785 | double __ovld __cnfn log(double); | ||
1786 | double2 __ovld __cnfn log(double2); | ||
1787 | double3 __ovld __cnfn log(double3); | ||
1788 | double4 __ovld __cnfn log(double4); | ||
1789 | double8 __ovld __cnfn log(double8); | ||
1790 | double16 __ovld __cnfn log(double16); | ||
1791 | -#endif //cl_khr_fp64 | ||
1792 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1793 | #ifdef cl_khr_fp16 | ||
1794 | half __ovld __cnfn log(half); | ||
1795 | half2 __ovld __cnfn log(half2); | ||
1796 | @@ -7760,7 +7766,7 @@ half16 __ovld __cnfn log(half16); | ||
1797 | #endif //cl_khr_fp16 | ||
1798 | |||
1799 | /** | ||
1800 | - * Compute a base 2 logarithm. | ||
1801 | + * Compute a base 2 logarithm | ||
1802 | */ | ||
1803 | float __ovld __cnfn log2(float); | ||
1804 | float2 __ovld __cnfn log2(float2); | ||
1805 | @@ -7768,14 +7774,14 @@ float3 __ovld __cnfn log2(float3); | ||
1806 | float4 __ovld __cnfn log2(float4); | ||
1807 | float8 __ovld __cnfn log2(float8); | ||
1808 | float16 __ovld __cnfn log2(float16); | ||
1809 | -#ifdef cl_khr_fp64 | ||
1810 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1811 | double __ovld __cnfn log2(double); | ||
1812 | double2 __ovld __cnfn log2(double2); | ||
1813 | double3 __ovld __cnfn log2(double3); | ||
1814 | double4 __ovld __cnfn log2(double4); | ||
1815 | double8 __ovld __cnfn log2(double8); | ||
1816 | double16 __ovld __cnfn log2(double16); | ||
1817 | -#endif //cl_khr_fp64 | ||
1818 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1819 | #ifdef cl_khr_fp16 | ||
1820 | half __ovld __cnfn log2(half); | ||
1821 | half2 __ovld __cnfn log2(half2); | ||
1822 | @@ -7794,14 +7800,14 @@ float3 __ovld __cnfn log10(float3); | ||
1823 | float4 __ovld __cnfn log10(float4); | ||
1824 | float8 __ovld __cnfn log10(float8); | ||
1825 | float16 __ovld __cnfn log10(float16); | ||
1826 | -#ifdef cl_khr_fp64 | ||
1827 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1828 | double __ovld __cnfn log10(double); | ||
1829 | double2 __ovld __cnfn log10(double2); | ||
1830 | double3 __ovld __cnfn log10(double3); | ||
1831 | double4 __ovld __cnfn log10(double4); | ||
1832 | double8 __ovld __cnfn log10(double8); | ||
1833 | double16 __ovld __cnfn log10(double16); | ||
1834 | -#endif //cl_khr_fp64 | ||
1835 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1836 | #ifdef cl_khr_fp16 | ||
1837 | half __ovld __cnfn log10(half); | ||
1838 | half2 __ovld __cnfn log10(half2); | ||
1839 | @@ -7820,14 +7826,14 @@ float3 __ovld __cnfn log1p(float3 x); | ||
1840 | float4 __ovld __cnfn log1p(float4 x); | ||
1841 | float8 __ovld __cnfn log1p(float8 x); | ||
1842 | float16 __ovld __cnfn log1p(float16 x); | ||
1843 | -#ifdef cl_khr_fp64 | ||
1844 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1845 | double __ovld __cnfn log1p(double x); | ||
1846 | double2 __ovld __cnfn log1p(double2 x); | ||
1847 | double3 __ovld __cnfn log1p(double3 x); | ||
1848 | double4 __ovld __cnfn log1p(double4 x); | ||
1849 | double8 __ovld __cnfn log1p(double8 x); | ||
1850 | double16 __ovld __cnfn log1p(double16 x); | ||
1851 | -#endif //cl_khr_fp64 | ||
1852 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1853 | #ifdef cl_khr_fp16 | ||
1854 | half __ovld __cnfn log1p(half x); | ||
1855 | half2 __ovld __cnfn log1p(half2 x); | ||
1856 | @@ -7847,14 +7853,14 @@ float3 __ovld __cnfn logb(float3 x); | ||
1857 | float4 __ovld __cnfn logb(float4 x); | ||
1858 | float8 __ovld __cnfn logb(float8 x); | ||
1859 | float16 __ovld __cnfn logb(float16 x); | ||
1860 | -#ifdef cl_khr_fp64 | ||
1861 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1862 | double __ovld __cnfn logb(double x); | ||
1863 | double2 __ovld __cnfn logb(double2 x); | ||
1864 | double3 __ovld __cnfn logb(double3 x); | ||
1865 | double4 __ovld __cnfn logb(double4 x); | ||
1866 | double8 __ovld __cnfn logb(double8 x); | ||
1867 | double16 __ovld __cnfn logb(double16 x); | ||
1868 | -#endif //cl_khr_fp64 | ||
1869 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1870 | #ifdef cl_khr_fp16 | ||
1871 | half __ovld __cnfn logb(half x); | ||
1872 | half2 __ovld __cnfn logb(half2 x); | ||
1873 | @@ -7877,14 +7883,14 @@ float3 __ovld __cnfn mad(float3 a, float3 b, float3 c); | ||
1874 | float4 __ovld __cnfn mad(float4 a, float4 b, float4 c); | ||
1875 | float8 __ovld __cnfn mad(float8 a, float8 b, float8 c); | ||
1876 | float16 __ovld __cnfn mad(float16 a, float16 b, float16 c); | ||
1877 | -#ifdef cl_khr_fp64 | ||
1878 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1879 | double __ovld __cnfn mad(double a, double b, double c); | ||
1880 | double2 __ovld __cnfn mad(double2 a, double2 b, double2 c); | ||
1881 | double3 __ovld __cnfn mad(double3 a, double3 b, double3 c); | ||
1882 | double4 __ovld __cnfn mad(double4 a, double4 b, double4 c); | ||
1883 | double8 __ovld __cnfn mad(double8 a, double8 b, double8 c); | ||
1884 | double16 __ovld __cnfn mad(double16 a, double16 b, double16 c); | ||
1885 | -#endif //cl_khr_fp64 | ||
1886 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1887 | #ifdef cl_khr_fp16 | ||
1888 | half __ovld __cnfn mad(half a, half b, half c); | ||
1889 | half2 __ovld __cnfn mad(half2 a, half2 b, half2 c); | ||
1890 | @@ -7904,14 +7910,14 @@ float3 __ovld __cnfn maxmag(float3 x, float3 y); | ||
1891 | float4 __ovld __cnfn maxmag(float4 x, float4 y); | ||
1892 | float8 __ovld __cnfn maxmag(float8 x, float8 y); | ||
1893 | float16 __ovld __cnfn maxmag(float16 x, float16 y); | ||
1894 | -#ifdef cl_khr_fp64 | ||
1895 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1896 | double __ovld __cnfn maxmag(double x, double y); | ||
1897 | double2 __ovld __cnfn maxmag(double2 x, double2 y); | ||
1898 | double3 __ovld __cnfn maxmag(double3 x, double3 y); | ||
1899 | double4 __ovld __cnfn maxmag(double4 x, double4 y); | ||
1900 | double8 __ovld __cnfn maxmag(double8 x, double8 y); | ||
1901 | double16 __ovld __cnfn maxmag(double16 x, double16 y); | ||
1902 | -#endif //cl_khr_fp64 | ||
1903 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1904 | #ifdef cl_khr_fp16 | ||
1905 | half __ovld __cnfn maxmag(half x, half y); | ||
1906 | half2 __ovld __cnfn maxmag(half2 x, half2 y); | ||
1907 | @@ -7931,14 +7937,14 @@ float3 __ovld __cnfn minmag(float3 x, float3 y); | ||
1908 | float4 __ovld __cnfn minmag(float4 x, float4 y); | ||
1909 | float8 __ovld __cnfn minmag(float8 x, float8 y); | ||
1910 | float16 __ovld __cnfn minmag(float16 x, float16 y); | ||
1911 | -#ifdef cl_khr_fp64 | ||
1912 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1913 | double __ovld __cnfn minmag(double x, double y); | ||
1914 | double2 __ovld __cnfn minmag(double2 x, double2 y); | ||
1915 | double3 __ovld __cnfn minmag(double3 x, double3 y); | ||
1916 | double4 __ovld __cnfn minmag(double4 x, double4 y); | ||
1917 | double8 __ovld __cnfn minmag(double8 x, double8 y); | ||
1918 | double16 __ovld __cnfn minmag(double16 x, double16 y); | ||
1919 | -#endif //cl_khr_fp64 | ||
1920 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1921 | #ifdef cl_khr_fp16 | ||
1922 | half __ovld __cnfn minmag(half x, half y); | ||
1923 | half2 __ovld __cnfn minmag(half2 x, half2 y); | ||
1924 | @@ -7955,21 +7961,21 @@ half16 __ovld __cnfn minmag(half16 x, half16 y); | ||
1925 | * the argument. It stores the integral part in the object | ||
1926 | * pointed to by iptr. | ||
1927 | */ | ||
1928 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
1929 | +#ifdef __opencl_c_generic_address_space | ||
1930 | float __ovld modf(float x, float *iptr); | ||
1931 | float2 __ovld modf(float2 x, float2 *iptr); | ||
1932 | float3 __ovld modf(float3 x, float3 *iptr); | ||
1933 | float4 __ovld modf(float4 x, float4 *iptr); | ||
1934 | float8 __ovld modf(float8 x, float8 *iptr); | ||
1935 | float16 __ovld modf(float16 x, float16 *iptr); | ||
1936 | -#ifdef cl_khr_fp64 | ||
1937 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1938 | double __ovld modf(double x, double *iptr); | ||
1939 | double2 __ovld modf(double2 x, double2 *iptr); | ||
1940 | double3 __ovld modf(double3 x, double3 *iptr); | ||
1941 | double4 __ovld modf(double4 x, double4 *iptr); | ||
1942 | double8 __ovld modf(double8 x, double8 *iptr); | ||
1943 | double16 __ovld modf(double16 x, double16 *iptr); | ||
1944 | -#endif //cl_khr_fp64 | ||
1945 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1946 | #ifdef cl_khr_fp16 | ||
1947 | half __ovld modf(half x, half *iptr); | ||
1948 | half2 __ovld modf(half2 x, half2 *iptr); | ||
1949 | @@ -7978,7 +7984,9 @@ half4 __ovld modf(half4 x, half4 *iptr); | ||
1950 | half8 __ovld modf(half8 x, half8 *iptr); | ||
1951 | half16 __ovld modf(half16 x, half16 *iptr); | ||
1952 | #endif //cl_khr_fp16 | ||
1953 | -#else | ||
1954 | +#endif //__opencl_c_generic_address_space | ||
1955 | + | ||
1956 | +#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) | ||
1957 | float __ovld modf(float x, __global float *iptr); | ||
1958 | float2 __ovld modf(float2 x, __global float2 *iptr); | ||
1959 | float3 __ovld modf(float3 x, __global float3 *iptr); | ||
1960 | @@ -7997,7 +8005,7 @@ float3 __ovld modf(float3 x, __private float3 *iptr); | ||
1961 | float4 __ovld modf(float4 x, __private float4 *iptr); | ||
1962 | float8 __ovld modf(float8 x, __private float8 *iptr); | ||
1963 | float16 __ovld modf(float16 x, __private float16 *iptr); | ||
1964 | -#ifdef cl_khr_fp64 | ||
1965 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1966 | double __ovld modf(double x, __global double *iptr); | ||
1967 | double2 __ovld modf(double2 x, __global double2 *iptr); | ||
1968 | double3 __ovld modf(double3 x, __global double3 *iptr); | ||
1969 | @@ -8016,7 +8024,7 @@ double3 __ovld modf(double3 x, __private double3 *iptr); | ||
1970 | double4 __ovld modf(double4 x, __private double4 *iptr); | ||
1971 | double8 __ovld modf(double8 x, __private double8 *iptr); | ||
1972 | double16 __ovld modf(double16 x, __private double16 *iptr); | ||
1973 | -#endif //cl_khr_fp64 | ||
1974 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1975 | #ifdef cl_khr_fp16 | ||
1976 | half __ovld modf(half x, __global half *iptr); | ||
1977 | half2 __ovld modf(half2 x, __global half2 *iptr); | ||
1978 | @@ -8037,7 +8045,8 @@ half4 __ovld modf(half4 x, __private half4 *iptr); | ||
1979 | half8 __ovld modf(half8 x, __private half8 *iptr); | ||
1980 | half16 __ovld modf(half16 x, __private half16 *iptr); | ||
1981 | #endif //cl_khr_fp16 | ||
1982 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
1983 | +#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != | ||
1984 | + //! CL_VERSION_2_0) | ||
1985 | |||
1986 | /** | ||
1987 | * Returns a quiet NaN. The nancode may be placed | ||
1988 | @@ -8049,14 +8058,14 @@ float3 __ovld __cnfn nan(uint3 nancode); | ||
1989 | float4 __ovld __cnfn nan(uint4 nancode); | ||
1990 | float8 __ovld __cnfn nan(uint8 nancode); | ||
1991 | float16 __ovld __cnfn nan(uint16 nancode); | ||
1992 | -#ifdef cl_khr_fp64 | ||
1993 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
1994 | double __ovld __cnfn nan(ulong nancode); | ||
1995 | double2 __ovld __cnfn nan(ulong2 nancode); | ||
1996 | double3 __ovld __cnfn nan(ulong3 nancode); | ||
1997 | double4 __ovld __cnfn nan(ulong4 nancode); | ||
1998 | double8 __ovld __cnfn nan(ulong8 nancode); | ||
1999 | double16 __ovld __cnfn nan(ulong16 nancode); | ||
2000 | -#endif //cl_khr_fp64 | ||
2001 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2002 | #ifdef cl_khr_fp16 | ||
2003 | half __ovld __cnfn nan(ushort nancode); | ||
2004 | half2 __ovld __cnfn nan(ushort2 nancode); | ||
2005 | @@ -8079,14 +8088,14 @@ float3 __ovld __cnfn nextafter(float3 x, float3 y); | ||
2006 | float4 __ovld __cnfn nextafter(float4 x, float4 y); | ||
2007 | float8 __ovld __cnfn nextafter(float8 x, float8 y); | ||
2008 | float16 __ovld __cnfn nextafter(float16 x, float16 y); | ||
2009 | -#ifdef cl_khr_fp64 | ||
2010 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2011 | double __ovld __cnfn nextafter(double x, double y); | ||
2012 | double2 __ovld __cnfn nextafter(double2 x, double2 y); | ||
2013 | double3 __ovld __cnfn nextafter(double3 x, double3 y); | ||
2014 | double4 __ovld __cnfn nextafter(double4 x, double4 y); | ||
2015 | double8 __ovld __cnfn nextafter(double8 x, double8 y); | ||
2016 | double16 __ovld __cnfn nextafter(double16 x, double16 y); | ||
2017 | -#endif //cl_khr_fp64 | ||
2018 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2019 | #ifdef cl_khr_fp16 | ||
2020 | half __ovld __cnfn nextafter(half x, half y); | ||
2021 | half2 __ovld __cnfn nextafter(half2 x, half2 y); | ||
2022 | @@ -8105,14 +8114,14 @@ float3 __ovld __cnfn pow(float3 x, float3 y); | ||
2023 | float4 __ovld __cnfn pow(float4 x, float4 y); | ||
2024 | float8 __ovld __cnfn pow(float8 x, float8 y); | ||
2025 | float16 __ovld __cnfn pow(float16 x, float16 y); | ||
2026 | -#ifdef cl_khr_fp64 | ||
2027 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2028 | double __ovld __cnfn pow(double x, double y); | ||
2029 | double2 __ovld __cnfn pow(double2 x, double2 y); | ||
2030 | double3 __ovld __cnfn pow(double3 x, double3 y); | ||
2031 | double4 __ovld __cnfn pow(double4 x, double4 y); | ||
2032 | double8 __ovld __cnfn pow(double8 x, double8 y); | ||
2033 | double16 __ovld __cnfn pow(double16 x, double16 y); | ||
2034 | -#endif //cl_khr_fp64 | ||
2035 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2036 | #ifdef cl_khr_fp16 | ||
2037 | half __ovld __cnfn pow(half x, half y); | ||
2038 | half2 __ovld __cnfn pow(half2 x, half2 y); | ||
2039 | @@ -8131,14 +8140,14 @@ float3 __ovld __cnfn pown(float3 x, int3 y); | ||
2040 | float4 __ovld __cnfn pown(float4 x, int4 y); | ||
2041 | float8 __ovld __cnfn pown(float8 x, int8 y); | ||
2042 | float16 __ovld __cnfn pown(float16 x, int16 y); | ||
2043 | -#ifdef cl_khr_fp64 | ||
2044 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2045 | double __ovld __cnfn pown(double x, int y); | ||
2046 | double2 __ovld __cnfn pown(double2 x, int2 y); | ||
2047 | double3 __ovld __cnfn pown(double3 x, int3 y); | ||
2048 | double4 __ovld __cnfn pown(double4 x, int4 y); | ||
2049 | double8 __ovld __cnfn pown(double8 x, int8 y); | ||
2050 | double16 __ovld __cnfn pown(double16 x, int16 y); | ||
2051 | -#endif //cl_khr_fp64 | ||
2052 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2053 | #ifdef cl_khr_fp16 | ||
2054 | half __ovld __cnfn pown(half x, int y); | ||
2055 | half2 __ovld __cnfn pown(half2 x, int2 y); | ||
2056 | @@ -8157,14 +8166,14 @@ float3 __ovld __cnfn powr(float3 x, float3 y); | ||
2057 | float4 __ovld __cnfn powr(float4 x, float4 y); | ||
2058 | float8 __ovld __cnfn powr(float8 x, float8 y); | ||
2059 | float16 __ovld __cnfn powr(float16 x, float16 y); | ||
2060 | -#ifdef cl_khr_fp64 | ||
2061 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2062 | double __ovld __cnfn powr(double x, double y); | ||
2063 | double2 __ovld __cnfn powr(double2 x, double2 y); | ||
2064 | double3 __ovld __cnfn powr(double3 x, double3 y); | ||
2065 | double4 __ovld __cnfn powr(double4 x, double4 y); | ||
2066 | double8 __ovld __cnfn powr(double8 x, double8 y); | ||
2067 | double16 __ovld __cnfn powr(double16 x, double16 y); | ||
2068 | -#endif //cl_khr_fp64 | ||
2069 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2070 | #ifdef cl_khr_fp16 | ||
2071 | half __ovld __cnfn powr(half x, half y); | ||
2072 | half2 __ovld __cnfn powr(half2 x, half2 y); | ||
2073 | @@ -8186,14 +8195,14 @@ float3 __ovld __cnfn remainder(float3 x, float3 y); | ||
2074 | float4 __ovld __cnfn remainder(float4 x, float4 y); | ||
2075 | float8 __ovld __cnfn remainder(float8 x, float8 y); | ||
2076 | float16 __ovld __cnfn remainder(float16 x, float16 y); | ||
2077 | -#ifdef cl_khr_fp64 | ||
2078 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2079 | double __ovld __cnfn remainder(double x, double y); | ||
2080 | double2 __ovld __cnfn remainder(double2 x, double2 y); | ||
2081 | double3 __ovld __cnfn remainder(double3 x, double3 y); | ||
2082 | double4 __ovld __cnfn remainder(double4 x, double4 y); | ||
2083 | double8 __ovld __cnfn remainder(double8 x, double8 y); | ||
2084 | double16 __ovld __cnfn remainder(double16 x, double16 y); | ||
2085 | -#endif //cl_khr_fp64 | ||
2086 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2087 | #ifdef cl_khr_fp16 | ||
2088 | half __ovld __cnfn remainder(half x, half y); | ||
2089 | half2 __ovld __cnfn remainder(half2 x, half2 y); | ||
2090 | @@ -8215,21 +8224,21 @@ half16 __ovld __cnfn remainder(half16 x, half16 y); | ||
2091 | * sign as x/y. It stores this signed value in the object | ||
2092 | * pointed to by quo. | ||
2093 | */ | ||
2094 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
2095 | +#ifdef __opencl_c_generic_address_space | ||
2096 | float __ovld remquo(float x, float y, int *quo); | ||
2097 | float2 __ovld remquo(float2 x, float2 y, int2 *quo); | ||
2098 | float3 __ovld remquo(float3 x, float3 y, int3 *quo); | ||
2099 | float4 __ovld remquo(float4 x, float4 y, int4 *quo); | ||
2100 | float8 __ovld remquo(float8 x, float8 y, int8 *quo); | ||
2101 | float16 __ovld remquo(float16 x, float16 y, int16 *quo); | ||
2102 | -#ifdef cl_khr_fp64 | ||
2103 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2104 | double __ovld remquo(double x, double y, int *quo); | ||
2105 | double2 __ovld remquo(double2 x, double2 y, int2 *quo); | ||
2106 | double3 __ovld remquo(double3 x, double3 y, int3 *quo); | ||
2107 | double4 __ovld remquo(double4 x, double4 y, int4 *quo); | ||
2108 | double8 __ovld remquo(double8 x, double8 y, int8 *quo); | ||
2109 | double16 __ovld remquo(double16 x, double16 y, int16 *quo); | ||
2110 | -#endif //cl_khr_fp64 | ||
2111 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2112 | #ifdef cl_khr_fp16 | ||
2113 | half __ovld remquo(half x, half y, int *quo); | ||
2114 | half2 __ovld remquo(half2 x, half2 y, int2 *quo); | ||
2115 | @@ -8237,9 +8246,10 @@ half3 __ovld remquo(half3 x, half3 y, int3 *quo); | ||
2116 | half4 __ovld remquo(half4 x, half4 y, int4 *quo); | ||
2117 | half8 __ovld remquo(half8 x, half8 y, int8 *quo); | ||
2118 | half16 __ovld remquo(half16 x, half16 y, int16 *quo); | ||
2119 | - | ||
2120 | #endif //cl_khr_fp16 | ||
2121 | -#else | ||
2122 | +#endif //__opencl_c_generic_address_space | ||
2123 | + | ||
2124 | +#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) | ||
2125 | float __ovld remquo(float x, float y, __global int *quo); | ||
2126 | float2 __ovld remquo(float2 x, float2 y, __global int2 *quo); | ||
2127 | float3 __ovld remquo(float3 x, float3 y, __global int3 *quo); | ||
2128 | @@ -8258,7 +8268,7 @@ float3 __ovld remquo(float3 x, float3 y, __private int3 *quo); | ||
2129 | float4 __ovld remquo(float4 x, float4 y, __private int4 *quo); | ||
2130 | float8 __ovld remquo(float8 x, float8 y, __private int8 *quo); | ||
2131 | float16 __ovld remquo(float16 x, float16 y, __private int16 *quo); | ||
2132 | -#ifdef cl_khr_fp64 | ||
2133 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2134 | double __ovld remquo(double x, double y, __global int *quo); | ||
2135 | double2 __ovld remquo(double2 x, double2 y, __global int2 *quo); | ||
2136 | double3 __ovld remquo(double3 x, double3 y, __global int3 *quo); | ||
2137 | @@ -8277,7 +8287,7 @@ double3 __ovld remquo(double3 x, double3 y, __private int3 *quo); | ||
2138 | double4 __ovld remquo(double4 x, double4 y, __private int4 *quo); | ||
2139 | double8 __ovld remquo(double8 x, double8 y, __private int8 *quo); | ||
2140 | double16 __ovld remquo(double16 x, double16 y, __private int16 *quo); | ||
2141 | -#endif //cl_khr_fp64 | ||
2142 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2143 | #ifdef cl_khr_fp16 | ||
2144 | half __ovld remquo(half x, half y, __global int *quo); | ||
2145 | half2 __ovld remquo(half2 x, half2 y, __global int2 *quo); | ||
2146 | @@ -8298,7 +8308,8 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo); | ||
2147 | half8 __ovld remquo(half8 x, half8 y, __private int8 *quo); | ||
2148 | half16 __ovld remquo(half16 x, half16 y, __private int16 *quo); | ||
2149 | #endif //cl_khr_fp16 | ||
2150 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
2151 | +#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != | ||
2152 | + //! CL_VERSION_2_0) | ||
2153 | /** | ||
2154 | * Round to integral value (using round to nearest | ||
2155 | * even rounding mode) in floating-point format. | ||
2156 | @@ -8311,14 +8322,14 @@ float3 __ovld __cnfn rint(float3); | ||
2157 | float4 __ovld __cnfn rint(float4); | ||
2158 | float8 __ovld __cnfn rint(float8); | ||
2159 | float16 __ovld __cnfn rint(float16); | ||
2160 | -#ifdef cl_khr_fp64 | ||
2161 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2162 | double __ovld __cnfn rint(double); | ||
2163 | double2 __ovld __cnfn rint(double2); | ||
2164 | double3 __ovld __cnfn rint(double3); | ||
2165 | double4 __ovld __cnfn rint(double4); | ||
2166 | double8 __ovld __cnfn rint(double8); | ||
2167 | double16 __ovld __cnfn rint(double16); | ||
2168 | -#endif //cl_khr_fp64 | ||
2169 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2170 | #ifdef cl_khr_fp16 | ||
2171 | half __ovld __cnfn rint(half); | ||
2172 | half2 __ovld __cnfn rint(half2); | ||
2173 | @@ -8337,14 +8348,14 @@ float3 __ovld __cnfn rootn(float3 x, int3 y); | ||
2174 | float4 __ovld __cnfn rootn(float4 x, int4 y); | ||
2175 | float8 __ovld __cnfn rootn(float8 x, int8 y); | ||
2176 | float16 __ovld __cnfn rootn(float16 x, int16 y); | ||
2177 | -#ifdef cl_khr_fp64 | ||
2178 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2179 | double __ovld __cnfn rootn(double x, int y); | ||
2180 | double2 __ovld __cnfn rootn(double2 x, int2 y); | ||
2181 | double3 __ovld __cnfn rootn(double3 x, int3 y); | ||
2182 | double4 __ovld __cnfn rootn(double4 x, int4 y); | ||
2183 | double8 __ovld __cnfn rootn(double8 x, int8 y); | ||
2184 | double16 __ovld __cnfn rootn(double16 x, int16 y); | ||
2185 | -#endif //cl_khr_fp64 | ||
2186 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2187 | #ifdef cl_khr_fp16 | ||
2188 | half __ovld __cnfn rootn(half x, int y); | ||
2189 | half2 __ovld __cnfn rootn(half2 x, int2 y); | ||
2190 | @@ -8365,14 +8376,14 @@ float3 __ovld __cnfn round(float3 x); | ||
2191 | float4 __ovld __cnfn round(float4 x); | ||
2192 | float8 __ovld __cnfn round(float8 x); | ||
2193 | float16 __ovld __cnfn round(float16 x); | ||
2194 | -#ifdef cl_khr_fp64 | ||
2195 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2196 | double __ovld __cnfn round(double x); | ||
2197 | double2 __ovld __cnfn round(double2 x); | ||
2198 | double3 __ovld __cnfn round(double3 x); | ||
2199 | double4 __ovld __cnfn round(double4 x); | ||
2200 | double8 __ovld __cnfn round(double8 x); | ||
2201 | double16 __ovld __cnfn round(double16 x); | ||
2202 | -#endif //cl_khr_fp64 | ||
2203 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2204 | #ifdef cl_khr_fp16 | ||
2205 | half __ovld __cnfn round(half x); | ||
2206 | half2 __ovld __cnfn round(half2 x); | ||
2207 | @@ -8391,14 +8402,14 @@ float3 __ovld __cnfn rsqrt(float3); | ||
2208 | float4 __ovld __cnfn rsqrt(float4); | ||
2209 | float8 __ovld __cnfn rsqrt(float8); | ||
2210 | float16 __ovld __cnfn rsqrt(float16); | ||
2211 | -#ifdef cl_khr_fp64 | ||
2212 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2213 | double __ovld __cnfn rsqrt(double); | ||
2214 | double2 __ovld __cnfn rsqrt(double2); | ||
2215 | double3 __ovld __cnfn rsqrt(double3); | ||
2216 | double4 __ovld __cnfn rsqrt(double4); | ||
2217 | double8 __ovld __cnfn rsqrt(double8); | ||
2218 | double16 __ovld __cnfn rsqrt(double16); | ||
2219 | -#endif //cl_khr_fp64 | ||
2220 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2221 | #ifdef cl_khr_fp16 | ||
2222 | half __ovld __cnfn rsqrt(half); | ||
2223 | half2 __ovld __cnfn rsqrt(half2); | ||
2224 | @@ -8417,14 +8428,14 @@ float3 __ovld __cnfn sin(float3); | ||
2225 | float4 __ovld __cnfn sin(float4); | ||
2226 | float8 __ovld __cnfn sin(float8); | ||
2227 | float16 __ovld __cnfn sin(float16); | ||
2228 | -#ifdef cl_khr_fp64 | ||
2229 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2230 | double __ovld __cnfn sin(double); | ||
2231 | double2 __ovld __cnfn sin(double2); | ||
2232 | double3 __ovld __cnfn sin(double3); | ||
2233 | double4 __ovld __cnfn sin(double4); | ||
2234 | double8 __ovld __cnfn sin(double8); | ||
2235 | double16 __ovld __cnfn sin(double16); | ||
2236 | -#endif //cl_khr_fp64 | ||
2237 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2238 | #ifdef cl_khr_fp16 | ||
2239 | half __ovld __cnfn sin(half); | ||
2240 | half2 __ovld __cnfn sin(half2); | ||
2241 | @@ -8439,21 +8450,21 @@ half16 __ovld __cnfn sin(half16); | ||
2242 | * is the return value and computed cosine is returned | ||
2243 | * in cosval. | ||
2244 | */ | ||
2245 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
2246 | +#ifdef __opencl_c_generic_address_space | ||
2247 | float __ovld sincos(float x, float *cosval); | ||
2248 | float2 __ovld sincos(float2 x, float2 *cosval); | ||
2249 | float3 __ovld sincos(float3 x, float3 *cosval); | ||
2250 | float4 __ovld sincos(float4 x, float4 *cosval); | ||
2251 | float8 __ovld sincos(float8 x, float8 *cosval); | ||
2252 | float16 __ovld sincos(float16 x, float16 *cosval); | ||
2253 | -#ifdef cl_khr_fp64 | ||
2254 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2255 | double __ovld sincos(double x, double *cosval); | ||
2256 | double2 __ovld sincos(double2 x, double2 *cosval); | ||
2257 | double3 __ovld sincos(double3 x, double3 *cosval); | ||
2258 | double4 __ovld sincos(double4 x, double4 *cosval); | ||
2259 | double8 __ovld sincos(double8 x, double8 *cosval); | ||
2260 | double16 __ovld sincos(double16 x, double16 *cosval); | ||
2261 | -#endif //cl_khr_fp64 | ||
2262 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2263 | #ifdef cl_khr_fp16 | ||
2264 | half __ovld sincos(half x, half *cosval); | ||
2265 | half2 __ovld sincos(half2 x, half2 *cosval); | ||
2266 | @@ -8462,7 +8473,9 @@ half4 __ovld sincos(half4 x, half4 *cosval); | ||
2267 | half8 __ovld sincos(half8 x, half8 *cosval); | ||
2268 | half16 __ovld sincos(half16 x, half16 *cosval); | ||
2269 | #endif //cl_khr_fp16 | ||
2270 | -#else | ||
2271 | +#endif //__opencl_c_generic_address_space | ||
2272 | + | ||
2273 | +#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0) | ||
2274 | float __ovld sincos(float x, __global float *cosval); | ||
2275 | float2 __ovld sincos(float2 x, __global float2 *cosval); | ||
2276 | float3 __ovld sincos(float3 x, __global float3 *cosval); | ||
2277 | @@ -8481,7 +8494,7 @@ float3 __ovld sincos(float3 x, __private float3 *cosval); | ||
2278 | float4 __ovld sincos(float4 x, __private float4 *cosval); | ||
2279 | float8 __ovld sincos(float8 x, __private float8 *cosval); | ||
2280 | float16 __ovld sincos(float16 x, __private float16 *cosval); | ||
2281 | -#ifdef cl_khr_fp64 | ||
2282 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2283 | double __ovld sincos(double x, __global double *cosval); | ||
2284 | double2 __ovld sincos(double2 x, __global double2 *cosval); | ||
2285 | double3 __ovld sincos(double3 x, __global double3 *cosval); | ||
2286 | @@ -8500,7 +8513,7 @@ double3 __ovld sincos(double3 x, __private double3 *cosval); | ||
2287 | double4 __ovld sincos(double4 x, __private double4 *cosval); | ||
2288 | double8 __ovld sincos(double8 x, __private double8 *cosval); | ||
2289 | double16 __ovld sincos(double16 x, __private double16 *cosval); | ||
2290 | -#endif //cl_khr_fp64 | ||
2291 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2292 | #ifdef cl_khr_fp16 | ||
2293 | half __ovld sincos(half x, __global half *cosval); | ||
2294 | half2 __ovld sincos(half2 x, __global half2 *cosval); | ||
2295 | @@ -8521,8 +8534,8 @@ half4 __ovld sincos(half4 x, __private half4 *cosval); | ||
2296 | half8 __ovld sincos(half8 x, __private half8 *cosval); | ||
2297 | half16 __ovld sincos(half16 x, __private half16 *cosval); | ||
2298 | #endif //cl_khr_fp16 | ||
2299 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
2300 | - | ||
2301 | +#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != | ||
2302 | + //! CL_VERSION_2_0) | ||
2303 | /** | ||
2304 | * Compute hyperbolic sine. | ||
2305 | */ | ||
2306 | @@ -8532,14 +8545,14 @@ float3 __ovld __cnfn sinh(float3); | ||
2307 | float4 __ovld __cnfn sinh(float4); | ||
2308 | float8 __ovld __cnfn sinh(float8); | ||
2309 | float16 __ovld __cnfn sinh(float16); | ||
2310 | -#ifdef cl_khr_fp64 | ||
2311 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2312 | double __ovld __cnfn sinh(double); | ||
2313 | double2 __ovld __cnfn sinh(double2); | ||
2314 | double3 __ovld __cnfn sinh(double3); | ||
2315 | double4 __ovld __cnfn sinh(double4); | ||
2316 | double8 __ovld __cnfn sinh(double8); | ||
2317 | double16 __ovld __cnfn sinh(double16); | ||
2318 | -#endif //cl_khr_fp64 | ||
2319 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2320 | #ifdef cl_khr_fp16 | ||
2321 | half __ovld __cnfn sinh(half); | ||
2322 | half2 __ovld __cnfn sinh(half2); | ||
2323 | @@ -8558,14 +8571,14 @@ float3 __ovld __cnfn sinpi(float3 x); | ||
2324 | float4 __ovld __cnfn sinpi(float4 x); | ||
2325 | float8 __ovld __cnfn sinpi(float8 x); | ||
2326 | float16 __ovld __cnfn sinpi(float16 x); | ||
2327 | -#ifdef cl_khr_fp64 | ||
2328 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2329 | double __ovld __cnfn sinpi(double x); | ||
2330 | double2 __ovld __cnfn sinpi(double2 x); | ||
2331 | double3 __ovld __cnfn sinpi(double3 x); | ||
2332 | double4 __ovld __cnfn sinpi(double4 x); | ||
2333 | double8 __ovld __cnfn sinpi(double8 x); | ||
2334 | double16 __ovld __cnfn sinpi(double16 x); | ||
2335 | -#endif //cl_khr_fp64 | ||
2336 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2337 | #ifdef cl_khr_fp16 | ||
2338 | half __ovld __cnfn sinpi(half x); | ||
2339 | half2 __ovld __cnfn sinpi(half2 x); | ||
2340 | @@ -8584,14 +8597,14 @@ float3 __ovld __cnfn sqrt(float3); | ||
2341 | float4 __ovld __cnfn sqrt(float4); | ||
2342 | float8 __ovld __cnfn sqrt(float8); | ||
2343 | float16 __ovld __cnfn sqrt(float16); | ||
2344 | -#ifdef cl_khr_fp64 | ||
2345 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2346 | double __ovld __cnfn sqrt(double); | ||
2347 | double2 __ovld __cnfn sqrt(double2); | ||
2348 | double3 __ovld __cnfn sqrt(double3); | ||
2349 | double4 __ovld __cnfn sqrt(double4); | ||
2350 | double8 __ovld __cnfn sqrt(double8); | ||
2351 | double16 __ovld __cnfn sqrt(double16); | ||
2352 | -#endif //cl_khr_fp64 | ||
2353 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2354 | #ifdef cl_khr_fp16 | ||
2355 | half __ovld __cnfn sqrt(half); | ||
2356 | half2 __ovld __cnfn sqrt(half2); | ||
2357 | @@ -8610,14 +8623,14 @@ float3 __ovld __cnfn tan(float3); | ||
2358 | float4 __ovld __cnfn tan(float4); | ||
2359 | float8 __ovld __cnfn tan(float8); | ||
2360 | float16 __ovld __cnfn tan(float16); | ||
2361 | -#ifdef cl_khr_fp64 | ||
2362 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2363 | double __ovld __cnfn tan(double); | ||
2364 | double2 __ovld __cnfn tan(double2); | ||
2365 | double3 __ovld __cnfn tan(double3); | ||
2366 | double4 __ovld __cnfn tan(double4); | ||
2367 | double8 __ovld __cnfn tan(double8); | ||
2368 | double16 __ovld __cnfn tan(double16); | ||
2369 | -#endif //cl_khr_fp64 | ||
2370 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2371 | #ifdef cl_khr_fp16 | ||
2372 | half __ovld __cnfn tan(half); | ||
2373 | half2 __ovld __cnfn tan(half2); | ||
2374 | @@ -8636,14 +8649,14 @@ float3 __ovld __cnfn tanh(float3); | ||
2375 | float4 __ovld __cnfn tanh(float4); | ||
2376 | float8 __ovld __cnfn tanh(float8); | ||
2377 | float16 __ovld __cnfn tanh(float16); | ||
2378 | -#ifdef cl_khr_fp64 | ||
2379 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2380 | double __ovld __cnfn tanh(double); | ||
2381 | double2 __ovld __cnfn tanh(double2); | ||
2382 | double3 __ovld __cnfn tanh(double3); | ||
2383 | double4 __ovld __cnfn tanh(double4); | ||
2384 | double8 __ovld __cnfn tanh(double8); | ||
2385 | double16 __ovld __cnfn tanh(double16); | ||
2386 | -#endif //cl_khr_fp64 | ||
2387 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2388 | #ifdef cl_khr_fp16 | ||
2389 | half __ovld __cnfn tanh(half); | ||
2390 | half2 __ovld __cnfn tanh(half2); | ||
2391 | @@ -8662,14 +8675,14 @@ float3 __ovld __cnfn tanpi(float3 x); | ||
2392 | float4 __ovld __cnfn tanpi(float4 x); | ||
2393 | float8 __ovld __cnfn tanpi(float8 x); | ||
2394 | float16 __ovld __cnfn tanpi(float16 x); | ||
2395 | -#ifdef cl_khr_fp64 | ||
2396 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2397 | double __ovld __cnfn tanpi(double x); | ||
2398 | double2 __ovld __cnfn tanpi(double2 x); | ||
2399 | double3 __ovld __cnfn tanpi(double3 x); | ||
2400 | double4 __ovld __cnfn tanpi(double4 x); | ||
2401 | double8 __ovld __cnfn tanpi(double8 x); | ||
2402 | double16 __ovld __cnfn tanpi(double16 x); | ||
2403 | -#endif //cl_khr_fp64 | ||
2404 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2405 | #ifdef cl_khr_fp16 | ||
2406 | half __ovld __cnfn tanpi(half x); | ||
2407 | half2 __ovld __cnfn tanpi(half2 x); | ||
2408 | @@ -8688,14 +8701,14 @@ float3 __ovld __cnfn tgamma(float3); | ||
2409 | float4 __ovld __cnfn tgamma(float4); | ||
2410 | float8 __ovld __cnfn tgamma(float8); | ||
2411 | float16 __ovld __cnfn tgamma(float16); | ||
2412 | -#ifdef cl_khr_fp64 | ||
2413 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2414 | double __ovld __cnfn tgamma(double); | ||
2415 | double2 __ovld __cnfn tgamma(double2); | ||
2416 | double3 __ovld __cnfn tgamma(double3); | ||
2417 | double4 __ovld __cnfn tgamma(double4); | ||
2418 | double8 __ovld __cnfn tgamma(double8); | ||
2419 | double16 __ovld __cnfn tgamma(double16); | ||
2420 | -#endif //cl_khr_fp64 | ||
2421 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2422 | #ifdef cl_khr_fp16 | ||
2423 | half __ovld __cnfn tgamma(half); | ||
2424 | half2 __ovld __cnfn tgamma(half2); | ||
2425 | @@ -8715,14 +8728,14 @@ float3 __ovld __cnfn trunc(float3); | ||
2426 | float4 __ovld __cnfn trunc(float4); | ||
2427 | float8 __ovld __cnfn trunc(float8); | ||
2428 | float16 __ovld __cnfn trunc(float16); | ||
2429 | -#ifdef cl_khr_fp64 | ||
2430 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2431 | double __ovld __cnfn trunc(double); | ||
2432 | double2 __ovld __cnfn trunc(double2); | ||
2433 | double3 __ovld __cnfn trunc(double3); | ||
2434 | double4 __ovld __cnfn trunc(double4); | ||
2435 | double8 __ovld __cnfn trunc(double8); | ||
2436 | double16 __ovld __cnfn trunc(double16); | ||
2437 | -#endif //cl_khr_fp64 | ||
2438 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2439 | #ifdef cl_khr_fp16 | ||
2440 | half __ovld __cnfn trunc(half); | ||
2441 | half2 __ovld __cnfn trunc(half2); | ||
2442 | @@ -10108,7 +10121,7 @@ float3 __ovld __cnfn clamp(float3 x, float minval, float maxval); | ||
2443 | float4 __ovld __cnfn clamp(float4 x, float minval, float maxval); | ||
2444 | float8 __ovld __cnfn clamp(float8 x, float minval, float maxval); | ||
2445 | float16 __ovld __cnfn clamp(float16 x, float minval, float maxval); | ||
2446 | -#ifdef cl_khr_fp64 | ||
2447 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2448 | double __ovld __cnfn clamp(double x, double minval, double maxval); | ||
2449 | double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval); | ||
2450 | double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval); | ||
2451 | @@ -10120,7 +10133,7 @@ double3 __ovld __cnfn clamp(double3 x, double minval, double maxval); | ||
2452 | double4 __ovld __cnfn clamp(double4 x, double minval, double maxval); | ||
2453 | double8 __ovld __cnfn clamp(double8 x, double minval, double maxval); | ||
2454 | double16 __ovld __cnfn clamp(double16 x, double minval, double maxval); | ||
2455 | -#endif //cl_khr_fp64 | ||
2456 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2457 | #ifdef cl_khr_fp16 | ||
2458 | half __ovld __cnfn clamp(half x, half minval, half maxval); | ||
2459 | half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval); | ||
2460 | @@ -10145,14 +10158,14 @@ float3 __ovld __cnfn degrees(float3 radians); | ||
2461 | float4 __ovld __cnfn degrees(float4 radians); | ||
2462 | float8 __ovld __cnfn degrees(float8 radians); | ||
2463 | float16 __ovld __cnfn degrees(float16 radians); | ||
2464 | -#ifdef cl_khr_fp64 | ||
2465 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2466 | double __ovld __cnfn degrees(double radians); | ||
2467 | double2 __ovld __cnfn degrees(double2 radians); | ||
2468 | double3 __ovld __cnfn degrees(double3 radians); | ||
2469 | double4 __ovld __cnfn degrees(double4 radians); | ||
2470 | double8 __ovld __cnfn degrees(double8 radians); | ||
2471 | double16 __ovld __cnfn degrees(double16 radians); | ||
2472 | -#endif //cl_khr_fp64 | ||
2473 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2474 | #ifdef cl_khr_fp16 | ||
2475 | half __ovld __cnfn degrees(half radians); | ||
2476 | half2 __ovld __cnfn degrees(half2 radians); | ||
2477 | @@ -10177,7 +10190,7 @@ float3 __ovld __cnfn max(float3 x, float y); | ||
2478 | float4 __ovld __cnfn max(float4 x, float y); | ||
2479 | float8 __ovld __cnfn max(float8 x, float y); | ||
2480 | float16 __ovld __cnfn max(float16 x, float y); | ||
2481 | -#ifdef cl_khr_fp64 | ||
2482 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2483 | double __ovld __cnfn max(double x, double y); | ||
2484 | double2 __ovld __cnfn max(double2 x, double2 y); | ||
2485 | double3 __ovld __cnfn max(double3 x, double3 y); | ||
2486 | @@ -10189,7 +10202,7 @@ double3 __ovld __cnfn max(double3 x, double y); | ||
2487 | double4 __ovld __cnfn max(double4 x, double y); | ||
2488 | double8 __ovld __cnfn max(double8 x, double y); | ||
2489 | double16 __ovld __cnfn max(double16 x, double y); | ||
2490 | -#endif //cl_khr_fp64 | ||
2491 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2492 | #ifdef cl_khr_fp16 | ||
2493 | half __ovld __cnfn max(half x, half y); | ||
2494 | half2 __ovld __cnfn max(half2 x, half2 y); | ||
2495 | @@ -10219,7 +10232,7 @@ float3 __ovld __cnfn min(float3 x, float y); | ||
2496 | float4 __ovld __cnfn min(float4 x, float y); | ||
2497 | float8 __ovld __cnfn min(float8 x, float y); | ||
2498 | float16 __ovld __cnfn min(float16 x, float y); | ||
2499 | -#ifdef cl_khr_fp64 | ||
2500 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2501 | double __ovld __cnfn min(double x, double y); | ||
2502 | double2 __ovld __cnfn min(double2 x, double2 y); | ||
2503 | double3 __ovld __cnfn min(double3 x, double3 y); | ||
2504 | @@ -10231,7 +10244,7 @@ double3 __ovld __cnfn min(double3 x, double y); | ||
2505 | double4 __ovld __cnfn min(double4 x, double y); | ||
2506 | double8 __ovld __cnfn min(double8 x, double y); | ||
2507 | double16 __ovld __cnfn min(double16 x, double y); | ||
2508 | -#endif //cl_khr_fp64 | ||
2509 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2510 | #ifdef cl_khr_fp16 | ||
2511 | half __ovld __cnfn min(half x, half y); | ||
2512 | half2 __ovld __cnfn min(half2 x, half2 y); | ||
2513 | @@ -10264,7 +10277,7 @@ float3 __ovld __cnfn mix(float3 x, float3 y, float a); | ||
2514 | float4 __ovld __cnfn mix(float4 x, float4 y, float a); | ||
2515 | float8 __ovld __cnfn mix(float8 x, float8 y, float a); | ||
2516 | float16 __ovld __cnfn mix(float16 x, float16 y, float a); | ||
2517 | -#ifdef cl_khr_fp64 | ||
2518 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2519 | double __ovld __cnfn mix(double x, double y, double a); | ||
2520 | double2 __ovld __cnfn mix(double2 x, double2 y, double2 a); | ||
2521 | double3 __ovld __cnfn mix(double3 x, double3 y, double3 a); | ||
2522 | @@ -10276,7 +10289,7 @@ double3 __ovld __cnfn mix(double3 x, double3 y, double a); | ||
2523 | double4 __ovld __cnfn mix(double4 x, double4 y, double a); | ||
2524 | double8 __ovld __cnfn mix(double8 x, double8 y, double a); | ||
2525 | double16 __ovld __cnfn mix(double16 x, double16 y, double a); | ||
2526 | -#endif //cl_khr_fp64 | ||
2527 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2528 | #ifdef cl_khr_fp16 | ||
2529 | half __ovld __cnfn mix(half x, half y, half a); | ||
2530 | half2 __ovld __cnfn mix(half2 x, half2 y, half2 a); | ||
2531 | @@ -10301,14 +10314,14 @@ float3 __ovld __cnfn radians(float3 degrees); | ||
2532 | float4 __ovld __cnfn radians(float4 degrees); | ||
2533 | float8 __ovld __cnfn radians(float8 degrees); | ||
2534 | float16 __ovld __cnfn radians(float16 degrees); | ||
2535 | -#ifdef cl_khr_fp64 | ||
2536 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2537 | double __ovld __cnfn radians(double degrees); | ||
2538 | double2 __ovld __cnfn radians(double2 degrees); | ||
2539 | double3 __ovld __cnfn radians(double3 degrees); | ||
2540 | double4 __ovld __cnfn radians(double4 degrees); | ||
2541 | double8 __ovld __cnfn radians(double8 degrees); | ||
2542 | double16 __ovld __cnfn radians(double16 degrees); | ||
2543 | -#endif //cl_khr_fp64 | ||
2544 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2545 | #ifdef cl_khr_fp16 | ||
2546 | half __ovld __cnfn radians(half degrees); | ||
2547 | half2 __ovld __cnfn radians(half2 degrees); | ||
2548 | @@ -10332,7 +10345,7 @@ float3 __ovld __cnfn step(float edge, float3 x); | ||
2549 | float4 __ovld __cnfn step(float edge, float4 x); | ||
2550 | float8 __ovld __cnfn step(float edge, float8 x); | ||
2551 | float16 __ovld __cnfn step(float edge, float16 x); | ||
2552 | -#ifdef cl_khr_fp64 | ||
2553 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2554 | double __ovld __cnfn step(double edge, double x); | ||
2555 | double2 __ovld __cnfn step(double2 edge, double2 x); | ||
2556 | double3 __ovld __cnfn step(double3 edge, double3 x); | ||
2557 | @@ -10344,7 +10357,7 @@ double3 __ovld __cnfn step(double edge, double3 x); | ||
2558 | double4 __ovld __cnfn step(double edge, double4 x); | ||
2559 | double8 __ovld __cnfn step(double edge, double8 x); | ||
2560 | double16 __ovld __cnfn step(double edge, double16 x); | ||
2561 | -#endif //cl_khr_fp64 | ||
2562 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2563 | #ifdef cl_khr_fp16 | ||
2564 | half __ovld __cnfn step(half edge, half x); | ||
2565 | half2 __ovld __cnfn step(half2 edge, half2 x); | ||
2566 | @@ -10383,7 +10396,7 @@ float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x); | ||
2567 | float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x); | ||
2568 | float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x); | ||
2569 | float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x); | ||
2570 | -#ifdef cl_khr_fp64 | ||
2571 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2572 | double __ovld __cnfn smoothstep(double edge0, double edge1, double x); | ||
2573 | double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x); | ||
2574 | double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x); | ||
2575 | @@ -10395,7 +10408,7 @@ double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x); | ||
2576 | double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x); | ||
2577 | double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x); | ||
2578 | double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x); | ||
2579 | -#endif //cl_khr_fp64 | ||
2580 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2581 | #ifdef cl_khr_fp16 | ||
2582 | half __ovld __cnfn smoothstep(half edge0, half edge1, half x); | ||
2583 | half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x); | ||
2584 | @@ -10420,14 +10433,14 @@ float3 __ovld __cnfn sign(float3 x); | ||
2585 | float4 __ovld __cnfn sign(float4 x); | ||
2586 | float8 __ovld __cnfn sign(float8 x); | ||
2587 | float16 __ovld __cnfn sign(float16 x); | ||
2588 | -#ifdef cl_khr_fp64 | ||
2589 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2590 | double __ovld __cnfn sign(double x); | ||
2591 | double2 __ovld __cnfn sign(double2 x); | ||
2592 | double3 __ovld __cnfn sign(double3 x); | ||
2593 | double4 __ovld __cnfn sign(double4 x); | ||
2594 | double8 __ovld __cnfn sign(double8 x); | ||
2595 | double16 __ovld __cnfn sign(double16 x); | ||
2596 | -#endif //cl_khr_fp64 | ||
2597 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2598 | #ifdef cl_khr_fp16 | ||
2599 | half __ovld __cnfn sign(half x); | ||
2600 | half2 __ovld __cnfn sign(half2 x); | ||
2601 | @@ -10445,10 +10458,10 @@ half16 __ovld __cnfn sign(half16 x); | ||
2602 | */ | ||
2603 | float4 __ovld __cnfn cross(float4 p0, float4 p1); | ||
2604 | float3 __ovld __cnfn cross(float3 p0, float3 p1); | ||
2605 | -#ifdef cl_khr_fp64 | ||
2606 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2607 | double4 __ovld __cnfn cross(double4 p0, double4 p1); | ||
2608 | double3 __ovld __cnfn cross(double3 p0, double3 p1); | ||
2609 | -#endif //cl_khr_fp64 | ||
2610 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2611 | #ifdef cl_khr_fp16 | ||
2612 | half4 __ovld __cnfn cross(half4 p0, half4 p1); | ||
2613 | half3 __ovld __cnfn cross(half3 p0, half3 p1); | ||
2614 | @@ -10461,12 +10474,12 @@ float __ovld __cnfn dot(float p0, float p1); | ||
2615 | float __ovld __cnfn dot(float2 p0, float2 p1); | ||
2616 | float __ovld __cnfn dot(float3 p0, float3 p1); | ||
2617 | float __ovld __cnfn dot(float4 p0, float4 p1); | ||
2618 | -#ifdef cl_khr_fp64 | ||
2619 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2620 | double __ovld __cnfn dot(double p0, double p1); | ||
2621 | double __ovld __cnfn dot(double2 p0, double2 p1); | ||
2622 | double __ovld __cnfn dot(double3 p0, double3 p1); | ||
2623 | double __ovld __cnfn dot(double4 p0, double4 p1); | ||
2624 | -#endif //cl_khr_fp64 | ||
2625 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2626 | #ifdef cl_khr_fp16 | ||
2627 | half __ovld __cnfn dot(half p0, half p1); | ||
2628 | half __ovld __cnfn dot(half2 p0, half2 p1); | ||
2629 | @@ -10482,12 +10495,12 @@ float __ovld __cnfn distance(float p0, float p1); | ||
2630 | float __ovld __cnfn distance(float2 p0, float2 p1); | ||
2631 | float __ovld __cnfn distance(float3 p0, float3 p1); | ||
2632 | float __ovld __cnfn distance(float4 p0, float4 p1); | ||
2633 | -#ifdef cl_khr_fp64 | ||
2634 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2635 | double __ovld __cnfn distance(double p0, double p1); | ||
2636 | double __ovld __cnfn distance(double2 p0, double2 p1); | ||
2637 | double __ovld __cnfn distance(double3 p0, double3 p1); | ||
2638 | double __ovld __cnfn distance(double4 p0, double4 p1); | ||
2639 | -#endif //cl_khr_fp64 | ||
2640 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2641 | #ifdef cl_khr_fp16 | ||
2642 | half __ovld __cnfn distance(half p0, half p1); | ||
2643 | half __ovld __cnfn distance(half2 p0, half2 p1); | ||
2644 | @@ -10503,12 +10516,12 @@ float __ovld __cnfn length(float p); | ||
2645 | float __ovld __cnfn length(float2 p); | ||
2646 | float __ovld __cnfn length(float3 p); | ||
2647 | float __ovld __cnfn length(float4 p); | ||
2648 | -#ifdef cl_khr_fp64 | ||
2649 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2650 | double __ovld __cnfn length(double p); | ||
2651 | double __ovld __cnfn length(double2 p); | ||
2652 | double __ovld __cnfn length(double3 p); | ||
2653 | double __ovld __cnfn length(double4 p); | ||
2654 | -#endif //cl_khr_fp64 | ||
2655 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2656 | #ifdef cl_khr_fp16 | ||
2657 | half __ovld __cnfn length(half p); | ||
2658 | half __ovld __cnfn length(half2 p); | ||
2659 | @@ -10524,12 +10537,12 @@ float __ovld __cnfn normalize(float p); | ||
2660 | float2 __ovld __cnfn normalize(float2 p); | ||
2661 | float3 __ovld __cnfn normalize(float3 p); | ||
2662 | float4 __ovld __cnfn normalize(float4 p); | ||
2663 | -#ifdef cl_khr_fp64 | ||
2664 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2665 | double __ovld __cnfn normalize(double p); | ||
2666 | double2 __ovld __cnfn normalize(double2 p); | ||
2667 | double3 __ovld __cnfn normalize(double3 p); | ||
2668 | double4 __ovld __cnfn normalize(double4 p); | ||
2669 | -#endif //cl_khr_fp64 | ||
2670 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2671 | #ifdef cl_khr_fp16 | ||
2672 | half __ovld __cnfn normalize(half p); | ||
2673 | half2 __ovld __cnfn normalize(half2 p); | ||
2674 | @@ -10610,14 +10623,14 @@ int3 __ovld __cnfn isequal(float3 x, float3 y); | ||
2675 | int4 __ovld __cnfn isequal(float4 x, float4 y); | ||
2676 | int8 __ovld __cnfn isequal(float8 x, float8 y); | ||
2677 | int16 __ovld __cnfn isequal(float16 x, float16 y); | ||
2678 | -#ifdef cl_khr_fp64 | ||
2679 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2680 | int __ovld __cnfn isequal(double x, double y); | ||
2681 | long2 __ovld __cnfn isequal(double2 x, double2 y); | ||
2682 | long3 __ovld __cnfn isequal(double3 x, double3 y); | ||
2683 | long4 __ovld __cnfn isequal(double4 x, double4 y); | ||
2684 | long8 __ovld __cnfn isequal(double8 x, double8 y); | ||
2685 | long16 __ovld __cnfn isequal(double16 x, double16 y); | ||
2686 | -#endif //cl_khr_fp64 | ||
2687 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2688 | #ifdef cl_khr_fp16 | ||
2689 | int __ovld __cnfn isequal(half x, half y); | ||
2690 | short2 __ovld __cnfn isequal(half2 x, half2 y); | ||
2691 | @@ -10636,14 +10649,14 @@ int3 __ovld __cnfn isnotequal(float3 x, float3 y); | ||
2692 | int4 __ovld __cnfn isnotequal(float4 x, float4 y); | ||
2693 | int8 __ovld __cnfn isnotequal(float8 x, float8 y); | ||
2694 | int16 __ovld __cnfn isnotequal(float16 x, float16 y); | ||
2695 | -#ifdef cl_khr_fp64 | ||
2696 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2697 | int __ovld __cnfn isnotequal(double x, double y); | ||
2698 | long2 __ovld __cnfn isnotequal(double2 x, double2 y); | ||
2699 | long3 __ovld __cnfn isnotequal(double3 x, double3 y); | ||
2700 | long4 __ovld __cnfn isnotequal(double4 x, double4 y); | ||
2701 | long8 __ovld __cnfn isnotequal(double8 x, double8 y); | ||
2702 | long16 __ovld __cnfn isnotequal(double16 x, double16 y); | ||
2703 | -#endif //cl_khr_fp64 | ||
2704 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2705 | #ifdef cl_khr_fp16 | ||
2706 | int __ovld __cnfn isnotequal(half x, half y); | ||
2707 | short2 __ovld __cnfn isnotequal(half2 x, half2 y); | ||
2708 | @@ -10662,14 +10675,14 @@ int3 __ovld __cnfn isgreater(float3 x, float3 y); | ||
2709 | int4 __ovld __cnfn isgreater(float4 x, float4 y); | ||
2710 | int8 __ovld __cnfn isgreater(float8 x, float8 y); | ||
2711 | int16 __ovld __cnfn isgreater(float16 x, float16 y); | ||
2712 | -#ifdef cl_khr_fp64 | ||
2713 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2714 | int __ovld __cnfn isgreater(double x, double y); | ||
2715 | long2 __ovld __cnfn isgreater(double2 x, double2 y); | ||
2716 | long3 __ovld __cnfn isgreater(double3 x, double3 y); | ||
2717 | long4 __ovld __cnfn isgreater(double4 x, double4 y); | ||
2718 | long8 __ovld __cnfn isgreater(double8 x, double8 y); | ||
2719 | long16 __ovld __cnfn isgreater(double16 x, double16 y); | ||
2720 | -#endif //cl_khr_fp64 | ||
2721 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2722 | #ifdef cl_khr_fp16 | ||
2723 | int __ovld __cnfn isgreater(half x, half y); | ||
2724 | short2 __ovld __cnfn isgreater(half2 x, half2 y); | ||
2725 | @@ -10688,14 +10701,14 @@ int3 __ovld __cnfn isgreaterequal(float3 x, float3 y); | ||
2726 | int4 __ovld __cnfn isgreaterequal(float4 x, float4 y); | ||
2727 | int8 __ovld __cnfn isgreaterequal(float8 x, float8 y); | ||
2728 | int16 __ovld __cnfn isgreaterequal(float16 x, float16 y); | ||
2729 | -#ifdef cl_khr_fp64 | ||
2730 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2731 | int __ovld __cnfn isgreaterequal(double x, double y); | ||
2732 | long2 __ovld __cnfn isgreaterequal(double2 x, double2 y); | ||
2733 | long3 __ovld __cnfn isgreaterequal(double3 x, double3 y); | ||
2734 | long4 __ovld __cnfn isgreaterequal(double4 x, double4 y); | ||
2735 | long8 __ovld __cnfn isgreaterequal(double8 x, double8 y); | ||
2736 | long16 __ovld __cnfn isgreaterequal(double16 x, double16 y); | ||
2737 | -#endif //cl_khr_fp64 | ||
2738 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2739 | #ifdef cl_khr_fp16 | ||
2740 | int __ovld __cnfn isgreaterequal(half x, half y); | ||
2741 | short2 __ovld __cnfn isgreaterequal(half2 x, half2 y); | ||
2742 | @@ -10714,14 +10727,14 @@ int3 __ovld __cnfn isless(float3 x, float3 y); | ||
2743 | int4 __ovld __cnfn isless(float4 x, float4 y); | ||
2744 | int8 __ovld __cnfn isless(float8 x, float8 y); | ||
2745 | int16 __ovld __cnfn isless(float16 x, float16 y); | ||
2746 | -#ifdef cl_khr_fp64 | ||
2747 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2748 | int __ovld __cnfn isless(double x, double y); | ||
2749 | long2 __ovld __cnfn isless(double2 x, double2 y); | ||
2750 | long3 __ovld __cnfn isless(double3 x, double3 y); | ||
2751 | long4 __ovld __cnfn isless(double4 x, double4 y); | ||
2752 | long8 __ovld __cnfn isless(double8 x, double8 y); | ||
2753 | long16 __ovld __cnfn isless(double16 x, double16 y); | ||
2754 | -#endif //cl_khr_fp64 | ||
2755 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2756 | #ifdef cl_khr_fp16 | ||
2757 | int __ovld __cnfn isless(half x, half y); | ||
2758 | short2 __ovld __cnfn isless(half2 x, half2 y); | ||
2759 | @@ -10740,14 +10753,14 @@ int3 __ovld __cnfn islessequal(float3 x, float3 y); | ||
2760 | int4 __ovld __cnfn islessequal(float4 x, float4 y); | ||
2761 | int8 __ovld __cnfn islessequal(float8 x, float8 y); | ||
2762 | int16 __ovld __cnfn islessequal(float16 x, float16 y); | ||
2763 | -#ifdef cl_khr_fp64 | ||
2764 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2765 | int __ovld __cnfn islessequal(double x, double y); | ||
2766 | long2 __ovld __cnfn islessequal(double2 x, double2 y); | ||
2767 | long3 __ovld __cnfn islessequal(double3 x, double3 y); | ||
2768 | long4 __ovld __cnfn islessequal(double4 x, double4 y); | ||
2769 | long8 __ovld __cnfn islessequal(double8 x, double8 y); | ||
2770 | long16 __ovld __cnfn islessequal(double16 x, double16 y); | ||
2771 | -#endif //cl_khr_fp64 | ||
2772 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2773 | #ifdef cl_khr_fp16 | ||
2774 | int __ovld __cnfn islessequal(half x, half y); | ||
2775 | short2 __ovld __cnfn islessequal(half2 x, half2 y); | ||
2776 | @@ -10767,14 +10780,14 @@ int3 __ovld __cnfn islessgreater(float3 x, float3 y); | ||
2777 | int4 __ovld __cnfn islessgreater(float4 x, float4 y); | ||
2778 | int8 __ovld __cnfn islessgreater(float8 x, float8 y); | ||
2779 | int16 __ovld __cnfn islessgreater(float16 x, float16 y); | ||
2780 | -#ifdef cl_khr_fp64 | ||
2781 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2782 | int __ovld __cnfn islessgreater(double x, double y); | ||
2783 | long2 __ovld __cnfn islessgreater(double2 x, double2 y); | ||
2784 | long3 __ovld __cnfn islessgreater(double3 x, double3 y); | ||
2785 | long4 __ovld __cnfn islessgreater(double4 x, double4 y); | ||
2786 | long8 __ovld __cnfn islessgreater(double8 x, double8 y); | ||
2787 | long16 __ovld __cnfn islessgreater(double16 x, double16 y); | ||
2788 | -#endif //cl_khr_fp64 | ||
2789 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2790 | #ifdef cl_khr_fp16 | ||
2791 | int __ovld __cnfn islessgreater(half x, half y); | ||
2792 | short2 __ovld __cnfn islessgreater(half2 x, half2 y); | ||
2793 | @@ -10793,14 +10806,14 @@ int3 __ovld __cnfn isfinite(float3); | ||
2794 | int4 __ovld __cnfn isfinite(float4); | ||
2795 | int8 __ovld __cnfn isfinite(float8); | ||
2796 | int16 __ovld __cnfn isfinite(float16); | ||
2797 | -#ifdef cl_khr_fp64 | ||
2798 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2799 | int __ovld __cnfn isfinite(double); | ||
2800 | long2 __ovld __cnfn isfinite(double2); | ||
2801 | long3 __ovld __cnfn isfinite(double3); | ||
2802 | long4 __ovld __cnfn isfinite(double4); | ||
2803 | long8 __ovld __cnfn isfinite(double8); | ||
2804 | long16 __ovld __cnfn isfinite(double16); | ||
2805 | -#endif //cl_khr_fp64 | ||
2806 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2807 | #ifdef cl_khr_fp16 | ||
2808 | int __ovld __cnfn isfinite(half); | ||
2809 | short2 __ovld __cnfn isfinite(half2); | ||
2810 | @@ -10819,14 +10832,14 @@ int3 __ovld __cnfn isinf(float3); | ||
2811 | int4 __ovld __cnfn isinf(float4); | ||
2812 | int8 __ovld __cnfn isinf(float8); | ||
2813 | int16 __ovld __cnfn isinf(float16); | ||
2814 | -#ifdef cl_khr_fp64 | ||
2815 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2816 | int __ovld __cnfn isinf(double); | ||
2817 | long2 __ovld __cnfn isinf(double2); | ||
2818 | long3 __ovld __cnfn isinf(double3); | ||
2819 | long4 __ovld __cnfn isinf(double4); | ||
2820 | long8 __ovld __cnfn isinf(double8); | ||
2821 | long16 __ovld __cnfn isinf(double16); | ||
2822 | -#endif //cl_khr_fp64 | ||
2823 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2824 | #ifdef cl_khr_fp16 | ||
2825 | int __ovld __cnfn isinf(half); | ||
2826 | short2 __ovld __cnfn isinf(half2); | ||
2827 | @@ -10845,14 +10858,14 @@ int3 __ovld __cnfn isnan(float3); | ||
2828 | int4 __ovld __cnfn isnan(float4); | ||
2829 | int8 __ovld __cnfn isnan(float8); | ||
2830 | int16 __ovld __cnfn isnan(float16); | ||
2831 | -#ifdef cl_khr_fp64 | ||
2832 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2833 | int __ovld __cnfn isnan(double); | ||
2834 | long2 __ovld __cnfn isnan(double2); | ||
2835 | long3 __ovld __cnfn isnan(double3); | ||
2836 | long4 __ovld __cnfn isnan(double4); | ||
2837 | long8 __ovld __cnfn isnan(double8); | ||
2838 | long16 __ovld __cnfn isnan(double16); | ||
2839 | -#endif //cl_khr_fp64 | ||
2840 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2841 | #ifdef cl_khr_fp16 | ||
2842 | int __ovld __cnfn isnan(half); | ||
2843 | short2 __ovld __cnfn isnan(half2); | ||
2844 | @@ -10871,14 +10884,14 @@ int3 __ovld __cnfn isnormal(float3); | ||
2845 | int4 __ovld __cnfn isnormal(float4); | ||
2846 | int8 __ovld __cnfn isnormal(float8); | ||
2847 | int16 __ovld __cnfn isnormal(float16); | ||
2848 | -#ifdef cl_khr_fp64 | ||
2849 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2850 | int __ovld __cnfn isnormal(double); | ||
2851 | long2 __ovld __cnfn isnormal(double2); | ||
2852 | long3 __ovld __cnfn isnormal(double3); | ||
2853 | long4 __ovld __cnfn isnormal(double4); | ||
2854 | long8 __ovld __cnfn isnormal(double8); | ||
2855 | long16 __ovld __cnfn isnormal(double16); | ||
2856 | -#endif //cl_khr_fp64 | ||
2857 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2858 | #ifdef cl_khr_fp16 | ||
2859 | int __ovld __cnfn isnormal(half); | ||
2860 | short2 __ovld __cnfn isnormal(half2); | ||
2861 | @@ -10899,14 +10912,14 @@ int3 __ovld __cnfn isordered(float3 x, float3 y); | ||
2862 | int4 __ovld __cnfn isordered(float4 x, float4 y); | ||
2863 | int8 __ovld __cnfn isordered(float8 x, float8 y); | ||
2864 | int16 __ovld __cnfn isordered(float16 x, float16 y); | ||
2865 | -#ifdef cl_khr_fp64 | ||
2866 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2867 | int __ovld __cnfn isordered(double x, double y); | ||
2868 | long2 __ovld __cnfn isordered(double2 x, double2 y); | ||
2869 | long3 __ovld __cnfn isordered(double3 x, double3 y); | ||
2870 | long4 __ovld __cnfn isordered(double4 x, double4 y); | ||
2871 | long8 __ovld __cnfn isordered(double8 x, double8 y); | ||
2872 | long16 __ovld __cnfn isordered(double16 x, double16 y); | ||
2873 | -#endif //cl_khr_fp64 | ||
2874 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2875 | #ifdef cl_khr_fp16 | ||
2876 | int __ovld __cnfn isordered(half x, half y); | ||
2877 | short2 __ovld __cnfn isordered(half2 x, half2 y); | ||
2878 | @@ -10927,14 +10940,14 @@ int3 __ovld __cnfn isunordered(float3 x, float3 y); | ||
2879 | int4 __ovld __cnfn isunordered(float4 x, float4 y); | ||
2880 | int8 __ovld __cnfn isunordered(float8 x, float8 y); | ||
2881 | int16 __ovld __cnfn isunordered(float16 x, float16 y); | ||
2882 | -#ifdef cl_khr_fp64 | ||
2883 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2884 | int __ovld __cnfn isunordered(double x, double y); | ||
2885 | long2 __ovld __cnfn isunordered(double2 x, double2 y); | ||
2886 | long3 __ovld __cnfn isunordered(double3 x, double3 y); | ||
2887 | long4 __ovld __cnfn isunordered(double4 x, double4 y); | ||
2888 | long8 __ovld __cnfn isunordered(double8 x, double8 y); | ||
2889 | long16 __ovld __cnfn isunordered(double16 x, double16 y); | ||
2890 | -#endif //cl_khr_fp64 | ||
2891 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2892 | #ifdef cl_khr_fp16 | ||
2893 | int __ovld __cnfn isunordered(half x, half y); | ||
2894 | short2 __ovld __cnfn isunordered(half2 x, half2 y); | ||
2895 | @@ -10957,14 +10970,14 @@ int3 __ovld __cnfn signbit(float3); | ||
2896 | int4 __ovld __cnfn signbit(float4); | ||
2897 | int8 __ovld __cnfn signbit(float8); | ||
2898 | int16 __ovld __cnfn signbit(float16); | ||
2899 | -#ifdef cl_khr_fp64 | ||
2900 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2901 | int __ovld __cnfn signbit(double); | ||
2902 | long2 __ovld __cnfn signbit(double2); | ||
2903 | long3 __ovld __cnfn signbit(double3); | ||
2904 | long4 __ovld __cnfn signbit(double4); | ||
2905 | long8 __ovld __cnfn signbit(double8); | ||
2906 | long16 __ovld __cnfn signbit(double16); | ||
2907 | -#endif //cl_khr_fp64 | ||
2908 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2909 | #ifdef cl_khr_fp16 | ||
2910 | int __ovld __cnfn signbit(half); | ||
2911 | short2 __ovld __cnfn signbit(half2); | ||
2912 | @@ -11091,14 +11104,14 @@ float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c); | ||
2913 | float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c); | ||
2914 | float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c); | ||
2915 | float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c); | ||
2916 | -#ifdef cl_khr_fp64 | ||
2917 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2918 | double __ovld __cnfn bitselect(double a, double b, double c); | ||
2919 | double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c); | ||
2920 | double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c); | ||
2921 | double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c); | ||
2922 | double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c); | ||
2923 | double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c); | ||
2924 | -#endif //cl_khr_fp64 | ||
2925 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2926 | #ifdef cl_khr_fp16 | ||
2927 | half __ovld __cnfn bitselect(half a, half b, half c); | ||
2928 | half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c); | ||
2929 | @@ -11231,7 +11244,7 @@ ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c); | ||
2930 | long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c); | ||
2931 | ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c); | ||
2932 | |||
2933 | -#ifdef cl_khr_fp64 | ||
2934 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2935 | double __ovld __cnfn select(double a, double b, long c); | ||
2936 | double2 __ovld __cnfn select(double2 a, double2 b, long2 c); | ||
2937 | double3 __ovld __cnfn select(double3 a, double3 b, long3 c); | ||
2938 | @@ -11244,7 +11257,7 @@ double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c); | ||
2939 | double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c); | ||
2940 | double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c); | ||
2941 | double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c); | ||
2942 | -#endif //cl_khr_fp64 | ||
2943 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2944 | #ifdef cl_khr_fp16 | ||
2945 | half __ovld __cnfn select(half a, half b, short c); | ||
2946 | half2 __ovld __cnfn select(half2 a, half2 b, short2 c); | ||
2947 | @@ -11323,13 +11336,13 @@ uint16 __ovld vload16(size_t offset, const __constant uint *p); | ||
2948 | long16 __ovld vload16(size_t offset, const __constant long *p); | ||
2949 | ulong16 __ovld vload16(size_t offset, const __constant ulong *p); | ||
2950 | float16 __ovld vload16(size_t offset, const __constant float *p); | ||
2951 | -#ifdef cl_khr_fp64 | ||
2952 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2953 | double2 __ovld vload2(size_t offset, const __constant double *p); | ||
2954 | double3 __ovld vload3(size_t offset, const __constant double *p); | ||
2955 | double4 __ovld vload4(size_t offset, const __constant double *p); | ||
2956 | double8 __ovld vload8(size_t offset, const __constant double *p); | ||
2957 | double16 __ovld vload16(size_t offset, const __constant double *p); | ||
2958 | -#endif //cl_khr_fp64 | ||
2959 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2960 | |||
2961 | #ifdef cl_khr_fp16 | ||
2962 | half __ovld vload(size_t offset, const __constant half *p); | ||
2963 | @@ -11340,7 +11353,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p); | ||
2964 | half16 __ovld vload16(size_t offset, const __constant half *p); | ||
2965 | #endif //cl_khr_fp16 | ||
2966 | |||
2967 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
2968 | +#ifdef __opencl_c_generic_address_space | ||
2969 | char2 __ovld vload2(size_t offset, const char *p); | ||
2970 | uchar2 __ovld vload2(size_t offset, const uchar *p); | ||
2971 | short2 __ovld vload2(size_t offset, const short *p); | ||
2972 | @@ -11387,13 +11400,13 @@ long16 __ovld vload16(size_t offset, const long *p); | ||
2973 | ulong16 __ovld vload16(size_t offset, const ulong *p); | ||
2974 | float16 __ovld vload16(size_t offset, const float *p); | ||
2975 | |||
2976 | -#ifdef cl_khr_fp64 | ||
2977 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2978 | double2 __ovld vload2(size_t offset, const double *p); | ||
2979 | double3 __ovld vload3(size_t offset, const double *p); | ||
2980 | double4 __ovld vload4(size_t offset, const double *p); | ||
2981 | double8 __ovld vload8(size_t offset, const double *p); | ||
2982 | double16 __ovld vload16(size_t offset, const double *p); | ||
2983 | -#endif //cl_khr_fp64 | ||
2984 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
2985 | |||
2986 | #ifdef cl_khr_fp16 | ||
2987 | half __ovld vload(size_t offset, const half *p); | ||
2988 | @@ -11403,7 +11416,7 @@ half4 __ovld vload4(size_t offset, const half *p); | ||
2989 | half8 __ovld vload8(size_t offset, const half *p); | ||
2990 | half16 __ovld vload16(size_t offset, const half *p); | ||
2991 | #endif //cl_khr_fp16 | ||
2992 | -#else | ||
2993 | +#endif //__opencl_c_generic_address_space | ||
2994 | char2 __ovld vload2(size_t offset, const __global char *p); | ||
2995 | uchar2 __ovld vload2(size_t offset, const __global uchar *p); | ||
2996 | short2 __ovld vload2(size_t offset, const __global short *p); | ||
2997 | @@ -11540,7 +11553,7 @@ long16 __ovld vload16(size_t offset, const __private long *p); | ||
2998 | ulong16 __ovld vload16(size_t offset, const __private ulong *p); | ||
2999 | float16 __ovld vload16(size_t offset, const __private float *p); | ||
3000 | |||
3001 | -#ifdef cl_khr_fp64 | ||
3002 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3003 | double2 __ovld vload2(size_t offset, const __global double *p); | ||
3004 | double3 __ovld vload3(size_t offset, const __global double *p); | ||
3005 | double4 __ovld vload4(size_t offset, const __global double *p); | ||
3006 | @@ -11556,7 +11569,7 @@ double3 __ovld vload3(size_t offset, const __private double *p); | ||
3007 | double4 __ovld vload4(size_t offset, const __private double *p); | ||
3008 | double8 __ovld vload8(size_t offset, const __private double *p); | ||
3009 | double16 __ovld vload16(size_t offset, const __private double *p); | ||
3010 | -#endif //cl_khr_fp64 | ||
3011 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3012 | |||
3013 | #ifdef cl_khr_fp16 | ||
3014 | half __ovld vload(size_t offset, const __global half *p); | ||
3015 | @@ -11578,9 +11591,8 @@ half4 __ovld vload4(size_t offset, const __private half *p); | ||
3016 | half8 __ovld vload8(size_t offset, const __private half *p); | ||
3017 | half16 __ovld vload16(size_t offset, const __private half *p); | ||
3018 | #endif //cl_khr_fp16 | ||
3019 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3020 | |||
3021 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3022 | +#ifdef __opencl_c_generic_address_space | ||
3023 | void __ovld vstore2(char2 data, size_t offset, char *p); | ||
3024 | void __ovld vstore2(uchar2 data, size_t offset, uchar *p); | ||
3025 | void __ovld vstore2(short2 data, size_t offset, short *p); | ||
3026 | @@ -11626,13 +11638,13 @@ void __ovld vstore16(uint16 data, size_t offset, uint *p); | ||
3027 | void __ovld vstore16(long16 data, size_t offset, long *p); | ||
3028 | void __ovld vstore16(ulong16 data, size_t offset, ulong *p); | ||
3029 | void __ovld vstore16(float16 data, size_t offset, float *p); | ||
3030 | -#ifdef cl_khr_fp64 | ||
3031 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3032 | void __ovld vstore2(double2 data, size_t offset, double *p); | ||
3033 | void __ovld vstore3(double3 data, size_t offset, double *p); | ||
3034 | void __ovld vstore4(double4 data, size_t offset, double *p); | ||
3035 | void __ovld vstore8(double8 data, size_t offset, double *p); | ||
3036 | void __ovld vstore16(double16 data, size_t offset, double *p); | ||
3037 | -#endif //cl_khr_fp64 | ||
3038 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3039 | #ifdef cl_khr_fp16 | ||
3040 | void __ovld vstore(half data, size_t offset, half *p); | ||
3041 | void __ovld vstore2(half2 data, size_t offset, half *p); | ||
3042 | @@ -11641,7 +11653,7 @@ void __ovld vstore4(half4 data, size_t offset, half *p); | ||
3043 | void __ovld vstore8(half8 data, size_t offset, half *p); | ||
3044 | void __ovld vstore16(half16 data, size_t offset, half *p); | ||
3045 | #endif //cl_khr_fp16 | ||
3046 | -#else | ||
3047 | +#endif //__opencl_c_generic_address_space | ||
3048 | void __ovld vstore2(char2 data, size_t offset, __global char *p); | ||
3049 | void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p); | ||
3050 | void __ovld vstore2(short2 data, size_t offset, __global short *p); | ||
3051 | @@ -11777,7 +11789,7 @@ void __ovld vstore16(uint16 data, size_t offset, __private uint *p); | ||
3052 | void __ovld vstore16(long16 data, size_t offset, __private long *p); | ||
3053 | void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p); | ||
3054 | void __ovld vstore16(float16 data, size_t offset, __private float *p); | ||
3055 | -#ifdef cl_khr_fp64 | ||
3056 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3057 | void __ovld vstore2(double2 data, size_t offset, __global double *p); | ||
3058 | void __ovld vstore3(double3 data, size_t offset, __global double *p); | ||
3059 | void __ovld vstore4(double4 data, size_t offset, __global double *p); | ||
3060 | @@ -11793,7 +11805,7 @@ void __ovld vstore3(double3 data, size_t offset, __private double *p); | ||
3061 | void __ovld vstore4(double4 data, size_t offset, __private double *p); | ||
3062 | void __ovld vstore8(double8 data, size_t offset, __private double *p); | ||
3063 | void __ovld vstore16(double16 data, size_t offset, __private double *p); | ||
3064 | -#endif //cl_khr_fp64 | ||
3065 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3066 | #ifdef cl_khr_fp16 | ||
3067 | void __ovld vstore(half data, size_t offset, __global half *p); | ||
3068 | void __ovld vstore2(half2 data, size_t offset, __global half *p); | ||
3069 | @@ -11814,7 +11826,6 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p); | ||
3070 | void __ovld vstore8(half8 data, size_t offset, __private half *p); | ||
3071 | void __ovld vstore16(half16 data, size_t offset, __private half *p); | ||
3072 | #endif //cl_khr_fp16 | ||
3073 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3074 | |||
3075 | /** | ||
3076 | * Read sizeof (half) bytes of data from address | ||
3077 | @@ -11825,13 +11836,12 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p); | ||
3078 | * must be 16-bit aligned. | ||
3079 | */ | ||
3080 | float __ovld vload_half(size_t offset, const __constant half *p); | ||
3081 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3082 | +#ifdef __opencl_c_generic_address_space | ||
3083 | float __ovld vload_half(size_t offset, const half *p); | ||
3084 | -#else | ||
3085 | +#endif //__opencl_c_generic_address_space | ||
3086 | float __ovld vload_half(size_t offset, const __global half *p); | ||
3087 | float __ovld vload_half(size_t offset, const __local half *p); | ||
3088 | float __ovld vload_half(size_t offset, const __private half *p); | ||
3089 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3090 | |||
3091 | /** | ||
3092 | * Read sizeof (halfn) bytes of data from address | ||
3093 | @@ -11846,13 +11856,13 @@ float3 __ovld vload_half3(size_t offset, const __constant half *p); | ||
3094 | float4 __ovld vload_half4(size_t offset, const __constant half *p); | ||
3095 | float8 __ovld vload_half8(size_t offset, const __constant half *p); | ||
3096 | float16 __ovld vload_half16(size_t offset, const __constant half *p); | ||
3097 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3098 | +#ifdef __opencl_c_generic_address_space | ||
3099 | float2 __ovld vload_half2(size_t offset, const half *p); | ||
3100 | float3 __ovld vload_half3(size_t offset, const half *p); | ||
3101 | float4 __ovld vload_half4(size_t offset, const half *p); | ||
3102 | float8 __ovld vload_half8(size_t offset, const half *p); | ||
3103 | float16 __ovld vload_half16(size_t offset, const half *p); | ||
3104 | -#else | ||
3105 | +#endif //__opencl_c_generic_address_space | ||
3106 | float2 __ovld vload_half2(size_t offset, const __global half *p); | ||
3107 | float3 __ovld vload_half3(size_t offset, const __global half *p); | ||
3108 | float4 __ovld vload_half4(size_t offset, const __global half *p); | ||
3109 | @@ -11868,7 +11878,6 @@ float3 __ovld vload_half3(size_t offset, const __private half *p); | ||
3110 | float4 __ovld vload_half4(size_t offset, const __private half *p); | ||
3111 | float8 __ovld vload_half8(size_t offset, const __private half *p); | ||
3112 | float16 __ovld vload_half16(size_t offset, const __private half *p); | ||
3113 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3114 | |||
3115 | /** | ||
3116 | * The float value given by data is first | ||
3117 | @@ -11881,20 +11890,20 @@ float16 __ovld vload_half16(size_t offset, const __private half *p); | ||
3118 | * The default current rounding mode is round to | ||
3119 | * nearest even. | ||
3120 | */ | ||
3121 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3122 | +#ifdef __opencl_c_generic_address_space | ||
3123 | void __ovld vstore_half(float data, size_t offset, half *p); | ||
3124 | void __ovld vstore_half_rte(float data, size_t offset, half *p); | ||
3125 | void __ovld vstore_half_rtz(float data, size_t offset, half *p); | ||
3126 | void __ovld vstore_half_rtp(float data, size_t offset, half *p); | ||
3127 | void __ovld vstore_half_rtn(float data, size_t offset, half *p); | ||
3128 | -#ifdef cl_khr_fp64 | ||
3129 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3130 | void __ovld vstore_half(double data, size_t offset, half *p); | ||
3131 | void __ovld vstore_half_rte(double data, size_t offset, half *p); | ||
3132 | void __ovld vstore_half_rtz(double data, size_t offset, half *p); | ||
3133 | void __ovld vstore_half_rtp(double data, size_t offset, half *p); | ||
3134 | void __ovld vstore_half_rtn(double data, size_t offset, half *p); | ||
3135 | -#endif //cl_khr_fp64 | ||
3136 | -#else | ||
3137 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3138 | +#endif //__opencl_c_generic_address_space | ||
3139 | void __ovld vstore_half(float data, size_t offset, __global half *p); | ||
3140 | void __ovld vstore_half_rte(float data, size_t offset, __global half *p); | ||
3141 | void __ovld vstore_half_rtz(float data, size_t offset, __global half *p); | ||
3142 | @@ -11910,7 +11919,7 @@ void __ovld vstore_half_rte(float data, size_t offset, __private half *p); | ||
3143 | void __ovld vstore_half_rtz(float data, size_t offset, __private half *p); | ||
3144 | void __ovld vstore_half_rtp(float data, size_t offset, __private half *p); | ||
3145 | void __ovld vstore_half_rtn(float data, size_t offset, __private half *p); | ||
3146 | -#ifdef cl_khr_fp64 | ||
3147 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3148 | void __ovld vstore_half(double data, size_t offset, __global half *p); | ||
3149 | void __ovld vstore_half_rte(double data, size_t offset, __global half *p); | ||
3150 | void __ovld vstore_half_rtz(double data, size_t offset, __global half *p); | ||
3151 | @@ -11926,8 +11935,7 @@ void __ovld vstore_half_rte(double data, size_t offset, __private half *p); | ||
3152 | void __ovld vstore_half_rtz(double data, size_t offset, __private half *p); | ||
3153 | void __ovld vstore_half_rtp(double data, size_t offset, __private half *p); | ||
3154 | void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); | ||
3155 | -#endif //cl_khr_fp64 | ||
3156 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3157 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3158 | |||
3159 | /** | ||
3160 | * The floatn value given by data is converted to | ||
3161 | @@ -11940,7 +11948,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); | ||
3162 | * The default current rounding mode is round to | ||
3163 | * nearest even. | ||
3164 | */ | ||
3165 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3166 | +#ifdef __opencl_c_generic_address_space | ||
3167 | void __ovld vstore_half2(float2 data, size_t offset, half *p); | ||
3168 | void __ovld vstore_half3(float3 data, size_t offset, half *p); | ||
3169 | void __ovld vstore_half4(float4 data, size_t offset, half *p); | ||
3170 | @@ -11966,7 +11974,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p); | ||
3171 | void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p); | ||
3172 | void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p); | ||
3173 | void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p); | ||
3174 | -#ifdef cl_khr_fp64 | ||
3175 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3176 | void __ovld vstore_half2(double2 data, size_t offset, half *p); | ||
3177 | void __ovld vstore_half3(double3 data, size_t offset, half *p); | ||
3178 | void __ovld vstore_half4(double4 data, size_t offset, half *p); | ||
3179 | @@ -11992,8 +12000,8 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p); | ||
3180 | void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p); | ||
3181 | void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p); | ||
3182 | void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p); | ||
3183 | -#endif //cl_khr_fp64 | ||
3184 | -#else | ||
3185 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3186 | +#endif //__opencl_c_generic_address_space | ||
3187 | void __ovld vstore_half2(float2 data, size_t offset, __global half *p); | ||
3188 | void __ovld vstore_half3(float3 data, size_t offset, __global half *p); | ||
3189 | void __ovld vstore_half4(float4 data, size_t offset, __global half *p); | ||
3190 | @@ -12069,7 +12077,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p); | ||
3191 | void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p); | ||
3192 | void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p); | ||
3193 | void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p); | ||
3194 | -#ifdef cl_khr_fp64 | ||
3195 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3196 | void __ovld vstore_half2(double2 data, size_t offset, __global half *p); | ||
3197 | void __ovld vstore_half3(double3 data, size_t offset, __global half *p); | ||
3198 | void __ovld vstore_half4(double4 data, size_t offset, __global half *p); | ||
3199 | @@ -12145,8 +12153,7 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p); | ||
3200 | void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p); | ||
3201 | void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p); | ||
3202 | void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); | ||
3203 | -#endif //cl_khr_fp64 | ||
3204 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3205 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3206 | |||
3207 | /** | ||
3208 | * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) | ||
3209 | @@ -12167,14 +12174,14 @@ float3 __ovld vloada_half3(size_t offset, const __constant half *p); | ||
3210 | float4 __ovld vloada_half4(size_t offset, const __constant half *p); | ||
3211 | float8 __ovld vloada_half8(size_t offset, const __constant half *p); | ||
3212 | float16 __ovld vloada_half16(size_t offset, const __constant half *p); | ||
3213 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3214 | +#ifdef __opencl_c_generic_address_space | ||
3215 | float __ovld vloada_half(size_t offset, const half *p); | ||
3216 | float2 __ovld vloada_half2(size_t offset, const half *p); | ||
3217 | float3 __ovld vloada_half3(size_t offset, const half *p); | ||
3218 | float4 __ovld vloada_half4(size_t offset, const half *p); | ||
3219 | float8 __ovld vloada_half8(size_t offset, const half *p); | ||
3220 | float16 __ovld vloada_half16(size_t offset, const half *p); | ||
3221 | -#else | ||
3222 | +#endif //__opencl_c_generic_address_space | ||
3223 | float __ovld vloada_half(size_t offset, const __global half *p); | ||
3224 | float2 __ovld vloada_half2(size_t offset, const __global half *p); | ||
3225 | float3 __ovld vloada_half3(size_t offset, const __global half *p); | ||
3226 | @@ -12193,7 +12200,6 @@ float3 __ovld vloada_half3(size_t offset, const __private half *p); | ||
3227 | float4 __ovld vloada_half4(size_t offset, const __private half *p); | ||
3228 | float8 __ovld vloada_half8(size_t offset, const __private half *p); | ||
3229 | float16 __ovld vloada_half16(size_t offset, const __private half *p); | ||
3230 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3231 | |||
3232 | /** | ||
3233 | * The floatn value given by data is converted to | ||
3234 | @@ -12211,7 +12217,7 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p); | ||
3235 | * mode. The default current rounding mode is | ||
3236 | * round to nearest even. | ||
3237 | */ | ||
3238 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3239 | +#ifdef __opencl_c_generic_address_space | ||
3240 | void __ovld vstorea_half(float data, size_t offset, half *p); | ||
3241 | void __ovld vstorea_half2(float2 data, size_t offset, half *p); | ||
3242 | void __ovld vstorea_half3(float3 data, size_t offset, half *p); | ||
3243 | @@ -12247,7 +12253,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p); | ||
3244 | void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p); | ||
3245 | void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p); | ||
3246 | |||
3247 | -#ifdef cl_khr_fp64 | ||
3248 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3249 | void __ovld vstorea_half(double data, size_t offset, half *p); | ||
3250 | void __ovld vstorea_half2(double2 data, size_t offset, half *p); | ||
3251 | void __ovld vstorea_half3(double3 data, size_t offset, half *p); | ||
3252 | @@ -12282,9 +12288,9 @@ void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p); | ||
3253 | void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p); | ||
3254 | void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p); | ||
3255 | void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p); | ||
3256 | -#endif //cl_khr_fp64 | ||
3257 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3258 | +#endif //__opencl_c_generic_address_space | ||
3259 | |||
3260 | -#else | ||
3261 | void __ovld vstorea_half(float data, size_t offset, __global half *p); | ||
3262 | void __ovld vstorea_half2(float2 data, size_t offset, __global half *p); | ||
3263 | void __ovld vstorea_half3(float3 data, size_t offset, __global half *p); | ||
3264 | @@ -12390,7 +12396,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p); | ||
3265 | void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p); | ||
3266 | void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p); | ||
3267 | |||
3268 | -#ifdef cl_khr_fp64 | ||
3269 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3270 | void __ovld vstorea_half(double data, size_t offset, __global half *p); | ||
3271 | void __ovld vstorea_half2(double2 data, size_t offset, __global half *p); | ||
3272 | void __ovld vstorea_half3(double3 data, size_t offset, __global half *p); | ||
3273 | @@ -12495,8 +12501,7 @@ void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p); | ||
3274 | void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p); | ||
3275 | void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p); | ||
3276 | void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); | ||
3277 | -#endif //cl_khr_fp64 | ||
3278 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3279 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3280 | |||
3281 | // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions | ||
3282 | |||
3283 | @@ -12580,7 +12585,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags); | ||
3284 | |||
3285 | // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions | ||
3286 | |||
3287 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3288 | +#ifdef __opencl_c_generic_address_space | ||
3289 | cl_mem_fence_flags __ovld get_fence(const void *ptr); | ||
3290 | cl_mem_fence_flags __ovld get_fence(void *ptr); | ||
3291 | |||
3292 | @@ -12591,7 +12596,7 @@ cl_mem_fence_flags __ovld get_fence(void *ptr); | ||
3293 | * where gentype is builtin type or user defined type. | ||
3294 | */ | ||
3295 | |||
3296 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
3297 | +#endif //__opencl_c_generic_address_space | ||
3298 | |||
3299 | // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch | ||
3300 | |||
3301 | @@ -12730,7 +12735,7 @@ event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16 | ||
3302 | event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event); | ||
3303 | event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event); | ||
3304 | event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event); | ||
3305 | -#ifdef cl_khr_fp64 | ||
3306 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3307 | event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event); | ||
3308 | event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event); | ||
3309 | event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event); | ||
3310 | @@ -12743,7 +12748,7 @@ event_t __ovld async_work_group_copy(__global double3 *dst, const __local double | ||
3311 | event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event); | ||
3312 | event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event); | ||
3313 | event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event); | ||
3314 | -#endif //cl_khr_fp64 | ||
3315 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3316 | #ifdef cl_khr_fp16 | ||
3317 | event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event); | ||
3318 | event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event); | ||
3319 | @@ -12893,7 +12898,7 @@ event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local | ||
3320 | event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event); | ||
3321 | event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event); | ||
3322 | event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event); | ||
3323 | -#ifdef cl_khr_fp64 | ||
3324 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3325 | event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event); | ||
3326 | event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event); | ||
3327 | event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event); | ||
3328 | @@ -12906,7 +12911,7 @@ event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __loca | ||
3329 | event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event); | ||
3330 | event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event); | ||
3331 | event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event); | ||
3332 | -#endif //cl_khr_fp64 | ||
3333 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3334 | #ifdef cl_khr_fp16 | ||
3335 | event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event); | ||
3336 | event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event); | ||
3337 | @@ -12996,14 +13001,14 @@ void __ovld prefetch(const __global uint16 *p, size_t num_elements); | ||
3338 | void __ovld prefetch(const __global long16 *p, size_t num_elements); | ||
3339 | void __ovld prefetch(const __global ulong16 *p, size_t num_elements); | ||
3340 | void __ovld prefetch(const __global float16 *p, size_t num_elements); | ||
3341 | -#ifdef cl_khr_fp64 | ||
3342 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3343 | void __ovld prefetch(const __global double *p, size_t num_elements); | ||
3344 | void __ovld prefetch(const __global double2 *p, size_t num_elements); | ||
3345 | void __ovld prefetch(const __global double3 *p, size_t num_elements); | ||
3346 | void __ovld prefetch(const __global double4 *p, size_t num_elements); | ||
3347 | void __ovld prefetch(const __global double8 *p, size_t num_elements); | ||
3348 | void __ovld prefetch(const __global double16 *p, size_t num_elements); | ||
3349 | -#endif //cl_khr_fp64 | ||
3350 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3351 | #ifdef cl_khr_fp16 | ||
3352 | void __ovld prefetch(const __global half *p, size_t num_elements); | ||
3353 | void __ovld prefetch(const __global half2 *p, size_t num_elements); | ||
3354 | @@ -13026,9 +13031,11 @@ void __ovld prefetch(const __global half16 *p, size_t num_elements); | ||
3355 | * pointed by p. The function returns old. | ||
3356 | */ | ||
3357 | int __ovld atomic_add(volatile __global int *p, int val); | ||
3358 | -unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val); | ||
3359 | +unsigned int __ovld atomic_add(volatile __global unsigned int *p, | ||
3360 | + unsigned int val); | ||
3361 | int __ovld atomic_add(volatile __local int *p, int val); | ||
3362 | -unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val); | ||
3363 | +unsigned int __ovld atomic_add(volatile __local unsigned int *p, | ||
3364 | + unsigned int val); | ||
3365 | #ifdef __OPENCL_CPP_VERSION__ | ||
3366 | int __ovld atomic_add(volatile int *p, int val); | ||
3367 | unsigned int __ovld atomic_add(volatile unsigned int *p, unsigned int val); | ||
3368 | @@ -13056,9 +13063,11 @@ unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long v | ||
3369 | * returns old. | ||
3370 | */ | ||
3371 | int __ovld atomic_sub(volatile __global int *p, int val); | ||
3372 | -unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val); | ||
3373 | +unsigned int __ovld atomic_sub(volatile __global unsigned int *p, | ||
3374 | + unsigned int val); | ||
3375 | int __ovld atomic_sub(volatile __local int *p, int val); | ||
3376 | -unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val); | ||
3377 | +unsigned int __ovld atomic_sub(volatile __local unsigned int *p, | ||
3378 | + unsigned int val); | ||
3379 | #ifdef __OPENCL_CPP_VERSION__ | ||
3380 | int __ovld atomic_sub(volatile int *p, int val); | ||
3381 | unsigned int __ovld atomic_sub(volatile unsigned int *p, unsigned int val); | ||
3382 | @@ -13086,9 +13095,11 @@ unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long v | ||
3383 | * value. | ||
3384 | */ | ||
3385 | int __ovld atomic_xchg(volatile __global int *p, int val); | ||
3386 | -unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val); | ||
3387 | +unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, | ||
3388 | + unsigned int val); | ||
3389 | int __ovld atomic_xchg(volatile __local int *p, int val); | ||
3390 | -unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val); | ||
3391 | +unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, | ||
3392 | + unsigned int val); | ||
3393 | float __ovld atomic_xchg(volatile __global float *p, float val); | ||
3394 | float __ovld atomic_xchg(volatile __local float *p, float val); | ||
3395 | #ifdef __OPENCL_CPP_VERSION__ | ||
3396 | @@ -13183,12 +13194,15 @@ unsigned long __ovld atom_dec(volatile __local unsigned long *p); | ||
3397 | * returns old. | ||
3398 | */ | ||
3399 | int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val); | ||
3400 | -unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val); | ||
3401 | +unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, | ||
3402 | + unsigned int cmp, unsigned int val); | ||
3403 | int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val); | ||
3404 | -unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val); | ||
3405 | +unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, | ||
3406 | + unsigned int cmp, unsigned int val); | ||
3407 | #ifdef __OPENCL_CPP_VERSION__ | ||
3408 | int __ovld atomic_cmpxchg(volatile int *p, int cmp, int val); | ||
3409 | -unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, unsigned int val); | ||
3410 | +unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, | ||
3411 | + unsigned int val); | ||
3412 | #endif | ||
3413 | |||
3414 | #if defined(cl_khr_global_int32_base_atomics) | ||
3415 | @@ -13215,9 +13229,11 @@ unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned lo | ||
3416 | * returns old. | ||
3417 | */ | ||
3418 | int __ovld atomic_min(volatile __global int *p, int val); | ||
3419 | -unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val); | ||
3420 | +unsigned int __ovld atomic_min(volatile __global unsigned int *p, | ||
3421 | + unsigned int val); | ||
3422 | int __ovld atomic_min(volatile __local int *p, int val); | ||
3423 | -unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val); | ||
3424 | +unsigned int __ovld atomic_min(volatile __local unsigned int *p, | ||
3425 | + unsigned int val); | ||
3426 | #ifdef __OPENCL_CPP_VERSION__ | ||
3427 | int __ovld atomic_min(volatile int *p, int val); | ||
3428 | unsigned int __ovld atomic_min(volatile unsigned int *p, unsigned int val); | ||
3429 | @@ -13247,9 +13263,11 @@ unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long v | ||
3430 | * returns old. | ||
3431 | */ | ||
3432 | int __ovld atomic_max(volatile __global int *p, int val); | ||
3433 | -unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val); | ||
3434 | +unsigned int __ovld atomic_max(volatile __global unsigned int *p, | ||
3435 | + unsigned int val); | ||
3436 | int __ovld atomic_max(volatile __local int *p, int val); | ||
3437 | -unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val); | ||
3438 | +unsigned int __ovld atomic_max(volatile __local unsigned int *p, | ||
3439 | + unsigned int val); | ||
3440 | #ifdef __OPENCL_CPP_VERSION__ | ||
3441 | int __ovld atomic_max(volatile int *p, int val); | ||
3442 | unsigned int __ovld atomic_max(volatile unsigned int *p, unsigned int val); | ||
3443 | @@ -13278,9 +13296,11 @@ unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long v | ||
3444 | * pointed by p. The function returns old. | ||
3445 | */ | ||
3446 | int __ovld atomic_and(volatile __global int *p, int val); | ||
3447 | -unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val); | ||
3448 | +unsigned int __ovld atomic_and(volatile __global unsigned int *p, | ||
3449 | + unsigned int val); | ||
3450 | int __ovld atomic_and(volatile __local int *p, int val); | ||
3451 | -unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val); | ||
3452 | +unsigned int __ovld atomic_and(volatile __local unsigned int *p, | ||
3453 | + unsigned int val); | ||
3454 | #ifdef __OPENCL_CPP_VERSION__ | ||
3455 | int __ovld atomic_and(volatile int *p, int val); | ||
3456 | unsigned int __ovld atomic_and(volatile unsigned int *p, unsigned int val); | ||
3457 | @@ -13309,9 +13329,11 @@ unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long v | ||
3458 | * pointed by p. The function returns old. | ||
3459 | */ | ||
3460 | int __ovld atomic_or(volatile __global int *p, int val); | ||
3461 | -unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val); | ||
3462 | +unsigned int __ovld atomic_or(volatile __global unsigned int *p, | ||
3463 | + unsigned int val); | ||
3464 | int __ovld atomic_or(volatile __local int *p, int val); | ||
3465 | -unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val); | ||
3466 | +unsigned int __ovld atomic_or(volatile __local unsigned int *p, | ||
3467 | + unsigned int val); | ||
3468 | #ifdef __OPENCL_CPP_VERSION__ | ||
3469 | int __ovld atomic_or(volatile int *p, int val); | ||
3470 | unsigned int __ovld atomic_or(volatile unsigned int *p, unsigned int val); | ||
3471 | @@ -13340,9 +13362,11 @@ unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long va | ||
3472 | * pointed by p. The function returns old. | ||
3473 | */ | ||
3474 | int __ovld atomic_xor(volatile __global int *p, int val); | ||
3475 | -unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val); | ||
3476 | +unsigned int __ovld atomic_xor(volatile __global unsigned int *p, | ||
3477 | + unsigned int val); | ||
3478 | int __ovld atomic_xor(volatile __local int *p, int val); | ||
3479 | -unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val); | ||
3480 | +unsigned int __ovld atomic_xor(volatile __local unsigned int *p, | ||
3481 | + unsigned int val); | ||
3482 | #ifdef __OPENCL_CPP_VERSION__ | ||
3483 | int __ovld atomic_xor(volatile int *p, int val); | ||
3484 | unsigned int __ovld atomic_xor(volatile unsigned int *p, unsigned int val); | ||
3485 | @@ -13380,120 +13404,78 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v | ||
3486 | #endif | ||
3487 | |||
3488 | // atomic_init() | ||
3489 | +#ifdef __opencl_c_generic_address_space | ||
3490 | void __ovld atomic_init(volatile atomic_int *object, int value); | ||
3491 | void __ovld atomic_init(volatile atomic_uint *object, uint value); | ||
3492 | void __ovld atomic_init(volatile atomic_float *object, float value); | ||
3493 | #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
3494 | void __ovld atomic_init(volatile atomic_long *object, long value); | ||
3495 | void __ovld atomic_init(volatile atomic_ulong *object, ulong value); | ||
3496 | -#ifdef cl_khr_fp64 | ||
3497 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3498 | void __ovld atomic_init(volatile atomic_double *object, double value); | ||
3499 | -#endif //cl_khr_fp64 | ||
3500 | -#endif | ||
3501 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3502 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
3503 | + // defined(cl_khr_int64_extended_atomics) | ||
3504 | +#endif // __opencl_c_generic_address_space | ||
3505 | + | ||
3506 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
3507 | +void __ovld atomic_init(volatile atomic_int __global *object, int value); | ||
3508 | +void __ovld atomic_init(volatile atomic_int __local *object, int value); | ||
3509 | +void __ovld atomic_init(volatile atomic_uint __global *object, uint value); | ||
3510 | +void __ovld atomic_init(volatile atomic_uint __local *object, uint value); | ||
3511 | +void __ovld atomic_init(volatile atomic_float __global *object, float value); | ||
3512 | +void __ovld atomic_init(volatile atomic_float __local *object, float value); | ||
3513 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
3514 | +void __ovld atomic_init(volatile atomic_long __global *object, long value); | ||
3515 | +void __ovld atomic_init(volatile atomic_long __local *object, long value); | ||
3516 | +void __ovld atomic_init(volatile atomic_ulong __global *object, ulong value); | ||
3517 | +void __ovld atomic_init(volatile atomic_ulong __local *object, ulong value); | ||
3518 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
3519 | +void __ovld atomic_init(volatile atomic_double __global *object, double value); | ||
3520 | +void __ovld atomic_init(volatile atomic_double __local *object, double value); | ||
3521 | +#endif // cl_khr_fp64 | ||
3522 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
3523 | + // defined(cl_khr_int64_extended_atomics) | ||
3524 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
3525 | |||
3526 | // atomic_work_item_fence() | ||
3527 | -void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope); | ||
3528 | +void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, | ||
3529 | + memory_scope scope); | ||
3530 | |||
3531 | // atomic_fetch() | ||
3532 | - | ||
3533 | +#if defined(__opencl_c_atomic_scope_device) && \ | ||
3534 | + defined(__opencl_c_atomic_order_seq_cst) | ||
3535 | +#ifdef __opencl_c_generic_address_space | ||
3536 | int __ovld atomic_fetch_add(volatile atomic_int *object, int operand); | ||
3537 | -int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order); | ||
3538 | -int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); | ||
3539 | uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand); | ||
3540 | -uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order); | ||
3541 | -uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); | ||
3542 | int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand); | ||
3543 | -int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order); | ||
3544 | -int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); | ||
3545 | uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand); | ||
3546 | -uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order); | ||
3547 | -uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); | ||
3548 | int __ovld atomic_fetch_or(volatile atomic_int *object, int operand); | ||
3549 | -int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order); | ||
3550 | -int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); | ||
3551 | uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand); | ||
3552 | -uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order); | ||
3553 | -uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); | ||
3554 | int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand); | ||
3555 | -int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order); | ||
3556 | -int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); | ||
3557 | uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand); | ||
3558 | -uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order); | ||
3559 | -uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); | ||
3560 | int __ovld atomic_fetch_and(volatile atomic_int *object, int operand); | ||
3561 | -int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order); | ||
3562 | -int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); | ||
3563 | uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand); | ||
3564 | -uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order); | ||
3565 | -uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); | ||
3566 | int __ovld atomic_fetch_min(volatile atomic_int *object, int operand); | ||
3567 | -int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order); | ||
3568 | -int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); | ||
3569 | uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand); | ||
3570 | -uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order); | ||
3571 | -uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); | ||
3572 | -uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand); | ||
3573 | -uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order); | ||
3574 | -uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); | ||
3575 | int __ovld atomic_fetch_max(volatile atomic_int *object, int operand); | ||
3576 | -int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order); | ||
3577 | -int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope); | ||
3578 | uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand); | ||
3579 | -uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order); | ||
3580 | -uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope); | ||
3581 | -uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand); | ||
3582 | -uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order); | ||
3583 | -uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope); | ||
3584 | |||
3585 | #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
3586 | long __ovld atomic_fetch_add(volatile atomic_long *object, long operand); | ||
3587 | -long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order); | ||
3588 | -long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); | ||
3589 | ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand); | ||
3590 | -ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); | ||
3591 | -ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); | ||
3592 | long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand); | ||
3593 | -long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order); | ||
3594 | -long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); | ||
3595 | ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand); | ||
3596 | -ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); | ||
3597 | -ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); | ||
3598 | long __ovld atomic_fetch_or(volatile atomic_long *object, long operand); | ||
3599 | -long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order); | ||
3600 | -long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); | ||
3601 | ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand); | ||
3602 | -ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); | ||
3603 | -ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); | ||
3604 | long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand); | ||
3605 | -long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order); | ||
3606 | -long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); | ||
3607 | ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand); | ||
3608 | -ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); | ||
3609 | -ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); | ||
3610 | long __ovld atomic_fetch_and(volatile atomic_long *object, long operand); | ||
3611 | -long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order); | ||
3612 | -long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); | ||
3613 | ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand); | ||
3614 | -ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); | ||
3615 | -ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); | ||
3616 | long __ovld atomic_fetch_min(volatile atomic_long *object, long operand); | ||
3617 | -long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order); | ||
3618 | -long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); | ||
3619 | ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand); | ||
3620 | -ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); | ||
3621 | -ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); | ||
3622 | -ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand); | ||
3623 | -ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order); | ||
3624 | -ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); | ||
3625 | long __ovld atomic_fetch_max(volatile atomic_long *object, long operand); | ||
3626 | -long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order); | ||
3627 | -long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope); | ||
3628 | ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand); | ||
3629 | -ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order); | ||
3630 | -ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope); | ||
3631 | -ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand); | ||
3632 | -ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order); | ||
3633 | -ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope); | ||
3634 | #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
3635 | |||
3636 | // OpenCL v2.0 s6.13.11.7.5: | ||
3637 | @@ -13501,196 +13483,2239 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long opera | ||
3638 | // or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. | ||
3639 | |||
3640 | #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
3641 | -uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand); | ||
3642 | -uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); | ||
3643 | -uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); | ||
3644 | -uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand); | ||
3645 | -uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order); | ||
3646 | -uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope); | ||
3647 | - | ||
3648 | -uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand); | ||
3649 | -uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); | ||
3650 | -uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); | ||
3651 | -uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand); | ||
3652 | -uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); | ||
3653 | -uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); | ||
3654 | -uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand); | ||
3655 | -uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order); | ||
3656 | -uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope); | ||
3657 | -uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax); | ||
3658 | -uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); | ||
3659 | -uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); | ||
3660 | -uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax); | ||
3661 | -uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder); | ||
3662 | -uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope); | ||
3663 | - | ||
3664 | -intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand); | ||
3665 | -intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); | ||
3666 | -intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); | ||
3667 | -intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand); | ||
3668 | -intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); | ||
3669 | -intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); | ||
3670 | -intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand); | ||
3671 | -intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order); | ||
3672 | -intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope); | ||
3673 | -intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax); | ||
3674 | -intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); | ||
3675 | -intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); | ||
3676 | -intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax); | ||
3677 | -intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder); | ||
3678 | -intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); | ||
3679 | +uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, | ||
3680 | + ptrdiff_t operand); | ||
3681 | +uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, | ||
3682 | + ptrdiff_t operand); | ||
3683 | + | ||
3684 | +uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, | ||
3685 | + intptr_t operand); | ||
3686 | +uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, | ||
3687 | + intptr_t operand); | ||
3688 | +uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, | ||
3689 | + intptr_t operand); | ||
3690 | +uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, | ||
3691 | + intptr_t opermax); | ||
3692 | +uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, | ||
3693 | + intptr_t opermax); | ||
3694 | + | ||
3695 | +intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, | ||
3696 | + uintptr_t operand); | ||
3697 | +intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, | ||
3698 | + uintptr_t operand); | ||
3699 | +intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, | ||
3700 | + uintptr_t operand); | ||
3701 | +intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, | ||
3702 | + uintptr_t opermax); | ||
3703 | +intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, | ||
3704 | + uintptr_t opermax); | ||
3705 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
3706 | + // defined(cl_khr_int64_extended_atomics) | ||
3707 | +#endif // __opencl_c_generic_address_space | ||
3708 | + | ||
3709 | +#if(__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
3710 | +int __ovld atomic_fetch_add(volatile atomic_int __global *object, int operand); | ||
3711 | +uint __ovld atomic_fetch_add(volatile atomic_uint __local *object, | ||
3712 | + uint operand); | ||
3713 | +int __ovld atomic_fetch_sub(volatile atomic_int __global *object, int operand); | ||
3714 | +int __ovld atomic_fetch_sub(volatile atomic_int __local *object, int operand); | ||
3715 | +uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, | ||
3716 | + uint operand); | ||
3717 | +uint __ovld atomic_fetch_sub(volatile atomic_uint __global *object, | ||
3718 | + uint operand); | ||
3719 | +int __ovld atomic_fetch_or(volatile atomic_int __global *object, int operand); | ||
3720 | +uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object, | ||
3721 | + uint operand); | ||
3722 | +uint __ovld atomic_fetch_or(volatile atomic_uint __global *object, | ||
3723 | + uint operand); | ||
3724 | +uint __ovld atomic_fetch_or(volatile atomic_uint __local *object, uint operand); | ||
3725 | +int __ovld atomic_fetch_xor(volatile atomic_int __global *object, int operand); | ||
3726 | +int __ovld atomic_fetch_xor(volatile atomic_int __local *object, int operand); | ||
3727 | +uint __ovld atomic_fetch_xor(volatile atomic_uint __global *object, | ||
3728 | + uint operand); | ||
3729 | +uint __ovld atomic_fetch_xor(volatile atomic_uint __local *object, | ||
3730 | + uint operand); | ||
3731 | +int __ovld atomic_fetch_and(volatile atomic_int __global *object, int operand); | ||
3732 | +int __ovld atomic_fetch_and(volatile atomic_int __local *object, int operand); | ||
3733 | +uint __ovld atomic_fetch_and(volatile atomic_uint __global *object, | ||
3734 | + uint operand); | ||
3735 | +uint __ovld atomic_fetch_and(volatile atomic_uint __local *object, | ||
3736 | + uint operand); | ||
3737 | +int __ovld atomic_fetch_min(volatile atomic_int __global *object, int operand); | ||
3738 | +int __ovld atomic_fetch_min(volatile atomic_int __local *object, int operand); | ||
3739 | +uint __ovld atomic_fetch_min(volatile atomic_uint __global *object, | ||
3740 | + uint operand); | ||
3741 | +uint __ovld atomic_fetch_min(volatile atomic_uint __local *object, | ||
3742 | + uint operand); | ||
3743 | +int __ovld atomic_fetch_max(volatile atomic_int __global *object, int operand); | ||
3744 | +int __ovld atomic_fetch_max(volatile atomic_int __local *object, int operand); | ||
3745 | +uint __ovld atomic_fetch_max(volatile atomic_uint __global *object, | ||
3746 | + uint operand); | ||
3747 | +uint __ovld atomic_fetch_max(volatile atomic_uint __local *object, | ||
3748 | + uint operand); | ||
3749 | + | ||
3750 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
3751 | +long __ovld atomic_fetch_add(volatile atomic_long __global *object, | ||
3752 | + long operand); | ||
3753 | +long __ovld atomic_fetch_add(volatile atomic_long __local *object, | ||
3754 | + long operand); | ||
3755 | +ulong __ovld atomic_fetch_add(volatile atomic_ulong __global *object, | ||
3756 | + ulong operand); | ||
3757 | +ulong __ovld atomic_fetch_add(volatile atomic_ulong __local *object, | ||
3758 | + ulong operand); | ||
3759 | +long __ovld atomic_fetch_sub(volatile atomic_long __global *object, | ||
3760 | + long operand); | ||
3761 | +long __ovld atomic_fetch_sub(volatile atomic_long __local *object, | ||
3762 | + long operand); | ||
3763 | +ulong __ovld atomic_fetch_sub(volatile atomic_ulong __global *object, | ||
3764 | + ulong operand); | ||
3765 | +ulong __ovld atomic_fetch_sub(volatile atomic_ulong __local *object, | ||
3766 | + ulong operand); | ||
3767 | +long __ovld atomic_fetch_or(volatile atomic_long __global *object, | ||
3768 | + long operand); | ||
3769 | +long __ovld atomic_fetch_or(volatile atomic_long __local *object, long operand); | ||
3770 | +ulong __ovld atomic_fetch_or(volatile atomic_ulong __global *object, | ||
3771 | + ulong operand); | ||
3772 | +ulong __ovld atomic_fetch_or(volatile atomic_ulong __local *object, | ||
3773 | + ulong operand); | ||
3774 | +long __ovld atomic_fetch_xor(volatile atomic_long __global *object, | ||
3775 | + long operand); | ||
3776 | +long __ovld atomic_fetch_xor(volatile atomic_long __local *object, | ||
3777 | + long operand); | ||
3778 | +ulong __ovld atomic_fetch_xor(volatile atomic_ulong __global *object, | ||
3779 | + ulong operand); | ||
3780 | +ulong __ovld atomic_fetch_xor(volatile atomic_ulong __local *object, | ||
3781 | + ulong operand); | ||
3782 | +long __ovld atomic_fetch_and(volatile atomic_long __global *object, | ||
3783 | + long operand); | ||
3784 | +long __ovld atomic_fetch_and(volatile atomic_long __local *object, | ||
3785 | + long operand); | ||
3786 | +ulong __ovld atomic_fetch_and(volatile atomic_ulong __global *object, | ||
3787 | + ulong operand); | ||
3788 | +ulong __ovld atomic_fetch_and(volatile atomic_ulong __local *object, | ||
3789 | + ulong operand); | ||
3790 | +long __ovld atomic_fetch_min(volatile atomic_long __global *object, | ||
3791 | + long operand); | ||
3792 | +long __ovld atomic_fetch_min(volatile atomic_long __local *object, | ||
3793 | + long operand); | ||
3794 | +ulong __ovld atomic_fetch_min(volatile atomic_ulong __global *object, | ||
3795 | + ulong operand); | ||
3796 | +ulong __ovld atomic_fetch_min(volatile atomic_ulong __local *object, | ||
3797 | + ulong operand); | ||
3798 | +long __ovld atomic_fetch_max(volatile atomic_long __global *object, | ||
3799 | + long operand); | ||
3800 | +long __ovld atomic_fetch_max(volatile atomic_long __local *object, | ||
3801 | + long operand); | ||
3802 | +ulong __ovld atomic_fetch_max(volatile atomic_ulong __global *object, | ||
3803 | + ulong operand); | ||
3804 | +ulong __ovld atomic_fetch_max(volatile atomic_ulong __local *object, | ||
3805 | + ulong operand); | ||
3806 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
3807 | + // defined(cl_khr_int64_extended_atomics) | ||
3808 | + | ||
3809 | +// OpenCL v2.0 s6.13.11.7.5: | ||
3810 | +// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument | ||
3811 | +// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be | ||
3812 | +// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. | ||
3813 | + | ||
3814 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
3815 | +uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __global *object, | ||
3816 | + ptrdiff_t operand); | ||
3817 | +uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __local *object, | ||
3818 | + ptrdiff_t operand); | ||
3819 | +uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __global *object, | ||
3820 | + ptrdiff_t operand); | ||
3821 | +uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __local *object, | ||
3822 | + ptrdiff_t operand); | ||
3823 | + | ||
3824 | +uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __global *object, | ||
3825 | + intptr_t operand); | ||
3826 | +uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __local *object, | ||
3827 | + intptr_t operand); | ||
3828 | +uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __global *object, | ||
3829 | + intptr_t operand); | ||
3830 | +uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __local *object, | ||
3831 | + intptr_t operand); | ||
3832 | +uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __global *object, | ||
3833 | + intptr_t operand); | ||
3834 | +uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __local *object, | ||
3835 | + intptr_t operand); | ||
3836 | +uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __global *object, | ||
3837 | + intptr_t opermax); | ||
3838 | +uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __local *object, | ||
3839 | + intptr_t opermax); | ||
3840 | +uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __global *object, | ||
3841 | + intptr_t opermax); | ||
3842 | +uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __local *object, | ||
3843 | + intptr_t opermax); | ||
3844 | + | ||
3845 | +intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __global *object, | ||
3846 | + uintptr_t operand); | ||
3847 | +intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __local *object, | ||
3848 | + uintptr_t operand); | ||
3849 | +intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __global *object, | ||
3850 | + uintptr_t operand); | ||
3851 | +intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __local *object, | ||
3852 | + uintptr_t operand); | ||
3853 | +intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __global *object, | ||
3854 | + uintptr_t operand); | ||
3855 | +intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __local *object, | ||
3856 | + uintptr_t operand); | ||
3857 | +intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __global *object, | ||
3858 | + uintptr_t opermax); | ||
3859 | +intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __local *object, | ||
3860 | + uintptr_t opermax); | ||
3861 | +intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __global *object, | ||
3862 | + uintptr_t opermax); | ||
3863 | +intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __local *object, | ||
3864 | + uintptr_t opermax); | ||
3865 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
3866 | + // defined(cl_khr_int64_extended_atomics) | ||
3867 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
3868 | + | ||
3869 | +#endif // defined(__opencl_c_atomic_scope_device) && | ||
3870 | + // defined(__opencl_c_atomic_order_seq_cst) | ||
3871 | + | ||
3872 | +#ifdef __opencl_c_generic_address_space | ||
3873 | +#ifdef __opencl_c_atomic_scope_device | ||
3874 | +int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, | ||
3875 | + memory_order order); | ||
3876 | +uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, | ||
3877 | + uint operand, memory_order order); | ||
3878 | +int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, | ||
3879 | + memory_order order); | ||
3880 | +uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, | ||
3881 | + uint operand, memory_order order); | ||
3882 | +int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, | ||
3883 | + memory_order order); | ||
3884 | +uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, | ||
3885 | + memory_order order); | ||
3886 | +int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, | ||
3887 | + memory_order order); | ||
3888 | +uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, | ||
3889 | + uint operand, memory_order order); | ||
3890 | +int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, | ||
3891 | + memory_order order); | ||
3892 | +uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, | ||
3893 | + uint operand, memory_order order); | ||
3894 | +int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, | ||
3895 | + memory_order order); | ||
3896 | +uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, | ||
3897 | + uint operand, memory_order order); | ||
3898 | +int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, | ||
3899 | + memory_order order); | ||
3900 | +uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, | ||
3901 | + uint operand, memory_order order); | ||
3902 | +#endif // __opencl_c_atomic_scope_device | ||
3903 | +int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, | ||
3904 | + memory_order order, memory_scope scope); | ||
3905 | +uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, | ||
3906 | + uint operand, memory_order order, | ||
3907 | + memory_scope scope); | ||
3908 | +int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, | ||
3909 | + memory_order order, memory_scope scope); | ||
3910 | +uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, | ||
3911 | + uint operand, memory_order order, | ||
3912 | + memory_scope scope); | ||
3913 | +int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, | ||
3914 | + memory_order order, memory_scope scope); | ||
3915 | +uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, | ||
3916 | + memory_order order, memory_scope scope); | ||
3917 | +int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, | ||
3918 | + memory_order order, memory_scope scope); | ||
3919 | +uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, | ||
3920 | + uint operand, memory_order order, | ||
3921 | + memory_scope scope); | ||
3922 | +int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, | ||
3923 | + memory_order order, memory_scope scope); | ||
3924 | +uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, | ||
3925 | + uint operand, memory_order order, | ||
3926 | + memory_scope scope); | ||
3927 | +int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, | ||
3928 | + memory_order order, memory_scope scope); | ||
3929 | +uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, | ||
3930 | + uint operand, memory_order order, | ||
3931 | + memory_scope scope); | ||
3932 | +int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, | ||
3933 | + memory_order order, memory_scope scope); | ||
3934 | +uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, | ||
3935 | + uint operand, memory_order order, | ||
3936 | + memory_scope scope); | ||
3937 | + | ||
3938 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
3939 | +#ifdef __opencl_c_atomic_scope_device | ||
3940 | +long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, | ||
3941 | + long operand, memory_order order); | ||
3942 | +ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, | ||
3943 | + ulong operand, memory_order order); | ||
3944 | +long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, | ||
3945 | + long operand, memory_order order); | ||
3946 | +ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, | ||
3947 | + ulong operand, memory_order order); | ||
3948 | +long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, | ||
3949 | + memory_order order); | ||
3950 | +ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, | ||
3951 | + ulong operand, memory_order order); | ||
3952 | +long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, | ||
3953 | + long operand, memory_order order); | ||
3954 | +ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, | ||
3955 | + ulong operand, memory_order order); | ||
3956 | +long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, | ||
3957 | + long operand, memory_order order); | ||
3958 | +ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, | ||
3959 | + ulong operand, memory_order order); | ||
3960 | +long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, | ||
3961 | + long operand, memory_order order); | ||
3962 | +ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, | ||
3963 | + ulong operand, memory_order order); | ||
3964 | +long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, | ||
3965 | + long operand, memory_order order); | ||
3966 | +ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, | ||
3967 | + ulong operand, memory_order order); | ||
3968 | +#endif // __opencl_c_atomic_scope_device | ||
3969 | +long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, | ||
3970 | + long operand, memory_order order, | ||
3971 | + memory_scope scope); | ||
3972 | +ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, | ||
3973 | + ulong operand, memory_order order, | ||
3974 | + memory_scope scope); | ||
3975 | +long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, | ||
3976 | + long operand, memory_order order, | ||
3977 | + memory_scope scope); | ||
3978 | +ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, | ||
3979 | + ulong operand, memory_order order, | ||
3980 | + memory_scope scope); | ||
3981 | +long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, | ||
3982 | + memory_order order, memory_scope scope); | ||
3983 | +ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, | ||
3984 | + ulong operand, memory_order order, | ||
3985 | + memory_scope scope); | ||
3986 | +long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, | ||
3987 | + long operand, memory_order order, | ||
3988 | + memory_scope scope); | ||
3989 | +ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, | ||
3990 | + ulong operand, memory_order order, | ||
3991 | + memory_scope scope); | ||
3992 | +long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, | ||
3993 | + long operand, memory_order order, | ||
3994 | + memory_scope scope); | ||
3995 | +ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, | ||
3996 | + ulong operand, memory_order order, | ||
3997 | + memory_scope scope); | ||
3998 | +long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, | ||
3999 | + long operand, memory_order order, | ||
4000 | + memory_scope scope); | ||
4001 | +ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, | ||
4002 | + ulong operand, memory_order order, | ||
4003 | + memory_scope scope); | ||
4004 | +long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, | ||
4005 | + long operand, memory_order order, | ||
4006 | + memory_scope scope); | ||
4007 | +ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, | ||
4008 | + ulong operand, memory_order order, | ||
4009 | + memory_scope scope); | ||
4010 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4011 | + // defined(cl_khr_int64_extended_atomics) | ||
4012 | + | ||
4013 | +// OpenCL v2.0 s6.13.11.7.5: | ||
4014 | +// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument | ||
4015 | +// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be | ||
4016 | +// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. | ||
4017 | + | ||
4018 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4019 | +#ifdef __opencl_c_atomic_scope_device | ||
4020 | +uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, | ||
4021 | + ptrdiff_t operand, | ||
4022 | + memory_order order); | ||
4023 | +uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, | ||
4024 | + ptrdiff_t operand, | ||
4025 | + memory_order order); | ||
4026 | +uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, | ||
4027 | + intptr_t operand, memory_order order); | ||
4028 | +uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, | ||
4029 | + intptr_t operand, | ||
4030 | + memory_order order); | ||
4031 | +uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, | ||
4032 | + intptr_t operand, | ||
4033 | + memory_order order); | ||
4034 | +uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, | ||
4035 | + intptr_t opermax, | ||
4036 | + memory_order minder); | ||
4037 | +uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, | ||
4038 | + intptr_t opermax, | ||
4039 | + memory_order minder); | ||
4040 | +intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, | ||
4041 | + uintptr_t operand, memory_order order); | ||
4042 | +intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, | ||
4043 | + uintptr_t operand, | ||
4044 | + memory_order order); | ||
4045 | +intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, | ||
4046 | + uintptr_t operand, | ||
4047 | + memory_order order); | ||
4048 | +intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, | ||
4049 | + uintptr_t opermax, | ||
4050 | + memory_order minder); | ||
4051 | +intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, | ||
4052 | + uintptr_t opermax, | ||
4053 | + memory_order minder); | ||
4054 | +#endif // __opencl_c_atomic_scope_device | ||
4055 | +uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, | ||
4056 | + ptrdiff_t operand, | ||
4057 | + memory_order order, | ||
4058 | + memory_scope scope); | ||
4059 | +uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, | ||
4060 | + ptrdiff_t operand, | ||
4061 | + memory_order order, | ||
4062 | + memory_scope scope); | ||
4063 | + | ||
4064 | +uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, | ||
4065 | + intptr_t operand, memory_order order, | ||
4066 | + memory_scope scope); | ||
4067 | +uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, | ||
4068 | + intptr_t operand, memory_order order, | ||
4069 | + memory_scope scope); | ||
4070 | +uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, | ||
4071 | + intptr_t operand, memory_order order, | ||
4072 | + memory_scope scope); | ||
4073 | +uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, | ||
4074 | + intptr_t opermax, | ||
4075 | + memory_order minder, | ||
4076 | + memory_scope scope); | ||
4077 | +uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, | ||
4078 | + intptr_t opermax, | ||
4079 | + memory_order minder, | ||
4080 | + memory_scope scope); | ||
4081 | + | ||
4082 | +intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, | ||
4083 | + uintptr_t operand, memory_order order, | ||
4084 | + memory_scope scope); | ||
4085 | +intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, | ||
4086 | + uintptr_t operand, memory_order order, | ||
4087 | + memory_scope scope); | ||
4088 | +intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, | ||
4089 | + uintptr_t operand, memory_order order, | ||
4090 | + memory_scope scope); | ||
4091 | +intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, | ||
4092 | + uintptr_t opermax, | ||
4093 | + memory_order minder, | ||
4094 | + memory_scope scope); | ||
4095 | +intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, | ||
4096 | + uintptr_t opermax, | ||
4097 | + memory_order minder, | ||
4098 | + memory_scope scope); | ||
4099 | #endif | ||
4100 | +#endif // __opencl_c_generic_address_space | ||
4101 | + | ||
4102 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4103 | +#ifdef __opencl_c_atomic_scope_device | ||
4104 | +int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, | ||
4105 | + int operand, memory_order order); | ||
4106 | +int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, | ||
4107 | + int operand, memory_order order); | ||
4108 | +uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, | ||
4109 | + uint operand, memory_order order); | ||
4110 | +uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, | ||
4111 | + uint operand, memory_order order); | ||
4112 | +int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, | ||
4113 | + int operand, memory_order order); | ||
4114 | +int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, | ||
4115 | + int operand, memory_order order); | ||
4116 | +uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, | ||
4117 | + uint operand, memory_order order); | ||
4118 | +uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, | ||
4119 | + uint operand, memory_order order); | ||
4120 | +int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, | ||
4121 | + int operand, memory_order order); | ||
4122 | +int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, | ||
4123 | + int operand, memory_order order); | ||
4124 | +uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, | ||
4125 | + uint operand, memory_order order); | ||
4126 | +uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, | ||
4127 | + uint operand, memory_order order); | ||
4128 | +int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, | ||
4129 | + int operand, memory_order order); | ||
4130 | +int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, | ||
4131 | + int operand, memory_order order); | ||
4132 | +uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, | ||
4133 | + uint operand, memory_order order); | ||
4134 | +uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, | ||
4135 | + uint operand, memory_order order); | ||
4136 | +int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, | ||
4137 | + int operand, memory_order order); | ||
4138 | +int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, | ||
4139 | + int operand, memory_order order); | ||
4140 | +uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, | ||
4141 | + uint operand, memory_order order); | ||
4142 | +uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, | ||
4143 | + uint operand, memory_order order); | ||
4144 | +int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, | ||
4145 | + int operand, memory_order order); | ||
4146 | +int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, | ||
4147 | + int operand, memory_order order); | ||
4148 | +uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, | ||
4149 | + uint operand, memory_order order); | ||
4150 | +uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, | ||
4151 | + uint operand, memory_order order); | ||
4152 | +int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, | ||
4153 | + int operand, memory_order order); | ||
4154 | +int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, | ||
4155 | + int operand, memory_order order); | ||
4156 | +uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, | ||
4157 | + uint operand, memory_order order); | ||
4158 | +uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, | ||
4159 | + uint operand, memory_order order); | ||
4160 | +#endif // __opencl_c_atomic_scope_device | ||
4161 | +int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object, | ||
4162 | + int operand, memory_order order, | ||
4163 | + memory_scope scope); | ||
4164 | +int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object, | ||
4165 | + int operand, memory_order order, | ||
4166 | + memory_scope scope); | ||
4167 | +uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object, | ||
4168 | + uint operand, memory_order order, | ||
4169 | + memory_scope scope); | ||
4170 | +uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object, | ||
4171 | + uint operand, memory_order order, | ||
4172 | + memory_scope scope); | ||
4173 | +int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object, | ||
4174 | + int operand, memory_order order, | ||
4175 | + memory_scope scope); | ||
4176 | +int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object, | ||
4177 | + int operand, memory_order order, | ||
4178 | + memory_scope scope); | ||
4179 | +uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object, | ||
4180 | + uint operand, memory_order order, | ||
4181 | + memory_scope scope); | ||
4182 | +uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object, | ||
4183 | + uint operand, memory_order order, | ||
4184 | + memory_scope scope); | ||
4185 | +int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object, | ||
4186 | + int operand, memory_order order, | ||
4187 | + memory_scope scope); | ||
4188 | +int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object, | ||
4189 | + int operand, memory_order order, | ||
4190 | + memory_scope scope); | ||
4191 | +uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object, | ||
4192 | + uint operand, memory_order order, | ||
4193 | + memory_scope scope); | ||
4194 | +uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object, | ||
4195 | + uint operand, memory_order order, | ||
4196 | + memory_scope scope); | ||
4197 | +int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object, | ||
4198 | + int operand, memory_order order, | ||
4199 | + memory_scope scope); | ||
4200 | +int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object, | ||
4201 | + int operand, memory_order order, | ||
4202 | + memory_scope scope); | ||
4203 | +uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object, | ||
4204 | + uint operand, memory_order order, | ||
4205 | + memory_scope scope); | ||
4206 | +uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object, | ||
4207 | + uint operand, memory_order order, | ||
4208 | + memory_scope scope); | ||
4209 | +int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object, | ||
4210 | + int operand, memory_order order, | ||
4211 | + memory_scope scope); | ||
4212 | +int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object, | ||
4213 | + int operand, memory_order order, | ||
4214 | + memory_scope scope); | ||
4215 | +uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object, | ||
4216 | + uint operand, memory_order order, | ||
4217 | + memory_scope scope); | ||
4218 | +uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object, | ||
4219 | + uint operand, memory_order order, | ||
4220 | + memory_scope scope); | ||
4221 | +int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object, | ||
4222 | + int operand, memory_order order, | ||
4223 | + memory_scope scope); | ||
4224 | +int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object, | ||
4225 | + int operand, memory_order order, | ||
4226 | + memory_scope scope); | ||
4227 | +uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object, | ||
4228 | + uint operand, memory_order order, | ||
4229 | + memory_scope scope); | ||
4230 | +uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object, | ||
4231 | + uint operand, memory_order order, | ||
4232 | + memory_scope scope); | ||
4233 | +int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object, | ||
4234 | + int operand, memory_order order, | ||
4235 | + memory_scope scope); | ||
4236 | +int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object, | ||
4237 | + int operand, memory_order order, | ||
4238 | + memory_scope scope); | ||
4239 | +uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object, | ||
4240 | + uint operand, memory_order order, | ||
4241 | + memory_scope scope); | ||
4242 | +uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object, | ||
4243 | + uint operand, memory_order order, | ||
4244 | + memory_scope scope); | ||
4245 | + | ||
4246 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4247 | +#ifdef __opencl_c_atomic_scope_device | ||
4248 | +long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, | ||
4249 | + long operand, memory_order order); | ||
4250 | +long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, | ||
4251 | + long operand, memory_order order); | ||
4252 | +ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, | ||
4253 | + ulong operand, memory_order order); | ||
4254 | +ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, | ||
4255 | + ulong operand, memory_order order); | ||
4256 | +long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, | ||
4257 | + long operand, memory_order order); | ||
4258 | +long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, | ||
4259 | + long operand, memory_order order); | ||
4260 | +ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, | ||
4261 | + ulong operand, memory_order order); | ||
4262 | +ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, | ||
4263 | + ulong operand, memory_order order); | ||
4264 | +long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, | ||
4265 | + long operand, memory_order order); | ||
4266 | +long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, | ||
4267 | + long operand, memory_order order); | ||
4268 | +ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, | ||
4269 | + ulong operand, memory_order order); | ||
4270 | +ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, | ||
4271 | + ulong operand, memory_order order); | ||
4272 | +long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, | ||
4273 | + long operand, memory_order order); | ||
4274 | +long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, | ||
4275 | + long operand, memory_order order); | ||
4276 | +ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, | ||
4277 | + ulong operand, memory_order order); | ||
4278 | +ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, | ||
4279 | + ulong operand, memory_order order); | ||
4280 | +long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, | ||
4281 | + long operand, memory_order order); | ||
4282 | +long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, | ||
4283 | + long operand, memory_order order); | ||
4284 | +ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, | ||
4285 | + ulong operand, memory_order order); | ||
4286 | +ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, | ||
4287 | + ulong operand, memory_order order); | ||
4288 | +long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, | ||
4289 | + long operand, memory_order order); | ||
4290 | +long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, | ||
4291 | + long operand, memory_order order); | ||
4292 | +ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, | ||
4293 | + ulong operand, memory_order order); | ||
4294 | +ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, | ||
4295 | + ulong operand, memory_order order); | ||
4296 | +long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, | ||
4297 | + long operand, memory_order order); | ||
4298 | +long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, | ||
4299 | + long operand, memory_order order); | ||
4300 | +ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, | ||
4301 | + ulong operand, memory_order order); | ||
4302 | +ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, | ||
4303 | + ulong operand, memory_order order); | ||
4304 | +#endif // __opencl_c_atomic_scope_device | ||
4305 | +long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object, | ||
4306 | + long operand, memory_order order, | ||
4307 | + memory_scope scope); | ||
4308 | +long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object, | ||
4309 | + long operand, memory_order order, | ||
4310 | + memory_scope scope); | ||
4311 | +ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object, | ||
4312 | + ulong operand, memory_order order, | ||
4313 | + memory_scope scope); | ||
4314 | +ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object, | ||
4315 | + ulong operand, memory_order order, | ||
4316 | + memory_scope scope); | ||
4317 | +long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object, | ||
4318 | + long operand, memory_order order, | ||
4319 | + memory_scope scope); | ||
4320 | +long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object, | ||
4321 | + long operand, memory_order order, | ||
4322 | + memory_scope scope); | ||
4323 | +ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object, | ||
4324 | + ulong operand, memory_order order, | ||
4325 | + memory_scope scope); | ||
4326 | +ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object, | ||
4327 | + ulong operand, memory_order order, | ||
4328 | + memory_scope scope); | ||
4329 | +long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object, | ||
4330 | + long operand, memory_order order, | ||
4331 | + memory_scope scope); | ||
4332 | +long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object, | ||
4333 | + long operand, memory_order order, | ||
4334 | + memory_scope scope); | ||
4335 | +ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object, | ||
4336 | + ulong operand, memory_order order, | ||
4337 | + memory_scope scope); | ||
4338 | +ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object, | ||
4339 | + ulong operand, memory_order order, | ||
4340 | + memory_scope scope); | ||
4341 | +long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object, | ||
4342 | + long operand, memory_order order, | ||
4343 | + memory_scope scope); | ||
4344 | +long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object, | ||
4345 | + long operand, memory_order order, | ||
4346 | + memory_scope scope); | ||
4347 | +ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object, | ||
4348 | + ulong operand, memory_order order, | ||
4349 | + memory_scope scope); | ||
4350 | +ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object, | ||
4351 | + ulong operand, memory_order order, | ||
4352 | + memory_scope scope); | ||
4353 | +long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object, | ||
4354 | + long operand, memory_order order, | ||
4355 | + memory_scope scope); | ||
4356 | +long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object, | ||
4357 | + long operand, memory_order order, | ||
4358 | + memory_scope scope); | ||
4359 | +ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object, | ||
4360 | + ulong operand, memory_order order, | ||
4361 | + memory_scope scope); | ||
4362 | +ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object, | ||
4363 | + ulong operand, memory_order order, | ||
4364 | + memory_scope scope); | ||
4365 | +long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object, | ||
4366 | + long operand, memory_order order, | ||
4367 | + memory_scope scope); | ||
4368 | +long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object, | ||
4369 | + long operand, memory_order order, | ||
4370 | + memory_scope scope); | ||
4371 | +ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object, | ||
4372 | + ulong operand, memory_order order, | ||
4373 | + memory_scope scope); | ||
4374 | +ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object, | ||
4375 | + ulong operand, memory_order order, | ||
4376 | + memory_scope scope); | ||
4377 | +long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object, | ||
4378 | + long operand, memory_order order, | ||
4379 | + memory_scope scope); | ||
4380 | +long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object, | ||
4381 | + long operand, memory_order order, | ||
4382 | + memory_scope scope); | ||
4383 | +ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object, | ||
4384 | + ulong operand, memory_order order, | ||
4385 | + memory_scope scope); | ||
4386 | +ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object, | ||
4387 | + ulong operand, memory_order order, | ||
4388 | + memory_scope scope); | ||
4389 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4390 | + // defined(cl_khr_int64_extended_atomics) | ||
4391 | + | ||
4392 | +// OpenCL v2.0 s6.13.11.7.5: | ||
4393 | +// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument | ||
4394 | +// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be | ||
4395 | +// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t. | ||
4396 | + | ||
4397 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4398 | +#ifdef __opencl_c_atomic_scope_device | ||
4399 | +uintptr_t __ovld | ||
4400 | +atomic_fetch_add_explicit(volatile atomic_uintptr_t __global *object, | ||
4401 | + ptrdiff_t operand, memory_order order); | ||
4402 | +uintptr_t __ovld | ||
4403 | +atomic_fetch_add_explicit(volatile atomic_uintptr_t __local *object, | ||
4404 | + ptrdiff_t operand, memory_order order); | ||
4405 | +uintptr_t __ovld | ||
4406 | +atomic_fetch_sub_explicit(volatile atomic_uintptr_t __global *object, | ||
4407 | + ptrdiff_t operand, memory_order order); | ||
4408 | +uintptr_t __ovld | ||
4409 | +atomic_fetch_sub_explicit(volatile atomic_uintptr_t __local *object, | ||
4410 | + ptrdiff_t operand, memory_order order); | ||
4411 | +uintptr_t __ovld | ||
4412 | +atomic_fetch_or_explicit(volatile atomic_uintptr_t __global *object, | ||
4413 | + intptr_t operand, memory_order order); | ||
4414 | +uintptr_t __ovld | ||
4415 | +atomic_fetch_or_explicit(volatile atomic_uintptr_t __local *object, | ||
4416 | + intptr_t operand, memory_order order); | ||
4417 | +uintptr_t __ovld | ||
4418 | +atomic_fetch_xor_explicit(volatile atomic_uintptr_t __global *object, | ||
4419 | + intptr_t operand, memory_order order); | ||
4420 | +uintptr_t __ovld | ||
4421 | +atomic_fetch_xor_explicit(volatile atomic_uintptr_t __local *object, | ||
4422 | + intptr_t operand, memory_order order); | ||
4423 | +uintptr_t __ovld | ||
4424 | +atomic_fetch_and_explicit(volatile atomic_uintptr_t __global *object, | ||
4425 | + intptr_t operand, memory_order order); | ||
4426 | +uintptr_t __ovld | ||
4427 | +atomic_fetch_and_explicit(volatile atomic_uintptr_t __local *object, | ||
4428 | + intptr_t operand, memory_order order); | ||
4429 | +uintptr_t __ovld | ||
4430 | +atomic_fetch_min_explicit(volatile atomic_uintptr_t __global *object, | ||
4431 | + intptr_t opermax, memory_order minder); | ||
4432 | +uintptr_t __ovld | ||
4433 | +atomic_fetch_min_explicit(volatile atomic_uintptr_t __local *object, | ||
4434 | + intptr_t opermax, memory_order minder); | ||
4435 | +uintptr_t __ovld | ||
4436 | +atomic_fetch_max_explicit(volatile atomic_uintptr_t __global *object, | ||
4437 | + intptr_t opermax, memory_order minder); | ||
4438 | +uintptr_t __ovld | ||
4439 | +atomic_fetch_max_explicit(volatile atomic_uintptr_t __local *object, | ||
4440 | + intptr_t opermax, memory_order minder); | ||
4441 | +intptr_t __ovld | ||
4442 | +atomic_fetch_or_explicit(volatile atomic_intptr_t __global *object, | ||
4443 | + uintptr_t operand, memory_order order); | ||
4444 | +intptr_t __ovld | ||
4445 | +atomic_fetch_or_explicit(volatile atomic_intptr_t __local *object, | ||
4446 | + uintptr_t operand, memory_order order); | ||
4447 | +intptr_t __ovld | ||
4448 | +atomic_fetch_xor_explicit(volatile atomic_intptr_t __global *object, | ||
4449 | + uintptr_t operand, memory_order order); | ||
4450 | +intptr_t __ovld | ||
4451 | +atomic_fetch_xor_explicit(volatile atomic_intptr_t __local *object, | ||
4452 | + uintptr_t operand, memory_order order); | ||
4453 | +intptr_t __ovld | ||
4454 | +atomic_fetch_and_explicit(volatile atomic_intptr_t __global *object, | ||
4455 | + uintptr_t operand, memory_order order); | ||
4456 | +intptr_t __ovld | ||
4457 | +atomic_fetch_and_explicit(volatile atomic_intptr_t __local *object, | ||
4458 | + uintptr_t operand, memory_order order); | ||
4459 | +intptr_t __ovld | ||
4460 | +atomic_fetch_min_explicit(volatile atomic_intptr_t __global *object, | ||
4461 | + uintptr_t opermax, memory_order minder); | ||
4462 | +intptr_t __ovld | ||
4463 | +atomic_fetch_min_explicit(volatile atomic_intptr_t __local *object, | ||
4464 | + uintptr_t opermax, memory_order minder); | ||
4465 | +intptr_t __ovld | ||
4466 | +atomic_fetch_max_explicit(volatile atomic_intptr_t __global *object, | ||
4467 | + uintptr_t opermax, memory_order minder); | ||
4468 | +intptr_t __ovld | ||
4469 | +atomic_fetch_max_explicit(volatile atomic_intptr_t __local *object, | ||
4470 | + uintptr_t opermax, memory_order minder); | ||
4471 | +#endif // __opencl_c_atomic_scope_device | ||
4472 | +uintptr_t __ovld atomic_fetch_add_explicit( | ||
4473 | + volatile atomic_uintptr_t __global *object, ptrdiff_t operand, | ||
4474 | + memory_order order, memory_scope scope); | ||
4475 | +uintptr_t __ovld atomic_fetch_add_explicit( | ||
4476 | + volatile atomic_uintptr_t __local *object, ptrdiff_t operand, | ||
4477 | + memory_order order, memory_scope scope); | ||
4478 | +uintptr_t __ovld atomic_fetch_sub_explicit( | ||
4479 | + volatile atomic_uintptr_t __global *object, ptrdiff_t operand, | ||
4480 | + memory_order order, memory_scope scope); | ||
4481 | +uintptr_t __ovld atomic_fetch_sub_explicit( | ||
4482 | + volatile atomic_uintptr_t __local *object, ptrdiff_t operand, | ||
4483 | + memory_order order, memory_scope scope); | ||
4484 | + | ||
4485 | +uintptr_t __ovld atomic_fetch_or_explicit( | ||
4486 | + volatile atomic_uintptr_t __global *object, intptr_t operand, | ||
4487 | + memory_order order, memory_scope scope); | ||
4488 | +uintptr_t __ovld atomic_fetch_or_explicit( | ||
4489 | + volatile atomic_uintptr_t __local *object, intptr_t operand, | ||
4490 | + memory_order order, memory_scope scope); | ||
4491 | +uintptr_t __ovld atomic_fetch_xor_explicit( | ||
4492 | + volatile atomic_uintptr_t __global *object, intptr_t operand, | ||
4493 | + memory_order order, memory_scope scope); | ||
4494 | +uintptr_t __ovld atomic_fetch_xor_explicit( | ||
4495 | + volatile atomic_uintptr_t __local *object, intptr_t operand, | ||
4496 | + memory_order order, memory_scope scope); | ||
4497 | +uintptr_t __ovld atomic_fetch_and_explicit( | ||
4498 | + volatile atomic_uintptr_t __global *object, intptr_t operand, | ||
4499 | + memory_order order, memory_scope scope); | ||
4500 | +uintptr_t __ovld atomic_fetch_and_explicit( | ||
4501 | + volatile atomic_uintptr_t __local *object, intptr_t operand, | ||
4502 | + memory_order order, memory_scope scope); | ||
4503 | +uintptr_t __ovld atomic_fetch_min_explicit( | ||
4504 | + volatile atomic_uintptr_t __global *object, intptr_t opermax, | ||
4505 | + memory_order minder, memory_scope scope); | ||
4506 | +uintptr_t __ovld atomic_fetch_min_explicit( | ||
4507 | + volatile atomic_uintptr_t __local *object, intptr_t opermax, | ||
4508 | + memory_order minder, memory_scope scope); | ||
4509 | +uintptr_t __ovld atomic_fetch_max_explicit( | ||
4510 | + volatile atomic_uintptr_t __global *object, intptr_t opermax, | ||
4511 | + memory_order minder, memory_scope scope); | ||
4512 | +uintptr_t __ovld atomic_fetch_max_explicit( | ||
4513 | + volatile atomic_uintptr_t __local *object, intptr_t opermax, | ||
4514 | + memory_order minder, memory_scope scope); | ||
4515 | + | ||
4516 | +intptr_t __ovld atomic_fetch_or_explicit( | ||
4517 | + volatile atomic_intptr_t __global *object, uintptr_t operand, | ||
4518 | + memory_order order, memory_scope scope); | ||
4519 | +intptr_t __ovld atomic_fetch_or_explicit( | ||
4520 | + volatile atomic_intptr_t __local *object, uintptr_t operand, | ||
4521 | + memory_order order, memory_scope scope); | ||
4522 | +intptr_t __ovld atomic_fetch_xor_explicit( | ||
4523 | + volatile atomic_intptr_t __global *object, uintptr_t operand, | ||
4524 | + memory_order order, memory_scope scope); | ||
4525 | +intptr_t __ovld atomic_fetch_xor_explicit( | ||
4526 | + volatile atomic_intptr_t __local *object, uintptr_t operand, | ||
4527 | + memory_order order, memory_scope scope); | ||
4528 | +intptr_t __ovld atomic_fetch_and_explicit( | ||
4529 | + volatile atomic_intptr_t __global *object, uintptr_t operand, | ||
4530 | + memory_order order, memory_scope scope); | ||
4531 | +intptr_t __ovld atomic_fetch_and_explicit( | ||
4532 | + volatile atomic_intptr_t __local *object, uintptr_t operand, | ||
4533 | + memory_order order, memory_scope scope); | ||
4534 | +intptr_t __ovld atomic_fetch_min_explicit( | ||
4535 | + volatile atomic_intptr_t __global *object, uintptr_t opermax, | ||
4536 | + memory_order minder, memory_scope scope); | ||
4537 | +intptr_t __ovld atomic_fetch_min_explicit( | ||
4538 | + volatile atomic_intptr_t __local *object, uintptr_t opermax, | ||
4539 | + memory_order minder, memory_scope scope); | ||
4540 | +intptr_t __ovld atomic_fetch_max_explicit( | ||
4541 | + volatile atomic_intptr_t __global *object, uintptr_t opermax, | ||
4542 | + memory_order minder, memory_scope scope); | ||
4543 | +intptr_t __ovld atomic_fetch_max_explicit( | ||
4544 | + volatile atomic_intptr_t __local *object, uintptr_t opermax, | ||
4545 | + memory_order minder, memory_scope scope); | ||
4546 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4547 | + // defined(cl_khr_int64_extended_atomics) | ||
4548 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4549 | |||
4550 | // atomic_store() | ||
4551 | |||
4552 | +#if defined(__opencl_c_atomic_scope_device) && \ | ||
4553 | + defined(__opencl_c_atomic_order_seq_cst) | ||
4554 | + | ||
4555 | +#ifdef __opencl_c_generic_address_space | ||
4556 | void __ovld atomic_store(volatile atomic_int *object, int desired); | ||
4557 | -void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order); | ||
4558 | -void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); | ||
4559 | void __ovld atomic_store(volatile atomic_uint *object, uint desired); | ||
4560 | -void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order); | ||
4561 | -void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); | ||
4562 | void __ovld atomic_store(volatile atomic_float *object, float desired); | ||
4563 | -void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order); | ||
4564 | -void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); | ||
4565 | #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4566 | -#ifdef cl_khr_fp64 | ||
4567 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4568 | void __ovld atomic_store(volatile atomic_double *object, double desired); | ||
4569 | -void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order); | ||
4570 | -void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); | ||
4571 | -#endif //cl_khr_fp64 | ||
4572 | +#endif | ||
4573 | void __ovld atomic_store(volatile atomic_long *object, long desired); | ||
4574 | -void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order); | ||
4575 | -void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); | ||
4576 | void __ovld atomic_store(volatile atomic_ulong *object, ulong desired); | ||
4577 | -void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); | ||
4578 | -void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); | ||
4579 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4580 | + // defined(cl_khr_int64_extended_atomics) | ||
4581 | +#endif // __opencl_c_generic_address_space | ||
4582 | + | ||
4583 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4584 | +void __ovld atomic_store(volatile atomic_int __global *object, int desired); | ||
4585 | +void __ovld atomic_store(volatile atomic_int __local *object, int desired); | ||
4586 | +void __ovld atomic_store(volatile atomic_uint __global *object, uint desired); | ||
4587 | +void __ovld atomic_store(volatile atomic_uint __local *object, uint desired); | ||
4588 | +void __ovld atomic_store(volatile atomic_float __global *object, float desired); | ||
4589 | +void __ovld atomic_store(volatile atomic_float __local *object, float desired); | ||
4590 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4591 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4592 | +void __ovld atomic_store(volatile atomic_double __global *object, | ||
4593 | + double desired); | ||
4594 | +void __ovld atomic_store(volatile atomic_double __local *object, | ||
4595 | + double desired); | ||
4596 | #endif | ||
4597 | +void __ovld atomic_store(volatile atomic_long __global *object, long desired); | ||
4598 | +void __ovld atomic_store(volatile atomic_long __local *object, long desired); | ||
4599 | +void __ovld atomic_store(volatile atomic_ulong __global *object, ulong desired); | ||
4600 | +void __ovld atomic_store(volatile atomic_ulong __local *object, ulong desired); | ||
4601 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4602 | + // defined(cl_khr_int64_extended_atomics) | ||
4603 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4604 | + | ||
4605 | +#endif // defined(__opencl_c_atomic_scope_device) && | ||
4606 | + // defined(__opencl_c_atomic_order_seq_cst) | ||
4607 | + | ||
4608 | +#ifdef __opencl_c_generic_address_space | ||
4609 | +#ifdef __opencl_c_atomic_scope_device | ||
4610 | +void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, | ||
4611 | + memory_order order); | ||
4612 | +void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, | ||
4613 | + memory_order order); | ||
4614 | +void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, | ||
4615 | + memory_order order); | ||
4616 | +#endif // __opencl_c_atomic_scope_device | ||
4617 | +void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, | ||
4618 | + memory_order order, memory_scope scope); | ||
4619 | +void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, | ||
4620 | + memory_order order, memory_scope scope); | ||
4621 | +void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, | ||
4622 | + memory_order order, memory_scope scope); | ||
4623 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4624 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4625 | +#ifdef __opencl_c_atomic_scope_device | ||
4626 | +void __ovld atomic_store_explicit(volatile atomic_double *object, | ||
4627 | + double desired, memory_order order); | ||
4628 | +#endif //__opencl_c_atomic_scope_device | ||
4629 | +void __ovld atomic_store_explicit(volatile atomic_double *object, | ||
4630 | + double desired, memory_order order, | ||
4631 | + memory_scope scope); | ||
4632 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4633 | +#ifdef __opencl_c_atomic_scope_device | ||
4634 | +void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, | ||
4635 | + memory_order order); | ||
4636 | +void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, | ||
4637 | + memory_order order); | ||
4638 | +#endif //__opencl_c_atomic_scope_device | ||
4639 | +void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, | ||
4640 | + memory_order order, memory_scope scope); | ||
4641 | +void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, | ||
4642 | + memory_order order, memory_scope scope); | ||
4643 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4644 | + // defined(cl_khr_int64_extended_atomics) | ||
4645 | +#endif // __opencl_c_generic_address_space | ||
4646 | + | ||
4647 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4648 | +#ifdef __opencl_c_atomic_scope_device | ||
4649 | +void __ovld atomic_store_explicit(volatile atomic_int __global *object, | ||
4650 | + int desired, memory_order order); | ||
4651 | +void __ovld atomic_store_explicit(volatile atomic_int __local *object, | ||
4652 | + int desired, memory_order order); | ||
4653 | +void __ovld atomic_store_explicit(volatile atomic_uint __global *object, | ||
4654 | + uint desired, memory_order order); | ||
4655 | +void __ovld atomic_store_explicit(volatile atomic_uint __local *object, | ||
4656 | + uint desired, memory_order order); | ||
4657 | +void __ovld atomic_store_explicit(volatile atomic_float __global *object, | ||
4658 | + float desired, memory_order order); | ||
4659 | +void __ovld atomic_store_explicit(volatile atomic_float __local *object, | ||
4660 | + float desired, memory_order order); | ||
4661 | +#endif // __opencl_c_atomic_scope_device | ||
4662 | +void __ovld atomic_store_explicit(volatile atomic_int __global *object, | ||
4663 | + int desired, memory_order order, | ||
4664 | + memory_scope scope); | ||
4665 | +void __ovld atomic_store_explicit(volatile atomic_int __local *object, | ||
4666 | + int desired, memory_order order, | ||
4667 | + memory_scope scope); | ||
4668 | +void __ovld atomic_store_explicit(volatile atomic_uint __global *object, | ||
4669 | + uint desired, memory_order order, | ||
4670 | + memory_scope scope); | ||
4671 | +void __ovld atomic_store_explicit(volatile atomic_uint __local *object, | ||
4672 | + uint desired, memory_order order, | ||
4673 | + memory_scope scope); | ||
4674 | +void __ovld atomic_store_explicit(volatile atomic_float __global *object, | ||
4675 | + float desired, memory_order order, | ||
4676 | + memory_scope scope); | ||
4677 | +void __ovld atomic_store_explicit(volatile atomic_float __local *object, | ||
4678 | + float desired, memory_order order, | ||
4679 | + memory_scope scope); | ||
4680 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4681 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4682 | +#ifdef __opencl_c_atomic_scope_device | ||
4683 | +void __ovld atomic_store_explicit(volatile atomic_double __global *object, | ||
4684 | + double desired, memory_order order); | ||
4685 | +void __ovld atomic_store_explicit(volatile atomic_double __local *object, | ||
4686 | + double desired, memory_order order); | ||
4687 | +#endif //__opencl_c_atomic_scope_device | ||
4688 | +void __ovld atomic_store_explicit(volatile atomic_double __global *object, | ||
4689 | + double desired, memory_order order, | ||
4690 | + memory_scope scope); | ||
4691 | +void __ovld atomic_store_explicit(volatile atomic_double __local *object, | ||
4692 | + double desired, memory_order order, | ||
4693 | + memory_scope scope); | ||
4694 | +#endif // cl_khr_fp64 | ||
4695 | +#ifdef __opencl_c_atomic_scope_device | ||
4696 | +void __ovld atomic_store_explicit(volatile atomic_long __global *object, | ||
4697 | + long desired, memory_order order); | ||
4698 | +void __ovld atomic_store_explicit(volatile atomic_long __local *object, | ||
4699 | + long desired, memory_order order); | ||
4700 | +void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, | ||
4701 | + ulong desired, memory_order order); | ||
4702 | +void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, | ||
4703 | + ulong desired, memory_order order); | ||
4704 | +#endif // __opencl_c_atomic_scope_device | ||
4705 | +void __ovld atomic_store_explicit(volatile atomic_long __global *object, | ||
4706 | + long desired, memory_order order, | ||
4707 | + memory_scope scope); | ||
4708 | +void __ovld atomic_store_explicit(volatile atomic_long __local *object, | ||
4709 | + long desired, memory_order order, | ||
4710 | + memory_scope scope); | ||
4711 | +void __ovld atomic_store_explicit(volatile atomic_ulong __global *object, | ||
4712 | + ulong desired, memory_order order, | ||
4713 | + memory_scope scope); | ||
4714 | +void __ovld atomic_store_explicit(volatile atomic_ulong __local *object, | ||
4715 | + ulong desired, memory_order order, | ||
4716 | + memory_scope scope); | ||
4717 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4718 | + // defined(cl_khr_int64_extended_atomics) | ||
4719 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4720 | |||
4721 | // atomic_load() | ||
4722 | - | ||
4723 | +#ifdef __opencl_c_generic_address_space | ||
4724 | +#if defined(__opencl_c_atomic_scope_device) && \ | ||
4725 | + defined(__opencl_c_atomic_order_seq_cst) | ||
4726 | int __ovld atomic_load(volatile atomic_int *object); | ||
4727 | -int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order); | ||
4728 | -int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope); | ||
4729 | uint __ovld atomic_load(volatile atomic_uint *object); | ||
4730 | -uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order); | ||
4731 | -uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope); | ||
4732 | float __ovld atomic_load(volatile atomic_float *object); | ||
4733 | -float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order); | ||
4734 | -float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope); | ||
4735 | #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4736 | -#ifdef cl_khr_fp64 | ||
4737 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4738 | double __ovld atomic_load(volatile atomic_double *object); | ||
4739 | -double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order); | ||
4740 | -double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope); | ||
4741 | -#endif //cl_khr_fp64 | ||
4742 | +#endif // cl_khr_fp64 | ||
4743 | long __ovld atomic_load(volatile atomic_long *object); | ||
4744 | -long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order); | ||
4745 | -long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope); | ||
4746 | ulong __ovld atomic_load(volatile atomic_ulong *object); | ||
4747 | -ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order); | ||
4748 | -ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope); | ||
4749 | -#endif | ||
4750 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4751 | + // defined(cl_khr_int64_extended_atomics) | ||
4752 | +#endif // defined(__opencl_c_atomic_scope_device) && | ||
4753 | + // defined(__opencl_c_atomic_order_seq_cst) | ||
4754 | +#endif //__opencl_c_generic_address_space | ||
4755 | + | ||
4756 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4757 | +#if defined(__opencl_c_atomic_scope_device) && \ | ||
4758 | + defined(__opencl_c_atomic_order_seq_cst) | ||
4759 | +int __ovld atomic_load(volatile atomic_int __global *object); | ||
4760 | +int __ovld atomic_load(volatile atomic_int __local *object); | ||
4761 | +uint __ovld atomic_load(volatile atomic_uint __global *object); | ||
4762 | +uint __ovld atomic_load(volatile atomic_uint __local *object); | ||
4763 | +float __ovld atomic_load(volatile atomic_float __global *object); | ||
4764 | +float __ovld atomic_load(volatile atomic_float __local *object); | ||
4765 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4766 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4767 | +double __ovld atomic_load(volatile atomic_double __global *object); | ||
4768 | +double __ovld atomic_load(volatile atomic_double __local *object); | ||
4769 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4770 | +long __ovld atomic_load(volatile atomic_long __global *object); | ||
4771 | +long __ovld atomic_load(volatile atomic_long __local *object); | ||
4772 | +ulong __ovld atomic_load(volatile atomic_ulong __global *object); | ||
4773 | +ulong __ovld atomic_load(volatile atomic_ulong __local *object); | ||
4774 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4775 | + // defined(cl_khr_int64_extended_atomics) | ||
4776 | +#endif // defined(__opencl_c_atomic_scope_device) && | ||
4777 | + // defined(__opencl_c_atomic_order_seq_cst) | ||
4778 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4779 | + | ||
4780 | +#ifdef __opencl_c_generic_address_space | ||
4781 | +#ifdef __opencl_c_atomic_scope_device | ||
4782 | +int __ovld atomic_load_explicit(volatile atomic_int *object, | ||
4783 | + memory_order order); | ||
4784 | +uint __ovld atomic_load_explicit(volatile atomic_uint *object, | ||
4785 | + memory_order order); | ||
4786 | +float __ovld atomic_load_explicit(volatile atomic_float *object, | ||
4787 | + memory_order order); | ||
4788 | +#endif // __opencl_c_atomic_scope_device | ||
4789 | +int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, | ||
4790 | + memory_scope scope); | ||
4791 | +uint __ovld atomic_load_explicit(volatile atomic_uint *object, | ||
4792 | + memory_order order, memory_scope scope); | ||
4793 | +float __ovld atomic_load_explicit(volatile atomic_float *object, | ||
4794 | + memory_order order, memory_scope scope); | ||
4795 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4796 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4797 | +#ifdef __opencl_c_atomic_scope_device | ||
4798 | +double __ovld atomic_load_explicit(volatile atomic_double *object, | ||
4799 | + memory_order order); | ||
4800 | +#endif // __opencl_c_atomic_scope_device | ||
4801 | +double __ovld atomic_load_explicit(volatile atomic_double *object, | ||
4802 | + memory_order order, memory_scope scope); | ||
4803 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4804 | +#ifdef __opencl_c_atomic_scope_device | ||
4805 | +long __ovld atomic_load_explicit(volatile atomic_long *object, | ||
4806 | + memory_order order); | ||
4807 | +ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, | ||
4808 | + memory_order order); | ||
4809 | +#endif //__opencl_c_atomic_scope_device | ||
4810 | +long __ovld atomic_load_explicit(volatile atomic_long *object, | ||
4811 | + memory_order order, memory_scope scope); | ||
4812 | +ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, | ||
4813 | + memory_order order, memory_scope scope); | ||
4814 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4815 | + // defined(cl_khr_int64_extended_atomics) | ||
4816 | +#endif // __opencl_c_generic_address_space | ||
4817 | + | ||
4818 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4819 | +#ifdef __opencl_c_atomic_scope_device | ||
4820 | +int __ovld atomic_load_explicit(volatile atomic_int __global *object, | ||
4821 | + memory_order order); | ||
4822 | +int __ovld atomic_load_explicit(volatile atomic_int __local *object, | ||
4823 | + memory_order order); | ||
4824 | +uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, | ||
4825 | + memory_order order); | ||
4826 | +uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, | ||
4827 | + memory_order order); | ||
4828 | +float __ovld atomic_load_explicit(volatile atomic_float __global *object, | ||
4829 | + memory_order order); | ||
4830 | +float __ovld atomic_load_explicit(volatile atomic_float __local *object, | ||
4831 | + memory_order order); | ||
4832 | +#endif // __opencl_c_atomic_scope_device | ||
4833 | +int __ovld atomic_load_explicit(volatile atomic_int __global *object, | ||
4834 | + memory_order order, memory_scope scope); | ||
4835 | +int __ovld atomic_load_explicit(volatile atomic_int __local *object, | ||
4836 | + memory_order order, memory_scope scope); | ||
4837 | +uint __ovld atomic_load_explicit(volatile atomic_uint __global *object, | ||
4838 | + memory_order order, memory_scope scope); | ||
4839 | +uint __ovld atomic_load_explicit(volatile atomic_uint __local *object, | ||
4840 | + memory_order order, memory_scope scope); | ||
4841 | +float __ovld atomic_load_explicit(volatile atomic_float __global *object, | ||
4842 | + memory_order order, memory_scope scope); | ||
4843 | +float __ovld atomic_load_explicit(volatile atomic_float __local *object, | ||
4844 | + memory_order order, memory_scope scope); | ||
4845 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4846 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4847 | +#ifdef __opencl_c_atomic_scope_device | ||
4848 | +double __ovld atomic_load_explicit(volatile atomic_double __global *object, | ||
4849 | + memory_order order); | ||
4850 | +double __ovld atomic_load_explicit(volatile atomic_double __local *object, | ||
4851 | + memory_order order); | ||
4852 | +#endif // __opencl_c_atomic_scope_device | ||
4853 | +double __ovld atomic_load_explicit(volatile atomic_double __global *object, | ||
4854 | + memory_order order, memory_scope scope); | ||
4855 | +double __ovld atomic_load_explicit(volatile atomic_double __local *object, | ||
4856 | + memory_order order, memory_scope scope); | ||
4857 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4858 | +#ifdef __opencl_c_atomic_scope_device | ||
4859 | +long __ovld atomic_load_explicit(volatile atomic_long __global *object, | ||
4860 | + memory_order order); | ||
4861 | +long __ovld atomic_load_explicit(volatile atomic_long __local *object, | ||
4862 | + memory_order order); | ||
4863 | +ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, | ||
4864 | + memory_order order); | ||
4865 | +ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, | ||
4866 | + memory_order order); | ||
4867 | +#endif // __opencl_c_atomic_scope_device | ||
4868 | +long __ovld atomic_load_explicit(volatile atomic_long __global *object, | ||
4869 | + memory_order order, memory_scope scope); | ||
4870 | +long __ovld atomic_load_explicit(volatile atomic_long __local *object, | ||
4871 | + memory_order order, memory_scope scope); | ||
4872 | +ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object, | ||
4873 | + memory_order order, memory_scope scope); | ||
4874 | +ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object, | ||
4875 | + memory_order order, memory_scope scope); | ||
4876 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4877 | + // defined(cl_khr_int64_extended_atomics) | ||
4878 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4879 | |||
4880 | // atomic_exchange() | ||
4881 | - | ||
4882 | +#ifdef __opencl_c_generic_address_space | ||
4883 | +#if defined(__opencl_c_atomic_scope_device) && \ | ||
4884 | + defined(__opencl_c_atomic_order_seq_cst) | ||
4885 | int __ovld atomic_exchange(volatile atomic_int *object, int desired); | ||
4886 | -int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order); | ||
4887 | -int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope); | ||
4888 | uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired); | ||
4889 | -uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order); | ||
4890 | -uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope); | ||
4891 | float __ovld atomic_exchange(volatile atomic_float *object, float desired); | ||
4892 | -float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order); | ||
4893 | -float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope); | ||
4894 | #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4895 | -#ifdef cl_khr_fp64 | ||
4896 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4897 | double __ovld atomic_exchange(volatile atomic_double *object, double desired); | ||
4898 | -double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order); | ||
4899 | -double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope); | ||
4900 | -#endif //cl_khr_fp64 | ||
4901 | +#endif | ||
4902 | long __ovld atomic_exchange(volatile atomic_long *object, long desired); | ||
4903 | -long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order); | ||
4904 | -long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope); | ||
4905 | ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired); | ||
4906 | -ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order); | ||
4907 | -ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope); | ||
4908 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4909 | + // defined(cl_khr_int64_extended_atomics) | ||
4910 | +#endif // defined(__opencl_c_atomic_scope_device) && | ||
4911 | + // defined(__opencl_c_atomic_order_seq_cst) | ||
4912 | +#endif // __opencl_c_generic_address_space | ||
4913 | + | ||
4914 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4915 | +#if defined(__opencl_c_atomic_scope_device) && \ | ||
4916 | + defined(__opencl_c_atomic_order_seq_cst) | ||
4917 | +int __ovld atomic_exchange(volatile atomic_int __global *object, int desired); | ||
4918 | +int __ovld atomic_exchange(volatile atomic_int __local *object, int desired); | ||
4919 | +uint __ovld atomic_exchange(volatile atomic_uint __global *object, | ||
4920 | + uint desired); | ||
4921 | +uint __ovld atomic_exchange(volatile atomic_uint __local *object, uint desired); | ||
4922 | +float __ovld atomic_exchange(volatile atomic_float __global *object, | ||
4923 | + float desired); | ||
4924 | +float __ovld atomic_exchange(volatile atomic_float __local *object, | ||
4925 | + float desired); | ||
4926 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4927 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4928 | +double __ovld atomic_exchange(volatile atomic_double __global *object, | ||
4929 | + double desired); | ||
4930 | +double __ovld atomic_exchange(volatile atomic_double __local *object, | ||
4931 | + double desired); | ||
4932 | #endif | ||
4933 | +long __ovld atomic_exchange(volatile atomic_long __global *object, | ||
4934 | + long desired); | ||
4935 | +long __ovld atomic_exchange(volatile atomic_long __local *object, long desired); | ||
4936 | +ulong __ovld atomic_exchange(volatile atomic_ulong __global *object, | ||
4937 | + ulong desired); | ||
4938 | +ulong __ovld atomic_exchange(volatile atomic_ulong __local *object, | ||
4939 | + ulong desired); | ||
4940 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4941 | + // defined(cl_khr_int64_extended_atomics) | ||
4942 | +#endif // defined(__opencl_c_atomic_scope_device) && | ||
4943 | + // defined(__opencl_c_atomic_order_seq_cst) | ||
4944 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4945 | + | ||
4946 | +#ifdef __opencl_c_generic_address_space | ||
4947 | +#ifdef __opencl_c_atomic_scope_device | ||
4948 | +int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, | ||
4949 | + memory_order order); | ||
4950 | +uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, | ||
4951 | + memory_order order); | ||
4952 | +float __ovld atomic_exchange_explicit(volatile atomic_float *object, | ||
4953 | + float desired, memory_order order); | ||
4954 | +#endif // __opencl_c_atomic_scope_device | ||
4955 | +int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, | ||
4956 | + memory_order order, memory_scope scope); | ||
4957 | +uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, | ||
4958 | + memory_order order, memory_scope scope); | ||
4959 | +float __ovld atomic_exchange_explicit(volatile atomic_float *object, | ||
4960 | + float desired, memory_order order, | ||
4961 | + memory_scope scope); | ||
4962 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
4963 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4964 | +#ifdef __opencl_c_atomic_scope_device | ||
4965 | +double __ovld atomic_exchange_explicit(volatile atomic_double *object, | ||
4966 | + double desired, memory_order order); | ||
4967 | +#endif // __opencl_c_atomic_scope_device | ||
4968 | +double __ovld atomic_exchange_explicit(volatile atomic_double *object, | ||
4969 | + double desired, memory_order order, | ||
4970 | + memory_scope scope); | ||
4971 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
4972 | +#ifdef __opencl_c_atomic_scope_device | ||
4973 | +long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, | ||
4974 | + memory_order order); | ||
4975 | +ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, | ||
4976 | + ulong desired, memory_order order); | ||
4977 | +#endif // __opencl_c_atomic_scope_device | ||
4978 | +long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, | ||
4979 | + memory_order order, memory_scope scope); | ||
4980 | +ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, | ||
4981 | + ulong desired, memory_order order, | ||
4982 | + memory_scope scope); | ||
4983 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
4984 | + // defined(cl_khr_int64_extended_atomics) | ||
4985 | +#endif // __opencl_c_generic_address_space | ||
4986 | + | ||
4987 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
4988 | +#ifdef __opencl_c_atomic_scope_device | ||
4989 | +int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, | ||
4990 | + int desired, memory_order order); | ||
4991 | +int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, | ||
4992 | + int desired, memory_order order); | ||
4993 | +uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, | ||
4994 | + uint desired, memory_order order); | ||
4995 | +uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, | ||
4996 | + uint desired, memory_order order); | ||
4997 | +float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, | ||
4998 | + float desired, memory_order order); | ||
4999 | +float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, | ||
5000 | + float desired, memory_order order); | ||
5001 | +#endif // __opencl_c_atomic_scope_device | ||
5002 | +int __ovld atomic_exchange_explicit(volatile atomic_int __global *object, | ||
5003 | + int desired, memory_order order, | ||
5004 | + memory_scope scope); | ||
5005 | +int __ovld atomic_exchange_explicit(volatile atomic_int __local *object, | ||
5006 | + int desired, memory_order order, | ||
5007 | + memory_scope scope); | ||
5008 | +uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object, | ||
5009 | + uint desired, memory_order order, | ||
5010 | + memory_scope scope); | ||
5011 | +uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object, | ||
5012 | + uint desired, memory_order order, | ||
5013 | + memory_scope scope); | ||
5014 | +float __ovld atomic_exchange_explicit(volatile atomic_float __global *object, | ||
5015 | + float desired, memory_order order, | ||
5016 | + memory_scope scope); | ||
5017 | +float __ovld atomic_exchange_explicit(volatile atomic_float __local *object, | ||
5018 | + float desired, memory_order order, | ||
5019 | + memory_scope scope); | ||
5020 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
5021 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
5022 | +#ifdef __opencl_c_atomic_scope_device | ||
5023 | +double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, | ||
5024 | + double desired, memory_order order); | ||
5025 | +double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, | ||
5026 | + double desired, memory_order order); | ||
5027 | +#endif // __opencl_c_atomic_scope_device | ||
5028 | +double __ovld atomic_exchange_explicit(volatile atomic_double __global *object, | ||
5029 | + double desired, memory_order order, | ||
5030 | + memory_scope scope); | ||
5031 | +double __ovld atomic_exchange_explicit(volatile atomic_double __local *object, | ||
5032 | + double desired, memory_order order, | ||
5033 | + memory_scope scope); | ||
5034 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
5035 | +#ifdef __opencl_c_atomic_scope_device | ||
5036 | +long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, | ||
5037 | + long desired, memory_order order); | ||
5038 | +long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, | ||
5039 | + long desired, memory_order order); | ||
5040 | +ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, | ||
5041 | + ulong desired, memory_order order); | ||
5042 | +ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, | ||
5043 | + ulong desired, memory_order order); | ||
5044 | +#endif // __opencl_c_atomic_scope_device | ||
5045 | +long __ovld atomic_exchange_explicit(volatile atomic_long __global *object, | ||
5046 | + long desired, memory_order order, | ||
5047 | + memory_scope scope); | ||
5048 | +long __ovld atomic_exchange_explicit(volatile atomic_long __local *object, | ||
5049 | + long desired, memory_order order, | ||
5050 | + memory_scope scope); | ||
5051 | +ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object, | ||
5052 | + ulong desired, memory_order order, | ||
5053 | + memory_scope scope); | ||
5054 | +ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object, | ||
5055 | + ulong desired, memory_order order, | ||
5056 | + memory_scope scope); | ||
5057 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
5058 | + // defined(cl_khr_int64_extended_atomics) | ||
5059 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
5060 | |||
5061 | // atomic_compare_exchange_strong() and atomic_compare_exchange_weak() | ||
5062 | - | ||
5063 | -bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired); | ||
5064 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, | ||
5065 | - int desired, memory_order success, memory_order failure); | ||
5066 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected, | ||
5067 | - int desired, memory_order success, memory_order failure, memory_scope scope); | ||
5068 | -bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired); | ||
5069 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, | ||
5070 | - uint desired, memory_order success, memory_order failure); | ||
5071 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected, | ||
5072 | - uint desired, memory_order success, memory_order failure, memory_scope scope); | ||
5073 | -bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired); | ||
5074 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, | ||
5075 | - int desired, memory_order success, memory_order failure); | ||
5076 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected, | ||
5077 | - int desired, memory_order success, memory_order failure, memory_scope scope); | ||
5078 | -bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired); | ||
5079 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, | ||
5080 | - uint desired, memory_order success, memory_order failure); | ||
5081 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected, | ||
5082 | - uint desired, memory_order success, memory_order failure, memory_scope scope); | ||
5083 | -bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired); | ||
5084 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, | ||
5085 | - float desired, memory_order success, memory_order failure); | ||
5086 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected, | ||
5087 | - float desired, memory_order success, memory_order failure, memory_scope scope); | ||
5088 | -bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired); | ||
5089 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, | ||
5090 | - float desired, memory_order success, memory_order failure); | ||
5091 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected, | ||
5092 | - float desired, memory_order success, memory_order failure, memory_scope scope); | ||
5093 | +#ifdef __opencl_c_generic_address_space | ||
5094 | +#if defined(__opencl_c_atomic_scope_device) && \ | ||
5095 | + defined(__opencl_c_atomic_order_seq_cst) | ||
5096 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, | ||
5097 | + int *expected, int desired); | ||
5098 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, | ||
5099 | + uint *expected, uint desired); | ||
5100 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, | ||
5101 | + int *expected, int desired); | ||
5102 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, | ||
5103 | + uint *expected, uint desired); | ||
5104 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, | ||
5105 | + float *expected, float desired); | ||
5106 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, | ||
5107 | + float *expected, float desired); | ||
5108 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
5109 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
5110 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, | ||
5111 | + double *expected, double desired); | ||
5112 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, | ||
5113 | + double *expected, double desired); | ||
5114 | +#endif | ||
5115 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, | ||
5116 | + long *expected, long desired); | ||
5117 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, | ||
5118 | + long *expected, long desired); | ||
5119 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, | ||
5120 | + ulong *expected, ulong desired); | ||
5121 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, | ||
5122 | + ulong *expected, ulong desired); | ||
5123 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
5124 | + // defined(cl_khr_int64_extended_atomics) | ||
5125 | +#endif // defined(__opencl_c_atomic_scope_device) && | ||
5126 | + // defined(__opencl_c_atomic_order_seq_cst) | ||
5127 | +#endif // __opencl_c_generic_address_space | ||
5128 | + | ||
5129 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
5130 | +#if defined(__opencl_c_atomic_scope_device) && \ | ||
5131 | + defined(__opencl_c_atomic_order_seq_cst) | ||
5132 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, | ||
5133 | + int __global *expected, int desired); | ||
5134 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, | ||
5135 | + int __global *expected, int desired); | ||
5136 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, | ||
5137 | + int __local *expected, int desired); | ||
5138 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, | ||
5139 | + int __local *expected, int desired); | ||
5140 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object, | ||
5141 | + int __private *expected, | ||
5142 | + int desired); | ||
5143 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object, | ||
5144 | + int __private *expected, | ||
5145 | + int desired); | ||
5146 | +bool __ovld | ||
5147 | +atomic_compare_exchange_strong(volatile atomic_uint __global *object, | ||
5148 | + uint __global *expected, uint desired); | ||
5149 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, | ||
5150 | + uint __global *expected, | ||
5151 | + uint desired); | ||
5152 | +bool __ovld | ||
5153 | +atomic_compare_exchange_strong(volatile atomic_uint __global *object, | ||
5154 | + uint __local *expected, uint desired); | ||
5155 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, | ||
5156 | + uint __local *expected, | ||
5157 | + uint desired); | ||
5158 | +bool __ovld | ||
5159 | +atomic_compare_exchange_strong(volatile atomic_uint __global *object, | ||
5160 | + uint __private *expected, uint desired); | ||
5161 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object, | ||
5162 | + uint __private *expected, | ||
5163 | + uint desired); | ||
5164 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, | ||
5165 | + int __global *expected, int desired); | ||
5166 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, | ||
5167 | + int __global *expected, int desired); | ||
5168 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, | ||
5169 | + int __local *expected, int desired); | ||
5170 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, | ||
5171 | + int __local *expected, int desired); | ||
5172 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object, | ||
5173 | + int __private *expected, int desired); | ||
5174 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object, | ||
5175 | + int __private *expected, int desired); | ||
5176 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, | ||
5177 | + uint __global *expected, uint desired); | ||
5178 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, | ||
5179 | + uint __global *expected, uint desired); | ||
5180 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, | ||
5181 | + uint __local *expected, uint desired); | ||
5182 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, | ||
5183 | + uint __local *expected, uint desired); | ||
5184 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object, | ||
5185 | + uint __private *expected, | ||
5186 | + uint desired); | ||
5187 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object, | ||
5188 | + uint __private *expected, | ||
5189 | + uint desired); | ||
5190 | +bool __ovld | ||
5191 | +atomic_compare_exchange_strong(volatile atomic_float __global *object, | ||
5192 | + float __global *expected, float desired); | ||
5193 | +bool __ovld | ||
5194 | +atomic_compare_exchange_strong(volatile atomic_float __local *object, | ||
5195 | + float __global *expected, float desired); | ||
5196 | +bool __ovld | ||
5197 | +atomic_compare_exchange_strong(volatile atomic_float __global *object, | ||
5198 | + float __local *expected, float desired); | ||
5199 | +bool __ovld | ||
5200 | +atomic_compare_exchange_strong(volatile atomic_float __local *object, | ||
5201 | + float __local *expected, float desired); | ||
5202 | +bool __ovld | ||
5203 | +atomic_compare_exchange_strong(volatile atomic_float __global *object, | ||
5204 | + float __private *expected, float desired); | ||
5205 | +bool __ovld | ||
5206 | +atomic_compare_exchange_strong(volatile atomic_float __local *object, | ||
5207 | + float __private *expected, float desired); | ||
5208 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, | ||
5209 | + float __global *expected, | ||
5210 | + float desired); | ||
5211 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, | ||
5212 | + float __global *expected, | ||
5213 | + float desired); | ||
5214 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, | ||
5215 | + float __local *expected, | ||
5216 | + float desired); | ||
5217 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, | ||
5218 | + float __local *expected, | ||
5219 | + float desired); | ||
5220 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object, | ||
5221 | + float __private *expected, | ||
5222 | + float desired); | ||
5223 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object, | ||
5224 | + float __private *expected, | ||
5225 | + float desired); | ||
5226 | #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
5227 | -#ifdef cl_khr_fp64 | ||
5228 | -bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired); | ||
5229 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, | ||
5230 | - double desired, memory_order success, memory_order failure); | ||
5231 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected, | ||
5232 | - double desired, memory_order success, memory_order failure, memory_scope scope); | ||
5233 | -bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired); | ||
5234 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, | ||
5235 | - double desired, memory_order success, memory_order failure); | ||
5236 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected, | ||
5237 | - double desired, memory_order success, memory_order failure, memory_scope scope); | ||
5238 | -#endif //cl_khr_fp64 | ||
5239 | -bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired); | ||
5240 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, | ||
5241 | - long desired, memory_order success, memory_order failure); | ||
5242 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected, | ||
5243 | - long desired, memory_order success, memory_order failure, memory_scope scope); | ||
5244 | -bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired); | ||
5245 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, | ||
5246 | - long desired, memory_order success, memory_order failure); | ||
5247 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected, | ||
5248 | - long desired, memory_order success, memory_order failure, memory_scope scope); | ||
5249 | -bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired); | ||
5250 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, | ||
5251 | - ulong desired, memory_order success, memory_order failure); | ||
5252 | -bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected, | ||
5253 | - ulong desired, memory_order success, memory_order failure, memory_scope scope); | ||
5254 | -bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired); | ||
5255 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, | ||
5256 | - ulong desired, memory_order success, memory_order failure); | ||
5257 | -bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected, | ||
5258 | - ulong desired, memory_order success, memory_order failure, memory_scope scope); | ||
5259 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
5260 | +bool __ovld | ||
5261 | +atomic_compare_exchange_strong(volatile atomic_double __global *object, | ||
5262 | + double __global *expected, double desired); | ||
5263 | +bool __ovld | ||
5264 | +atomic_compare_exchange_strong(volatile atomic_double __local *object, | ||
5265 | + double __global *expected, double desired); | ||
5266 | +bool __ovld | ||
5267 | +atomic_compare_exchange_strong(volatile atomic_double __global *object, | ||
5268 | + double __local *expected, double desired); | ||
5269 | +bool __ovld | ||
5270 | +atomic_compare_exchange_strong(volatile atomic_double __local *object, | ||
5271 | + double __local *expected, double desired); | ||
5272 | +bool __ovld | ||
5273 | +atomic_compare_exchange_strong(volatile atomic_double __global *object, | ||
5274 | + double __private *expected, double desired); | ||
5275 | +bool __ovld | ||
5276 | +atomic_compare_exchange_strong(volatile atomic_double __local *object, | ||
5277 | + double __private *expected, double desired); | ||
5278 | +bool __ovld | ||
5279 | +atomic_compare_exchange_weak(volatile atomic_double __global *object, | ||
5280 | + double __global *expected, double desired); | ||
5281 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, | ||
5282 | + double __global *expected, | ||
5283 | + double desired); | ||
5284 | +bool __ovld | ||
5285 | +atomic_compare_exchange_weak(volatile atomic_double __global *object, | ||
5286 | + double __local *expected, double desired); | ||
5287 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, | ||
5288 | + double __local *expected, | ||
5289 | + double desired); | ||
5290 | +bool __ovld | ||
5291 | +atomic_compare_exchange_weak(volatile atomic_double __global *object, | ||
5292 | + double __private *expected, double desired); | ||
5293 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object, | ||
5294 | + double __private *expected, | ||
5295 | + double desired); | ||
5296 | #endif | ||
5297 | +bool __ovld | ||
5298 | +atomic_compare_exchange_strong(volatile atomic_long __global *object, | ||
5299 | + long __global *expected, long desired); | ||
5300 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, | ||
5301 | + long __global *expected, | ||
5302 | + long desired); | ||
5303 | +bool __ovld | ||
5304 | +atomic_compare_exchange_strong(volatile atomic_long __global *object, | ||
5305 | + long __local *expected, long desired); | ||
5306 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, | ||
5307 | + long __local *expected, | ||
5308 | + long desired); | ||
5309 | +bool __ovld | ||
5310 | +atomic_compare_exchange_strong(volatile atomic_long __global *object, | ||
5311 | + long __private *expected, long desired); | ||
5312 | +bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object, | ||
5313 | + long __private *expected, | ||
5314 | + long desired); | ||
5315 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, | ||
5316 | + long __global *expected, long desired); | ||
5317 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, | ||
5318 | + long __global *expected, long desired); | ||
5319 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, | ||
5320 | + long __local *expected, long desired); | ||
5321 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, | ||
5322 | + long __local *expected, long desired); | ||
5323 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object, | ||
5324 | + long __private *expected, | ||
5325 | + long desired); | ||
5326 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object, | ||
5327 | + long __private *expected, | ||
5328 | + long desired); | ||
5329 | +bool __ovld | ||
5330 | +atomic_compare_exchange_strong(volatile atomic_ulong __global *object, | ||
5331 | + ulong __global *expected, ulong desired); | ||
5332 | +bool __ovld | ||
5333 | +atomic_compare_exchange_strong(volatile atomic_ulong __local *object, | ||
5334 | + ulong __global *expected, ulong desired); | ||
5335 | +bool __ovld | ||
5336 | +atomic_compare_exchange_strong(volatile atomic_ulong __global *object, | ||
5337 | + ulong __local *expected, ulong desired); | ||
5338 | +bool __ovld | ||
5339 | +atomic_compare_exchange_strong(volatile atomic_ulong __local *object, | ||
5340 | + ulong __local *expected, ulong desired); | ||
5341 | +bool __ovld | ||
5342 | +atomic_compare_exchange_strong(volatile atomic_ulong __global *object, | ||
5343 | + ulong __private *expected, ulong desired); | ||
5344 | +bool __ovld | ||
5345 | +atomic_compare_exchange_strong(volatile atomic_ulong __local *object, | ||
5346 | + ulong __private *expected, ulong desired); | ||
5347 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, | ||
5348 | + ulong __global *expected, | ||
5349 | + ulong desired); | ||
5350 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, | ||
5351 | + ulong __global *expected, | ||
5352 | + ulong desired); | ||
5353 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, | ||
5354 | + ulong __local *expected, | ||
5355 | + ulong desired); | ||
5356 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, | ||
5357 | + ulong __local *expected, | ||
5358 | + ulong desired); | ||
5359 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object, | ||
5360 | + ulong __private *expected, | ||
5361 | + ulong desired); | ||
5362 | +bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object, | ||
5363 | + ulong __private *expected, | ||
5364 | + ulong desired); | ||
5365 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
5366 | + // defined(cl_khr_int64_extended_atomics) | ||
5367 | +#endif // defined(__opencl_c_atomic_scope_device) && | ||
5368 | + // defined(__opencl_c_atomic_order_seq_cst) | ||
5369 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
5370 | + | ||
5371 | +#ifdef __opencl_c_generic_address_space | ||
5372 | +#ifdef __opencl_c_atomic_scope_device | ||
5373 | +bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, | ||
5374 | + int *expected, int desired, | ||
5375 | + memory_order success, | ||
5376 | + memory_order failure); | ||
5377 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5378 | + volatile atomic_uint *object, uint *expected, uint desired, | ||
5379 | + memory_order success, memory_order failure); | ||
5380 | +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, | ||
5381 | + int *expected, int desired, | ||
5382 | + memory_order success, | ||
5383 | + memory_order failure); | ||
5384 | +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, | ||
5385 | + uint *expected, uint desired, | ||
5386 | + memory_order success, | ||
5387 | + memory_order failure); | ||
5388 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5389 | + volatile atomic_float *object, float *expected, float desired, | ||
5390 | + memory_order success, memory_order failure); | ||
5391 | +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, | ||
5392 | + float *expected, | ||
5393 | + float desired, | ||
5394 | + memory_order success, | ||
5395 | + memory_order failure); | ||
5396 | +#endif // __opencl_c_atomic_scope_device | ||
5397 | +bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, | ||
5398 | + int *expected, int desired, | ||
5399 | + memory_order success, | ||
5400 | + memory_order failure, | ||
5401 | + memory_scope scope); | ||
5402 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5403 | + volatile atomic_uint *object, uint *expected, uint desired, | ||
5404 | + memory_order success, memory_order failure, memory_scope scope); | ||
5405 | +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, | ||
5406 | + int *expected, int desired, | ||
5407 | + memory_order success, | ||
5408 | + memory_order failure, | ||
5409 | + memory_scope scope); | ||
5410 | +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, | ||
5411 | + uint *expected, uint desired, | ||
5412 | + memory_order success, | ||
5413 | + memory_order failure, | ||
5414 | + memory_scope scope); | ||
5415 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5416 | + volatile atomic_float *object, float *expected, float desired, | ||
5417 | + memory_order success, memory_order failure, memory_scope scope); | ||
5418 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5419 | + volatile atomic_float *object, float *expected, float desired, | ||
5420 | + memory_order success, memory_order failure, memory_scope scope); | ||
5421 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
5422 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
5423 | +#ifdef __opencl_c_atomic_scope_device | ||
5424 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5425 | + volatile atomic_double *object, double *expected, double desired, | ||
5426 | + memory_order success, memory_order failure); | ||
5427 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5428 | + volatile atomic_double *object, double *expected, double desired, | ||
5429 | + memory_order success, memory_order failure); | ||
5430 | +#endif // __opencl_c_atomic_scope_device | ||
5431 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5432 | + volatile atomic_double *object, double *expected, double desired, | ||
5433 | + memory_order success, memory_order failure, memory_scope scope); | ||
5434 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5435 | + volatile atomic_double *object, double *expected, double desired, | ||
5436 | + memory_order success, memory_order failure, memory_scope scope); | ||
5437 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
5438 | +#ifdef __opencl_c_atomic_scope_device | ||
5439 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5440 | + volatile atomic_long *object, long *expected, long desired, | ||
5441 | + memory_order success, memory_order failure); | ||
5442 | +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, | ||
5443 | + long *expected, long desired, | ||
5444 | + memory_order success, | ||
5445 | + memory_order failure); | ||
5446 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5447 | + volatile atomic_ulong *object, ulong *expected, ulong desired, | ||
5448 | + memory_order success, memory_order failure); | ||
5449 | +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, | ||
5450 | + ulong *expected, | ||
5451 | + ulong desired, | ||
5452 | + memory_order success, | ||
5453 | + memory_order failure); | ||
5454 | +#endif // __opencl_c_atomic_scope_device | ||
5455 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5456 | + volatile atomic_long *object, long *expected, long desired, | ||
5457 | + memory_order success, memory_order failure, memory_scope scope); | ||
5458 | +bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, | ||
5459 | + long *expected, long desired, | ||
5460 | + memory_order success, | ||
5461 | + memory_order failure, | ||
5462 | + memory_scope scope); | ||
5463 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5464 | + volatile atomic_ulong *object, ulong *expected, ulong desired, | ||
5465 | + memory_order success, memory_order failure, memory_scope scope); | ||
5466 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5467 | + volatile atomic_ulong *object, ulong *expected, ulong desired, | ||
5468 | + memory_order success, memory_order failure, memory_scope scope); | ||
5469 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
5470 | + // defined(cl_khr_int64_extended_atomics) | ||
5471 | +#endif // __opencl_c_generic_address_space | ||
5472 | + | ||
5473 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
5474 | +#ifdef __opencl_c_atomic_scope_device | ||
5475 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5476 | + volatile atomic_int __global *object, int __global *expected, int desired, | ||
5477 | + memory_order success, memory_order failure); | ||
5478 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5479 | + volatile atomic_int __local *object, int __global *expected, int desired, | ||
5480 | + memory_order success, memory_order failure); | ||
5481 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5482 | + volatile atomic_int __global *object, int __local *expected, int desired, | ||
5483 | + memory_order success, memory_order failure); | ||
5484 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5485 | + volatile atomic_int __local *object, int __local *expected, int desired, | ||
5486 | + memory_order success, memory_order failure); | ||
5487 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5488 | + volatile atomic_int __global *object, int __private *expected, int desired, | ||
5489 | + memory_order success, memory_order failure); | ||
5490 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5491 | + volatile atomic_int __local *object, int __private *expected, int desired, | ||
5492 | + memory_order success, memory_order failure); | ||
5493 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5494 | + volatile atomic_uint __global *object, uint __global *expected, | ||
5495 | + uint desired, memory_order success, memory_order failure); | ||
5496 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5497 | + volatile atomic_uint __local *object, uint __global *expected, uint desired, | ||
5498 | + memory_order success, memory_order failure); | ||
5499 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5500 | + volatile atomic_uint __global *object, uint __local *expected, uint desired, | ||
5501 | + memory_order success, memory_order failure); | ||
5502 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5503 | + volatile atomic_uint __local *object, uint __local *expected, uint desired, | ||
5504 | + memory_order success, memory_order failure); | ||
5505 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5506 | + volatile atomic_uint __global *object, uint __private *expected, | ||
5507 | + uint desired, memory_order success, memory_order failure); | ||
5508 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5509 | + volatile atomic_uint __local *object, uint __private *expected, | ||
5510 | + uint desired, memory_order success, memory_order failure); | ||
5511 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5512 | + volatile atomic_int __global *object, int __global *expected, int desired, | ||
5513 | + memory_order success, memory_order failure); | ||
5514 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5515 | + volatile atomic_int __local *object, int __global *expected, int desired, | ||
5516 | + memory_order success, memory_order failure); | ||
5517 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5518 | + volatile atomic_int __global *object, int __local *expected, int desired, | ||
5519 | + memory_order success, memory_order failure); | ||
5520 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5521 | + volatile atomic_int __local *object, int __local *expected, int desired, | ||
5522 | + memory_order success, memory_order failure); | ||
5523 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5524 | + volatile atomic_int __global *object, int __private *expected, int desired, | ||
5525 | + memory_order success, memory_order failure); | ||
5526 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5527 | + volatile atomic_int __local *object, int __private *expected, int desired, | ||
5528 | + memory_order success, memory_order failure); | ||
5529 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5530 | + volatile atomic_uint __global *object, uint __global *expected, | ||
5531 | + uint desired, memory_order success, memory_order failure); | ||
5532 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5533 | + volatile atomic_uint __local *object, uint __global *expected, uint desired, | ||
5534 | + memory_order success, memory_order failure); | ||
5535 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5536 | + volatile atomic_uint __global *object, uint __local *expected, uint desired, | ||
5537 | + memory_order success, memory_order failure); | ||
5538 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5539 | + volatile atomic_uint __local *object, uint __local *expected, uint desired, | ||
5540 | + memory_order success, memory_order failure); | ||
5541 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5542 | + volatile atomic_uint __global *object, uint __private *expected, | ||
5543 | + uint desired, memory_order success, memory_order failure); | ||
5544 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5545 | + volatile atomic_uint __local *object, uint __private *expected, | ||
5546 | + uint desired, memory_order success, memory_order failure); | ||
5547 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5548 | + volatile atomic_float __global *object, float __global *expected, | ||
5549 | + float desired, memory_order success, memory_order failure); | ||
5550 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5551 | + volatile atomic_float __local *object, float __global *expected, | ||
5552 | + float desired, memory_order success, memory_order failure); | ||
5553 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5554 | + volatile atomic_float __global *object, float __local *expected, | ||
5555 | + float desired, memory_order success, memory_order failure); | ||
5556 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5557 | + volatile atomic_float __local *object, float __local *expected, | ||
5558 | + float desired, memory_order success, memory_order failure); | ||
5559 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5560 | + volatile atomic_float __global *object, float __private *expected, | ||
5561 | + float desired, memory_order success, memory_order failure); | ||
5562 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5563 | + volatile atomic_float __local *object, float __private *expected, | ||
5564 | + float desired, memory_order success, memory_order failure); | ||
5565 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5566 | + volatile atomic_float __global *object, float __global *expected, | ||
5567 | + float desired, memory_order success, memory_order failure); | ||
5568 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5569 | + volatile atomic_float __local *object, float __global *expected, | ||
5570 | + float desired, memory_order success, memory_order failure); | ||
5571 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5572 | + volatile atomic_float __global *object, float __local *expected, | ||
5573 | + float desired, memory_order success, memory_order failure); | ||
5574 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5575 | + volatile atomic_float __local *object, float __local *expected, | ||
5576 | + float desired, memory_order success, memory_order failure); | ||
5577 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5578 | + volatile atomic_float __global *object, float __private *expected, | ||
5579 | + float desired, memory_order success, memory_order failure); | ||
5580 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5581 | + volatile atomic_float __local *object, float __private *expected, | ||
5582 | + float desired, memory_order success, memory_order failure); | ||
5583 | +#endif // __opencl_c_atomic_scope_device | ||
5584 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5585 | + volatile atomic_int __global *object, int __global *expected, int desired, | ||
5586 | + memory_order success, memory_order failure, memory_scope scope); | ||
5587 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5588 | + volatile atomic_int __local *object, int __global *expected, int desired, | ||
5589 | + memory_order success, memory_order failure, memory_scope scope); | ||
5590 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5591 | + volatile atomic_int __global *object, int __local *expected, int desired, | ||
5592 | + memory_order success, memory_order failure, memory_scope scope); | ||
5593 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5594 | + volatile atomic_int __local *object, int __local *expected, int desired, | ||
5595 | + memory_order success, memory_order failure, memory_scope scope); | ||
5596 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5597 | + volatile atomic_int __global *object, int __private *expected, int desired, | ||
5598 | + memory_order success, memory_order failure, memory_scope scope); | ||
5599 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5600 | + volatile atomic_int __local *object, int __private *expected, int desired, | ||
5601 | + memory_order success, memory_order failure, memory_scope scope); | ||
5602 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5603 | + volatile atomic_uint __global *object, uint __global *expected, | ||
5604 | + uint desired, memory_order success, memory_order failure, | ||
5605 | + memory_scope scope); | ||
5606 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5607 | + volatile atomic_uint __local *object, uint __global *expected, uint desired, | ||
5608 | + memory_order success, memory_order failure, memory_scope scope); | ||
5609 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5610 | + volatile atomic_uint __global *object, uint __local *expected, uint desired, | ||
5611 | + memory_order success, memory_order failure, memory_scope scope); | ||
5612 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5613 | + volatile atomic_uint __local *object, uint __local *expected, uint desired, | ||
5614 | + memory_order success, memory_order failure, memory_scope scope); | ||
5615 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5616 | + volatile atomic_uint __global *object, uint __private *expected, | ||
5617 | + uint desired, memory_order success, memory_order failure, | ||
5618 | + memory_scope scope); | ||
5619 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5620 | + volatile atomic_uint __local *object, uint __private *expected, | ||
5621 | + uint desired, memory_order success, memory_order failure, | ||
5622 | + memory_scope scope); | ||
5623 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5624 | + volatile atomic_int __global *object, int __global *expected, int desired, | ||
5625 | + memory_order success, memory_order failure, memory_scope scope); | ||
5626 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5627 | + volatile atomic_int __local *object, int __global *expected, int desired, | ||
5628 | + memory_order success, memory_order failure, memory_scope scope); | ||
5629 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5630 | + volatile atomic_int __global *object, int __local *expected, int desired, | ||
5631 | + memory_order success, memory_order failure, memory_scope scope); | ||
5632 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5633 | + volatile atomic_int __local *object, int __local *expected, int desired, | ||
5634 | + memory_order success, memory_order failure, memory_scope scope); | ||
5635 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5636 | + volatile atomic_int __global *object, int __private *expected, int desired, | ||
5637 | + memory_order success, memory_order failure, memory_scope scope); | ||
5638 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5639 | + volatile atomic_int __local *object, int __private *expected, int desired, | ||
5640 | + memory_order success, memory_order failure, memory_scope scope); | ||
5641 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5642 | + volatile atomic_uint __global *object, uint __global *expected, | ||
5643 | + uint desired, memory_order success, memory_order failure, | ||
5644 | + memory_scope scope); | ||
5645 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5646 | + volatile atomic_uint __local *object, uint __global *expected, uint desired, | ||
5647 | + memory_order success, memory_order failure, memory_scope scope); | ||
5648 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5649 | + volatile atomic_uint __global *object, uint __local *expected, uint desired, | ||
5650 | + memory_order success, memory_order failure, memory_scope scope); | ||
5651 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5652 | + volatile atomic_uint __local *object, uint __local *expected, uint desired, | ||
5653 | + memory_order success, memory_order failure, memory_scope scope); | ||
5654 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5655 | + volatile atomic_uint __global *object, uint __private *expected, | ||
5656 | + uint desired, memory_order success, memory_order failure, | ||
5657 | + memory_scope scope); | ||
5658 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5659 | + volatile atomic_uint __local *object, uint __private *expected, | ||
5660 | + uint desired, memory_order success, memory_order failure, | ||
5661 | + memory_scope scope); | ||
5662 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5663 | + volatile atomic_float __global *object, float __global *expected, | ||
5664 | + float desired, memory_order success, memory_order failure, | ||
5665 | + memory_scope scope); | ||
5666 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5667 | + volatile atomic_float __local *object, float __global *expected, | ||
5668 | + float desired, memory_order success, memory_order failure, | ||
5669 | + memory_scope scope); | ||
5670 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5671 | + volatile atomic_float __global *object, float __local *expected, | ||
5672 | + float desired, memory_order success, memory_order failure, | ||
5673 | + memory_scope scope); | ||
5674 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5675 | + volatile atomic_float __local *object, float __local *expected, | ||
5676 | + float desired, memory_order success, memory_order failure, | ||
5677 | + memory_scope scope); | ||
5678 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5679 | + volatile atomic_float __global *object, float __private *expected, | ||
5680 | + float desired, memory_order success, memory_order failure, | ||
5681 | + memory_scope scope); | ||
5682 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5683 | + volatile atomic_float __local *object, float __private *expected, | ||
5684 | + float desired, memory_order success, memory_order failure, | ||
5685 | + memory_scope scope); | ||
5686 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5687 | + volatile atomic_float __global *object, float __global *expected, | ||
5688 | + float desired, memory_order success, memory_order failure, | ||
5689 | + memory_scope scope); | ||
5690 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5691 | + volatile atomic_float __local *object, float __global *expected, | ||
5692 | + float desired, memory_order success, memory_order failure, | ||
5693 | + memory_scope scope); | ||
5694 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5695 | + volatile atomic_float __global *object, float __local *expected, | ||
5696 | + float desired, memory_order success, memory_order failure, | ||
5697 | + memory_scope scope); | ||
5698 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5699 | + volatile atomic_float __local *object, float __local *expected, | ||
5700 | + float desired, memory_order success, memory_order failure, | ||
5701 | + memory_scope scope); | ||
5702 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5703 | + volatile atomic_float __global *object, float __private *expected, | ||
5704 | + float desired, memory_order success, memory_order failure, | ||
5705 | + memory_scope scope); | ||
5706 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5707 | + volatile atomic_float __local *object, float __private *expected, | ||
5708 | + float desired, memory_order success, memory_order failure, | ||
5709 | + memory_scope scope); | ||
5710 | +#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) | ||
5711 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
5712 | +#ifdef __opencl_c_atomic_scope_device | ||
5713 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5714 | + volatile atomic_double __global *object, double __global *expected, | ||
5715 | + double desired, memory_order success, memory_order failure); | ||
5716 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5717 | + volatile atomic_double __local *object, double __global *expected, | ||
5718 | + double desired, memory_order success, memory_order failure); | ||
5719 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5720 | + volatile atomic_double __global *object, double __local *expected, | ||
5721 | + double desired, memory_order success, memory_order failure); | ||
5722 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5723 | + volatile atomic_double __local *object, double __local *expected, | ||
5724 | + double desired, memory_order success, memory_order failure); | ||
5725 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5726 | + volatile atomic_double __global *object, double __private *expected, | ||
5727 | + double desired, memory_order success, memory_order failure); | ||
5728 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5729 | + volatile atomic_double __local *object, double __private *expected, | ||
5730 | + double desired, memory_order success, memory_order failure); | ||
5731 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5732 | + volatile atomic_double __global *object, double __global *expected, | ||
5733 | + double desired, memory_order success, memory_order failure); | ||
5734 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5735 | + volatile atomic_double __local *object, double __global *expected, | ||
5736 | + double desired, memory_order success, memory_order failure); | ||
5737 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5738 | + volatile atomic_double __global *object, double __local *expected, | ||
5739 | + double desired, memory_order success, memory_order failure); | ||
5740 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5741 | + volatile atomic_double __local *object, double __local *expected, | ||
5742 | + double desired, memory_order success, memory_order failure); | ||
5743 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5744 | + volatile atomic_double __global *object, double __private *expected, | ||
5745 | + double desired, memory_order success, memory_order failure); | ||
5746 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5747 | + volatile atomic_double __local *object, double __private *expected, | ||
5748 | + double desired, memory_order success, memory_order failure); | ||
5749 | +#endif // __opencl_c_atomic_scope_device | ||
5750 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5751 | + volatile atomic_double __global *object, double __global *expected, | ||
5752 | + double desired, memory_order success, memory_order failure, | ||
5753 | + memory_scope scope); | ||
5754 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5755 | + volatile atomic_double __local *object, double __global *expected, | ||
5756 | + double desired, memory_order success, memory_order failure, | ||
5757 | + memory_scope scope); | ||
5758 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5759 | + volatile atomic_double __global *object, double __local *expected, | ||
5760 | + double desired, memory_order success, memory_order failure, | ||
5761 | + memory_scope scope); | ||
5762 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5763 | + volatile atomic_double __local *object, double __local *expected, | ||
5764 | + double desired, memory_order success, memory_order failure, | ||
5765 | + memory_scope scope); | ||
5766 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5767 | + volatile atomic_double __global *object, double __private *expected, | ||
5768 | + double desired, memory_order success, memory_order failure, | ||
5769 | + memory_scope scope); | ||
5770 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5771 | + volatile atomic_double __local *object, double __private *expected, | ||
5772 | + double desired, memory_order success, memory_order failure, | ||
5773 | + memory_scope scope); | ||
5774 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5775 | + volatile atomic_double __global *object, double __global *expected, | ||
5776 | + double desired, memory_order success, memory_order failure, | ||
5777 | + memory_scope scope); | ||
5778 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5779 | + volatile atomic_double __local *object, double __global *expected, | ||
5780 | + double desired, memory_order success, memory_order failure, | ||
5781 | + memory_scope scope); | ||
5782 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5783 | + volatile atomic_double __global *object, double __local *expected, | ||
5784 | + double desired, memory_order success, memory_order failure, | ||
5785 | + memory_scope scope); | ||
5786 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5787 | + volatile atomic_double __local *object, double __local *expected, | ||
5788 | + double desired, memory_order success, memory_order failure, | ||
5789 | + memory_scope scope); | ||
5790 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5791 | + volatile atomic_double __global *object, double __private *expected, | ||
5792 | + double desired, memory_order success, memory_order failure, | ||
5793 | + memory_scope scope); | ||
5794 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5795 | + volatile atomic_double __local *object, double __private *expected, | ||
5796 | + double desired, memory_order success, memory_order failure, | ||
5797 | + memory_scope scope); | ||
5798 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
5799 | +#ifdef __opencl_c_atomic_scope_device | ||
5800 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5801 | + volatile atomic_long __global *object, long __global *expected, | ||
5802 | + long desired, memory_order success, memory_order failure); | ||
5803 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5804 | + volatile atomic_long __local *object, long __global *expected, long desired, | ||
5805 | + memory_order success, memory_order failure); | ||
5806 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5807 | + volatile atomic_long __global *object, long __local *expected, long desired, | ||
5808 | + memory_order success, memory_order failure); | ||
5809 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5810 | + volatile atomic_long __local *object, long __local *expected, long desired, | ||
5811 | + memory_order success, memory_order failure); | ||
5812 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5813 | + volatile atomic_long __global *object, long __private *expected, | ||
5814 | + long desired, memory_order success, memory_order failure); | ||
5815 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5816 | + volatile atomic_long __local *object, long __private *expected, | ||
5817 | + long desired, memory_order success, memory_order failure); | ||
5818 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5819 | + volatile atomic_long __global *object, long __global *expected, | ||
5820 | + long desired, memory_order success, memory_order failure); | ||
5821 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5822 | + volatile atomic_long __local *object, long __global *expected, long desired, | ||
5823 | + memory_order success, memory_order failure); | ||
5824 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5825 | + volatile atomic_long __global *object, long __local *expected, long desired, | ||
5826 | + memory_order success, memory_order failure); | ||
5827 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5828 | + volatile atomic_long __local *object, long __local *expected, long desired, | ||
5829 | + memory_order success, memory_order failure); | ||
5830 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5831 | + volatile atomic_long __global *object, long __private *expected, | ||
5832 | + long desired, memory_order success, memory_order failure); | ||
5833 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5834 | + volatile atomic_long __local *object, long __private *expected, | ||
5835 | + long desired, memory_order success, memory_order failure); | ||
5836 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5837 | + volatile atomic_ulong __global *object, ulong __global *expected, | ||
5838 | + ulong desired, memory_order success, memory_order failure); | ||
5839 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5840 | + volatile atomic_ulong __local *object, ulong __global *expected, | ||
5841 | + ulong desired, memory_order success, memory_order failure); | ||
5842 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5843 | + volatile atomic_ulong __global *object, ulong __local *expected, | ||
5844 | + ulong desired, memory_order success, memory_order failure); | ||
5845 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5846 | + volatile atomic_ulong __local *object, ulong __local *expected, | ||
5847 | + ulong desired, memory_order success, memory_order failure); | ||
5848 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5849 | + volatile atomic_ulong __global *object, ulong __private *expected, | ||
5850 | + ulong desired, memory_order success, memory_order failure); | ||
5851 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5852 | + volatile atomic_ulong __local *object, ulong __private *expected, | ||
5853 | + ulong desired, memory_order success, memory_order failure); | ||
5854 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5855 | + volatile atomic_ulong __global *object, ulong __global *expected, | ||
5856 | + ulong desired, memory_order success, memory_order failure); | ||
5857 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5858 | + volatile atomic_ulong __local *object, ulong __global *expected, | ||
5859 | + ulong desired, memory_order success, memory_order failure); | ||
5860 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5861 | + volatile atomic_ulong __global *object, ulong __local *expected, | ||
5862 | + ulong desired, memory_order success, memory_order failure); | ||
5863 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5864 | + volatile atomic_ulong __local *object, ulong __local *expected, | ||
5865 | + ulong desired, memory_order success, memory_order failure); | ||
5866 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5867 | + volatile atomic_ulong __global *object, ulong __private *expected, | ||
5868 | + ulong desired, memory_order success, memory_order failure); | ||
5869 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5870 | + volatile atomic_ulong __local *object, ulong __private *expected, | ||
5871 | + ulong desired, memory_order success, memory_order failure); | ||
5872 | +#endif // __opencl_c_atomic_scope_device | ||
5873 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5874 | + volatile atomic_long __global *object, long __global *expected, | ||
5875 | + long desired, memory_order success, memory_order failure, | ||
5876 | + memory_scope scope); | ||
5877 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5878 | + volatile atomic_long __local *object, long __global *expected, long desired, | ||
5879 | + memory_order success, memory_order failure, memory_scope scope); | ||
5880 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5881 | + volatile atomic_long __global *object, long __local *expected, long desired, | ||
5882 | + memory_order success, memory_order failure, memory_scope scope); | ||
5883 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5884 | + volatile atomic_long __local *object, long __local *expected, long desired, | ||
5885 | + memory_order success, memory_order failure, memory_scope scope); | ||
5886 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5887 | + volatile atomic_long __global *object, long __private *expected, | ||
5888 | + long desired, memory_order success, memory_order failure, | ||
5889 | + memory_scope scope); | ||
5890 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5891 | + volatile atomic_long __local *object, long __private *expected, | ||
5892 | + long desired, memory_order success, memory_order failure, | ||
5893 | + memory_scope scope); | ||
5894 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5895 | + volatile atomic_long __global *object, long __global *expected, | ||
5896 | + long desired, memory_order success, memory_order failure, | ||
5897 | + memory_scope scope); | ||
5898 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5899 | + volatile atomic_long __local *object, long __global *expected, long desired, | ||
5900 | + memory_order success, memory_order failure, memory_scope scope); | ||
5901 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5902 | + volatile atomic_long __global *object, long __local *expected, long desired, | ||
5903 | + memory_order success, memory_order failure, memory_scope scope); | ||
5904 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5905 | + volatile atomic_long __local *object, long __local *expected, long desired, | ||
5906 | + memory_order success, memory_order failure, memory_scope scope); | ||
5907 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5908 | + volatile atomic_long __global *object, long __private *expected, | ||
5909 | + long desired, memory_order success, memory_order failure, | ||
5910 | + memory_scope scope); | ||
5911 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5912 | + volatile atomic_long __local *object, long __private *expected, | ||
5913 | + long desired, memory_order success, memory_order failure, | ||
5914 | + memory_scope scope); | ||
5915 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5916 | + volatile atomic_ulong __global *object, ulong __global *expected, | ||
5917 | + ulong desired, memory_order success, memory_order failure, | ||
5918 | + memory_scope scope); | ||
5919 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5920 | + volatile atomic_ulong __local *object, ulong __global *expected, | ||
5921 | + ulong desired, memory_order success, memory_order failure, | ||
5922 | + memory_scope scope); | ||
5923 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5924 | + volatile atomic_ulong __global *object, ulong __local *expected, | ||
5925 | + ulong desired, memory_order success, memory_order failure, | ||
5926 | + memory_scope scope); | ||
5927 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5928 | + volatile atomic_ulong __local *object, ulong __local *expected, | ||
5929 | + ulong desired, memory_order success, memory_order failure, | ||
5930 | + memory_scope scope); | ||
5931 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5932 | + volatile atomic_ulong __global *object, ulong __private *expected, | ||
5933 | + ulong desired, memory_order success, memory_order failure, | ||
5934 | + memory_scope scope); | ||
5935 | +bool __ovld atomic_compare_exchange_strong_explicit( | ||
5936 | + volatile atomic_ulong __local *object, ulong __private *expected, | ||
5937 | + ulong desired, memory_order success, memory_order failure, | ||
5938 | + memory_scope scope); | ||
5939 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5940 | + volatile atomic_ulong __global *object, ulong __global *expected, | ||
5941 | + ulong desired, memory_order success, memory_order failure, | ||
5942 | + memory_scope scope); | ||
5943 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5944 | + volatile atomic_ulong __local *object, ulong __global *expected, | ||
5945 | + ulong desired, memory_order success, memory_order failure, | ||
5946 | + memory_scope scope); | ||
5947 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5948 | + volatile atomic_ulong __global *object, ulong __local *expected, | ||
5949 | + ulong desired, memory_order success, memory_order failure, | ||
5950 | + memory_scope scope); | ||
5951 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5952 | + volatile atomic_ulong __local *object, ulong __local *expected, | ||
5953 | + ulong desired, memory_order success, memory_order failure, | ||
5954 | + memory_scope scope); | ||
5955 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5956 | + volatile atomic_ulong __global *object, ulong __private *expected, | ||
5957 | + ulong desired, memory_order success, memory_order failure, | ||
5958 | + memory_scope scope); | ||
5959 | +bool __ovld atomic_compare_exchange_weak_explicit( | ||
5960 | + volatile atomic_ulong __local *object, ulong __private *expected, | ||
5961 | + ulong desired, memory_order success, memory_order failure, | ||
5962 | + memory_scope scope); | ||
5963 | +#endif // defined(cl_khr_int64_base_atomics) && | ||
5964 | + // defined(cl_khr_int64_extended_atomics) | ||
5965 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
5966 | |||
5967 | // atomic_flag_test_and_set() and atomic_flag_clear() | ||
5968 | - | ||
5969 | +#if defined(__opencl_c_atomic_scope_device) && \ | ||
5970 | + defined(__opencl_c_atomic_order_seq_cst) | ||
5971 | +#ifdef __opencl_c_generic_address_space | ||
5972 | bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object); | ||
5973 | -bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order); | ||
5974 | -bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); | ||
5975 | void __ovld atomic_flag_clear(volatile atomic_flag *object); | ||
5976 | -void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order); | ||
5977 | -void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); | ||
5978 | +#endif // __opencl_c_generic_address_space | ||
5979 | + | ||
5980 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
5981 | +bool __ovld atomic_flag_test_and_set(volatile atomic_flag __global *object); | ||
5982 | +bool __ovld atomic_flag_test_and_set(volatile atomic_flag __local *object); | ||
5983 | +void __ovld atomic_flag_clear(volatile atomic_flag __global *object); | ||
5984 | +void __ovld atomic_flag_clear(volatile atomic_flag __local *object); | ||
5985 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
5986 | +#endif // defined(__opencl_c_atomic_scope_device) && | ||
5987 | + // defined(__opencl_c_atomic_order_seq_cst) | ||
5988 | + | ||
5989 | +#ifdef __opencl_c_generic_address_space | ||
5990 | +#ifdef __opencl_c_atomic_scope_device | ||
5991 | +bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, | ||
5992 | + memory_order order); | ||
5993 | +void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, | ||
5994 | + memory_order order); | ||
5995 | +#endif // __opencl_c_atomic_scope_device | ||
5996 | +bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, | ||
5997 | + memory_order order, | ||
5998 | + memory_scope scope); | ||
5999 | +void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, | ||
6000 | + memory_order order, memory_scope scope); | ||
6001 | +#endif // __opencl_c_generic_address_space | ||
6002 | + | ||
6003 | +#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
6004 | +#ifdef __opencl_c_atomic_scope_device | ||
6005 | +bool __ovld atomic_flag_test_and_set_explicit( | ||
6006 | + volatile atomic_flag __global *object, memory_order order); | ||
6007 | +bool __ovld atomic_flag_test_and_set_explicit( | ||
6008 | + volatile atomic_flag __local *object, memory_order order); | ||
6009 | +void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, | ||
6010 | + memory_order order); | ||
6011 | +void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, | ||
6012 | + memory_order order); | ||
6013 | +#endif // __opencl_c_atomic_scope_device | ||
6014 | +bool __ovld | ||
6015 | +atomic_flag_test_and_set_explicit(volatile atomic_flag __global *object, | ||
6016 | + memory_order order, memory_scope scope); | ||
6017 | +bool __ovld | ||
6018 | +atomic_flag_test_and_set_explicit(volatile atomic_flag __local *object, | ||
6019 | + memory_order order, memory_scope scope); | ||
6020 | +void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object, | ||
6021 | + memory_order order, memory_scope scope); | ||
6022 | +void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object, | ||
6023 | + memory_order order, memory_scope scope); | ||
6024 | +#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
6025 | |||
6026 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6027 | |||
6028 | @@ -13918,7 +15943,7 @@ float16 __ovld __cnfn shuffle(float4 x, uint16 mask); | ||
6029 | float16 __ovld __cnfn shuffle(float8 x, uint16 mask); | ||
6030 | float16 __ovld __cnfn shuffle(float16 x, uint16 mask); | ||
6031 | |||
6032 | -#ifdef cl_khr_fp64 | ||
6033 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6034 | double2 __ovld __cnfn shuffle(double2 x, ulong2 mask); | ||
6035 | double2 __ovld __cnfn shuffle(double4 x, ulong2 mask); | ||
6036 | double2 __ovld __cnfn shuffle(double8 x, ulong2 mask); | ||
6037 | @@ -13938,7 +15963,7 @@ double16 __ovld __cnfn shuffle(double2 x, ulong16 mask); | ||
6038 | double16 __ovld __cnfn shuffle(double4 x, ulong16 mask); | ||
6039 | double16 __ovld __cnfn shuffle(double8 x, ulong16 mask); | ||
6040 | double16 __ovld __cnfn shuffle(double16 x, ulong16 mask); | ||
6041 | -#endif //cl_khr_fp64 | ||
6042 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6043 | |||
6044 | #ifdef cl_khr_fp16 | ||
6045 | half2 __ovld __cnfn shuffle(half2 x, ushort2 mask); | ||
6046 | @@ -14142,7 +16167,7 @@ float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask); | ||
6047 | float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask); | ||
6048 | float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask); | ||
6049 | |||
6050 | -#ifdef cl_khr_fp64 | ||
6051 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6052 | double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask); | ||
6053 | double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask); | ||
6054 | double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask); | ||
6055 | @@ -14162,7 +16187,7 @@ double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask); | ||
6056 | double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask); | ||
6057 | double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask); | ||
6058 | double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask); | ||
6059 | -#endif //cl_khr_fp64 | ||
6060 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6061 | |||
6062 | #ifdef cl_khr_fp16 | ||
6063 | half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask); | ||
6064 | @@ -14198,6 +16223,7 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))) | ||
6065 | #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable | ||
6066 | #endif //cl_khr_gl_msaa_sharing | ||
6067 | |||
6068 | +#ifdef __opencl_c_images | ||
6069 | /** | ||
6070 | * Use the coordinate (coord.xy) to do an element lookup in | ||
6071 | * the 2D image object specified by image. | ||
6072 | @@ -14476,6 +16502,7 @@ half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord); | ||
6073 | |||
6074 | // Image read functions for read_write images | ||
6075 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6076 | +#ifdef __opencl_c_read_write_images | ||
6077 | float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord); | ||
6078 | int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord); | ||
6079 | uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord); | ||
6080 | @@ -14519,6 +16546,7 @@ float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, i | ||
6081 | #endif //cl_khr_gl_msaa_sharing | ||
6082 | |||
6083 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6084 | +#ifdef __opencl_c_read_write_images | ||
6085 | #ifdef cl_khr_mipmap_image | ||
6086 | float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod); | ||
6087 | int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod); | ||
6088 | @@ -14569,6 +16597,7 @@ int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, | ||
6089 | uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); | ||
6090 | |||
6091 | #endif //cl_khr_mipmap_image | ||
6092 | +#endif //__opencl_c_read_write_images | ||
6093 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6094 | |||
6095 | // Image read functions returning half4 type | ||
6096 | @@ -14580,6 +16609,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord); | ||
6097 | half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord); | ||
6098 | half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord); | ||
6099 | #endif //cl_khr_fp16 | ||
6100 | +#endif //__opencl_c_read_write_images | ||
6101 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6102 | |||
6103 | /** | ||
6104 | @@ -14669,7 +16699,7 @@ void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, flo | ||
6105 | void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color); | ||
6106 | void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color); | ||
6107 | |||
6108 | -#ifdef cl_khr_3d_image_writes | ||
6109 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6110 | void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color); | ||
6111 | void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color); | ||
6112 | void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color); | ||
6113 | @@ -14702,7 +16732,7 @@ void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, in | ||
6114 | void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float depth); | ||
6115 | void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float depth); | ||
6116 | |||
6117 | -#ifdef cl_khr_3d_image_writes | ||
6118 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6119 | void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color); | ||
6120 | void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color); | ||
6121 | void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color); | ||
6122 | @@ -14715,7 +16745,7 @@ void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 | ||
6123 | #ifdef cl_khr_fp16 | ||
6124 | void __ovld write_imageh(write_only image1d_t image, int coord, half4 color); | ||
6125 | void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color); | ||
6126 | -#ifdef cl_khr_3d_image_writes | ||
6127 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6128 | void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color); | ||
6129 | #endif | ||
6130 | void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color); | ||
6131 | @@ -14725,6 +16755,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col | ||
6132 | |||
6133 | // Image write functions for read_write images | ||
6134 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6135 | +#ifdef __opencl_c_read_write_images | ||
6136 | void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color); | ||
6137 | void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color); | ||
6138 | void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color); | ||
6139 | @@ -14745,7 +16776,7 @@ void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, flo | ||
6140 | void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color); | ||
6141 | void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color); | ||
6142 | |||
6143 | -#ifdef cl_khr_3d_image_writes | ||
6144 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6145 | void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color); | ||
6146 | void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color); | ||
6147 | void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color); | ||
6148 | @@ -14777,7 +16808,7 @@ void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, in | ||
6149 | void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color); | ||
6150 | void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color); | ||
6151 | |||
6152 | -#ifdef cl_khr_3d_image_writes | ||
6153 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6154 | void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color); | ||
6155 | void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color); | ||
6156 | void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color); | ||
6157 | @@ -14790,13 +16821,14 @@ void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 | ||
6158 | #ifdef cl_khr_fp16 | ||
6159 | void __ovld write_imageh(read_write image1d_t image, int coord, half4 color); | ||
6160 | void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color); | ||
6161 | -#ifdef cl_khr_3d_image_writes | ||
6162 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6163 | void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color); | ||
6164 | #endif | ||
6165 | void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color); | ||
6166 | void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color); | ||
6167 | void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color); | ||
6168 | #endif //cl_khr_fp16 | ||
6169 | +#endif //__opencl_c_read_write_images | ||
6170 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6171 | |||
6172 | // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have | ||
6173 | @@ -14810,7 +16842,7 @@ void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 col | ||
6174 | int __ovld __cnfn get_image_width(read_only image1d_t image); | ||
6175 | int __ovld __cnfn get_image_width(read_only image1d_buffer_t image); | ||
6176 | int __ovld __cnfn get_image_width(read_only image2d_t image); | ||
6177 | -#ifdef cl_khr_3d_image_writes | ||
6178 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6179 | int __ovld __cnfn get_image_width(read_only image3d_t image); | ||
6180 | #endif | ||
6181 | int __ovld __cnfn get_image_width(read_only image1d_array_t image); | ||
6182 | @@ -14829,7 +16861,7 @@ int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image); | ||
6183 | int __ovld __cnfn get_image_width(write_only image1d_t image); | ||
6184 | int __ovld __cnfn get_image_width(write_only image1d_buffer_t image); | ||
6185 | int __ovld __cnfn get_image_width(write_only image2d_t image); | ||
6186 | -#ifdef cl_khr_3d_image_writes | ||
6187 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6188 | int __ovld __cnfn get_image_width(write_only image3d_t image); | ||
6189 | #endif | ||
6190 | int __ovld __cnfn get_image_width(write_only image1d_array_t image); | ||
6191 | @@ -14846,6 +16878,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image); | ||
6192 | #endif //cl_khr_gl_msaa_sharing | ||
6193 | |||
6194 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6195 | +#ifdef __opencl_c_read_write_images | ||
6196 | int __ovld __cnfn get_image_width(read_write image1d_t image); | ||
6197 | int __ovld __cnfn get_image_width(read_write image1d_buffer_t image); | ||
6198 | int __ovld __cnfn get_image_width(read_write image2d_t image); | ||
6199 | @@ -14862,6 +16895,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image); | ||
6200 | int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image); | ||
6201 | int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image); | ||
6202 | #endif //cl_khr_gl_msaa_sharing | ||
6203 | +#endif //__opencl_c_read_write_images | ||
6204 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6205 | |||
6206 | /** | ||
6207 | @@ -14882,7 +16916,7 @@ int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image); | ||
6208 | #endif //cl_khr_gl_msaa_sharing | ||
6209 | |||
6210 | int __ovld __cnfn get_image_height(write_only image2d_t image); | ||
6211 | -#ifdef cl_khr_3d_image_writes | ||
6212 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6213 | int __ovld __cnfn get_image_height(write_only image3d_t image); | ||
6214 | #endif | ||
6215 | int __ovld __cnfn get_image_height(write_only image2d_array_t image); | ||
6216 | @@ -14898,6 +16932,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image); | ||
6217 | #endif //cl_khr_gl_msaa_sharing | ||
6218 | |||
6219 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6220 | +#ifdef __opencl_c_read_write_images | ||
6221 | int __ovld __cnfn get_image_height(read_write image2d_t image); | ||
6222 | int __ovld __cnfn get_image_height(read_write image3d_t image); | ||
6223 | int __ovld __cnfn get_image_height(read_write image2d_array_t image); | ||
6224 | @@ -14911,6 +16946,7 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image); | ||
6225 | int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image); | ||
6226 | int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); | ||
6227 | #endif //cl_khr_gl_msaa_sharing | ||
6228 | +#endif //__opencl_c_read_write_images | ||
6229 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6230 | |||
6231 | /** | ||
6232 | @@ -14918,12 +16954,14 @@ int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); | ||
6233 | */ | ||
6234 | int __ovld __cnfn get_image_depth(read_only image3d_t image); | ||
6235 | |||
6236 | -#ifdef cl_khr_3d_image_writes | ||
6237 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6238 | int __ovld __cnfn get_image_depth(write_only image3d_t image); | ||
6239 | #endif | ||
6240 | |||
6241 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6242 | +#ifdef __opencl_c_read_write_images | ||
6243 | int __ovld __cnfn get_image_depth(read_write image3d_t image); | ||
6244 | +#endif //__opencl_c_read_write_images | ||
6245 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6246 | |||
6247 | // OpenCL Extension v2.0 s9.18 - Mipmaps | ||
6248 | @@ -14939,13 +16977,15 @@ int __ovld get_image_num_mip_levels(read_only image3d_t image); | ||
6249 | |||
6250 | int __ovld get_image_num_mip_levels(write_only image1d_t image); | ||
6251 | int __ovld get_image_num_mip_levels(write_only image2d_t image); | ||
6252 | -#ifdef cl_khr_3d_image_writes | ||
6253 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6254 | int __ovld get_image_num_mip_levels(write_only image3d_t image); | ||
6255 | #endif | ||
6256 | |||
6257 | +#ifdef __opencl_c_read_write_images | ||
6258 | int __ovld get_image_num_mip_levels(read_write image1d_t image); | ||
6259 | int __ovld get_image_num_mip_levels(read_write image2d_t image); | ||
6260 | int __ovld get_image_num_mip_levels(read_write image3d_t image); | ||
6261 | +#endif //__opencl_c_read_write_images | ||
6262 | |||
6263 | int __ovld get_image_num_mip_levels(read_only image1d_array_t image); | ||
6264 | int __ovld get_image_num_mip_levels(read_only image2d_array_t image); | ||
6265 | @@ -14957,10 +16997,12 @@ int __ovld get_image_num_mip_levels(write_only image2d_array_t image); | ||
6266 | int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image); | ||
6267 | int __ovld get_image_num_mip_levels(write_only image2d_depth_t image); | ||
6268 | |||
6269 | +#ifdef __opencl_c_read_write_images | ||
6270 | int __ovld get_image_num_mip_levels(read_write image1d_array_t image); | ||
6271 | int __ovld get_image_num_mip_levels(read_write image2d_array_t image); | ||
6272 | int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image); | ||
6273 | int __ovld get_image_num_mip_levels(read_write image2d_depth_t image); | ||
6274 | +#endif //__opencl_c_read_write_images | ||
6275 | |||
6276 | #endif //cl_khr_mipmap_image | ||
6277 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6278 | @@ -15004,7 +17046,7 @@ int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth | ||
6279 | int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image); | ||
6280 | int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image); | ||
6281 | int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image); | ||
6282 | -#ifdef cl_khr_3d_image_writes | ||
6283 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6284 | int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image); | ||
6285 | #endif | ||
6286 | int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image); | ||
6287 | @@ -15021,6 +17063,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_dept | ||
6288 | #endif //cl_khr_gl_msaa_sharing | ||
6289 | |||
6290 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6291 | +#ifdef __opencl_c_read_write_images | ||
6292 | int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image); | ||
6293 | int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image); | ||
6294 | int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image); | ||
6295 | @@ -15037,6 +17080,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im | ||
6296 | int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image); | ||
6297 | int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image); | ||
6298 | #endif //cl_khr_gl_msaa_sharing | ||
6299 | +#endif //__opencl_c_read_write_images | ||
6300 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6301 | |||
6302 | /** | ||
6303 | @@ -15076,7 +17120,7 @@ int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t i | ||
6304 | int __ovld __cnfn get_image_channel_order(write_only image1d_t image); | ||
6305 | int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image); | ||
6306 | int __ovld __cnfn get_image_channel_order(write_only image2d_t image); | ||
6307 | -#ifdef cl_khr_3d_image_writes | ||
6308 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6309 | int __ovld __cnfn get_image_channel_order(write_only image3d_t image); | ||
6310 | #endif | ||
6311 | int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image); | ||
6312 | @@ -15093,6 +17137,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t | ||
6313 | #endif //cl_khr_gl_msaa_sharing | ||
6314 | |||
6315 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6316 | +#ifdef __opencl_c_read_write_images | ||
6317 | int __ovld __cnfn get_image_channel_order(read_write image1d_t image); | ||
6318 | int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image); | ||
6319 | int __ovld __cnfn get_image_channel_order(read_write image2d_t image); | ||
6320 | @@ -15109,6 +17154,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image) | ||
6321 | int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image); | ||
6322 | int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image); | ||
6323 | #endif //cl_khr_gl_msaa_sharing | ||
6324 | +#endif //__opencl_c_read_write_images | ||
6325 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6326 | |||
6327 | /** | ||
6328 | @@ -15143,6 +17189,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image); | ||
6329 | #endif //cl_khr_gl_msaa_sharing | ||
6330 | |||
6331 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6332 | +#ifdef __opencl_c_read_write_images | ||
6333 | int2 __ovld __cnfn get_image_dim(read_write image2d_t image); | ||
6334 | int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image); | ||
6335 | #ifdef cl_khr_depth_images | ||
6336 | @@ -15155,6 +17202,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image); | ||
6337 | int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image); | ||
6338 | int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); | ||
6339 | #endif //cl_khr_gl_msaa_sharing | ||
6340 | +#endif //__opencl_c_read_write_images | ||
6341 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6342 | |||
6343 | /** | ||
6344 | @@ -15164,11 +17212,13 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); | ||
6345 | * component and the w component is 0. | ||
6346 | */ | ||
6347 | int4 __ovld __cnfn get_image_dim(read_only image3d_t image); | ||
6348 | -#ifdef cl_khr_3d_image_writes | ||
6349 | +#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes) | ||
6350 | int4 __ovld __cnfn get_image_dim(write_only image3d_t image); | ||
6351 | #endif | ||
6352 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6353 | +#ifdef __opencl_c_read_write_images | ||
6354 | int4 __ovld __cnfn get_image_dim(read_write image3d_t image); | ||
6355 | +#endif //__opencl_c_read_write_images | ||
6356 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6357 | |||
6358 | /** | ||
6359 | @@ -15196,6 +17246,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t | ||
6360 | #endif //cl_khr_gl_msaa_sharing | ||
6361 | |||
6362 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6363 | +#ifdef __opencl_c_read_write_images | ||
6364 | size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array); | ||
6365 | size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array); | ||
6366 | #ifdef cl_khr_depth_images | ||
6367 | @@ -15205,6 +17256,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image | ||
6368 | size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array); | ||
6369 | size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array); | ||
6370 | #endif //cl_khr_gl_msaa_sharing | ||
6371 | +#endif //__opencl_c_read_write_images | ||
6372 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6373 | |||
6374 | /** | ||
6375 | @@ -15222,16 +17274,21 @@ int __ovld get_image_num_samples(write_only image2d_array_msaa_t image); | ||
6376 | int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); | ||
6377 | |||
6378 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6379 | +#ifdef __opencl_c_read_write_images | ||
6380 | int __ovld get_image_num_samples(read_write image2d_msaa_t image); | ||
6381 | int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image); | ||
6382 | int __ovld get_image_num_samples(read_write image2d_array_msaa_t image); | ||
6383 | int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); | ||
6384 | +#endif //__opencl_c_read_write_images | ||
6385 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6386 | #endif | ||
6387 | |||
6388 | +#endif //__opencl_c_images | ||
6389 | + | ||
6390 | // OpenCL v2.0 s6.13.15 - Work-group Functions | ||
6391 | |||
6392 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6393 | +#ifdef __opencl_c_work_group_collective_functions | ||
6394 | int __ovld __conv work_group_all(int predicate); | ||
6395 | int __ovld __conv work_group_any(int predicate); | ||
6396 | |||
6397 | @@ -15255,11 +17312,11 @@ ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z); | ||
6398 | float __ovld __conv work_group_broadcast(float a, size_t local_id); | ||
6399 | float __ovld __conv work_group_broadcast(float a, size_t x, size_t y); | ||
6400 | float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z); | ||
6401 | -#ifdef cl_khr_fp64 | ||
6402 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6403 | double __ovld __conv work_group_broadcast(double a, size_t local_id); | ||
6404 | double __ovld __conv work_group_broadcast(double a, size_t x, size_t y); | ||
6405 | double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z); | ||
6406 | -#endif //cl_khr_fp64 | ||
6407 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6408 | |||
6409 | #ifdef cl_khr_fp16 | ||
6410 | half __ovld __conv work_group_reduce_add(half x); | ||
6411 | @@ -15317,7 +17374,7 @@ float __ovld __conv work_group_scan_exclusive_max(float x); | ||
6412 | float __ovld __conv work_group_scan_inclusive_add(float x); | ||
6413 | float __ovld __conv work_group_scan_inclusive_min(float x); | ||
6414 | float __ovld __conv work_group_scan_inclusive_max(float x); | ||
6415 | -#ifdef cl_khr_fp64 | ||
6416 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6417 | double __ovld __conv work_group_reduce_add(double x); | ||
6418 | double __ovld __conv work_group_reduce_min(double x); | ||
6419 | double __ovld __conv work_group_reduce_max(double x); | ||
6420 | @@ -15327,19 +17384,18 @@ double __ovld __conv work_group_scan_exclusive_max(double x); | ||
6421 | double __ovld __conv work_group_scan_inclusive_add(double x); | ||
6422 | double __ovld __conv work_group_scan_inclusive_min(double x); | ||
6423 | double __ovld __conv work_group_scan_inclusive_max(double x); | ||
6424 | -#endif //cl_khr_fp64 | ||
6425 | - | ||
6426 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6427 | +#endif //__opencl_c_work_group_collective_functions | ||
6428 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6429 | |||
6430 | // OpenCL v2.0 s6.13.16 - Pipe Functions | ||
6431 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6432 | +#ifdef __opencl_c_pipes | ||
6433 | bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); | ||
6434 | -#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6435 | - | ||
6436 | +#endif //__opencl_c_pipes | ||
6437 | |||
6438 | // OpenCL v2.0 s6.13.17 - Enqueue Kernels | ||
6439 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6440 | - | ||
6441 | +#ifdef __opencl_c_device_enqueue | ||
6442 | ndrange_t __ovld ndrange_1D(size_t); | ||
6443 | ndrange_t __ovld ndrange_1D(size_t, size_t); | ||
6444 | ndrange_t __ovld ndrange_1D(size_t, size_t, size_t); | ||
6445 | @@ -15367,11 +17423,13 @@ bool __ovld is_valid_event (clk_event_t event); | ||
6446 | void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value); | ||
6447 | |||
6448 | queue_t __ovld get_default_queue(void); | ||
6449 | +#endif //__opencl_c_device_enqueue | ||
6450 | #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6451 | |||
6452 | // OpenCL Extension v2.0 s9.17 - Sub-groups | ||
6453 | |||
6454 | -#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) | ||
6455 | +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \ | ||
6456 | + defined(__opencl_c_subgroups) | ||
6457 | // Shared Sub Group Functions | ||
6458 | uint __ovld get_sub_group_size(void); | ||
6459 | uint __ovld get_max_sub_group_size(void); | ||
6460 | @@ -15457,7 +17515,7 @@ half __ovld __conv sub_group_scan_inclusive_min(half x); | ||
6461 | half __ovld __conv sub_group_scan_inclusive_max(half x); | ||
6462 | #endif //cl_khr_fp16 | ||
6463 | |||
6464 | -#ifdef cl_khr_fp64 | ||
6465 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6466 | double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id); | ||
6467 | double __ovld __conv sub_group_reduce_add(double x); | ||
6468 | double __ovld __conv sub_group_reduce_min(double x); | ||
6469 | @@ -15468,7 +17526,7 @@ double __ovld __conv sub_group_scan_exclusive_max(double x); | ||
6470 | double __ovld __conv sub_group_scan_inclusive_add(double x); | ||
6471 | double __ovld __conv sub_group_scan_inclusive_min(double x); | ||
6472 | double __ovld __conv sub_group_scan_inclusive_max(double x); | ||
6473 | -#endif //cl_khr_fp64 | ||
6474 | +#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6475 | |||
6476 | #endif //cl_khr_subgroups cl_intel_subgroups | ||
6477 | |||
6478 | @@ -15570,16 +17628,22 @@ uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c ); | ||
6479 | long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c ); | ||
6480 | ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c ); | ||
6481 | |||
6482 | +#ifdef __opencl_c_images | ||
6483 | uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord ); | ||
6484 | uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord ); | ||
6485 | uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord ); | ||
6486 | uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord ); | ||
6487 | +#endif //__opencl_c_images | ||
6488 | |||
6489 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6490 | +#ifdef __opencl_c_images | ||
6491 | +#ifdef __opencl_c_read_write_images | ||
6492 | uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord); | ||
6493 | uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord); | ||
6494 | uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord); | ||
6495 | uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord); | ||
6496 | +#endif //__opencl_c_read_write_images | ||
6497 | +#endif //__opencl_c_images | ||
6498 | #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6499 | |||
6500 | uint __ovld __conv intel_sub_group_block_read( const __global uint* p ); | ||
6501 | @@ -15587,16 +17651,22 @@ uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p ); | ||
6502 | uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p ); | ||
6503 | uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p ); | ||
6504 | |||
6505 | +#ifdef __opencl_c_images | ||
6506 | void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data); | ||
6507 | void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data); | ||
6508 | void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data); | ||
6509 | void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data); | ||
6510 | +#endif //__opencl_c_images | ||
6511 | |||
6512 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6513 | +#ifdef __opencl_c_images | ||
6514 | +#ifdef __opencl_c_read_write_images | ||
6515 | void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data); | ||
6516 | void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data); | ||
6517 | void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data); | ||
6518 | void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data); | ||
6519 | +#endif // __opencl_c_read_write_images | ||
6520 | +#endif // __opencl_c_images | ||
6521 | #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6522 | |||
6523 | void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data ); | ||
6524 | @@ -15611,7 +17681,7 @@ half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c ); | ||
6525 | half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c ); | ||
6526 | #endif | ||
6527 | |||
6528 | -#if defined(cl_khr_fp64) | ||
6529 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
6530 | double __ovld __conv intel_sub_group_shuffle( double x, uint c ); | ||
6531 | double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c ); | ||
6532 | double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c ); | ||
6533 | @@ -15710,16 +17780,22 @@ ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x ); | ||
6534 | short __ovld __conv intel_sub_group_scan_inclusive_max( short x ); | ||
6535 | ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x ); | ||
6536 | |||
6537 | +#ifdef __opencl_c_images | ||
6538 | uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord ); | ||
6539 | uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord ); | ||
6540 | uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord ); | ||
6541 | uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord ); | ||
6542 | +#endif //__opencl_c_images | ||
6543 | |||
6544 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6545 | +#ifdef __opencl_c_images | ||
6546 | +#ifdef __opencl_c_read_write_images | ||
6547 | uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord ); | ||
6548 | uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord ); | ||
6549 | uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord ); | ||
6550 | uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord ); | ||
6551 | +#endif //__opencl_c_read_write_images | ||
6552 | +#endif //__opencl_c_images | ||
6553 | #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6554 | |||
6555 | uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p ); | ||
6556 | @@ -15727,16 +17803,22 @@ uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p | ||
6557 | uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p ); | ||
6558 | uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p ); | ||
6559 | |||
6560 | +#ifdef __opencl_c_images | ||
6561 | void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data ); | ||
6562 | void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data ); | ||
6563 | void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data ); | ||
6564 | void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data ); | ||
6565 | +#endif //__opencl_c_images | ||
6566 | |||
6567 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6568 | +#ifdef __opencl_c_images | ||
6569 | +#ifdef __opencl_c_read_write_images | ||
6570 | void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data ); | ||
6571 | void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data ); | ||
6572 | void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data ); | ||
6573 | void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data ); | ||
6574 | +#endif //__opencl_c_read_write_images | ||
6575 | +#endif //__opencl_c_images | ||
6576 | #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6577 | |||
6578 | void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data ); | ||
6579 | @@ -15744,16 +17826,22 @@ void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint | ||
6580 | void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data ); | ||
6581 | void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data ); | ||
6582 | |||
6583 | +#ifdef __opencl_c_images | ||
6584 | ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord ); | ||
6585 | ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord ); | ||
6586 | ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord ); | ||
6587 | ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord ); | ||
6588 | +#endif //__opencl_c_images | ||
6589 | |||
6590 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6591 | +#ifdef __opencl_c_images | ||
6592 | +#ifdef __opencl_c_read_write_images | ||
6593 | ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord); | ||
6594 | ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord); | ||
6595 | ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord); | ||
6596 | ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord); | ||
6597 | +#endif //__opencl_c_read_write_images | ||
6598 | +#endif //__opencl_c_images | ||
6599 | #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6600 | |||
6601 | ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p ); | ||
6602 | @@ -15761,16 +17849,22 @@ ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* | ||
6603 | ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p ); | ||
6604 | ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p ); | ||
6605 | |||
6606 | +#ifdef __opencl_c_images | ||
6607 | void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data); | ||
6608 | void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data); | ||
6609 | void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data); | ||
6610 | void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data); | ||
6611 | +#endif //__opencl_c_images | ||
6612 | |||
6613 | #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6614 | +#ifdef __opencl_c_images | ||
6615 | +#ifdef __opencl_c_read_write_images | ||
6616 | void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data); | ||
6617 | void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data); | ||
6618 | void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data); | ||
6619 | void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data); | ||
6620 | +#endif //__opencl_c_read_write_images | ||
6621 | +#endif //__opencl_c_images | ||
6622 | #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
6623 | |||
6624 | void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data ); | ||
6625 | @@ -15889,6 +17983,7 @@ short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset( | ||
6626 | short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size, | ||
6627 | ushort2 image_size); | ||
6628 | |||
6629 | +#ifdef __opencl_c_images | ||
6630 | intel_sub_group_avc_ime_result_t __ovld | ||
6631 | intel_sub_group_avc_ime_evaluate_with_single_reference( | ||
6632 | read_only image2d_t src_image, read_only image2d_t ref_image, | ||
6633 | @@ -15929,6 +18024,7 @@ intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout( | ||
6634 | read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler, | ||
6635 | intel_sub_group_avc_ime_payload_t payload, | ||
6636 | intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components); | ||
6637 | +#endif //__opencl_c_images | ||
6638 | |||
6639 | intel_sub_group_avc_ime_single_reference_streamin_t __ovld | ||
6640 | intel_sub_group_avc_ime_get_single_reference_streamin( | ||
6641 | @@ -15993,6 +18089,7 @@ intel_sub_group_avc_ref_payload_t __ovld | ||
6642 | intel_sub_group_avc_ref_set_bilinear_filter_enable( | ||
6643 | intel_sub_group_avc_ref_payload_t payload); | ||
6644 | |||
6645 | +#ifdef __opencl_c_images | ||
6646 | intel_sub_group_avc_ref_result_t __ovld | ||
6647 | intel_sub_group_avc_ref_evaluate_with_single_reference( | ||
6648 | read_only image2d_t src_image, read_only image2d_t ref_image, | ||
6649 | @@ -16011,6 +18108,7 @@ intel_sub_group_avc_ref_evaluate_with_multi_reference( | ||
6650 | read_only image2d_t src_image, uint packed_reference_ids, | ||
6651 | uchar packed_reference_field_polarities, sampler_t vme_media_sampler, | ||
6652 | intel_sub_group_avc_ref_payload_t payload); | ||
6653 | +#endif //__opencl_c_images | ||
6654 | |||
6655 | // SIC built-in functions | ||
6656 | intel_sub_group_avc_sic_payload_t __ovld | ||
6657 | @@ -16061,6 +18159,7 @@ intel_sub_group_avc_sic_set_block_based_raw_skip_sad( | ||
6658 | uchar block_based_skip_type, | ||
6659 | intel_sub_group_avc_sic_payload_t payload); | ||
6660 | |||
6661 | +#ifdef __opencl_c_images | ||
6662 | intel_sub_group_avc_sic_result_t __ovld | ||
6663 | intel_sub_group_avc_sic_evaluate_ipe( | ||
6664 | read_only image2d_t src_image, sampler_t vme_media_sampler, | ||
6665 | @@ -16083,6 +18182,7 @@ intel_sub_group_avc_sic_evaluate_with_multi_reference( | ||
6666 | read_only image2d_t src_image, uint packed_reference_ids, | ||
6667 | uchar packed_reference_field_polarities, sampler_t vme_media_sampler, | ||
6668 | intel_sub_group_avc_sic_payload_t payload); | ||
6669 | +#endif //__opencl_c_images | ||
6670 | |||
6671 | uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape( | ||
6672 | intel_sub_group_avc_sic_result_t result); | ||
6673 | diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp | ||
6674 | index 6353e14bc41a..a8c216de6e04 100644 | ||
6675 | --- a/clang/lib/Parse/ParseDecl.cpp | ||
6676 | +++ b/clang/lib/Parse/ParseDecl.cpp | ||
6677 | @@ -3904,8 +3904,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, | ||
6678 | isInvalid = DS.SetTypeAltiVecBool(true, Loc, PrevSpec, DiagID, Policy); | ||
6679 | break; | ||
6680 | case tok::kw_pipe: | ||
6681 | - if (!getLangOpts().OpenCL || (getLangOpts().OpenCLVersion < 200 && | ||
6682 | - !getLangOpts().OpenCLCPlusPlus)) { | ||
6683 | + if (!getLangOpts().OpenCLPipeKeyword) { | ||
6684 | // OpenCL 2.0 defined this keyword. OpenCL 1.2 and earlier should | ||
6685 | // support the "pipe" word as identifier. | ||
6686 | Tok.getIdentifierInfo()->revertTokenIDToIdentifier(); | ||
6687 | @@ -4027,8 +4026,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, | ||
6688 | case tok::kw___generic: | ||
6689 | // generic address space is introduced only in OpenCL v2.0 | ||
6690 | // see OpenCL C Spec v2.0 s6.5.5 | ||
6691 | - if (Actions.getLangOpts().OpenCLVersion < 200 && | ||
6692 | - !Actions.getLangOpts().OpenCLCPlusPlus) { | ||
6693 | + if (!Actions.getLangOpts().OpenCLGenericKeyword) { | ||
6694 | DiagID = diag::err_opencl_unknown_type_specifier; | ||
6695 | PrevSpec = Tok.getIdentifierInfo()->getNameStart(); | ||
6696 | isInvalid = true; | ||
6697 | @@ -5050,8 +5048,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { | ||
6698 | default: return false; | ||
6699 | |||
6700 | case tok::kw_pipe: | ||
6701 | - return (getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200) || | ||
6702 | - getLangOpts().OpenCLCPlusPlus; | ||
6703 | + return getLangOpts().OpenCLPipeKeyword; | ||
6704 | |||
6705 | case tok::identifier: // foo::bar | ||
6706 | // Unfortunate hack to support "Class.factoryMethod" notation. | ||
6707 | @@ -5557,8 +5554,7 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang, | ||
6708 | if (Kind == tok::star || Kind == tok::caret) | ||
6709 | return true; | ||
6710 | |||
6711 | - if (Kind == tok::kw_pipe && | ||
6712 | - ((Lang.OpenCL && Lang.OpenCLVersion >= 200) || Lang.OpenCLCPlusPlus)) | ||
6713 | + if (Kind == tok::kw_pipe && Lang.OpenCLPipeKeyword) | ||
6714 | return true; | ||
6715 | |||
6716 | if (!Lang.CPlusPlus) | ||
6717 | diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp | ||
6718 | index df411e1928d6..9a40ce888695 100644 | ||
6719 | --- a/clang/lib/Parse/ParsePragma.cpp | ||
6720 | +++ b/clang/lib/Parse/ParsePragma.cpp | ||
6721 | @@ -697,12 +697,14 @@ void Parser::HandlePragmaOpenCLExtension() { | ||
6722 | if (Name == "all") { | ||
6723 | if (State == Disable) { | ||
6724 | Opt.disableAll(); | ||
6725 | - Opt.enableSupportedCore(getLangOpts()); | ||
6726 | + Opt.enableSupportedCore(); | ||
6727 | } else { | ||
6728 | PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1; | ||
6729 | } | ||
6730 | + } else if (Opt.isFeature(Name)) { | ||
6731 | + PP.Diag(NameLoc, diag::warn_opencl_pragma_feature_ignore) << Ident; | ||
6732 | } else if (State == Begin) { | ||
6733 | - if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts())) { | ||
6734 | + if (!Opt.isKnown(Name) || !Opt.isSupported(Name)) { | ||
6735 | Opt.support(Name); | ||
6736 | } | ||
6737 | Actions.setCurrentOpenCLExtension(Name); | ||
6738 | @@ -712,9 +714,9 @@ void Parser::HandlePragmaOpenCLExtension() { | ||
6739 | Actions.setCurrentOpenCLExtension(""); | ||
6740 | } else if (!Opt.isKnown(Name)) | ||
6741 | PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << Ident; | ||
6742 | - else if (Opt.isSupportedExtension(Name, getLangOpts())) | ||
6743 | + else if (Opt.isSupportedExtension(Name)) | ||
6744 | Opt.enable(Name, State == Enable); | ||
6745 | - else if (Opt.isSupportedCore(Name, getLangOpts())) | ||
6746 | + else if (Opt.isSupportedCore(Name)) | ||
6747 | PP.Diag(NameLoc, diag::warn_pragma_extension_is_core) << Ident; | ||
6748 | else | ||
6749 | PP.Diag(NameLoc, diag::warn_pragma_unsupported_extension) << Ident; | ||
6750 | diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td | ||
6751 | index 9d6bb411eff8..d352d35f1e46 100644 | ||
6752 | --- a/clang/lib/Sema/OpenCLBuiltins.td | ||
6753 | +++ b/clang/lib/Sema/OpenCLBuiltins.td | ||
6754 | @@ -22,11 +22,13 @@ | ||
6755 | class Version<int _Version> { | ||
6756 | int ID = _Version; | ||
6757 | } | ||
6758 | + | ||
6759 | def CLAll : Version< 0>; | ||
6760 | def CL10 : Version<100>; | ||
6761 | def CL11 : Version<110>; | ||
6762 | def CL12 : Version<120>; | ||
6763 | def CL20 : Version<200>; | ||
6764 | +def CL30 : Version<300>; | ||
6765 | |||
6766 | // Address spaces | ||
6767 | // Pointer types need to be assigned an address space. | ||
6768 | @@ -65,6 +67,14 @@ def FuncExtKhrGlMsaaSharing : FunctionExtension<"cl_khr_gl_msaa_sha | ||
6769 | // Multiple extensions | ||
6770 | def FuncExtKhrMipmapAndWrite3d : FunctionExtension<"cl_khr_mipmap_image cl_khr_3d_image_writes">; | ||
6771 | |||
6772 | +// Features | ||
6773 | +def FuncExtGenericAddressSpace : FunctionExtension<"__opencl_c_generic_address_space">; | ||
6774 | +def FuncExtWorkGroupCollective : FunctionExtension<"__opencl_c_work_group_collective_functions">; | ||
6775 | +def FuncExtPipes : FunctionExtension<"__opencl_c_pipes">; | ||
6776 | +def FuncExtDeviceSidEenqueue : FunctionExtension<"__opencl_c_device_enqueue">; | ||
6777 | +def FuncNonExplicitAtomicFeatures : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device">; | ||
6778 | +def FuncNonExplicitAtomicFeaturesAndGenericAS : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device __opencl_c_generic_address_space">; | ||
6779 | + | ||
6780 | // Qualified Type. These map to ASTContext::QualType. | ||
6781 | class QualType<string _Name, bit _IsAbstract=0> { | ||
6782 | // Name of the field or function in a clang::ASTContext | ||
6783 | @@ -230,13 +240,9 @@ class Builtin<string _Name, list<Type> _Signature, list<bit> _Attributes = Attr. | ||
6784 | bit IsConv = _Attributes[2]; | ||
6785 | // OpenCL extensions to which the function belongs. | ||
6786 | FunctionExtension Extension = FuncExtNone; | ||
6787 | - // Version of OpenCL from which the function is available (e.g.: CL10). | ||
6788 | - // MinVersion is inclusive. | ||
6789 | - Version MinVersion = CL10; | ||
6790 | - // Version of OpenCL from which the function is not supported anymore. | ||
6791 | - // MaxVersion is exclusive. | ||
6792 | + // List of OpenCL version in which this function available. | ||
6793 | // CLAll makes the function available for all versions. | ||
6794 | - Version MaxVersion = CLAll; | ||
6795 | + list<Version> Versions = [CLAll]; | ||
6796 | } | ||
6797 | |||
6798 | //===----------------------------------------------------------------------===// | ||
6799 | @@ -398,7 +404,7 @@ foreach name = ["get_global_size", "get_global_id", "get_local_size", | ||
6800 | def : Builtin<name, [Size, UInt], Attr.Const>; | ||
6801 | } | ||
6802 | |||
6803 | -let MinVersion = CL20 in { | ||
6804 | +let Versions = [CL20, CL30] in { | ||
6805 | def : Builtin<"get_enqueued_local_size", [Size, UInt]>; | ||
6806 | foreach name = ["get_global_linear_id", "get_local_linear_id"] in { | ||
6807 | def : Builtin<name, [Size]>; | ||
6808 | @@ -471,7 +477,7 @@ foreach name = ["fma", "mad"] in { | ||
6809 | } | ||
6810 | |||
6811 | // --- Version dependent --- | ||
6812 | -let MaxVersion = CL20 in { | ||
6813 | +let Versions = [CL10, CL11, CL12, CL30] in { | ||
6814 | foreach AS = [GlobalAS, LocalAS, PrivateAS] in { | ||
6815 | foreach name = ["fract", "modf", "sincos"] in { | ||
6816 | def : Builtin<name, [FGenTypeN, FGenTypeN, PointerType<FGenTypeN, AS>]>; | ||
6817 | @@ -488,7 +494,9 @@ let MaxVersion = CL20 in { | ||
6818 | } | ||
6819 | } | ||
6820 | } | ||
6821 | -let MinVersion = CL20 in { | ||
6822 | + | ||
6823 | +let Versions = [CL20, CL30] in { | ||
6824 | + let Extension = FuncExtGenericAddressSpace in { | ||
6825 | foreach name = ["fract", "modf", "sincos"] in { | ||
6826 | def : Builtin<name, [FGenTypeN, FGenTypeN, PointerType<FGenTypeN, GenericAS>]>; | ||
6827 | } | ||
6828 | @@ -501,6 +509,7 @@ let MinVersion = CL20 in { | ||
6829 | def : Builtin<name, [Type, Type, Type, PointerType<GenTypeIntVecAndScalar, GenericAS>]>; | ||
6830 | } | ||
6831 | } | ||
6832 | + } | ||
6833 | } | ||
6834 | |||
6835 | // --- Table 9 --- | ||
6836 | @@ -531,7 +540,7 @@ foreach name = ["abs"] in { | ||
6837 | foreach name = ["clz", "popcount"] in { | ||
6838 | def : Builtin<name, [AIGenTypeN, AIGenTypeN], Attr.Const>; | ||
6839 | } | ||
6840 | -let MinVersion = CL20 in { | ||
6841 | +let Versions = [CL20, CL30] in { | ||
6842 | foreach name = ["ctz"] in { | ||
6843 | def : Builtin<name, [AIGenTypeN, AIGenTypeN]>; | ||
6844 | } | ||
6845 | @@ -705,7 +714,7 @@ foreach name = ["select"] in { | ||
6846 | // --- Table 15 --- | ||
6847 | // Variants for OpenCL versions below 2.0, using pointers to the global, local | ||
6848 | // and private address spaces. | ||
6849 | -let MaxVersion = CL20 in { | ||
6850 | +let Versions = [CL10, CL11, CL12, CL30] in { | ||
6851 | foreach AS = [GlobalAS, LocalAS, PrivateAS] in { | ||
6852 | foreach VSize = [2, 3, 4, 8, 16] in { | ||
6853 | foreach name = ["vload" # VSize] in { | ||
6854 | @@ -748,7 +757,8 @@ let MaxVersion = CL20 in { | ||
6855 | } | ||
6856 | // Variants for OpenCL versions above 2.0, using pointers to the generic | ||
6857 | // address space. | ||
6858 | -let MinVersion = CL20 in { | ||
6859 | +let Versions = [CL20, CL30] in { | ||
6860 | + let Extension = FuncExtGenericAddressSpace in { | ||
6861 | foreach VSize = [2, 3, 4, 8, 16] in { | ||
6862 | foreach name = ["vload" # VSize] in { | ||
6863 | def : Builtin<name, [VectorType<Char, VSize>, Size, PointerType<ConstType<Char>, GenericAS>]>; | ||
6864 | @@ -786,6 +796,7 @@ let MinVersion = CL20 in { | ||
6865 | } | ||
6866 | } | ||
6867 | } | ||
6868 | + } | ||
6869 | } | ||
6870 | // Variants using pointers to the constant address space. | ||
6871 | foreach VSize = [2, 3, 4, 8, 16] in { | ||
6872 | @@ -812,7 +823,7 @@ foreach VSize = [2, 3, 4, 8, 16] in { | ||
6873 | } | ||
6874 | } | ||
6875 | } | ||
6876 | -let MaxVersion = CL20 in { | ||
6877 | +let Versions = [CL10, CL11, CL12, CL30] in { | ||
6878 | foreach AS = [GlobalAS, LocalAS, PrivateAS] in { | ||
6879 | def : Builtin<"vload_half", [Float, Size, PointerType<ConstType<Half>, AS>]>; | ||
6880 | foreach VSize = [2, 3, 4, 8, 16] in { | ||
6881 | @@ -832,7 +843,8 @@ let MaxVersion = CL20 in { | ||
6882 | } | ||
6883 | } | ||
6884 | } | ||
6885 | -let MinVersion = CL20 in { | ||
6886 | +let Versions = [CL20, CL30] in { | ||
6887 | + let Extension = FuncExtGenericAddressSpace in { | ||
6888 | foreach AS = [GenericAS] in { | ||
6889 | def : Builtin<"vload_half", [Float, Size, PointerType<ConstType<Half>, AS>]>; | ||
6890 | foreach VSize = [2, 3, 4, 8, 16] in { | ||
6891 | @@ -851,6 +863,7 @@ let MinVersion = CL20 in { | ||
6892 | } | ||
6893 | } | ||
6894 | } | ||
6895 | + } | ||
6896 | } | ||
6897 | |||
6898 | foreach AS = [ConstantAS] in { | ||
6899 | @@ -875,7 +888,9 @@ foreach name = ["async_work_group_strided_copy"] in { | ||
6900 | def : Builtin<name, [Event, PointerType<AGenTypeN, GlobalAS>, PointerType<ConstType<AGenTypeN>, LocalAS>, Size, Size, Event]>; | ||
6901 | } | ||
6902 | foreach name = ["wait_group_events"] in { | ||
6903 | + let Extension = FuncExtGenericAddressSpace in { | ||
6904 | def : Builtin<name, [Void, Int, PointerType<Event, GenericAS>]>; | ||
6905 | + } | ||
6906 | } | ||
6907 | foreach name = ["prefetch"] in { | ||
6908 | def : Builtin<name, [Void, PointerType<ConstType<AGenTypeN>, GlobalAS>, Size]>; | ||
6909 | @@ -1154,7 +1169,8 @@ foreach aQual = ["WO", "RW"] in { | ||
6910 | //-------------------------------------------------------------------- | ||
6911 | // OpenCL v2.0 s6.13.15 - Work-group Functions | ||
6912 | // --- Table 26 --- | ||
6913 | -let MinVersion = CL20 in { | ||
6914 | +let Versions = [CL20, CL30] in { | ||
6915 | + let Extension = FuncExtWorkGroupCollective in { | ||
6916 | foreach name = ["work_group_all", "work_group_any"] in { | ||
6917 | def : Builtin<name, [Int, Int], Attr.Convergent>; | ||
6918 | } | ||
6919 | @@ -1169,11 +1185,12 @@ let MinVersion = CL20 in { | ||
6920 | def : Builtin<name # op, [IntLongFloatGenType1, IntLongFloatGenType1], Attr.Convergent>; | ||
6921 | } | ||
6922 | } | ||
6923 | + } | ||
6924 | } | ||
6925 | |||
6926 | |||
6927 | // OpenCL v2.0 s9.17.3: Additions to section 6.13.1: Work-Item Functions | ||
6928 | -let MinVersion = CL20 in { | ||
6929 | +let Versions = [CL20] in { | ||
6930 | let Extension = FuncExtKhrSubgroups in { | ||
6931 | def get_sub_group_size : Builtin<"get_sub_group_size", [UInt]>; | ||
6932 | def get_max_sub_group_size : Builtin<"get_max_sub_group_size", [UInt]>; | ||
6933 | diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp | ||
6934 | index 9cfce5a63b1d..c26f45f62668 100644 | ||
6935 | --- a/clang/lib/Sema/Sema.cpp | ||
6936 | +++ b/clang/lib/Sema/Sema.cpp | ||
6937 | @@ -286,9 +286,10 @@ void Sema::Initialize() { | ||
6938 | // Initialize predefined OpenCL types and supported extensions and (optional) | ||
6939 | // core features. | ||
6940 | if (getLangOpts().OpenCL) { | ||
6941 | + getOpenCLOptions().setOpenCLVersion(getLangOpts()); | ||
6942 | getOpenCLOptions().addSupport( | ||
6943 | Context.getTargetInfo().getSupportedOpenCLOpts()); | ||
6944 | - getOpenCLOptions().enableSupportedCore(getLangOpts()); | ||
6945 | + getOpenCLOptions().enableSupportedCore(); | ||
6946 | addImplicitTypedef("sampler_t", Context.OCLSamplerTy); | ||
6947 | addImplicitTypedef("event_t", Context.OCLEventTy); | ||
6948 | if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) { | ||
6949 | @@ -344,12 +345,18 @@ void Sema::Initialize() { | ||
6950 | "cl_khr_int64_base_atomics cl_khr_int64_extended_atomics"); | ||
6951 | |||
6952 | setOpenCLExtensionForType(AtomicDoubleT, "cl_khr_fp64"); | ||
6953 | + setOpenCLExtensionForType(Context.OCLReserveIDTy, "__opencl_c_pipes"); | ||
6954 | + setOpenCLExtensionForType(Context.OCLClkEventTy, | ||
6955 | + "__opencl_c_device_enqueue"); | ||
6956 | + setOpenCLExtensionForType(Context.OCLQueueTy, | ||
6957 | + "__opencl_c_device_enqueue"); | ||
6958 | } | ||
6959 | |||
6960 | setOpenCLExtensionForType(Context.DoubleTy, "cl_khr_fp64"); | ||
6961 | |||
6962 | -#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ | ||
6963 | - setOpenCLExtensionForType(Context.Id, Ext); | ||
6964 | +#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \ | ||
6965 | + setOpenCLExtensionForType(Context.Id, Ext); \ | ||
6966 | + setOpenCLExtensionForType(Context.Id, "__opencl_c_images"); | ||
6967 | #include "clang/Basic/OpenCLImageTypes.def" | ||
6968 | #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ | ||
6969 | addImplicitTypedef(#ExtType, Context.Id##Ty); \ | ||
6970 | @@ -2293,6 +2300,27 @@ bool Sema::isOpenCLDisabledDecl(Decl *FD) { | ||
6971 | return false; | ||
6972 | } | ||
6973 | |||
6974 | +template <typename DiagLocT, typename DiagInfoT> | ||
6975 | +void Sema::DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName, | ||
6976 | + DiagLocT DiagLoc, DiagInfoT DiagInfo, | ||
6977 | + unsigned Selector, | ||
6978 | + SourceRange SrcRange) { | ||
6979 | + const auto &LO = getLangOpts(); | ||
6980 | + auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion; | ||
6981 | + // For versions higher that 3.0 diagnosing feature | ||
6982 | + if (CLVer >= 300) { | ||
6983 | + OpenCLOptName = | ||
6984 | + llvm::StringSwitch<llvm::StringRef>(OpenCLOptName) | ||
6985 | + .Case("cl_khr_3d_image_writes", "__opencl_c_3d_image_writes") | ||
6986 | + .Case("cl_khr_subgroups", "__opencl_c_subgroups") | ||
6987 | + .Case("cl_khr_fp64", "__opencl_c_fp64") | ||
6988 | + .Default(OpenCLOptName); | ||
6989 | + } | ||
6990 | + | ||
6991 | + Diag(DiagLoc, diag::err_opencl_requires_extension) | ||
6992 | + << Selector << DiagInfo << OpenCLOptName << SrcRange; | ||
6993 | +} | ||
6994 | + | ||
6995 | template <typename T, typename DiagLocT, typename DiagInfoT, typename MapT> | ||
6996 | bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, | ||
6997 | DiagInfoT DiagInfo, MapT &Map, | ||
6998 | @@ -2304,8 +2332,7 @@ bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc, | ||
6999 | bool Disabled = false; | ||
7000 | for (auto &I : Loc->second) { | ||
7001 | if (I != CurrOpenCLExtension && !getOpenCLOptions().isEnabled(I)) { | ||
7002 | - Diag(DiagLoc, diag::err_opencl_requires_extension) << Selector << DiagInfo | ||
7003 | - << I << SrcRange; | ||
7004 | + DiagnoseOpenCLRequiresOption(I, DiagLoc, DiagInfo, Selector, SrcRange); | ||
7005 | Disabled = true; | ||
7006 | } | ||
7007 | } | ||
7008 | @@ -2341,3 +2368,13 @@ bool Sema::checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E) { | ||
7009 | return checkOpenCLDisabledTypeOrDecl(&D, E.getBeginLoc(), FnName, | ||
7010 | OpenCLDeclExtMap, 1, D.getSourceRange()); | ||
7011 | } | ||
7012 | + | ||
7013 | +bool Sema::checkOpenCLSubgroupExtForCallExpr(CallExpr *Call) { | ||
7014 | + if (!getOpenCLOptions().isEnabled("cl_khr_subgroups")) { | ||
7015 | + DiagnoseOpenCLRequiresOption("cl_khr_subgroups", Call->getBeginLoc(), | ||
7016 | + Call->getDirectCallee(), 1, | ||
7017 | + Call->getSourceRange()); | ||
7018 | + return true; | ||
7019 | + } | ||
7020 | + return false; | ||
7021 | +} | ||
7022 | diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp | ||
7023 | index 74742023d1b3..efa3b6ab0eb6 100644 | ||
7024 | --- a/clang/lib/Sema/SemaChecking.cpp | ||
7025 | +++ b/clang/lib/Sema/SemaChecking.cpp | ||
7026 | @@ -597,20 +597,11 @@ static bool checkOpenCLBlockArgs(Sema &S, Expr *BlockArg) { | ||
7027 | return IllegalParams; | ||
7028 | } | ||
7029 | |||
7030 | -static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) { | ||
7031 | - if (!S.getOpenCLOptions().isEnabled("cl_khr_subgroups")) { | ||
7032 | - S.Diag(Call->getBeginLoc(), diag::err_opencl_requires_extension) | ||
7033 | - << 1 << Call->getDirectCallee() << "cl_khr_subgroups"; | ||
7034 | - return true; | ||
7035 | - } | ||
7036 | - return false; | ||
7037 | -} | ||
7038 | - | ||
7039 | static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { | ||
7040 | if (checkArgCount(S, TheCall, 2)) | ||
7041 | return true; | ||
7042 | |||
7043 | - if (checkOpenCLSubgroupExt(S, TheCall)) | ||
7044 | + if (S.checkOpenCLSubgroupExtForCallExpr(TheCall)) | ||
7045 | return true; | ||
7046 | |||
7047 | // First argument is an ndrange_t type. | ||
7048 | @@ -1564,7 +1555,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, | ||
7049 | break; | ||
7050 | case Builtin::BIsub_group_reserve_read_pipe: | ||
7051 | case Builtin::BIsub_group_reserve_write_pipe: | ||
7052 | - if (checkOpenCLSubgroupExt(*this, TheCall) || | ||
7053 | + if (checkOpenCLSubgroupExtForCallExpr(TheCall) || | ||
7054 | SemaBuiltinReserveRWPipe(*this, TheCall)) | ||
7055 | return ExprError(); | ||
7056 | break; | ||
7057 | @@ -1577,7 +1568,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, | ||
7058 | break; | ||
7059 | case Builtin::BIsub_group_commit_read_pipe: | ||
7060 | case Builtin::BIsub_group_commit_write_pipe: | ||
7061 | - if (checkOpenCLSubgroupExt(*this, TheCall) || | ||
7062 | + if (checkOpenCLSubgroupExtForCallExpr(TheCall) || | ||
7063 | SemaBuiltinCommitRWPipe(*this, TheCall)) | ||
7064 | return ExprError(); | ||
7065 | break; | ||
7066 | @@ -4314,6 +4305,20 @@ DiagnoseCStringFormatDirectiveInCFAPI(Sema &S, | ||
7067 | } | ||
7068 | } | ||
7069 | |||
7070 | +bool Sema::isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const { | ||
7071 | + assert(llvm::isValidAtomicOrderingCABI(Ordering)); | ||
7072 | + auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering; | ||
7073 | + switch (OrderingCABI) { | ||
7074 | + case llvm::AtomicOrderingCABI::acquire: | ||
7075 | + case llvm::AtomicOrderingCABI::release: | ||
7076 | + case llvm::AtomicOrderingCABI::acq_rel: | ||
7077 | + return OpenCLFeatures.isEnabled("__opencl_c_atomic_order_acq_rel"); | ||
7078 | + | ||
7079 | + default: | ||
7080 | + return true; | ||
7081 | + } | ||
7082 | +} | ||
7083 | + | ||
7084 | /// Determine whether the given type has a non-null nullability annotation. | ||
7085 | static bool isNonNullType(ASTContext &ctx, QualType type) { | ||
7086 | if (auto nullability = type->getNullability(ctx)) | ||
7087 | @@ -5067,10 +5072,17 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, | ||
7088 | if (SubExprs.size() >= 2 && Form != Init) { | ||
7089 | llvm::APSInt Result(32); | ||
7090 | if (SubExprs[1]->isIntegerConstantExpr(Result, Context) && | ||
7091 | - !isValidOrderingForOp(Result.getSExtValue(), Op)) | ||
7092 | + !isValidOrderingForOp(Result.getSExtValue(), Op)) { | ||
7093 | Diag(SubExprs[1]->getBeginLoc(), | ||
7094 | diag::warn_atomic_op_has_invalid_memory_order) | ||
7095 | << SubExprs[1]->getSourceRange(); | ||
7096 | + } else if (IsOpenCL && | ||
7097 | + !isSupportedOpenCLOMemoryOrdering(Result.getSExtValue())) { | ||
7098 | + Diag(SubExprs[1]->getBeginLoc(), | ||
7099 | + diag::err_opencl_memory_ordering_require_feat) | ||
7100 | + << SubExprs[1]->getSourceRange(); | ||
7101 | + return ExprError(); | ||
7102 | + } | ||
7103 | } | ||
7104 | |||
7105 | if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) { | ||
7106 | diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp | ||
7107 | index 64146f4a912f..c1e629b5dc76 100644 | ||
7108 | --- a/clang/lib/Sema/SemaDecl.cpp | ||
7109 | +++ b/clang/lib/Sema/SemaDecl.cpp | ||
7110 | @@ -6152,7 +6152,9 @@ void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) { | ||
7111 | if (Type->isSamplerT() || Type->isVoidType()) | ||
7112 | return; | ||
7113 | LangAS ImplAS = LangAS::opencl_private; | ||
7114 | - if ((getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) && | ||
7115 | + if ((getLangOpts().OpenCLCPlusPlus || | ||
7116 | + getOpenCLOptions().isEnabled( | ||
7117 | + "__opencl_c_program_scope_global_variables")) && | ||
7118 | Var->hasGlobalStorage()) | ||
7119 | ImplAS = LangAS::opencl_global; | ||
7120 | // If the original type from a decayed type is an array type and that array | ||
7121 | @@ -7682,6 +7684,10 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { | ||
7122 | // OpenCL C v2.0 s6.5.1 - Variables defined at program scope and static | ||
7123 | // variables inside a function can also be declared in the global | ||
7124 | // address space. | ||
7125 | + // OpenCL C v3.0 s5.5 - For OpenCL C 2.0, or with the | ||
7126 | + // __opencl_c_program_scope_global_variables feature macro, the | ||
7127 | + // address space for a variable at program scope or a static variable | ||
7128 | + // also be __global | ||
7129 | // C++ for OpenCL inherits rule from OpenCL C v2.0. | ||
7130 | // FIXME: Adding local AS in C++ for OpenCL might make sense. | ||
7131 | if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() || | ||
7132 | @@ -7689,10 +7695,11 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { | ||
7133 | if (!T->isSamplerT() && | ||
7134 | !(T.getAddressSpace() == LangAS::opencl_constant || | ||
7135 | (T.getAddressSpace() == LangAS::opencl_global && | ||
7136 | - (getLangOpts().OpenCLVersion == 200 || | ||
7137 | - getLangOpts().OpenCLCPlusPlus)))) { | ||
7138 | + (OpenCLFeatures.isEnabled( | ||
7139 | + "__opencl_c_program_scope_global_variables"))))) { | ||
7140 | int Scope = NewVD->isStaticLocal() | NewVD->hasExternalStorage() << 1; | ||
7141 | - if (getLangOpts().OpenCLVersion == 200 || getLangOpts().OpenCLCPlusPlus) | ||
7142 | + if (OpenCLFeatures.isEnabled( | ||
7143 | + "__opencl_c_program_scope_global_variables")) | ||
7144 | Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space) | ||
7145 | << Scope << "global or constant"; | ||
7146 | else | ||
7147 | diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp | ||
7148 | index 849bc09063b3..c963b95b131a 100644 | ||
7149 | --- a/clang/lib/Sema/SemaDeclAttr.cpp | ||
7150 | +++ b/clang/lib/Sema/SemaDeclAttr.cpp | ||
7151 | @@ -6362,7 +6362,7 @@ static void handleInternalLinkageAttr(Sema &S, Decl *D, const ParsedAttr &AL) { | ||
7152 | } | ||
7153 | |||
7154 | static void handleOpenCLNoSVMAttr(Sema &S, Decl *D, const ParsedAttr &AL) { | ||
7155 | - if (S.LangOpts.OpenCLVersion != 200) | ||
7156 | + if (S.LangOpts.OpenCLVersion < 200) | ||
7157 | S.Diag(AL.getLoc(), diag::err_attribute_requires_opencl_version) | ||
7158 | << AL << "2.0" << 0; | ||
7159 | else | ||
7160 | @@ -6446,6 +6446,13 @@ static void handleOpenCLAccessAttr(Sema &S, Decl *D, const ParsedAttr &AL) { | ||
7161 | << AL << PDecl->getType() << DeclTy->isImageType(); | ||
7162 | D->setInvalidDecl(true); | ||
7163 | return; | ||
7164 | + } else if ((!S.getLangOpts().OpenCLCPlusPlus && | ||
7165 | + S.getLangOpts().OpenCLVersion >= 200) && | ||
7166 | + !S.getOpenCLOptions().isEnabled( | ||
7167 | + "__opencl_c_read_write_images")) { | ||
7168 | + S.Diag(AL.getLoc(), diag::err_opencl_requires_extension) | ||
7169 | + << 0 << PDecl->getType() << "__opencl_c_read_write_images"; | ||
7170 | + return; | ||
7171 | } | ||
7172 | } | ||
7173 | } | ||
7174 | diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp | ||
7175 | index 831e55046e80..4481a59b4517 100644 | ||
7176 | --- a/clang/lib/Sema/SemaDeclCXX.cpp | ||
7177 | +++ b/clang/lib/Sema/SemaDeclCXX.cpp | ||
7178 | @@ -14906,6 +14906,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, | ||
7179 | if (auto *PtrTy = ResultType->getAs<PointerType>()) { | ||
7180 | ResultType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); | ||
7181 | } | ||
7182 | + if (CanQual<PointerType> ExpectedPtrTy = | ||
7183 | + ExpectedResultType->getAs<PointerType>()) { | ||
7184 | + ExpectedResultType = SemaRef.Context.getCanonicalType( | ||
7185 | + RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); | ||
7186 | + } | ||
7187 | } | ||
7188 | |||
7189 | // Check that the result type is what we expect. | ||
7190 | @@ -14939,6 +14944,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, | ||
7191 | FnDecl->getParamDecl(0)->getType()->getAs<PointerType>()) { | ||
7192 | FirstParamType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy); | ||
7193 | } | ||
7194 | + if (CanQual<PointerType> ExpectedPtrTy = | ||
7195 | + ExpectedFirstParamType->getAs<PointerType>()) { | ||
7196 | + ExpectedFirstParamType = SemaRef.Context.getCanonicalType( | ||
7197 | + RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr())); | ||
7198 | + } | ||
7199 | } | ||
7200 | if (SemaRef.Context.getCanonicalType(FirstParamType).getUnqualifiedType() != | ||
7201 | ExpectedFirstParamType) | ||
7202 | diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp | ||
7203 | index 8d96404a5c27..8f21203b9508 100644 | ||
7204 | --- a/clang/lib/Sema/SemaLookup.cpp | ||
7205 | +++ b/clang/lib/Sema/SemaLookup.cpp | ||
7206 | @@ -771,19 +771,20 @@ static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR, | ||
7207 | // as argument. Only meaningful for generic types, otherwise equals 1. | ||
7208 | unsigned GenTypeMaxCnt; | ||
7209 | |||
7210 | + ASTContext &Context = S.Context; | ||
7211 | + | ||
7212 | + // Ignore this BIF if its version does not match the language options. | ||
7213 | + unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion; | ||
7214 | + if (Context.getLangOpts().OpenCLCPlusPlus) | ||
7215 | + OpenCLVersion = 200; | ||
7216 | + | ||
7217 | + unsigned short VersionCode = OpenCLOptions::EncodeVersion(OpenCLVersion); | ||
7218 | + | ||
7219 | for (unsigned SignatureIndex = 0; SignatureIndex < Len; SignatureIndex++) { | ||
7220 | const OpenCLBuiltinStruct &OpenCLBuiltin = | ||
7221 | BuiltinTable[FctIndex + SignatureIndex]; | ||
7222 | - ASTContext &Context = S.Context; | ||
7223 | |||
7224 | - // Ignore this BIF if its version does not match the language options. | ||
7225 | - unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion; | ||
7226 | - if (Context.getLangOpts().OpenCLCPlusPlus) | ||
7227 | - OpenCLVersion = 200; | ||
7228 | - if (OpenCLVersion < OpenCLBuiltin.MinVersion) | ||
7229 | - continue; | ||
7230 | - if ((OpenCLBuiltin.MaxVersion != 0) && | ||
7231 | - (OpenCLVersion >= OpenCLBuiltin.MaxVersion)) | ||
7232 | + if (!(OpenCLBuiltin.AllVersions & VersionCode)) | ||
7233 | continue; | ||
7234 | |||
7235 | SmallVector<QualType, 1> RetTypes; | ||
7236 | diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp | ||
7237 | index 93ddd047e09b..93211b801f8d 100644 | ||
7238 | --- a/clang/lib/Sema/SemaType.cpp | ||
7239 | +++ b/clang/lib/Sema/SemaType.cpp | ||
7240 | @@ -2016,7 +2016,7 @@ static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) { | ||
7241 | !PointeeType.hasAddressSpace()) | ||
7242 | PointeeType = S.getASTContext().getAddrSpaceQualType( | ||
7243 | PointeeType, | ||
7244 | - S.getLangOpts().OpenCLCPlusPlus || S.getLangOpts().OpenCLVersion == 200 | ||
7245 | + S.getOpenCLOptions().isEnabled("__opencl_c_generic_address_space") | ||
7246 | ? LangAS::opencl_generic | ||
7247 | : LangAS::opencl_private); | ||
7248 | return PointeeType; | ||
7249 | @@ -5160,9 +5160,15 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, | ||
7250 | } | ||
7251 | |||
7252 | case DeclaratorChunk::Pipe: { | ||
7253 | - T = S.BuildReadPipeType(T, DeclType.Loc); | ||
7254 | - processTypeAttrs(state, T, TAL_DeclSpec, | ||
7255 | - D.getMutableDeclSpec().getAttributes()); | ||
7256 | + if (S.getOpenCLOptions().isEnabled("__opencl_c_pipes")) { | ||
7257 | + T = S.BuildReadPipeType(T, DeclType.Loc); | ||
7258 | + processTypeAttrs(state, T, TAL_DeclSpec, | ||
7259 | + D.getMutableDeclSpec().getAttributes()); | ||
7260 | + } else { | ||
7261 | + D.setInvalidType(true); | ||
7262 | + T = Context.IntTy; | ||
7263 | + S.Diag(D.getIdentifierLoc(), diag::err_opencl_pipes_require_feat); | ||
7264 | + } | ||
7265 | break; | ||
7266 | } | ||
7267 | } | ||
7268 | diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl | ||
7269 | index cdbf28bbcad8..0bedff5ef0f3 100644 | ||
7270 | --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl | ||
7271 | +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl | ||
7272 | @@ -2,7 +2,8 @@ | ||
7273 | // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s | ||
7274 | // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s | ||
7275 | // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s | ||
7276 | - | ||
7277 | +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s | ||
7278 | +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s | ||
7279 | typedef int int2 __attribute__((ext_vector_type(2))); | ||
7280 | |||
7281 | typedef struct { | ||
7282 | @@ -39,7 +40,7 @@ struct LargeStructTwoMember { | ||
7283 | int2 y[20]; | ||
7284 | }; | ||
7285 | |||
7286 | -#if __OPENCL_C_VERSION__ >= 200 | ||
7287 | +#ifdef __opencl_c_program_scope_global_variables | ||
7288 | struct LargeStructOneMember g_s; | ||
7289 | #endif | ||
7290 | |||
7291 | @@ -98,7 +99,7 @@ void FuncOneLargeMember(struct LargeStructOneMember u) { | ||
7292 | // AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)* | ||
7293 | // AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false) | ||
7294 | // AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]]) | ||
7295 | -#if __OPENCL_C_VERSION__ >= 200 | ||
7296 | +#ifdef __opencl_c_program_scope_global_variables | ||
7297 | void test_indirect_arg_globl(void) { | ||
7298 | FuncOneLargeMember(g_s); | ||
7299 | } | ||
7300 | diff --git a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl | ||
7301 | index 52feccc540bb..9ecffcca5ee9 100644 | ||
7302 | --- a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl | ||
7303 | +++ b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl | ||
7304 | @@ -1,5 +1,7 @@ | ||
7305 | // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s | ||
7306 | +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck %s | ||
7307 | // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s | ||
7308 | +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s | ||
7309 | // When -ffake-address-space-map is not used, all addr space mapped to 0 for x86_64. | ||
7310 | |||
7311 | // test that we generate address space casts everywhere we need conversions of | ||
7312 | diff --git a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl | ||
7313 | index 50622f099143..e19ec8451d0d 100644 | ||
7314 | --- a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl | ||
7315 | +++ b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl | ||
7316 | @@ -6,6 +6,7 @@ | ||
7317 | // We check that the address spaces are mangled the same in both version of OpenCL | ||
7318 | // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s | ||
7319 | // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s | ||
7320 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s | ||
7321 | |||
7322 | // We can't name this f as private is equivalent to default | ||
7323 | // no specifier given address space so we get multiple definition | ||
7324 | @@ -47,7 +48,7 @@ void f(constant int *arg) { } | ||
7325 | // OCL-20-DAG: @_Z1fPU3AS2i | ||
7326 | // OCL-12-DAG: @_Z1fPU3AS2i | ||
7327 | |||
7328 | -#if __OPENCL_C_VERSION__ >= 200 | ||
7329 | +#if __OPENCL_C_VERSION__ == 200 | ||
7330 | __attribute__((overloadable)) | ||
7331 | void f(generic int *arg) { } | ||
7332 | // ASMANG20: @_Z1fPU3AS4i | ||
7333 | diff --git a/clang/test/CodeGenOpenCL/address-spaces.cl b/clang/test/CodeGenOpenCL/address-spaces.cl | ||
7334 | index 3c8fea2a80bc..26a741338b3a 100644 | ||
7335 | --- a/clang/test/CodeGenOpenCL/address-spaces.cl | ||
7336 | +++ b/clang/test/CodeGenOpenCL/address-spaces.cl | ||
7337 | @@ -1,9 +1,13 @@ | ||
7338 | // RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR | ||
7339 | +// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR | ||
7340 | // RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR | ||
7341 | // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s | ||
7342 | +// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s | ||
7343 | // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN | ||
7344 | // RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s | ||
7345 | +// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s | ||
7346 | // RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s | ||
7347 | +// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s | ||
7348 | |||
7349 | // SPIR: %struct.S = type { i32, i32, i32* } | ||
7350 | // CL20SPIR: %struct.S = type { i32, i32, i32 addrspace(4)* } | ||
7351 | diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl | ||
7352 | index 7216cb517495..8d3b30fe8074 100644 | ||
7353 | --- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl | ||
7354 | +++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl | ||
7355 | @@ -1,4 +1,5 @@ | ||
7356 | // RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s | ||
7357 | +// RUN: %clang_cc1 -O0 -cl-std=CL3.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s | ||
7358 | // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s | ||
7359 | |||
7360 | // CL12-LABEL: define void @func1(i32 addrspace(5)* %x) | ||
7361 | diff --git a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl | ||
7362 | index a5d438933fa4..8073c7756eb6 100644 | ||
7363 | --- a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl | ||
7364 | +++ b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl | ||
7365 | @@ -4,6 +4,17 @@ | ||
7366 | // RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s | ||
7367 | // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s | ||
7368 | // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s | ||
7369 | +// RUN: %clang_cc1 -triple r600 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7370 | +// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7371 | +// RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7372 | +// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7373 | +// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7374 | +// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7375 | +// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7376 | +// RUN: %clang_cc1 -triple r600 -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7377 | +// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7378 | +// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7379 | +// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s | ||
7380 | |||
7381 | #ifdef __AMDGCN__ | ||
7382 | #define PTSIZE 8 | ||
7383 | @@ -11,7 +22,7 @@ | ||
7384 | #define PTSIZE 4 | ||
7385 | #endif | ||
7386 | |||
7387 | -#ifdef cl_khr_fp64 | ||
7388 | +#if defined(cl_khr_fp64) || defined(__opencl_c_fp64) | ||
7389 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable | ||
7390 | #endif | ||
7391 | #ifdef cl_khr_fp16 | ||
7392 | @@ -59,8 +70,12 @@ void test() { | ||
7393 | check(__alignof__(double) == 8); | ||
7394 | #endif | ||
7395 | |||
7396 | - check(sizeof(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); | ||
7397 | - check(__alignof__(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4)); | ||
7398 | + check(sizeof(private void*) == 4); | ||
7399 | + check(__alignof__(private void*) == 4); | ||
7400 | +#ifdef __opencl_c_generic_address_space | ||
7401 | + check(sizeof(generic void*) == 8); | ||
7402 | + check(__alignof__(generic void*) == 8); | ||
7403 | +#endif | ||
7404 | check(sizeof(global_ptr_t) == PTSIZE); | ||
7405 | check(__alignof__(global_ptr_t) == PTSIZE); | ||
7406 | check(sizeof(constant_ptr_t) == PTSIZE); | ||
7407 | diff --git a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl | ||
7408 | index d1ab6aceac5c..70c5bace023b 100644 | ||
7409 | --- a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl | ||
7410 | +++ b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl | ||
7411 | @@ -1,4 +1,5 @@ | ||
7412 | // RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL1.2 -emit-llvm -o - -O0 | FileCheck %s | ||
7413 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s | ||
7414 | |||
7415 | #pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable | ||
7416 | void test_int8(uchar4 ua, uchar4 ub, char4 sa, char4 sb) { | ||
7417 | diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl | ||
7418 | index 76ace5dca21e..5dc43e222f75 100644 | ||
7419 | --- a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl | ||
7420 | +++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl | ||
7421 | @@ -1,6 +1,8 @@ | ||
7422 | // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM | ||
7423 | // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM | ||
7424 | // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM | ||
7425 | +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM | ||
7426 | +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM | ||
7427 | |||
7428 | kernel void ker() {}; | ||
7429 | // CHECK: define{{.*}}@ker() #0 | ||
7430 | diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl | ||
7431 | index 0108d909c94e..b28392739c71 100644 | ||
7432 | --- a/clang/test/CodeGenOpenCL/fpmath.cl | ||
7433 | +++ b/clang/test/CodeGenOpenCL/fpmath.cl | ||
7434 | @@ -2,6 +2,8 @@ | ||
7435 | // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s | ||
7436 | // RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s | ||
7437 | // RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s | ||
7438 | +// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL3.0 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s | ||
7439 | +// RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s | ||
7440 | |||
7441 | typedef __attribute__(( ext_vector_type(4) )) float float4; | ||
7442 | |||
7443 | diff --git a/clang/test/CodeGenOpenCL/generic-address-space-feature.cl b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl | ||
7444 | new file mode 100644 | ||
7445 | index 000000000000..890860abe4d9 | ||
7446 | --- /dev/null | ||
7447 | +++ b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl | ||
7448 | @@ -0,0 +1,28 @@ | ||
7449 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL12 | ||
7450 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL12 | ||
7451 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL20 | ||
7452 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL20 | ||
7453 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30 | ||
7454 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC | ||
7455 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30 | ||
7456 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC | ||
7457 | + | ||
7458 | +void test(global float* src1, local float *src2, private float *src3, float *src4, float tmp) { | ||
7459 | + // CL20: %{{.+}} = addrspacecast float addrspace(1)* %{{.+}} to float addrspace(4)* | ||
7460 | + // CL12-NOT: addrspacecast | ||
7461 | + // CL30-NOT: addrspacecast | ||
7462 | + // CL30-GENERIC-NOT: addrspacecast | ||
7463 | + tmp = sincos(tmp, src1); | ||
7464 | + // CL20: %{{.+}} = addrspacecast float addrspace(3)* %{{.+}} to float addrspace(4)* | ||
7465 | + // CL12-NOT: addrspacecast | ||
7466 | + // CL30-NOT: addrspacecast | ||
7467 | + // CL30-GENERIC-NOT: addrspacecast | ||
7468 | + tmp = sincos(tmp, src2); | ||
7469 | + | ||
7470 | + // CL12: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) | ||
7471 | + // CL20: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) | ||
7472 | + // CL30: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}}) | ||
7473 | + // CL30-GENERIC: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}}) | ||
7474 | + // CHECK: addrspacecast | ||
7475 | + tmp = sincos(tmp, src4); | ||
7476 | +} | ||
7477 | diff --git a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl | ||
7478 | index 515f13f6e768..5aa31ac6f345 100644 | ||
7479 | --- a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl | ||
7480 | +++ b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl | ||
7481 | @@ -1,4 +1,5 @@ | ||
7482 | // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s | ||
7483 | +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s | ||
7484 | |||
7485 | // CHECK: %opencl.intel_sub_group_avc_mce_payload_t = type opaque | ||
7486 | // CHECK: %opencl.intel_sub_group_avc_ime_payload_t = type opaque | ||
7487 | diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl | ||
7488 | index e89237623478..3d6708ac361f 100644 | ||
7489 | --- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl | ||
7490 | +++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl | ||
7491 | @@ -1,5 +1,8 @@ | ||
7492 | // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s | ||
7493 | // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s | ||
7494 | +// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s | ||
7495 | +// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s | ||
7496 | + | ||
7497 | // Test that the kernels always use the SPIR calling convention | ||
7498 | // to have unambiguous mapping of arguments to feasibly implement | ||
7499 | // clSetKernelArg(). | ||
7500 | diff --git a/clang/test/CodeGenOpenCL/logical-ops.cl b/clang/test/CodeGenOpenCL/logical-ops.cl | ||
7501 | index f083a8580ee7..499eab65039b 100644 | ||
7502 | --- a/clang/test/CodeGenOpenCL/logical-ops.cl | ||
7503 | +++ b/clang/test/CodeGenOpenCL/logical-ops.cl | ||
7504 | @@ -1,4 +1,5 @@ | ||
7505 | // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s | ||
7506 | +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s | ||
7507 | // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=clc++ -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s | ||
7508 | |||
7509 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable | ||
7510 | diff --git a/clang/test/CodeGenOpenCL/no-half.cl b/clang/test/CodeGenOpenCL/no-half.cl | ||
7511 | index aee8f678f01a..46da7fa339e8 100644 | ||
7512 | --- a/clang/test/CodeGenOpenCL/no-half.cl | ||
7513 | +++ b/clang/test/CodeGenOpenCL/no-half.cl | ||
7514 | @@ -1,6 +1,7 @@ | ||
7515 | // RUN: %clang_cc1 %s -cl-std=cl2.0 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s | ||
7516 | // RUN: %clang_cc1 %s -cl-std=cl1.2 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s | ||
7517 | // RUN: %clang_cc1 %s -cl-std=cl1.1 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s | ||
7518 | +// RUN: %clang_cc1 %s -cl-std=cl3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s | ||
7519 | |||
7520 | #pragma OPENCL EXTENSION cl_khr_fp64:enable | ||
7521 | |||
7522 | diff --git a/clang/test/CodeGenOpenCL/pipe_builtin.cl b/clang/test/CodeGenOpenCL/pipe_builtin.cl | ||
7523 | index 02b9669b7ab1..0bf35c336199 100644 | ||
7524 | --- a/clang/test/CodeGenOpenCL/pipe_builtin.cl | ||
7525 | +++ b/clang/test/CodeGenOpenCL/pipe_builtin.cl | ||
7526 | @@ -1,4 +1,7 @@ | ||
7527 | // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=clc++ -o - %s | FileCheck %s | ||
7528 | +// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl2.0 -o - %s | FileCheck %s | ||
7529 | +// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl3.0 -cl-ext=__opencl_c_pipes,__opencl_c_subgroups -o - %s | FileCheck %s | ||
7530 | + | ||
7531 | // FIXME: Add MS ABI manglings of OpenCL things and remove %itanium_abi_triple | ||
7532 | // above to support OpenCL in the MS C++ ABI. | ||
7533 | |||
7534 | diff --git a/clang/test/CodeGenOpenCL/pipe_types.cl b/clang/test/CodeGenOpenCL/pipe_types.cl | ||
7535 | index ba064c6d7557..b7a523d4f084 100644 | ||
7536 | --- a/clang/test/CodeGenOpenCL/pipe_types.cl | ||
7537 | +++ b/clang/test/CodeGenOpenCL/pipe_types.cl | ||
7538 | @@ -1,4 +1,5 @@ | ||
7539 | // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s | ||
7540 | +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -o - %s | FileCheck %s | ||
7541 | |||
7542 | // CHECK: %opencl.pipe_ro_t = type opaque | ||
7543 | // CHECK: %opencl.pipe_wo_t = type opaque | ||
7544 | diff --git a/clang/test/CodeGenOpenCL/printf.cl b/clang/test/CodeGenOpenCL/printf.cl | ||
7545 | index fc139d776db6..0133c5595d81 100644 | ||
7546 | --- a/clang/test/CodeGenOpenCL/printf.cl | ||
7547 | +++ b/clang/test/CodeGenOpenCL/printf.cl | ||
7548 | @@ -1,5 +1,7 @@ | ||
7549 | // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s | ||
7550 | // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s | ||
7551 | +// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s | ||
7552 | +// RUN: %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s | ||
7553 | |||
7554 | typedef __attribute__((ext_vector_type(2))) float float2; | ||
7555 | typedef __attribute__((ext_vector_type(2))) half half2; | ||
7556 | diff --git a/clang/test/CodeGenOpenCL/unroll-hint.cl b/clang/test/CodeGenOpenCL/unroll-hint.cl | ||
7557 | index 0f84450a1ae6..9347c935869b 100644 | ||
7558 | --- a/clang/test/CodeGenOpenCL/unroll-hint.cl | ||
7559 | +++ b/clang/test/CodeGenOpenCL/unroll-hint.cl | ||
7560 | @@ -1,5 +1,6 @@ | ||
7561 | // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s | ||
7562 | // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s | FileCheck %s | ||
7563 | +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s | FileCheck %s | ||
7564 | |||
7565 | /*** for ***/ | ||
7566 | void for_count() | ||
7567 | diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c | ||
7568 | index 5c0bfb69f9a3..eabdf0b5938d 100644 | ||
7569 | --- a/clang/test/Driver/autocomplete.c | ||
7570 | +++ b/clang/test/Driver/autocomplete.c | ||
7571 | @@ -43,6 +43,8 @@ | ||
7572 | // CLSTDALL-NEXT: CL1.2 | ||
7573 | // CLSTDALL-NEXT: cl2.0 | ||
7574 | // CLSTDALL-NEXT: CL2.0 | ||
7575 | +// CLSTDALL-NEXT: cl3.0 | ||
7576 | +// CLSTDALL-NEXT: CL3.0 | ||
7577 | // CLSTDALL-NEXT: clc++ | ||
7578 | // CLSTDALL-NEXT: CLC++ | ||
7579 | // RUN: %clang --autocomplete=-fno-sanitize-coverage=,f | FileCheck %s -check-prefix=FNOSANICOVER | ||
7580 | diff --git a/clang/test/Driver/opencl.cl b/clang/test/Driver/opencl.cl | ||
7581 | index 63b04bc1af41..ffdd4f7d65bb 100644 | ||
7582 | --- a/clang/test/Driver/opencl.cl | ||
7583 | +++ b/clang/test/Driver/opencl.cl | ||
7584 | @@ -2,6 +2,7 @@ | ||
7585 | // RUN: %clang -S -### -cl-std=CL1.1 %s 2>&1 | FileCheck --check-prefix=CHECK-CL11 %s | ||
7586 | // RUN: %clang -S -### -cl-std=CL1.2 %s 2>&1 | FileCheck --check-prefix=CHECK-CL12 %s | ||
7587 | // RUN: %clang -S -### -cl-std=CL2.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL20 %s | ||
7588 | +// RUN: %clang -S -### -cl-std=CL3.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL30 %s | ||
7589 | // RUN: %clang -S -### -cl-std=clc++ %s 2>&1 | FileCheck --check-prefix=CHECK-CLCPP %s | ||
7590 | // RUN: %clang -S -### -cl-opt-disable %s 2>&1 | FileCheck --check-prefix=CHECK-OPT-DISABLE %s | ||
7591 | // RUN: %clang -S -### -cl-strict-aliasing %s 2>&1 | FileCheck --check-prefix=CHECK-STRICT-ALIASING %s | ||
7592 | @@ -22,6 +23,7 @@ | ||
7593 | // CHECK-CL11: "-cc1" {{.*}} "-cl-std=CL1.1" | ||
7594 | // CHECK-CL12: "-cc1" {{.*}} "-cl-std=CL1.2" | ||
7595 | // CHECK-CL20: "-cc1" {{.*}} "-cl-std=CL2.0" | ||
7596 | +// CHECK-CL30: "-cc1" {{.*}} "-cl-std=CL3.0" | ||
7597 | // CHECK-CLCPP: "-cc1" {{.*}} "-cl-std=clc++" | ||
7598 | // CHECK-OPT-DISABLE: "-cc1" {{.*}} "-cl-opt-disable" | ||
7599 | // CHECK-STRICT-ALIASING: "-cc1" {{.*}} "-cl-strict-aliasing" | ||
7600 | diff --git a/clang/test/Driver/unknown-std.cl b/clang/test/Driver/unknown-std.cl | ||
7601 | index 6f371bac13ac..00209fb62556 100644 | ||
7602 | --- a/clang/test/Driver/unknown-std.cl | ||
7603 | +++ b/clang/test/Driver/unknown-std.cl | ||
7604 | @@ -10,6 +10,7 @@ | ||
7605 | // CHECK-NEXT: note: use 'cl1.1' for 'OpenCL 1.1' standard | ||
7606 | // CHECK-NEXT: note: use 'cl1.2' for 'OpenCL 1.2' standard | ||
7607 | // CHECK-NEXT: note: use 'cl2.0' for 'OpenCL 2.0' standard | ||
7608 | +// CHECK-NEXT: note: use 'cl3.0' for 'OpenCL 3.0' standard | ||
7609 | // CHECK-NEXT: note: use 'clc++' for 'C++ for OpenCL' standard | ||
7610 | |||
7611 | // Make sure that no other output is present. | ||
7612 | diff --git a/clang/test/Frontend/stdlang.c b/clang/test/Frontend/stdlang.c | ||
7613 | index 51484999e37a..eac4632fbdd6 100644 | ||
7614 | --- a/clang/test/Frontend/stdlang.c | ||
7615 | +++ b/clang/test/Frontend/stdlang.c | ||
7616 | @@ -9,6 +9,7 @@ | ||
7617 | // RUN: %clang_cc1 -x cl -cl-std=CL1.1 -DOPENCL %s | ||
7618 | // RUN: %clang_cc1 -x cl -cl-std=CL1.2 -DOPENCL %s | ||
7619 | // RUN: %clang_cc1 -x cl -cl-std=CL2.0 -DOPENCL %s | ||
7620 | +// RUN: %clang_cc1 -x cl -cl-std=CL3.0 -DOPENCL %s | ||
7621 | // RUN: %clang_cc1 -x cl -cl-std=CLC++ -DOPENCL %s | ||
7622 | // RUN: not %clang_cc1 -x cl -std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s | ||
7623 | // RUN: not %clang_cc1 -x cl -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s | ||
7624 | diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl | ||
7625 | index 1b151ffdd16a..2716076acdcf 100644 | ||
7626 | --- a/clang/test/Headers/opencl-c-header.cl | ||
7627 | +++ b/clang/test/Headers/opencl-c-header.cl | ||
7628 | @@ -1,6 +1,7 @@ | ||
7629 | // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify | FileCheck %s | ||
7630 | // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.1 | FileCheck %s | ||
7631 | // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.2 | FileCheck %s | ||
7632 | +// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL3.0 | FileCheck %s | ||
7633 | // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=clc++ | FileCheck %s --check-prefix=CHECK20 | ||
7634 | |||
7635 | // Test including the default header as a module. | ||
7636 | @@ -39,9 +40,11 @@ | ||
7637 | // RUN: rm -rf %t | ||
7638 | // RUN: mkdir -p %t | ||
7639 | // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s | ||
7640 | +// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s | ||
7641 | // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s | ||
7642 | // RUN: chmod u-w %t | ||
7643 | // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s | ||
7644 | +// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s | ||
7645 | // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s | ||
7646 | // RUN: chmod u+w %t | ||
7647 | |||
7648 | @@ -67,7 +70,7 @@ char f(char x) { | ||
7649 | // from OpenCL 2.0 onwards. | ||
7650 | |||
7651 | // CHECK20: _Z12write_imagef14ocl_image3d_wo | ||
7652 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
7653 | +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) | ||
7654 | void test_image3dwo(write_only image3d_t img) { | ||
7655 | write_imagef(img, (0), (0.0f)); | ||
7656 | } | ||
7657 | @@ -81,7 +84,7 @@ void test_atomics(__generic volatile unsigned int* a) { | ||
7658 | #endif | ||
7659 | |||
7660 | // Verify that ATOMIC_VAR_INIT is defined. | ||
7661 | -#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
7662 | +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) | ||
7663 | global atomic_int z = ATOMIC_VAR_INIT(99); | ||
7664 | #endif //__OPENCL_C_VERSION__ | ||
7665 | |||
7666 | diff --git a/clang/test/Index/pipe-size.cl b/clang/test/Index/pipe-size.cl | ||
7667 | index 94a1255f0a48..59b76051eda1 100644 | ||
7668 | --- a/clang/test/Index/pipe-size.cl | ||
7669 | +++ b/clang/test/Index/pipe-size.cl | ||
7670 | @@ -2,6 +2,13 @@ | ||
7671 | // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR | ||
7672 | // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 | ||
7673 | // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN | ||
7674 | +// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=X86 | ||
7675 | +// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR | ||
7676 | +// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64 | ||
7677 | +// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN | ||
7678 | + | ||
7679 | + | ||
7680 | + | ||
7681 | __kernel void testPipe( pipe int test ) | ||
7682 | { | ||
7683 | int s = sizeof(test); | ||
7684 | diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c | ||
7685 | index def105f4c52e..b088a37ba665 100644 | ||
7686 | --- a/clang/test/Preprocessor/predefined-macros.c | ||
7687 | +++ b/clang/test/Preprocessor/predefined-macros.c | ||
7688 | @@ -129,6 +129,8 @@ | ||
7689 | // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL12 | ||
7690 | // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL2.0 \ | ||
7691 | // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL20 | ||
7692 | +// RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL3.0 \ | ||
7693 | +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL30 | ||
7694 | // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-fast-relaxed-math \ | ||
7695 | // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FRM | ||
7696 | // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=clc++ \ | ||
7697 | @@ -137,26 +139,37 @@ | ||
7698 | // CHECK-CL10: #define CL_VERSION_1_1 110 | ||
7699 | // CHECK-CL10: #define CL_VERSION_1_2 120 | ||
7700 | // CHECK-CL10: #define CL_VERSION_2_0 200 | ||
7701 | +// CHECK-CL10: #define CL_VERSION_3_0 300 | ||
7702 | // CHECK-CL10: #define __OPENCL_C_VERSION__ 100 | ||
7703 | // CHECK-CL10-NOT: #define __FAST_RELAXED_MATH__ 1 | ||
7704 | // CHECK-CL11: #define CL_VERSION_1_0 100 | ||
7705 | // CHECK-CL11: #define CL_VERSION_1_1 110 | ||
7706 | // CHECK-CL11: #define CL_VERSION_1_2 120 | ||
7707 | // CHECK-CL11: #define CL_VERSION_2_0 200 | ||
7708 | +// CHECK-CL11: #define CL_VERSION_3_0 300 | ||
7709 | // CHECK-CL11: #define __OPENCL_C_VERSION__ 110 | ||
7710 | // CHECK-CL11-NOT: #define __FAST_RELAXED_MATH__ 1 | ||
7711 | // CHECK-CL12: #define CL_VERSION_1_0 100 | ||
7712 | // CHECK-CL12: #define CL_VERSION_1_1 110 | ||
7713 | // CHECK-CL12: #define CL_VERSION_1_2 120 | ||
7714 | // CHECK-CL12: #define CL_VERSION_2_0 200 | ||
7715 | +// CHECK-CL12: #define CL_VERSION_3_0 300 | ||
7716 | // CHECK-CL12: #define __OPENCL_C_VERSION__ 120 | ||
7717 | // CHECK-CL12-NOT: #define __FAST_RELAXED_MATH__ 1 | ||
7718 | // CHECK-CL20: #define CL_VERSION_1_0 100 | ||
7719 | // CHECK-CL20: #define CL_VERSION_1_1 110 | ||
7720 | // CHECK-CL20: #define CL_VERSION_1_2 120 | ||
7721 | // CHECK-CL20: #define CL_VERSION_2_0 200 | ||
7722 | +// CHECK-CL20: #define CL_VERSION_3_0 300 | ||
7723 | // CHECK-CL20: #define __OPENCL_C_VERSION__ 200 | ||
7724 | // CHECK-CL20-NOT: #define __FAST_RELAXED_MATH__ 1 | ||
7725 | +// CHECK-CL30: #define CL_VERSION_1_0 100 | ||
7726 | +// CHECK-CL30: #define CL_VERSION_1_1 110 | ||
7727 | +// CHECK-CL30: #define CL_VERSION_1_2 120 | ||
7728 | +// CHECK-CL30: #define CL_VERSION_2_0 200 | ||
7729 | +// CHECK-CL30: #define CL_VERSION_3_0 300 | ||
7730 | +// CHECK-CL30: #define __OPENCL_C_VERSION__ 300 | ||
7731 | +// CHECK-CL30-NOT: #define __FAST_RELAXED_MATH__ 1 | ||
7732 | // CHECK-FRM: #define __FAST_RELAXED_MATH__ 1 | ||
7733 | // CHECK-CLCPP10: #define __CL_CPP_VERSION_1_0__ 100 | ||
7734 | // CHECK-CLCPP10: #define __OPENCL_CPP_VERSION__ 100 | ||
7735 | diff --git a/clang/test/Sema/feature-extensions-simult-support.cl b/clang/test/Sema/feature-extensions-simult-support.cl | ||
7736 | new file mode 100644 | ||
7737 | index 000000000000..0789105002b2 | ||
7738 | --- /dev/null | ||
7739 | +++ b/clang/test/Sema/feature-extensions-simult-support.cl | ||
7740 | @@ -0,0 +1,75 @@ | ||
7741 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 | ||
7742 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_fp64 | ||
7743 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_3d_image_writes | ||
7744 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_3d_image_writes | ||
7745 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_subgroups | ||
7746 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_subgroups | ||
7747 | + | ||
7748 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 | ||
7749 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_fp64 | ||
7750 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_3d_image_writes | ||
7751 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_3d_image_writes | ||
7752 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_subgroups | ||
7753 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups | ||
7754 | + | ||
7755 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 | ||
7756 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes | ||
7757 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups | ||
7758 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes | ||
7759 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_device_enqueue | ||
7760 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_read_write_images | ||
7761 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64,-cl_khr_fp64 | ||
7762 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes,-cl_khr_3d_image_writes | ||
7763 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups,-cl_khr_subgroups | ||
7764 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_fp64 | ||
7765 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_3d_image_writes | ||
7766 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_subgroups | ||
7767 | + | ||
7768 | +// expected-no-diagnostics | ||
7769 | + | ||
7770 | +#ifdef cl_khr_fp64 | ||
7771 | + #ifndef __opencl_c_fp64 | ||
7772 | + #error macros were not properly set up | ||
7773 | + #endif | ||
7774 | +#endif | ||
7775 | +#ifdef __opencl_c_fp64 | ||
7776 | + #ifndef cl_khr_fp64 | ||
7777 | + #error macros were not properly set up | ||
7778 | + #endif | ||
7779 | +#endif | ||
7780 | + | ||
7781 | +#ifdef cl_khr_3d_image_writes | ||
7782 | + #ifndef __opencl_c_3d_image_writes | ||
7783 | + #error macros were not properly set up | ||
7784 | + #endif | ||
7785 | +#endif | ||
7786 | +#ifdef __opencl_c_3d_image_writes | ||
7787 | + #ifndef cl_khr_3d_image_writes | ||
7788 | + #error macros were not properly set up | ||
7789 | + #endif | ||
7790 | +#endif | ||
7791 | + | ||
7792 | +#ifdef cl_khr_subgroups | ||
7793 | + #ifndef __opencl_c_subgroups | ||
7794 | + #error macros were not properly set up | ||
7795 | + #endif | ||
7796 | +#endif | ||
7797 | +#ifdef __opencl_c_subgroups | ||
7798 | + #ifndef cl_khr_subgroups | ||
7799 | + #error macros were not properly set up | ||
7800 | + #endif | ||
7801 | +#endif | ||
7802 | + | ||
7803 | +#if defined(__opencl_c_pipes) || defined(__opencl_c_device_enqueue) | ||
7804 | + #ifndef __opencl_c_generic_address_space | ||
7805 | + #error macros were not properly set up | ||
7806 | + #endif | ||
7807 | +#endif | ||
7808 | + | ||
7809 | +#if defined(__opencl_c_3d_image_writes) || defined(__opencl_c_read_write_images) | ||
7810 | + #ifndef __opencl_c_images | ||
7811 | + #error macros were not properly set up | ||
7812 | + #endif | ||
7813 | +#endif | ||
7814 | + | ||
7815 | +kernel void test(){} | ||
7816 | diff --git a/clang/test/Sema/features-ignore-pragma.cl b/clang/test/Sema/features-ignore-pragma.cl | ||
7817 | new file mode 100644 | ||
7818 | index 000000000000..046ce5390754 | ||
7819 | --- /dev/null | ||
7820 | +++ b/clang/test/Sema/features-ignore-pragma.cl | ||
7821 | @@ -0,0 +1,24 @@ | ||
7822 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 | ||
7823 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 | ||
7824 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups | ||
7825 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_3d_image_writes | ||
7826 | + | ||
7827 | +#pragma OPENCL EXTENSION __opencl_c_fp64 : enable | ||
7828 | +// expected-warning@-1 {{OpenCL feature support can't be controlled via pragma, ignoring}} | ||
7829 | + | ||
7830 | +#pragma OPENCL EXTENSION cl_khr_fp64 : enable | ||
7831 | +#ifndef __opencl_c_fp64 | ||
7832 | +// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_fp64' - ignoring}} | ||
7833 | +#endif | ||
7834 | + | ||
7835 | +#pragma OPENCL EXTENSION cl_khr_subgroups : enable | ||
7836 | +#ifndef __opencl_c_subgroups | ||
7837 | +// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_subgroups' - ignoring}} | ||
7838 | +#endif | ||
7839 | + | ||
7840 | +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable | ||
7841 | +#ifndef __opencl_c_3d_image_writes | ||
7842 | +// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_3d_image_writes' - ignoring}} | ||
7843 | +#endif | ||
7844 | + | ||
7845 | +kernel void foo() {} | ||
7846 | diff --git a/clang/test/Sema/opencl-features-pipes.cl b/clang/test/Sema/opencl-features-pipes.cl | ||
7847 | new file mode 100644 | ||
7848 | index 000000000000..c0ac778f24a6 | ||
7849 | --- /dev/null | ||
7850 | +++ b/clang/test/Sema/opencl-features-pipes.cl | ||
7851 | @@ -0,0 +1,18 @@ | ||
7852 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.1 | ||
7853 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 | ||
7854 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -DHAS | ||
7855 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 | ||
7856 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -DHAS | ||
7857 | +// expected-no-diagnostics | ||
7858 | + | ||
7859 | +#ifdef HAS | ||
7860 | + #ifndef __opencl_c_pipes | ||
7861 | + #error Feature should be defined | ||
7862 | + #endif | ||
7863 | +#else | ||
7864 | + #ifdef __opencl_c_pipes | ||
7865 | + #error Feature should not be defined | ||
7866 | + #endif | ||
7867 | +#endif | ||
7868 | + | ||
7869 | +kernel void foo() {} | ||
7870 | diff --git a/clang/test/Sema/opencl-features.cl b/clang/test/Sema/opencl-features.cl | ||
7871 | new file mode 100644 | ||
7872 | index 000000000000..aa432f6b60bf | ||
7873 | --- /dev/null | ||
7874 | +++ b/clang/test/Sema/opencl-features.cl | ||
7875 | @@ -0,0 +1,128 @@ | ||
7876 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 | ||
7877 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-__opencl_c_device_enqueue,-__opencl_c_pipes,-__opencl_c_read_write_images | ||
7878 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CLC++ | ||
7879 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 | ||
7880 | +// expected-no-diagnostics | ||
7881 | + | ||
7882 | +#ifndef __opencl_c_int64 | ||
7883 | + #error Feature __opencl_c_int64 shouldn't be defined | ||
7884 | +#endif | ||
7885 | + | ||
7886 | +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0) | ||
7887 | + #ifndef __opencl_c_3d_image_writes | ||
7888 | + #error Feature __opencl_c_3d_image_writes should be defined | ||
7889 | + #endif | ||
7890 | + | ||
7891 | + #ifndef __opencl_c_atomic_order_acq_rel | ||
7892 | + #error Feature __opencl_c_atomic_order_acq_rel should be defined | ||
7893 | + #endif | ||
7894 | + | ||
7895 | + #ifndef __opencl_c_atomic_order_seq_cst | ||
7896 | + #error Feature __opencl_c_atomic_order_seq_cst should be defined | ||
7897 | + #endif | ||
7898 | + | ||
7899 | + #ifndef __opencl_c_atomic_scope_device | ||
7900 | + #error Feature __opencl_c_atomic_scope_device should be defined | ||
7901 | + #endif | ||
7902 | + | ||
7903 | + #ifndef __opencl_c_atomic_scope_all_devices | ||
7904 | + #error Feature __opencl_c_atomic_scope_all_devices should be defined | ||
7905 | + #endif | ||
7906 | + | ||
7907 | + #ifndef __opencl_c_device_enqueue | ||
7908 | + #error Feature __opencl_c_device_enqueue should be defined | ||
7909 | + #endif | ||
7910 | + | ||
7911 | + #ifndef __opencl_c_generic_address_space | ||
7912 | + #error Feature __opencl_c_generic_address_space should be defined | ||
7913 | + #endif | ||
7914 | + | ||
7915 | + #ifndef __opencl_c_pipes | ||
7916 | + #error Feature __opencl_c_pipes should be defined | ||
7917 | + #endif | ||
7918 | + | ||
7919 | + #ifndef __opencl_c_program_scope_global_variables | ||
7920 | + #error Feature __opencl_c_program_scope_global_variables should be defined | ||
7921 | + #endif | ||
7922 | + | ||
7923 | + #ifndef __opencl_c_read_write_images | ||
7924 | + #error Feature __opencl_c_read_write_images should be defined | ||
7925 | + #endif | ||
7926 | + | ||
7927 | + #ifndef __opencl_c_subgroups | ||
7928 | + #error Feature __opencl_c_subgroups should be defined | ||
7929 | + #endif | ||
7930 | + | ||
7931 | + #ifndef __opencl_c_work_group_collective_functions | ||
7932 | + #error Feature __opencl_c_work_group_collective_functions should be defined | ||
7933 | + #endif | ||
7934 | + | ||
7935 | + #ifndef __opencl_c_fp64 | ||
7936 | + #error Feature __opencl_c_fp64 should be defined | ||
7937 | + #endif | ||
7938 | + | ||
7939 | + #ifndef __opencl_c_images | ||
7940 | + #error Feature __opencl_c_images should be defined | ||
7941 | + #endif | ||
7942 | +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) | ||
7943 | + | ||
7944 | + | ||
7945 | +#if __OPENCL_C_VERSION__ == CL_VERSION_3_0 | ||
7946 | + #ifdef __opencl_c_3d_image_writes | ||
7947 | + #error Feature __opencl_c_3d_image_writes shouldn't be defined | ||
7948 | + #endif | ||
7949 | + | ||
7950 | + #ifdef __opencl_c_atomic_order_acq_rel | ||
7951 | + #error Feature __opencl_c_atomic_order_acq_rel shouldn't be defined | ||
7952 | + #endif | ||
7953 | + | ||
7954 | + #ifdef __opencl_c_atomic_order_seq_cst | ||
7955 | + #error Feature __opencl_c_atomic_order_seq_cst shouldn't be defined | ||
7956 | + #endif | ||
7957 | + | ||
7958 | + #ifdef __opencl_c_atomic_scope_device | ||
7959 | + #error Feature __opencl_c_atomic_scope_device shouldn't be defined | ||
7960 | + #endif | ||
7961 | + | ||
7962 | + #ifdef __opencl_c_atomic_scope_all_devices | ||
7963 | + #error Feature __opencl_c_atomic_scope_all_devices shouldn't be defined | ||
7964 | + #endif | ||
7965 | + | ||
7966 | + #ifdef __opencl_c_device_enqueue | ||
7967 | + #error Feature __opencl_c_device_enqueue shouldn't be defined | ||
7968 | + #endif | ||
7969 | + | ||
7970 | + #ifdef __opencl_c_generic_address_space | ||
7971 | + #error Feature __opencl_c_generic_address_space shouldn't be defined | ||
7972 | + #endif | ||
7973 | + | ||
7974 | + #ifdef __opencl_c_pipes | ||
7975 | + #error Feature __opencl_c_pipes shouldn't be defined | ||
7976 | + #endif | ||
7977 | + | ||
7978 | + #ifdef __opencl_c_program_scope_global_variables | ||
7979 | + #error Feature __opencl_c_program_scope_global_variables shouldn't be defined | ||
7980 | + #endif | ||
7981 | + | ||
7982 | + #ifdef __opencl_c_read_write_images | ||
7983 | + #error Feature __opencl_c_read_write_images shouldn't be defined | ||
7984 | + #endif | ||
7985 | + | ||
7986 | + #ifdef __opencl_c_subgroups | ||
7987 | + #error Feature __opencl_c_subgroups shouldn't be defined | ||
7988 | + #endif | ||
7989 | + | ||
7990 | + #ifdef __opencl_c_work_group_collective_functions | ||
7991 | + #error Feature __opencl_c_work_group_collective_functions shouldn't be defined | ||
7992 | + #endif | ||
7993 | + | ||
7994 | + #ifdef __opencl_c_fp64 | ||
7995 | + #error Feature __opencl_c_fp64 shouldn't be defined | ||
7996 | + #endif | ||
7997 | + | ||
7998 | + #ifdef __opencl_c_images | ||
7999 | + #error Feature __opencl_c_images shouldn't be defined | ||
8000 | + #endif | ||
8001 | +#endif // __OPENCL_C_VERSION__ == CL_VERSION_3_0 | ||
8002 | + | ||
8003 | +kernel void foo() {} | ||
8004 | diff --git a/clang/test/Sema/pipe_builtins_feature.cl b/clang/test/Sema/pipe_builtins_feature.cl | ||
8005 | new file mode 100644 | ||
8006 | index 000000000000..56fa94fc7705 | ||
8007 | --- /dev/null | ||
8008 | +++ b/clang/test/Sema/pipe_builtins_feature.cl | ||
8009 | @@ -0,0 +1,21 @@ | ||
8010 | +// RUN: %clang_cc1 -cl-std=CL2.0 -fsyntax-only -verify %s | ||
8011 | +// RUN: %clang_cc1 -cl-std=CL3.0 -fsyntax-only -verify %s | ||
8012 | +// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -fsyntax-only -verify %s | ||
8013 | + | ||
8014 | +#ifdef __opencl_c_pipes | ||
8015 | + #ifndef __opencl_c_generic_address_space | ||
8016 | + #error Generic address space feature must also be defined | ||
8017 | + #endif | ||
8018 | +// CHECK: expected-no-diagnostics | ||
8019 | +// check that pragma disable all doesn't touch feature support | ||
8020 | + #pragma OPENCL EXTENSION all : disable | ||
8021 | +#endif | ||
8022 | + | ||
8023 | +void test(read_only pipe int p, global int *ptr) { | ||
8024 | + reserve_id_t rid; | ||
8025 | +} | ||
8026 | + | ||
8027 | +#ifndef __opencl_c_pipes | ||
8028 | +// expected-error@-5 {{expected parameter declarator}} expected-error@-5 {{expected ')'}} expected-note@-5 {{to match this '('}} | ||
8029 | +// expected-error@-5 {{use of type 'reserve_id_t' requires __opencl_c_pipes extension to be enabled}} | ||
8030 | +#endif | ||
8031 | diff --git a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl | ||
8032 | index 5efea216346a..09aea27ae6de 100644 | ||
8033 | --- a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl | ||
8034 | +++ b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl | ||
8035 | @@ -1,6 +1,9 @@ | ||
8036 | // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL2.0 | ||
8037 | // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL2.0 | ||
8038 | // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL2.0 | ||
8039 | +// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space | ||
8040 | +// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space | ||
8041 | +// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space | ||
8042 | // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=clc++ | ||
8043 | // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=clc++ | ||
8044 | // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=clc++ | ||
8045 | diff --git a/clang/test/SemaOpenCL/address-spaces.cl b/clang/test/SemaOpenCL/address-spaces.cl | ||
8046 | index 07547ea19680..e367a7c57292 100644 | ||
8047 | --- a/clang/test/SemaOpenCL/address-spaces.cl | ||
8048 | +++ b/clang/test/SemaOpenCL/address-spaces.cl | ||
8049 | @@ -1,5 +1,6 @@ | ||
8050 | // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only | ||
8051 | // RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only | ||
8052 | +// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -verify -pedantic -fsyntax-only | ||
8053 | // RUN: %clang_cc1 %s -cl-std=clc++ -verify -pedantic -fsyntax-only | ||
8054 | |||
8055 | __constant int ci = 1; | ||
8056 | diff --git a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl | ||
8057 | index f63e2913c749..727141190a0b 100644 | ||
8058 | --- a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl | ||
8059 | +++ b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl | ||
8060 | @@ -2,6 +2,12 @@ | ||
8061 | // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir-unknown-unknown" -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" | ||
8062 | // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= | ||
8063 | // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" | ||
8064 | +// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS= | ||
8065 | +// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile" | ||
8066 | +// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS= | ||
8067 | +// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile" | ||
8068 | + | ||
8069 | + | ||
8070 | |||
8071 | typedef struct {int a;} ndrange_t; | ||
8072 | // Diagnostic tests for different overloads of enqueue_kernel from Table 6.13.17.1 of OpenCL 2.0 Spec. | ||
8073 | @@ -235,11 +241,17 @@ kernel void bar(global unsigned int *buf) | ||
8074 | kernel void foo1(global unsigned int *buf) | ||
8075 | { | ||
8076 | ndrange_t n; | ||
8077 | - buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} | ||
8078 | + buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); | ||
8079 | +#if __OPENCL_C_VERSION__ < 300 | ||
8080 | +// expected-error@-2 {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}} | ||
8081 | +#endif | ||
8082 | } | ||
8083 | |||
8084 | kernel void bar1(global unsigned int *buf) | ||
8085 | { | ||
8086 | ndrange_t n; | ||
8087 | - buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} | ||
8088 | + buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); | ||
8089 | +#if __OPENCL_C_VERSION__ < 300 | ||
8090 | +// expected-error@-2 {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}} | ||
8091 | +#endif | ||
8092 | } | ||
8093 | diff --git a/clang/test/SemaOpenCL/forget-unsupported-builtins.cl b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl | ||
8094 | new file mode 100644 | ||
8095 | index 000000000000..14dd03e2c7db | ||
8096 | --- /dev/null | ||
8097 | +++ b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl | ||
8098 | @@ -0,0 +1,22 @@ | ||
8099 | +// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -verify %s -triple spir-unknown-unknown | ||
8100 | +// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -cl-ext=__opencl_c_pipes,__opencl_c_generic_address_space,__opencl_c_device_enqueue -verify %s -triple spir-unknown-unknown -DFEATURES | ||
8101 | + | ||
8102 | +#ifndef FEATURES | ||
8103 | + // expected-no-diagnostics | ||
8104 | +#else | ||
8105 | + // expected-error@+10 {{cannot redeclare builtin function 'get_pipe_max_packets'}} | ||
8106 | + // expected-note@+9 {{'get_pipe_max_packets' is a builtin with type 'unsigned int ()'}} | ||
8107 | + // expected-error@+9 {{cannot redeclare builtin function 'to_local'}} | ||
8108 | + // expected-note@+8 {{'to_local' is a builtin with type 'void *(void *)'}} | ||
8109 | + // expected-error@+8 {{cannot redeclare builtin function 'to_global'}} | ||
8110 | + // expected-note@+7 {{'to_global' is a builtin with type 'void *(void *)'}} | ||
8111 | + // expected-error@+7 {{cannot redeclare builtin function 'get_kernel_work_group_size'}} | ||
8112 | + // expected-note@+6 {{'get_kernel_work_group_size' is a builtin with type 'unsigned int ()'}} | ||
8113 | +#endif | ||
8114 | + | ||
8115 | +int get_pipe_max_packets(int); | ||
8116 | +int to_local(int); | ||
8117 | +int to_global(int); | ||
8118 | +int get_kernel_work_group_size(int); | ||
8119 | + | ||
8120 | +kernel void test(global int *dst) {} | ||
8121 | diff --git a/clang/test/SemaOpenCL/image-features.cl b/clang/test/SemaOpenCL/image-features.cl | ||
8122 | new file mode 100644 | ||
8123 | index 000000000000..ace6913bb31e | ||
8124 | --- /dev/null | ||
8125 | +++ b/clang/test/SemaOpenCL/image-features.cl | ||
8126 | @@ -0,0 +1,20 @@ | ||
8127 | +// RUN: %clang_cc1 -cl-std=cl2.0 -fsyntax-only -verify %s -triple spir-unknown-unknown | ||
8128 | +// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_images -fsyntax-only -verify %s -triple spir-unknown-unknown | ||
8129 | +// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_3d_image_writes -fsyntax-only -verify %s -triple spir-unknown-unknown | ||
8130 | +// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_read_write_images -fsyntax-only -verify %s -triple spir-unknown-unknown | ||
8131 | + | ||
8132 | +#if defined(__opencl_c_read_write_images) && defined(__opencl_c_3d_image_writes) | ||
8133 | + // expected-no-diagnostics | ||
8134 | +#endif | ||
8135 | + | ||
8136 | +__kernel void write_3d_image(__write_only image3d_t i) {} | ||
8137 | + | ||
8138 | +#ifndef __opencl_c_3d_image_writes | ||
8139 | + // expected-error@-3 {{use of type '__write_only image3d_t' requires __opencl_c_3d_image_writes extension to be enabled}} | ||
8140 | +#endif | ||
8141 | + | ||
8142 | +__kernel void read_write_3d_image(__read_write image3d_t i) { } | ||
8143 | + | ||
8144 | +#ifndef __opencl_c_read_write_images | ||
8145 | + // expected-error@-3 {{use of type '__read_write image3d_t' requires __opencl_c_read_write_images extension to be enabled}} | ||
8146 | +#endif | ||
8147 | diff --git a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl | ||
8148 | index 36e76621d24a..38b0a04726e3 100644 | ||
8149 | --- a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl | ||
8150 | +++ b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl | ||
8151 | @@ -1,4 +1,5 @@ | ||
8152 | // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups | ||
8153 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups,__opencl_c_pipes | ||
8154 | |||
8155 | #pragma OPENCL EXTENSION cl_khr_subgroups : enable | ||
8156 | |||
8157 | diff --git a/clang/test/SemaOpenCL/storageclass-cl20.cl b/clang/test/SemaOpenCL/storageclass-cl20.cl | ||
8158 | index 581701d2a6a5..469c526ebc30 100644 | ||
8159 | --- a/clang/test/SemaOpenCL/storageclass-cl20.cl | ||
8160 | +++ b/clang/test/SemaOpenCL/storageclass-cl20.cl | ||
8161 | @@ -1,4 +1,5 @@ | ||
8162 | // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 | ||
8163 | +// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables,__opencl_c_generic_address_space | ||
8164 | |||
8165 | int G2 = 0; | ||
8166 | global int G3 = 0; | ||
8167 | diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp | ||
8168 | index 41d33b550680..f50652493e5e 100644 | ||
8169 | --- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp | ||
8170 | +++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp | ||
8171 | @@ -56,6 +56,7 @@ | ||
8172 | //===----------------------------------------------------------------------===// | ||
8173 | |||
8174 | #include "TableGenBackends.h" | ||
8175 | +#include "clang/Basic/OpenCLOptions.h" | ||
8176 | #include "llvm/ADT/MapVector.h" | ||
8177 | #include "llvm/ADT/STLExtras.h" | ||
8178 | #include "llvm/ADT/SmallString.h" | ||
8179 | @@ -69,6 +70,7 @@ | ||
8180 | #include "llvm/TableGen/Record.h" | ||
8181 | #include "llvm/TableGen/StringMatcher.h" | ||
8182 | #include "llvm/TableGen/TableGenBackend.h" | ||
8183 | +#include <numeric> | ||
8184 | #include <set> | ||
8185 | |||
8186 | using namespace llvm; | ||
8187 | @@ -228,6 +230,10 @@ private: | ||
8188 | // The function "tan", having the same signatures, would be mapped to the | ||
8189 | // same entry (<I1, I2, I3>). | ||
8190 | MapVector<BuiltinIndexListTy *, BuiltinTableEntries> SignatureListMap; | ||
8191 | + | ||
8192 | + // Encode all versions | ||
8193 | + unsigned short | ||
8194 | + EncodeBuiltinVersions(std::vector<Record *> BuiltinVersionsRecords) const; | ||
8195 | }; | ||
8196 | } // namespace | ||
8197 | |||
8198 | @@ -338,12 +344,10 @@ struct OpenCLBuiltinStruct { | ||
8199 | const bool IsConst; | ||
8200 | // Function attribute __attribute__((convergent)) | ||
8201 | const bool IsConv; | ||
8202 | + // All opencl versions encoded | ||
8203 | + const unsigned char AllVersions : 5; | ||
8204 | // OpenCL extension(s) required for this overload. | ||
8205 | const unsigned short Extension; | ||
8206 | - // First OpenCL version in which this overload was introduced (e.g. CL20). | ||
8207 | - const unsigned short MinVersion; | ||
8208 | - // First OpenCL version in which this overload was removed (e.g. CL20). | ||
8209 | - const unsigned short MaxVersion; | ||
8210 | }; | ||
8211 | |||
8212 | )"; | ||
8213 | @@ -503,11 +507,9 @@ void BuiltinNameEmitter::EmitBuiltinTable() { | ||
8214 | << (Overload.first->getValueAsBit("IsPure")) << ", " | ||
8215 | << (Overload.first->getValueAsBit("IsConst")) << ", " | ||
8216 | << (Overload.first->getValueAsBit("IsConv")) << ", " | ||
8217 | - << FunctionExtensionIndex[ExtName] << ", " | ||
8218 | - << Overload.first->getValueAsDef("MinVersion")->getValueAsInt("ID") | ||
8219 | - << ", " | ||
8220 | - << Overload.first->getValueAsDef("MaxVersion")->getValueAsInt("ID") | ||
8221 | - << " },\n"; | ||
8222 | + << EncodeBuiltinVersions( | ||
8223 | + Overload.first->getValueAsListOfDefs("Versions")) | ||
8224 | + << ", " << FunctionExtensionIndex[ExtName] << " },\n"; | ||
8225 | Index++; | ||
8226 | } | ||
8227 | } | ||
8228 | @@ -528,10 +530,8 @@ bool BuiltinNameEmitter::CanReuseSignature( | ||
8229 | if (Rec->getValueAsBit("IsPure") == Rec2->getValueAsBit("IsPure") && | ||
8230 | Rec->getValueAsBit("IsConst") == Rec2->getValueAsBit("IsConst") && | ||
8231 | Rec->getValueAsBit("IsConv") == Rec2->getValueAsBit("IsConv") && | ||
8232 | - Rec->getValueAsDef("MinVersion")->getValueAsInt("ID") == | ||
8233 | - Rec2->getValueAsDef("MinVersion")->getValueAsInt("ID") && | ||
8234 | - Rec->getValueAsDef("MaxVersion")->getValueAsInt("ID") == | ||
8235 | - Rec2->getValueAsDef("MaxVersion")->getValueAsInt("ID") && | ||
8236 | + EncodeBuiltinVersions(Rec->getValueAsListOfDefs("Versions")) == | ||
8237 | + EncodeBuiltinVersions(Rec2->getValueAsListOfDefs("Versions")) && | ||
8238 | Rec->getValueAsDef("Extension")->getName() == | ||
8239 | Rec2->getValueAsDef("Extension")->getName()) { | ||
8240 | return true; | ||
8241 | @@ -806,6 +806,15 @@ static void OCL2Qual(ASTContext &Context, const OpenCLTypeStruct &Ty, | ||
8242 | OS << "\n} // OCL2Qual\n"; | ||
8243 | } | ||
8244 | |||
8245 | +unsigned short BuiltinNameEmitter::EncodeBuiltinVersions( | ||
8246 | + std::vector<Record *> BuiltinVersionsRecords) const { | ||
8247 | + return std::accumulate( | ||
8248 | + BuiltinVersionsRecords.begin(), BuiltinVersionsRecords.end(), | ||
8249 | + (unsigned short)0, [](unsigned short C, Record *R) { | ||
8250 | + return C |= clang::OpenCLOptions::EncodeVersion(R->getValueAsInt("ID")); | ||
8251 | + }); | ||
8252 | +} | ||
8253 | + | ||
8254 | void clang::EmitClangOpenCLBuiltins(RecordKeeper &Records, raw_ostream &OS) { | ||
8255 | BuiltinNameEmitter NameChecker(Records, OS); | ||
8256 | NameChecker.Emit(); | ||
8257 | -- | ||
8258 | 2.17.1 | ||
8259 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch deleted file mode 100644 index 84a4ba19..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | From 661021749a168c423d69d0ba7cdfa16fed860836 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Wed, 21 Aug 2019 14:35:31 +0800 | ||
4 | Subject: [PATCH 1/3] llvm-spirv: skip building tests | ||
5 | |||
6 | Some of these need clang to be built and since we're building this in-tree, | ||
7 | that leads to problems when compiling libcxx, compiler-rt which aren't built | ||
8 | in-tree. | ||
9 | |||
10 | Instead of using SPIRV_SKIP_CLANG_BUILD to skip clang build and adding this to | ||
11 | all components, disable the building of tests altogether. | ||
12 | |||
13 | Upstream-Status: Inappropriate | ||
14 | |||
15 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
16 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
17 | --- | ||
18 | CMakeLists.txt | 10 ---------- | ||
19 | 1 file changed, 10 deletions(-) | ||
20 | |||
21 | diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
22 | index 92c50370..80999c98 100644 | ||
23 | --- a/CMakeLists.txt | ||
24 | +++ b/CMakeLists.txt | ||
25 | @@ -25,13 +25,6 @@ if(LLVM_SPIRV_BUILD_EXTERNAL) | ||
26 | set(CMAKE_CXX_STANDARD 14) | ||
27 | set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
28 | |||
29 | - if(LLVM_SPIRV_INCLUDE_TESTS) | ||
30 | - set(LLVM_TEST_COMPONENTS | ||
31 | - llvm-as | ||
32 | - llvm-dis | ||
33 | - ) | ||
34 | - endif(LLVM_SPIRV_INCLUDE_TESTS) | ||
35 | - | ||
36 | find_package(LLVM 10.0.0 REQUIRED | ||
37 | COMPONENTS | ||
38 | Analysis | ||
39 | @@ -63,9 +56,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) | ||
40 | |||
41 | add_subdirectory(lib/SPIRV) | ||
42 | add_subdirectory(tools/llvm-spirv) | ||
43 | -if(LLVM_SPIRV_INCLUDE_TESTS) | ||
44 | - add_subdirectory(test) | ||
45 | -endif(LLVM_SPIRV_INCLUDE_TESTS) | ||
46 | |||
47 | install( | ||
48 | FILES | ||
49 | -- | ||
50 | 2.17.1 | ||
51 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch deleted file mode 100644 index 3f1b24e7..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch +++ /dev/null | |||
@@ -1,812 +0,0 @@ | |||
1 | From 3f544cfe44ee5f113a3fb554aca2cf5d64996062 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Wed, 7 Apr 2021 16:38:38 +0800 | ||
4 | Subject: [PATCH 2/7] Add cl_khr_extended_subgroup extensions. | ||
5 | |||
6 | Added extensions and their function declarations into | ||
7 | the standard header. | ||
8 | |||
9 | Patch by Piotr Fusik! | ||
10 | |||
11 | Tags: #clang | ||
12 | |||
13 | Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4a4402f0d72167477a6252e4c3daf5089ebc8f9a] | ||
14 | Signed-off-by: Anastasia Stulova <anastasia.stulova@arm.com> | ||
15 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
16 | --- | ||
17 | .../include/clang/Basic/OpenCLExtensions.def | 7 + | ||
18 | clang/lib/Headers/opencl-c.h | 668 ++++++++++++++++++ | ||
19 | clang/test/SemaOpenCL/extension-version.cl | 83 +++ | ||
20 | 3 files changed, 758 insertions(+) | ||
21 | |||
22 | diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def | ||
23 | index 608f78a13eef..d1574164f9b2 100644 | ||
24 | --- a/clang/include/clang/Basic/OpenCLExtensions.def | ||
25 | +++ b/clang/include/clang/Basic/OpenCLExtensions.def | ||
26 | @@ -74,6 +74,13 @@ OPENCLEXT_INTERNAL(cl_khr_mipmap_image_writes, 200, ~0U) | ||
27 | OPENCLEXT_INTERNAL(cl_khr_srgb_image_writes, 200, ~0U) | ||
28 | OPENCLEXT_INTERNAL(cl_khr_subgroups, 200, ~0U) | ||
29 | OPENCLEXT_INTERNAL(cl_khr_terminate_context, 200, ~0U) | ||
30 | +OPENCLEXT_INTERNAL(cl_khr_subgroup_extended_types, 200, ~0U) | ||
31 | +OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_vote, 200, ~0U) | ||
32 | +OPENCLEXT_INTERNAL(cl_khr_subgroup_ballot, 200, ~0U) | ||
33 | +OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_arithmetic, 200, ~0U) | ||
34 | +OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle, 200, ~0U) | ||
35 | +OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle_relative, 200, ~0U) | ||
36 | +OPENCLEXT_INTERNAL(cl_khr_subgroup_clustered_reduce, 200, ~0U) | ||
37 | |||
38 | // Clang Extensions. | ||
39 | OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U) | ||
40 | diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h | ||
41 | index 93a946cec5b1..67d900eb1c3d 100644 | ||
42 | --- a/clang/lib/Headers/opencl-c.h | ||
43 | +++ b/clang/lib/Headers/opencl-c.h | ||
44 | @@ -17530,6 +17530,674 @@ double __ovld __conv sub_group_scan_inclusive_max(double x); | ||
45 | |||
46 | #endif //cl_khr_subgroups cl_intel_subgroups | ||
47 | |||
48 | +#if defined(cl_khr_subgroup_extended_types) | ||
49 | +char __ovld __conv sub_group_broadcast( char value, uint index ); | ||
50 | +char2 __ovld __conv sub_group_broadcast( char2 value, uint index ); | ||
51 | +char3 __ovld __conv sub_group_broadcast( char3 value, uint index ); | ||
52 | +char4 __ovld __conv sub_group_broadcast( char4 value, uint index ); | ||
53 | +char8 __ovld __conv sub_group_broadcast( char8 value, uint index ); | ||
54 | +char16 __ovld __conv sub_group_broadcast( char16 value, uint index ); | ||
55 | + | ||
56 | +uchar __ovld __conv sub_group_broadcast( uchar value, uint index ); | ||
57 | +uchar2 __ovld __conv sub_group_broadcast( uchar2 value, uint index ); | ||
58 | +uchar3 __ovld __conv sub_group_broadcast( uchar3 value, uint index ); | ||
59 | +uchar4 __ovld __conv sub_group_broadcast( uchar4 value, uint index ); | ||
60 | +uchar8 __ovld __conv sub_group_broadcast( uchar8 value, uint index ); | ||
61 | +uchar16 __ovld __conv sub_group_broadcast( uchar16 value, uint index ); | ||
62 | + | ||
63 | +short __ovld __conv sub_group_broadcast( short value, uint index ); | ||
64 | +short2 __ovld __conv sub_group_broadcast( short2 value, uint index ); | ||
65 | +short3 __ovld __conv sub_group_broadcast( short3 value, uint index ); | ||
66 | +short4 __ovld __conv sub_group_broadcast( short4 value, uint index ); | ||
67 | +short8 __ovld __conv sub_group_broadcast( short8 value, uint index ); | ||
68 | +short16 __ovld __conv sub_group_broadcast( short16 value, uint index ); | ||
69 | + | ||
70 | +ushort __ovld __conv sub_group_broadcast( ushort value, uint index ); | ||
71 | +ushort2 __ovld __conv sub_group_broadcast( ushort2 value, uint index ); | ||
72 | +ushort3 __ovld __conv sub_group_broadcast( ushort3 value, uint index ); | ||
73 | +ushort4 __ovld __conv sub_group_broadcast( ushort4 value, uint index ); | ||
74 | +ushort8 __ovld __conv sub_group_broadcast( ushort8 value, uint index ); | ||
75 | +ushort16 __ovld __conv sub_group_broadcast( ushort16 value, uint index ); | ||
76 | + | ||
77 | +// scalar int broadcast is part of cl_khr_subgroups | ||
78 | +int2 __ovld __conv sub_group_broadcast( int2 value, uint index ); | ||
79 | +int3 __ovld __conv sub_group_broadcast( int3 value, uint index ); | ||
80 | +int4 __ovld __conv sub_group_broadcast( int4 value, uint index ); | ||
81 | +int8 __ovld __conv sub_group_broadcast( int8 value, uint index ); | ||
82 | +int16 __ovld __conv sub_group_broadcast( int16 value, uint index ); | ||
83 | + | ||
84 | +// scalar uint broadcast is part of cl_khr_subgroups | ||
85 | +uint2 __ovld __conv sub_group_broadcast( uint2 value, uint index ); | ||
86 | +uint3 __ovld __conv sub_group_broadcast( uint3 value, uint index ); | ||
87 | +uint4 __ovld __conv sub_group_broadcast( uint4 value, uint index ); | ||
88 | +uint8 __ovld __conv sub_group_broadcast( uint8 value, uint index ); | ||
89 | +uint16 __ovld __conv sub_group_broadcast( uint16 value, uint index ); | ||
90 | + | ||
91 | +// scalar long broadcast is part of cl_khr_subgroups | ||
92 | +long2 __ovld __conv sub_group_broadcast( long2 value, uint index ); | ||
93 | +long3 __ovld __conv sub_group_broadcast( long3 value, uint index ); | ||
94 | +long4 __ovld __conv sub_group_broadcast( long4 value, uint index ); | ||
95 | +long8 __ovld __conv sub_group_broadcast( long8 value, uint index ); | ||
96 | +long16 __ovld __conv sub_group_broadcast( long16 value, uint index ); | ||
97 | + | ||
98 | +// scalar ulong broadcast is part of cl_khr_subgroups | ||
99 | +ulong2 __ovld __conv sub_group_broadcast( ulong2 value, uint index ); | ||
100 | +ulong3 __ovld __conv sub_group_broadcast( ulong3 value, uint index ); | ||
101 | +ulong4 __ovld __conv sub_group_broadcast( ulong4 value, uint index ); | ||
102 | +ulong8 __ovld __conv sub_group_broadcast( ulong8 value, uint index ); | ||
103 | +ulong16 __ovld __conv sub_group_broadcast( ulong16 value, uint index ); | ||
104 | + | ||
105 | +// scalar float broadcast is part of cl_khr_subgroups | ||
106 | +float2 __ovld __conv sub_group_broadcast( float2 value, uint index ); | ||
107 | +float3 __ovld __conv sub_group_broadcast( float3 value, uint index ); | ||
108 | +float4 __ovld __conv sub_group_broadcast( float4 value, uint index ); | ||
109 | +float8 __ovld __conv sub_group_broadcast( float8 value, uint index ); | ||
110 | +float16 __ovld __conv sub_group_broadcast( float16 value, uint index ); | ||
111 | + | ||
112 | +char __ovld __conv sub_group_reduce_add( char value ); | ||
113 | +uchar __ovld __conv sub_group_reduce_add( uchar value ); | ||
114 | +short __ovld __conv sub_group_reduce_add( short value ); | ||
115 | +ushort __ovld __conv sub_group_reduce_add( ushort value ); | ||
116 | + | ||
117 | +char __ovld __conv sub_group_reduce_min( char value ); | ||
118 | +uchar __ovld __conv sub_group_reduce_min( uchar value ); | ||
119 | +short __ovld __conv sub_group_reduce_min( short value ); | ||
120 | +ushort __ovld __conv sub_group_reduce_min( ushort value ); | ||
121 | + | ||
122 | +char __ovld __conv sub_group_reduce_max( char value ); | ||
123 | +uchar __ovld __conv sub_group_reduce_max( uchar value ); | ||
124 | +short __ovld __conv sub_group_reduce_max( short value ); | ||
125 | +ushort __ovld __conv sub_group_reduce_max( ushort value ); | ||
126 | + | ||
127 | +char __ovld __conv sub_group_scan_inclusive_add( char value ); | ||
128 | +uchar __ovld __conv sub_group_scan_inclusive_add( uchar value ); | ||
129 | +short __ovld __conv sub_group_scan_inclusive_add( short value ); | ||
130 | +ushort __ovld __conv sub_group_scan_inclusive_add( ushort value ); | ||
131 | + | ||
132 | +char __ovld __conv sub_group_scan_inclusive_min( char value ); | ||
133 | +uchar __ovld __conv sub_group_scan_inclusive_min( uchar value ); | ||
134 | +short __ovld __conv sub_group_scan_inclusive_min( short value ); | ||
135 | +ushort __ovld __conv sub_group_scan_inclusive_min( ushort value ); | ||
136 | + | ||
137 | +char __ovld __conv sub_group_scan_inclusive_max( char value ); | ||
138 | +uchar __ovld __conv sub_group_scan_inclusive_max( uchar value ); | ||
139 | +short __ovld __conv sub_group_scan_inclusive_max( short value ); | ||
140 | +ushort __ovld __conv sub_group_scan_inclusive_max( ushort value ); | ||
141 | + | ||
142 | +char __ovld __conv sub_group_scan_exclusive_add( char value ); | ||
143 | +uchar __ovld __conv sub_group_scan_exclusive_add( uchar value ); | ||
144 | +short __ovld __conv sub_group_scan_exclusive_add( short value ); | ||
145 | +ushort __ovld __conv sub_group_scan_exclusive_add( ushort value ); | ||
146 | + | ||
147 | +char __ovld __conv sub_group_scan_exclusive_min( char value ); | ||
148 | +uchar __ovld __conv sub_group_scan_exclusive_min( uchar value ); | ||
149 | +short __ovld __conv sub_group_scan_exclusive_min( short value ); | ||
150 | +ushort __ovld __conv sub_group_scan_exclusive_min( ushort value ); | ||
151 | + | ||
152 | +char __ovld __conv sub_group_scan_exclusive_max( char value ); | ||
153 | +uchar __ovld __conv sub_group_scan_exclusive_max( uchar value ); | ||
154 | +short __ovld __conv sub_group_scan_exclusive_max( short value ); | ||
155 | +ushort __ovld __conv sub_group_scan_exclusive_max( ushort value ); | ||
156 | + | ||
157 | +#if defined(cl_khr_fp16) | ||
158 | +// scalar half broadcast is part of cl_khr_subgroups | ||
159 | +half2 __ovld __conv sub_group_broadcast( half2 value, uint index ); | ||
160 | +half3 __ovld __conv sub_group_broadcast( half3 value, uint index ); | ||
161 | +half4 __ovld __conv sub_group_broadcast( half4 value, uint index ); | ||
162 | +half8 __ovld __conv sub_group_broadcast( half8 value, uint index ); | ||
163 | +half16 __ovld __conv sub_group_broadcast( half16 value, uint index ); | ||
164 | +#endif // cl_khr_fp16 | ||
165 | + | ||
166 | +#if defined(cl_khr_fp64) | ||
167 | +// scalar double broadcast is part of cl_khr_subgroups | ||
168 | +double2 __ovld __conv sub_group_broadcast( double2 value, uint index ); | ||
169 | +double3 __ovld __conv sub_group_broadcast( double3 value, uint index ); | ||
170 | +double4 __ovld __conv sub_group_broadcast( double4 value, uint index ); | ||
171 | +double8 __ovld __conv sub_group_broadcast( double8 value, uint index ); | ||
172 | +double16 __ovld __conv sub_group_broadcast( double16 value, uint index ); | ||
173 | +#endif // cl_khr_fp64 | ||
174 | + | ||
175 | +#endif // cl_khr_subgroup_extended_types | ||
176 | + | ||
177 | +#if defined(cl_khr_subgroup_non_uniform_vote) | ||
178 | +int __ovld sub_group_elect(void); | ||
179 | +int __ovld sub_group_non_uniform_all( int predicate ); | ||
180 | +int __ovld sub_group_non_uniform_any( int predicate ); | ||
181 | + | ||
182 | +int __ovld sub_group_non_uniform_all_equal( char value ); | ||
183 | +int __ovld sub_group_non_uniform_all_equal( uchar value ); | ||
184 | +int __ovld sub_group_non_uniform_all_equal( short value ); | ||
185 | +int __ovld sub_group_non_uniform_all_equal( ushort value ); | ||
186 | +int __ovld sub_group_non_uniform_all_equal( int value ); | ||
187 | +int __ovld sub_group_non_uniform_all_equal( uint value ); | ||
188 | +int __ovld sub_group_non_uniform_all_equal( long value ); | ||
189 | +int __ovld sub_group_non_uniform_all_equal( ulong value ); | ||
190 | +int __ovld sub_group_non_uniform_all_equal( float value ); | ||
191 | + | ||
192 | +#if defined(cl_khr_fp16) | ||
193 | +int __ovld sub_group_non_uniform_all_equal( half value ); | ||
194 | +#endif // cl_khr_fp16 | ||
195 | + | ||
196 | +#if defined(cl_khr_fp64) | ||
197 | +int __ovld sub_group_non_uniform_all_equal( double value ); | ||
198 | +#endif // cl_khr_fp64 | ||
199 | + | ||
200 | +#endif // cl_khr_subgroup_non_uniform_vote | ||
201 | + | ||
202 | +#if defined(cl_khr_subgroup_ballot) | ||
203 | +char __ovld sub_group_non_uniform_broadcast( char value, uint index ); | ||
204 | +char2 __ovld sub_group_non_uniform_broadcast( char2 value, uint index ); | ||
205 | +char3 __ovld sub_group_non_uniform_broadcast( char3 value, uint index ); | ||
206 | +char4 __ovld sub_group_non_uniform_broadcast( char4 value, uint index ); | ||
207 | +char8 __ovld sub_group_non_uniform_broadcast( char8 value, uint index ); | ||
208 | +char16 __ovld sub_group_non_uniform_broadcast( char16 value, uint index ); | ||
209 | + | ||
210 | +uchar __ovld sub_group_non_uniform_broadcast( uchar value, uint index ); | ||
211 | +uchar2 __ovld sub_group_non_uniform_broadcast( uchar2 value, uint index ); | ||
212 | +uchar3 __ovld sub_group_non_uniform_broadcast( uchar3 value, uint index ); | ||
213 | +uchar4 __ovld sub_group_non_uniform_broadcast( uchar4 value, uint index ); | ||
214 | +uchar8 __ovld sub_group_non_uniform_broadcast( uchar8 value, uint index ); | ||
215 | +uchar16 __ovld sub_group_non_uniform_broadcast( uchar16 value, uint index ); | ||
216 | + | ||
217 | +short __ovld sub_group_non_uniform_broadcast( short value, uint index ); | ||
218 | +short2 __ovld sub_group_non_uniform_broadcast( short2 value, uint index ); | ||
219 | +short3 __ovld sub_group_non_uniform_broadcast( short3 value, uint index ); | ||
220 | +short4 __ovld sub_group_non_uniform_broadcast( short4 value, uint index ); | ||
221 | +short8 __ovld sub_group_non_uniform_broadcast( short8 value, uint index ); | ||
222 | +short16 __ovld sub_group_non_uniform_broadcast( short16 value, uint index ); | ||
223 | + | ||
224 | +ushort __ovld sub_group_non_uniform_broadcast( ushort value, uint index ); | ||
225 | +ushort2 __ovld sub_group_non_uniform_broadcast( ushort2 value, uint index ); | ||
226 | +ushort3 __ovld sub_group_non_uniform_broadcast( ushort3 value, uint index ); | ||
227 | +ushort4 __ovld sub_group_non_uniform_broadcast( ushort4 value, uint index ); | ||
228 | +ushort8 __ovld sub_group_non_uniform_broadcast( ushort8 value, uint index ); | ||
229 | +ushort16 __ovld sub_group_non_uniform_broadcast( ushort16 value, uint index ); | ||
230 | + | ||
231 | +int __ovld sub_group_non_uniform_broadcast( int value, uint index ); | ||
232 | +int2 __ovld sub_group_non_uniform_broadcast( int2 value, uint index ); | ||
233 | +int3 __ovld sub_group_non_uniform_broadcast( int3 value, uint index ); | ||
234 | +int4 __ovld sub_group_non_uniform_broadcast( int4 value, uint index ); | ||
235 | +int8 __ovld sub_group_non_uniform_broadcast( int8 value, uint index ); | ||
236 | +int16 __ovld sub_group_non_uniform_broadcast( int16 value, uint index ); | ||
237 | + | ||
238 | +uint __ovld sub_group_non_uniform_broadcast( uint value, uint index ); | ||
239 | +uint2 __ovld sub_group_non_uniform_broadcast( uint2 value, uint index ); | ||
240 | +uint3 __ovld sub_group_non_uniform_broadcast( uint3 value, uint index ); | ||
241 | +uint4 __ovld sub_group_non_uniform_broadcast( uint4 value, uint index ); | ||
242 | +uint8 __ovld sub_group_non_uniform_broadcast( uint8 value, uint index ); | ||
243 | +uint16 __ovld sub_group_non_uniform_broadcast( uint16 value, uint index ); | ||
244 | + | ||
245 | +long __ovld sub_group_non_uniform_broadcast( long value, uint index ); | ||
246 | +long2 __ovld sub_group_non_uniform_broadcast( long2 value, uint index ); | ||
247 | +long3 __ovld sub_group_non_uniform_broadcast( long3 value, uint index ); | ||
248 | +long4 __ovld sub_group_non_uniform_broadcast( long4 value, uint index ); | ||
249 | +long8 __ovld sub_group_non_uniform_broadcast( long8 value, uint index ); | ||
250 | +long16 __ovld sub_group_non_uniform_broadcast( long16 value, uint index ); | ||
251 | + | ||
252 | +ulong __ovld sub_group_non_uniform_broadcast( ulong value, uint index ); | ||
253 | +ulong2 __ovld sub_group_non_uniform_broadcast( ulong2 value, uint index ); | ||
254 | +ulong3 __ovld sub_group_non_uniform_broadcast( ulong3 value, uint index ); | ||
255 | +ulong4 __ovld sub_group_non_uniform_broadcast( ulong4 value, uint index ); | ||
256 | +ulong8 __ovld sub_group_non_uniform_broadcast( ulong8 value, uint index ); | ||
257 | +ulong16 __ovld sub_group_non_uniform_broadcast( ulong16 value, uint index ); | ||
258 | + | ||
259 | +float __ovld sub_group_non_uniform_broadcast( float value, uint index ); | ||
260 | +float2 __ovld sub_group_non_uniform_broadcast( float2 value, uint index ); | ||
261 | +float3 __ovld sub_group_non_uniform_broadcast( float3 value, uint index ); | ||
262 | +float4 __ovld sub_group_non_uniform_broadcast( float4 value, uint index ); | ||
263 | +float8 __ovld sub_group_non_uniform_broadcast( float8 value, uint index ); | ||
264 | +float16 __ovld sub_group_non_uniform_broadcast( float16 value, uint index ); | ||
265 | + | ||
266 | +char __ovld sub_group_broadcast_first( char value ); | ||
267 | +uchar __ovld sub_group_broadcast_first( uchar value ); | ||
268 | +short __ovld sub_group_broadcast_first( short value ); | ||
269 | +ushort __ovld sub_group_broadcast_first( ushort value ); | ||
270 | +int __ovld sub_group_broadcast_first( int value ); | ||
271 | +uint __ovld sub_group_broadcast_first( uint value ); | ||
272 | +long __ovld sub_group_broadcast_first( long value ); | ||
273 | +ulong __ovld sub_group_broadcast_first( ulong value ); | ||
274 | +float __ovld sub_group_broadcast_first( float value ); | ||
275 | + | ||
276 | +uint4 __ovld sub_group_ballot( int predicate ); | ||
277 | +int __ovld __cnfn sub_group_inverse_ballot( uint4 value ); | ||
278 | +int __ovld __cnfn sub_group_ballot_bit_extract( uint4 value, uint index ); | ||
279 | +uint __ovld __cnfn sub_group_ballot_bit_count( uint4 value ); | ||
280 | + | ||
281 | +uint __ovld sub_group_ballot_inclusive_scan( uint4 value ); | ||
282 | +uint __ovld sub_group_ballot_exclusive_scan( uint4 value ); | ||
283 | +uint __ovld sub_group_ballot_find_lsb( uint4 value ); | ||
284 | +uint __ovld sub_group_ballot_find_msb( uint4 value ); | ||
285 | + | ||
286 | +uint4 __ovld __cnfn get_sub_group_eq_mask(void); | ||
287 | +uint4 __ovld __cnfn get_sub_group_ge_mask(void); | ||
288 | +uint4 __ovld __cnfn get_sub_group_gt_mask(void); | ||
289 | +uint4 __ovld __cnfn get_sub_group_le_mask(void); | ||
290 | +uint4 __ovld __cnfn get_sub_group_lt_mask(void); | ||
291 | + | ||
292 | +#if defined(cl_khr_fp16) | ||
293 | +half __ovld sub_group_non_uniform_broadcast( half value, uint index ); | ||
294 | +half2 __ovld sub_group_non_uniform_broadcast( half2 value, uint index ); | ||
295 | +half3 __ovld sub_group_non_uniform_broadcast( half3 value, uint index ); | ||
296 | +half4 __ovld sub_group_non_uniform_broadcast( half4 value, uint index ); | ||
297 | +half8 __ovld sub_group_non_uniform_broadcast( half8 value, uint index ); | ||
298 | +half16 __ovld sub_group_non_uniform_broadcast( half16 value, uint index ); | ||
299 | + | ||
300 | +half __ovld sub_group_broadcast_first( half value ); | ||
301 | +#endif // cl_khr_fp16 | ||
302 | + | ||
303 | +#if defined(cl_khr_fp64) | ||
304 | +double __ovld sub_group_non_uniform_broadcast( double value, uint index ); | ||
305 | +double2 __ovld sub_group_non_uniform_broadcast( double2 value, uint index ); | ||
306 | +double3 __ovld sub_group_non_uniform_broadcast( double3 value, uint index ); | ||
307 | +double4 __ovld sub_group_non_uniform_broadcast( double4 value, uint index ); | ||
308 | +double8 __ovld sub_group_non_uniform_broadcast( double8 value, uint index ); | ||
309 | +double16 __ovld sub_group_non_uniform_broadcast( double16 value, uint index ); | ||
310 | + | ||
311 | +double __ovld sub_group_broadcast_first( double value ); | ||
312 | +#endif // cl_khr_fp64 | ||
313 | + | ||
314 | +#endif // cl_khr_subgroup_ballot | ||
315 | + | ||
316 | +#if defined(cl_khr_subgroup_non_uniform_arithmetic) | ||
317 | +char __ovld sub_group_non_uniform_reduce_add( char value ); | ||
318 | +uchar __ovld sub_group_non_uniform_reduce_add( uchar value ); | ||
319 | +short __ovld sub_group_non_uniform_reduce_add( short value ); | ||
320 | +ushort __ovld sub_group_non_uniform_reduce_add( ushort value ); | ||
321 | +int __ovld sub_group_non_uniform_reduce_add( int value ); | ||
322 | +uint __ovld sub_group_non_uniform_reduce_add( uint value ); | ||
323 | +long __ovld sub_group_non_uniform_reduce_add( long value ); | ||
324 | +ulong __ovld sub_group_non_uniform_reduce_add( ulong value ); | ||
325 | +float __ovld sub_group_non_uniform_reduce_add( float value ); | ||
326 | + | ||
327 | +char __ovld sub_group_non_uniform_reduce_mul( char value ); | ||
328 | +uchar __ovld sub_group_non_uniform_reduce_mul( uchar value ); | ||
329 | +short __ovld sub_group_non_uniform_reduce_mul( short value ); | ||
330 | +ushort __ovld sub_group_non_uniform_reduce_mul( ushort value ); | ||
331 | +int __ovld sub_group_non_uniform_reduce_mul( int value ); | ||
332 | +uint __ovld sub_group_non_uniform_reduce_mul( uint value ); | ||
333 | +long __ovld sub_group_non_uniform_reduce_mul( long value ); | ||
334 | +ulong __ovld sub_group_non_uniform_reduce_mul( ulong value ); | ||
335 | +float __ovld sub_group_non_uniform_reduce_mul( float value ); | ||
336 | + | ||
337 | +char __ovld sub_group_non_uniform_reduce_min( char value ); | ||
338 | +uchar __ovld sub_group_non_uniform_reduce_min( uchar value ); | ||
339 | +short __ovld sub_group_non_uniform_reduce_min( short value ); | ||
340 | +ushort __ovld sub_group_non_uniform_reduce_min( ushort value ); | ||
341 | +int __ovld sub_group_non_uniform_reduce_min( int value ); | ||
342 | +uint __ovld sub_group_non_uniform_reduce_min( uint value ); | ||
343 | +long __ovld sub_group_non_uniform_reduce_min( long value ); | ||
344 | +ulong __ovld sub_group_non_uniform_reduce_min( ulong value ); | ||
345 | +float __ovld sub_group_non_uniform_reduce_min( float value ); | ||
346 | + | ||
347 | +char __ovld sub_group_non_uniform_reduce_max( char value ); | ||
348 | +uchar __ovld sub_group_non_uniform_reduce_max( uchar value ); | ||
349 | +short __ovld sub_group_non_uniform_reduce_max( short value ); | ||
350 | +ushort __ovld sub_group_non_uniform_reduce_max( ushort value ); | ||
351 | +int __ovld sub_group_non_uniform_reduce_max( int value ); | ||
352 | +uint __ovld sub_group_non_uniform_reduce_max( uint value ); | ||
353 | +long __ovld sub_group_non_uniform_reduce_max( long value ); | ||
354 | +ulong __ovld sub_group_non_uniform_reduce_max( ulong value ); | ||
355 | +float __ovld sub_group_non_uniform_reduce_max( float value ); | ||
356 | + | ||
357 | +char __ovld sub_group_non_uniform_scan_inclusive_add( char value ); | ||
358 | +uchar __ovld sub_group_non_uniform_scan_inclusive_add( uchar value ); | ||
359 | +short __ovld sub_group_non_uniform_scan_inclusive_add( short value ); | ||
360 | +ushort __ovld sub_group_non_uniform_scan_inclusive_add( ushort value ); | ||
361 | +int __ovld sub_group_non_uniform_scan_inclusive_add( int value ); | ||
362 | +uint __ovld sub_group_non_uniform_scan_inclusive_add( uint value ); | ||
363 | +long __ovld sub_group_non_uniform_scan_inclusive_add( long value ); | ||
364 | +ulong __ovld sub_group_non_uniform_scan_inclusive_add( ulong value ); | ||
365 | +float __ovld sub_group_non_uniform_scan_inclusive_add( float value ); | ||
366 | + | ||
367 | +char __ovld sub_group_non_uniform_scan_inclusive_mul( char value ); | ||
368 | +uchar __ovld sub_group_non_uniform_scan_inclusive_mul( uchar value ); | ||
369 | +short __ovld sub_group_non_uniform_scan_inclusive_mul( short value ); | ||
370 | +ushort __ovld sub_group_non_uniform_scan_inclusive_mul( ushort value ); | ||
371 | +int __ovld sub_group_non_uniform_scan_inclusive_mul( int value ); | ||
372 | +uint __ovld sub_group_non_uniform_scan_inclusive_mul( uint value ); | ||
373 | +long __ovld sub_group_non_uniform_scan_inclusive_mul( long value ); | ||
374 | +ulong __ovld sub_group_non_uniform_scan_inclusive_mul( ulong value ); | ||
375 | +float __ovld sub_group_non_uniform_scan_inclusive_mul( float value ); | ||
376 | + | ||
377 | +char __ovld sub_group_non_uniform_scan_inclusive_min( char value ); | ||
378 | +uchar __ovld sub_group_non_uniform_scan_inclusive_min( uchar value ); | ||
379 | +short __ovld sub_group_non_uniform_scan_inclusive_min( short value ); | ||
380 | +ushort __ovld sub_group_non_uniform_scan_inclusive_min( ushort value ); | ||
381 | +int __ovld sub_group_non_uniform_scan_inclusive_min( int value ); | ||
382 | +uint __ovld sub_group_non_uniform_scan_inclusive_min( uint value ); | ||
383 | +long __ovld sub_group_non_uniform_scan_inclusive_min( long value ); | ||
384 | +ulong __ovld sub_group_non_uniform_scan_inclusive_min( ulong value ); | ||
385 | +float __ovld sub_group_non_uniform_scan_inclusive_min( float value ); | ||
386 | + | ||
387 | +char __ovld sub_group_non_uniform_scan_inclusive_max( char value ); | ||
388 | +uchar __ovld sub_group_non_uniform_scan_inclusive_max( uchar value ); | ||
389 | +short __ovld sub_group_non_uniform_scan_inclusive_max( short value ); | ||
390 | +ushort __ovld sub_group_non_uniform_scan_inclusive_max( ushort value ); | ||
391 | +int __ovld sub_group_non_uniform_scan_inclusive_max( int value ); | ||
392 | +uint __ovld sub_group_non_uniform_scan_inclusive_max( uint value ); | ||
393 | +long __ovld sub_group_non_uniform_scan_inclusive_max( long value ); | ||
394 | +ulong __ovld sub_group_non_uniform_scan_inclusive_max( ulong value ); | ||
395 | +float __ovld sub_group_non_uniform_scan_inclusive_max( float value ); | ||
396 | + | ||
397 | +char __ovld sub_group_non_uniform_scan_exclusive_add( char value ); | ||
398 | +uchar __ovld sub_group_non_uniform_scan_exclusive_add( uchar value ); | ||
399 | +short __ovld sub_group_non_uniform_scan_exclusive_add( short value ); | ||
400 | +ushort __ovld sub_group_non_uniform_scan_exclusive_add( ushort value ); | ||
401 | +int __ovld sub_group_non_uniform_scan_exclusive_add( int value ); | ||
402 | +uint __ovld sub_group_non_uniform_scan_exclusive_add( uint value ); | ||
403 | +long __ovld sub_group_non_uniform_scan_exclusive_add( long value ); | ||
404 | +ulong __ovld sub_group_non_uniform_scan_exclusive_add( ulong value ); | ||
405 | +float __ovld sub_group_non_uniform_scan_exclusive_add( float value ); | ||
406 | + | ||
407 | +char __ovld sub_group_non_uniform_scan_exclusive_mul( char value ); | ||
408 | +uchar __ovld sub_group_non_uniform_scan_exclusive_mul( uchar value ); | ||
409 | +short __ovld sub_group_non_uniform_scan_exclusive_mul( short value ); | ||
410 | +ushort __ovld sub_group_non_uniform_scan_exclusive_mul( ushort value ); | ||
411 | +int __ovld sub_group_non_uniform_scan_exclusive_mul( int value ); | ||
412 | +uint __ovld sub_group_non_uniform_scan_exclusive_mul( uint value ); | ||
413 | +long __ovld sub_group_non_uniform_scan_exclusive_mul( long value ); | ||
414 | +ulong __ovld sub_group_non_uniform_scan_exclusive_mul( ulong value ); | ||
415 | +float __ovld sub_group_non_uniform_scan_exclusive_mul( float value ); | ||
416 | + | ||
417 | +char __ovld sub_group_non_uniform_scan_exclusive_min( char value ); | ||
418 | +uchar __ovld sub_group_non_uniform_scan_exclusive_min( uchar value ); | ||
419 | +short __ovld sub_group_non_uniform_scan_exclusive_min( short value ); | ||
420 | +ushort __ovld sub_group_non_uniform_scan_exclusive_min( ushort value ); | ||
421 | +int __ovld sub_group_non_uniform_scan_exclusive_min( int value ); | ||
422 | +uint __ovld sub_group_non_uniform_scan_exclusive_min( uint value ); | ||
423 | +long __ovld sub_group_non_uniform_scan_exclusive_min( long value ); | ||
424 | +ulong __ovld sub_group_non_uniform_scan_exclusive_min( ulong value ); | ||
425 | +float __ovld sub_group_non_uniform_scan_exclusive_min( float value ); | ||
426 | + | ||
427 | +char __ovld sub_group_non_uniform_scan_exclusive_max( char value ); | ||
428 | +uchar __ovld sub_group_non_uniform_scan_exclusive_max( uchar value ); | ||
429 | +short __ovld sub_group_non_uniform_scan_exclusive_max( short value ); | ||
430 | +ushort __ovld sub_group_non_uniform_scan_exclusive_max( ushort value ); | ||
431 | +int __ovld sub_group_non_uniform_scan_exclusive_max( int value ); | ||
432 | +uint __ovld sub_group_non_uniform_scan_exclusive_max( uint value ); | ||
433 | +long __ovld sub_group_non_uniform_scan_exclusive_max( long value ); | ||
434 | +ulong __ovld sub_group_non_uniform_scan_exclusive_max( ulong value ); | ||
435 | +float __ovld sub_group_non_uniform_scan_exclusive_max( float value ); | ||
436 | + | ||
437 | +char __ovld sub_group_non_uniform_reduce_and( char value ); | ||
438 | +uchar __ovld sub_group_non_uniform_reduce_and( uchar value ); | ||
439 | +short __ovld sub_group_non_uniform_reduce_and( short value ); | ||
440 | +ushort __ovld sub_group_non_uniform_reduce_and( ushort value ); | ||
441 | +int __ovld sub_group_non_uniform_reduce_and( int value ); | ||
442 | +uint __ovld sub_group_non_uniform_reduce_and( uint value ); | ||
443 | +long __ovld sub_group_non_uniform_reduce_and( long value ); | ||
444 | +ulong __ovld sub_group_non_uniform_reduce_and( ulong value ); | ||
445 | + | ||
446 | +char __ovld sub_group_non_uniform_reduce_or( char value ); | ||
447 | +uchar __ovld sub_group_non_uniform_reduce_or( uchar value ); | ||
448 | +short __ovld sub_group_non_uniform_reduce_or( short value ); | ||
449 | +ushort __ovld sub_group_non_uniform_reduce_or( ushort value ); | ||
450 | +int __ovld sub_group_non_uniform_reduce_or( int value ); | ||
451 | +uint __ovld sub_group_non_uniform_reduce_or( uint value ); | ||
452 | +long __ovld sub_group_non_uniform_reduce_or( long value ); | ||
453 | +ulong __ovld sub_group_non_uniform_reduce_or( ulong value ); | ||
454 | + | ||
455 | +char __ovld sub_group_non_uniform_reduce_xor( char value ); | ||
456 | +uchar __ovld sub_group_non_uniform_reduce_xor( uchar value ); | ||
457 | +short __ovld sub_group_non_uniform_reduce_xor( short value ); | ||
458 | +ushort __ovld sub_group_non_uniform_reduce_xor( ushort value ); | ||
459 | +int __ovld sub_group_non_uniform_reduce_xor( int value ); | ||
460 | +uint __ovld sub_group_non_uniform_reduce_xor( uint value ); | ||
461 | +long __ovld sub_group_non_uniform_reduce_xor( long value ); | ||
462 | +ulong __ovld sub_group_non_uniform_reduce_xor( ulong value ); | ||
463 | + | ||
464 | +char __ovld sub_group_non_uniform_scan_inclusive_and( char value ); | ||
465 | +uchar __ovld sub_group_non_uniform_scan_inclusive_and( uchar value ); | ||
466 | +short __ovld sub_group_non_uniform_scan_inclusive_and( short value ); | ||
467 | +ushort __ovld sub_group_non_uniform_scan_inclusive_and( ushort value ); | ||
468 | +int __ovld sub_group_non_uniform_scan_inclusive_and( int value ); | ||
469 | +uint __ovld sub_group_non_uniform_scan_inclusive_and( uint value ); | ||
470 | +long __ovld sub_group_non_uniform_scan_inclusive_and( long value ); | ||
471 | +ulong __ovld sub_group_non_uniform_scan_inclusive_and( ulong value ); | ||
472 | + | ||
473 | +char __ovld sub_group_non_uniform_scan_inclusive_or( char value ); | ||
474 | +uchar __ovld sub_group_non_uniform_scan_inclusive_or( uchar value ); | ||
475 | +short __ovld sub_group_non_uniform_scan_inclusive_or( short value ); | ||
476 | +ushort __ovld sub_group_non_uniform_scan_inclusive_or( ushort value ); | ||
477 | +int __ovld sub_group_non_uniform_scan_inclusive_or( int value ); | ||
478 | +uint __ovld sub_group_non_uniform_scan_inclusive_or( uint value ); | ||
479 | +long __ovld sub_group_non_uniform_scan_inclusive_or( long value ); | ||
480 | +ulong __ovld sub_group_non_uniform_scan_inclusive_or( ulong value ); | ||
481 | + | ||
482 | +char __ovld sub_group_non_uniform_scan_inclusive_xor( char value ); | ||
483 | +uchar __ovld sub_group_non_uniform_scan_inclusive_xor( uchar value ); | ||
484 | +short __ovld sub_group_non_uniform_scan_inclusive_xor( short value ); | ||
485 | +ushort __ovld sub_group_non_uniform_scan_inclusive_xor( ushort value ); | ||
486 | +int __ovld sub_group_non_uniform_scan_inclusive_xor( int value ); | ||
487 | +uint __ovld sub_group_non_uniform_scan_inclusive_xor( uint value ); | ||
488 | +long __ovld sub_group_non_uniform_scan_inclusive_xor( long value ); | ||
489 | +ulong __ovld sub_group_non_uniform_scan_inclusive_xor( ulong value ); | ||
490 | + | ||
491 | +char __ovld sub_group_non_uniform_scan_exclusive_and( char value ); | ||
492 | +uchar __ovld sub_group_non_uniform_scan_exclusive_and( uchar value ); | ||
493 | +short __ovld sub_group_non_uniform_scan_exclusive_and( short value ); | ||
494 | +ushort __ovld sub_group_non_uniform_scan_exclusive_and( ushort value ); | ||
495 | +int __ovld sub_group_non_uniform_scan_exclusive_and( int value ); | ||
496 | +uint __ovld sub_group_non_uniform_scan_exclusive_and( uint value ); | ||
497 | +long __ovld sub_group_non_uniform_scan_exclusive_and( long value ); | ||
498 | +ulong __ovld sub_group_non_uniform_scan_exclusive_and( ulong value ); | ||
499 | + | ||
500 | +char __ovld sub_group_non_uniform_scan_exclusive_or( char value ); | ||
501 | +uchar __ovld sub_group_non_uniform_scan_exclusive_or( uchar value ); | ||
502 | +short __ovld sub_group_non_uniform_scan_exclusive_or( short value ); | ||
503 | +ushort __ovld sub_group_non_uniform_scan_exclusive_or( ushort value ); | ||
504 | +int __ovld sub_group_non_uniform_scan_exclusive_or( int value ); | ||
505 | +uint __ovld sub_group_non_uniform_scan_exclusive_or( uint value ); | ||
506 | +long __ovld sub_group_non_uniform_scan_exclusive_or( long value ); | ||
507 | +ulong __ovld sub_group_non_uniform_scan_exclusive_or( ulong value ); | ||
508 | + | ||
509 | +char __ovld sub_group_non_uniform_scan_exclusive_xor( char value ); | ||
510 | +uchar __ovld sub_group_non_uniform_scan_exclusive_xor( uchar value ); | ||
511 | +short __ovld sub_group_non_uniform_scan_exclusive_xor( short value ); | ||
512 | +ushort __ovld sub_group_non_uniform_scan_exclusive_xor( ushort value ); | ||
513 | +int __ovld sub_group_non_uniform_scan_exclusive_xor( int value ); | ||
514 | +uint __ovld sub_group_non_uniform_scan_exclusive_xor( uint value ); | ||
515 | +long __ovld sub_group_non_uniform_scan_exclusive_xor( long value ); | ||
516 | +ulong __ovld sub_group_non_uniform_scan_exclusive_xor( ulong value ); | ||
517 | + | ||
518 | +int __ovld sub_group_non_uniform_reduce_logical_and( int predicate ); | ||
519 | +int __ovld sub_group_non_uniform_reduce_logical_or( int predicate ); | ||
520 | +int __ovld sub_group_non_uniform_reduce_logical_xor( int predicate ); | ||
521 | + | ||
522 | +int __ovld sub_group_non_uniform_scan_inclusive_logical_and( int predicate ); | ||
523 | +int __ovld sub_group_non_uniform_scan_inclusive_logical_or( int predicate ); | ||
524 | +int __ovld sub_group_non_uniform_scan_inclusive_logical_xor( int predicate ); | ||
525 | + | ||
526 | +int __ovld sub_group_non_uniform_scan_exclusive_logical_and( int predicate ); | ||
527 | +int __ovld sub_group_non_uniform_scan_exclusive_logical_or( int predicate ); | ||
528 | +int __ovld sub_group_non_uniform_scan_exclusive_logical_xor( int predicate ); | ||
529 | + | ||
530 | +#if defined(cl_khr_fp16) | ||
531 | +half __ovld sub_group_non_uniform_reduce_add( half value ); | ||
532 | +half __ovld sub_group_non_uniform_reduce_mul( half value ); | ||
533 | +half __ovld sub_group_non_uniform_reduce_min( half value ); | ||
534 | +half __ovld sub_group_non_uniform_reduce_max( half value ); | ||
535 | +half __ovld sub_group_non_uniform_scan_inclusive_add( half value ); | ||
536 | +half __ovld sub_group_non_uniform_scan_inclusive_mul( half value ); | ||
537 | +half __ovld sub_group_non_uniform_scan_inclusive_min( half value ); | ||
538 | +half __ovld sub_group_non_uniform_scan_inclusive_max( half value ); | ||
539 | +half __ovld sub_group_non_uniform_scan_exclusive_add( half value ); | ||
540 | +half __ovld sub_group_non_uniform_scan_exclusive_mul( half value ); | ||
541 | +half __ovld sub_group_non_uniform_scan_exclusive_min( half value ); | ||
542 | +half __ovld sub_group_non_uniform_scan_exclusive_max( half value ); | ||
543 | +#endif // cl_khr_fp16 | ||
544 | + | ||
545 | +#if defined(cl_khr_fp64) | ||
546 | +double __ovld sub_group_non_uniform_reduce_add( double value ); | ||
547 | +double __ovld sub_group_non_uniform_reduce_mul( double value ); | ||
548 | +double __ovld sub_group_non_uniform_reduce_min( double value ); | ||
549 | +double __ovld sub_group_non_uniform_reduce_max( double value ); | ||
550 | +double __ovld sub_group_non_uniform_scan_inclusive_add( double value ); | ||
551 | +double __ovld sub_group_non_uniform_scan_inclusive_mul( double value ); | ||
552 | +double __ovld sub_group_non_uniform_scan_inclusive_min( double value ); | ||
553 | +double __ovld sub_group_non_uniform_scan_inclusive_max( double value ); | ||
554 | +double __ovld sub_group_non_uniform_scan_exclusive_add( double value ); | ||
555 | +double __ovld sub_group_non_uniform_scan_exclusive_mul( double value ); | ||
556 | +double __ovld sub_group_non_uniform_scan_exclusive_min( double value ); | ||
557 | +double __ovld sub_group_non_uniform_scan_exclusive_max( double value ); | ||
558 | +#endif // cl_khr_fp64 | ||
559 | + | ||
560 | +#endif // cl_khr_subgroup_non_uniform_arithmetic | ||
561 | + | ||
562 | +#if defined(cl_khr_subgroup_shuffle) | ||
563 | +char __ovld sub_group_shuffle( char value, uint index ); | ||
564 | +uchar __ovld sub_group_shuffle( uchar value, uint index ); | ||
565 | +short __ovld sub_group_shuffle( short value, uint index ); | ||
566 | +ushort __ovld sub_group_shuffle( ushort value, uint index ); | ||
567 | +int __ovld sub_group_shuffle( int value, uint index ); | ||
568 | +uint __ovld sub_group_shuffle( uint value, uint index ); | ||
569 | +long __ovld sub_group_shuffle( long value, uint index ); | ||
570 | +ulong __ovld sub_group_shuffle( ulong value, uint index ); | ||
571 | +float __ovld sub_group_shuffle( float value, uint index ); | ||
572 | + | ||
573 | +char __ovld sub_group_shuffle_xor( char value, uint mask ); | ||
574 | +uchar __ovld sub_group_shuffle_xor( uchar value, uint mask ); | ||
575 | +short __ovld sub_group_shuffle_xor( short value, uint mask ); | ||
576 | +ushort __ovld sub_group_shuffle_xor( ushort value, uint mask ); | ||
577 | +int __ovld sub_group_shuffle_xor( int value, uint mask ); | ||
578 | +uint __ovld sub_group_shuffle_xor( uint value, uint mask ); | ||
579 | +long __ovld sub_group_shuffle_xor( long value, uint mask ); | ||
580 | +ulong __ovld sub_group_shuffle_xor( ulong value, uint mask ); | ||
581 | +float __ovld sub_group_shuffle_xor( float value, uint mask ); | ||
582 | + | ||
583 | +#if defined(cl_khr_fp16) | ||
584 | +half __ovld sub_group_shuffle( half value, uint index ); | ||
585 | +half __ovld sub_group_shuffle_xor( half value, uint mask ); | ||
586 | +#endif // cl_khr_fp16 | ||
587 | + | ||
588 | +#if defined(cl_khr_fp64) | ||
589 | +double __ovld sub_group_shuffle( double value, uint index ); | ||
590 | +double __ovld sub_group_shuffle_xor( double value, uint mask ); | ||
591 | +#endif // cl_khr_fp64 | ||
592 | + | ||
593 | +#endif // cl_khr_subgroup_shuffle | ||
594 | + | ||
595 | +#if defined(cl_khr_subgroup_shuffle_relative) | ||
596 | +char __ovld sub_group_shuffle_up( char value, uint delta ); | ||
597 | +uchar __ovld sub_group_shuffle_up( uchar value, uint delta ); | ||
598 | +short __ovld sub_group_shuffle_up( short value, uint delta ); | ||
599 | +ushort __ovld sub_group_shuffle_up( ushort value, uint delta ); | ||
600 | +int __ovld sub_group_shuffle_up( int value, uint delta ); | ||
601 | +uint __ovld sub_group_shuffle_up( uint value, uint delta ); | ||
602 | +long __ovld sub_group_shuffle_up( long value, uint delta ); | ||
603 | +ulong __ovld sub_group_shuffle_up( ulong value, uint delta ); | ||
604 | +float __ovld sub_group_shuffle_up( float value, uint delta ); | ||
605 | + | ||
606 | +char __ovld sub_group_shuffle_down( char value, uint delta ); | ||
607 | +uchar __ovld sub_group_shuffle_down( uchar value, uint delta ); | ||
608 | +short __ovld sub_group_shuffle_down( short value, uint delta ); | ||
609 | +ushort __ovld sub_group_shuffle_down( ushort value, uint delta ); | ||
610 | +int __ovld sub_group_shuffle_down( int value, uint delta ); | ||
611 | +uint __ovld sub_group_shuffle_down( uint value, uint delta ); | ||
612 | +long __ovld sub_group_shuffle_down( long value, uint delta ); | ||
613 | +ulong __ovld sub_group_shuffle_down( ulong value, uint delta ); | ||
614 | +float __ovld sub_group_shuffle_down( float value, uint delta ); | ||
615 | + | ||
616 | +#if defined(cl_khr_fp16) | ||
617 | +half __ovld sub_group_shuffle_up( half value, uint delta ); | ||
618 | +half __ovld sub_group_shuffle_down( half value, uint delta ); | ||
619 | +#endif // cl_khr_fp16 | ||
620 | + | ||
621 | +#if defined(cl_khr_fp64) | ||
622 | +double __ovld sub_group_shuffle_up( double value, uint delta ); | ||
623 | +double __ovld sub_group_shuffle_down( double value, uint delta ); | ||
624 | +#endif // cl_khr_fp64 | ||
625 | + | ||
626 | +#endif // cl_khr_subgroup_shuffle_relative | ||
627 | + | ||
628 | +#if defined(cl_khr_subgroup_clustered_reduce) | ||
629 | +char __ovld sub_group_clustered_reduce_add( char value, uint clustersize ); | ||
630 | +uchar __ovld sub_group_clustered_reduce_add( uchar value, uint clustersize ); | ||
631 | +short __ovld sub_group_clustered_reduce_add( short value, uint clustersize ); | ||
632 | +ushort __ovld sub_group_clustered_reduce_add( ushort value, uint clustersize ); | ||
633 | +int __ovld sub_group_clustered_reduce_add( int value, uint clustersize ); | ||
634 | +uint __ovld sub_group_clustered_reduce_add( uint value, uint clustersize ); | ||
635 | +long __ovld sub_group_clustered_reduce_add( long value, uint clustersize ); | ||
636 | +ulong __ovld sub_group_clustered_reduce_add( ulong value, uint clustersize ); | ||
637 | +float __ovld sub_group_clustered_reduce_add( float value, uint clustersize ); | ||
638 | + | ||
639 | +char __ovld sub_group_clustered_reduce_mul( char value, uint clustersize ); | ||
640 | +uchar __ovld sub_group_clustered_reduce_mul( uchar value, uint clustersize ); | ||
641 | +short __ovld sub_group_clustered_reduce_mul( short value, uint clustersize ); | ||
642 | +ushort __ovld sub_group_clustered_reduce_mul( ushort value, uint clustersize ); | ||
643 | +int __ovld sub_group_clustered_reduce_mul( int value, uint clustersize ); | ||
644 | +uint __ovld sub_group_clustered_reduce_mul( uint value, uint clustersize ); | ||
645 | +long __ovld sub_group_clustered_reduce_mul( long value, uint clustersize ); | ||
646 | +ulong __ovld sub_group_clustered_reduce_mul( ulong value, uint clustersize ); | ||
647 | +float __ovld sub_group_clustered_reduce_mul( float value, uint clustersize ); | ||
648 | + | ||
649 | +char __ovld sub_group_clustered_reduce_min( char value, uint clustersize ); | ||
650 | +uchar __ovld sub_group_clustered_reduce_min( uchar value, uint clustersize ); | ||
651 | +short __ovld sub_group_clustered_reduce_min( short value, uint clustersize ); | ||
652 | +ushort __ovld sub_group_clustered_reduce_min( ushort value, uint clustersize ); | ||
653 | +int __ovld sub_group_clustered_reduce_min( int value, uint clustersize ); | ||
654 | +uint __ovld sub_group_clustered_reduce_min( uint value, uint clustersize ); | ||
655 | +long __ovld sub_group_clustered_reduce_min( long value, uint clustersize ); | ||
656 | +ulong __ovld sub_group_clustered_reduce_min( ulong value, uint clustersize ); | ||
657 | +float __ovld sub_group_clustered_reduce_min( float value, uint clustersize ); | ||
658 | + | ||
659 | +char __ovld sub_group_clustered_reduce_max( char value, uint clustersize ); | ||
660 | +uchar __ovld sub_group_clustered_reduce_max( uchar value, uint clustersize ); | ||
661 | +short __ovld sub_group_clustered_reduce_max( short value, uint clustersize ); | ||
662 | +ushort __ovld sub_group_clustered_reduce_max( ushort value, uint clustersize ); | ||
663 | +int __ovld sub_group_clustered_reduce_max( int value, uint clustersize ); | ||
664 | +uint __ovld sub_group_clustered_reduce_max( uint value, uint clustersize ); | ||
665 | +long __ovld sub_group_clustered_reduce_max( long value, uint clustersize ); | ||
666 | +ulong __ovld sub_group_clustered_reduce_max( ulong value, uint clustersize ); | ||
667 | +float __ovld sub_group_clustered_reduce_max( float value, uint clustersize ); | ||
668 | + | ||
669 | +char __ovld sub_group_clustered_reduce_and( char value, uint clustersize ); | ||
670 | +uchar __ovld sub_group_clustered_reduce_and( uchar value, uint clustersize ); | ||
671 | +short __ovld sub_group_clustered_reduce_and( short value, uint clustersize ); | ||
672 | +ushort __ovld sub_group_clustered_reduce_and( ushort value, uint clustersize ); | ||
673 | +int __ovld sub_group_clustered_reduce_and( int value, uint clustersize ); | ||
674 | +uint __ovld sub_group_clustered_reduce_and( uint value, uint clustersize ); | ||
675 | +long __ovld sub_group_clustered_reduce_and( long value, uint clustersize ); | ||
676 | +ulong __ovld sub_group_clustered_reduce_and( ulong value, uint clustersize ); | ||
677 | + | ||
678 | +char __ovld sub_group_clustered_reduce_or( char value, uint clustersize ); | ||
679 | +uchar __ovld sub_group_clustered_reduce_or( uchar value, uint clustersize ); | ||
680 | +short __ovld sub_group_clustered_reduce_or( short value, uint clustersize ); | ||
681 | +ushort __ovld sub_group_clustered_reduce_or( ushort value, uint clustersize ); | ||
682 | +int __ovld sub_group_clustered_reduce_or( int value, uint clustersize ); | ||
683 | +uint __ovld sub_group_clustered_reduce_or( uint value, uint clustersize ); | ||
684 | +long __ovld sub_group_clustered_reduce_or( long value, uint clustersize ); | ||
685 | +ulong __ovld sub_group_clustered_reduce_or( ulong value, uint clustersize ); | ||
686 | + | ||
687 | +char __ovld sub_group_clustered_reduce_xor( char value, uint clustersize ); | ||
688 | +uchar __ovld sub_group_clustered_reduce_xor( uchar value, uint clustersize ); | ||
689 | +short __ovld sub_group_clustered_reduce_xor( short value, uint clustersize ); | ||
690 | +ushort __ovld sub_group_clustered_reduce_xor( ushort value, uint clustersize ); | ||
691 | +int __ovld sub_group_clustered_reduce_xor( int value, uint clustersize ); | ||
692 | +uint __ovld sub_group_clustered_reduce_xor( uint value, uint clustersize ); | ||
693 | +long __ovld sub_group_clustered_reduce_xor( long value, uint clustersize ); | ||
694 | +ulong __ovld sub_group_clustered_reduce_xor( ulong value, uint clustersize ); | ||
695 | + | ||
696 | +int __ovld sub_group_clustered_reduce_logical_and( int predicate, uint clustersize ); | ||
697 | +int __ovld sub_group_clustered_reduce_logical_or( int predicate, uint clustersize ); | ||
698 | +int __ovld sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize ); | ||
699 | + | ||
700 | +#if defined(cl_khr_fp16) | ||
701 | +half __ovld sub_group_clustered_reduce_add( half value, uint clustersize ); | ||
702 | +half __ovld sub_group_clustered_reduce_mul( half value, uint clustersize ); | ||
703 | +half __ovld sub_group_clustered_reduce_min( half value, uint clustersize ); | ||
704 | +half __ovld sub_group_clustered_reduce_max( half value, uint clustersize ); | ||
705 | +#endif // cl_khr_fp16 | ||
706 | + | ||
707 | +#if defined(cl_khr_fp64) | ||
708 | +double __ovld sub_group_clustered_reduce_add( double value, uint clustersize ); | ||
709 | +double __ovld sub_group_clustered_reduce_mul( double value, uint clustersize ); | ||
710 | +double __ovld sub_group_clustered_reduce_min( double value, uint clustersize ); | ||
711 | +double __ovld sub_group_clustered_reduce_max( double value, uint clustersize ); | ||
712 | +#endif // cl_khr_fp64 | ||
713 | + | ||
714 | +#endif // cl_khr_subgroup_clustered_reduce | ||
715 | + | ||
716 | #if defined(cl_intel_subgroups) | ||
717 | // Intel-Specific Sub Group Functions | ||
718 | float __ovld __conv intel_sub_group_shuffle( float x, uint c ); | ||
719 | diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl | ||
720 | index 0e6bbb7d3bcd..86c78143a0eb 100644 | ||
721 | --- a/clang/test/SemaOpenCL/extension-version.cl | ||
722 | +++ b/clang/test/SemaOpenCL/extension-version.cl | ||
723 | @@ -333,3 +333,86 @@ | ||
724 | #endif | ||
725 | #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : enable | ||
726 | |||
727 | +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
728 | +#ifndef cl_khr_subgroup_extended_types | ||
729 | +#error "Missing cl_khr_subgroup_extended_types" | ||
730 | +#endif | ||
731 | +#else | ||
732 | +#ifdef cl_khr_subgroup_extended_types | ||
733 | +#error "Incorrect cl_khr_subgroup_extended_types define" | ||
734 | +#endif | ||
735 | +// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_extended_types' - ignoring}} | ||
736 | +#endif | ||
737 | +#pragma OPENCL EXTENSION cl_khr_subgroup_extended_types : enable | ||
738 | + | ||
739 | +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
740 | +#ifndef cl_khr_subgroup_non_uniform_vote | ||
741 | +#error "Missing cl_khr_subgroup_non_uniform_vote" | ||
742 | +#endif | ||
743 | +#else | ||
744 | +#ifdef cl_khr_subgroup_non_uniform_vote | ||
745 | +#error "Incorrect cl_khr_subgroup_non_uniform_vote define" | ||
746 | +#endif | ||
747 | +// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_vote' - ignoring}} | ||
748 | +#endif | ||
749 | +#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable | ||
750 | + | ||
751 | +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
752 | +#ifndef cl_khr_subgroup_ballot | ||
753 | +#error "Missing cl_khr_subgroup_ballot" | ||
754 | +#endif | ||
755 | +#else | ||
756 | +#ifdef cl_khr_subgroup_ballot | ||
757 | +#error "Incorrect cl_khr_subgroup_ballot define" | ||
758 | +#endif | ||
759 | +// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_ballot' - ignoring}} | ||
760 | +#endif | ||
761 | +#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable | ||
762 | + | ||
763 | +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
764 | +#ifndef cl_khr_subgroup_non_uniform_arithmetic | ||
765 | +#error "Missing cl_khr_subgroup_non_uniform_arithmetic" | ||
766 | +#endif | ||
767 | +#else | ||
768 | +#ifdef cl_khr_subgroup_non_uniform_arithmetic | ||
769 | +#error "Incorrect cl_khr_subgroup_non_uniform_arithmetic define" | ||
770 | +#endif | ||
771 | +// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_arithmetic' - ignoring}} | ||
772 | +#endif | ||
773 | +#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable | ||
774 | + | ||
775 | +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
776 | +#ifndef cl_khr_subgroup_shuffle | ||
777 | +#error "Missing cl_khr_subgroup_shuffle" | ||
778 | +#endif | ||
779 | +#else | ||
780 | +#ifdef cl_khr_subgroup_shuffle | ||
781 | +#error "Incorrect cl_khr_subgroup_shuffle define" | ||
782 | +#endif | ||
783 | +// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle' - ignoring}} | ||
784 | +#endif | ||
785 | +#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable | ||
786 | + | ||
787 | +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
788 | +#ifndef cl_khr_subgroup_shuffle_relative | ||
789 | +#error "Missing cl_khr_subgroup_shuffle_relative" | ||
790 | +#endif | ||
791 | +#else | ||
792 | +#ifdef cl_khr_subgroup_shuffle_relative | ||
793 | +#error "Incorrect cl_khr_subgroup_shuffle_relative define" | ||
794 | +#endif | ||
795 | +// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle_relative' - ignoring}} | ||
796 | +#endif | ||
797 | +#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable | ||
798 | + | ||
799 | +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
800 | +#ifndef cl_khr_subgroup_clustered_reduce | ||
801 | +#error "Missing cl_khr_subgroup_clustered_reduce" | ||
802 | +#endif | ||
803 | +#else | ||
804 | +#ifdef cl_khr_subgroup_clustered_reduce | ||
805 | +#error "Incorrect cl_khr_subgroup_clustered_reduce define" | ||
806 | +#endif | ||
807 | +// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_clustered_reduce' - ignoring}} | ||
808 | +#endif | ||
809 | +#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable | ||
810 | -- | ||
811 | 2.17.1 | ||
812 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch deleted file mode 100644 index 1aff65e7..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch +++ /dev/null | |||
@@ -1,33 +0,0 @@ | |||
1 | From 331e323ae2633a8999a660314022491d670c442c Mon Sep 17 00:00:00 2001 | ||
2 | From: Andrea Bocci <andrea.bocci@cern.ch> | ||
3 | Date: Sun, 15 Mar 2020 17:35:44 +0100 | ||
4 | Subject: [PATCH 2/3] Fix building in-tree with cmake -DLLVM_LINK_LLVM_DYLIB=ON | ||
5 | |||
6 | Building in-tree with LLVM 11.0 master with the LLVM_LINK_LLVM_DYLIB | ||
7 | cmake flag fails to link with the LLVMSPIRVLib library. | ||
8 | |||
9 | Add an explicit dependency to force the correct build order and linking. | ||
10 | |||
11 | Signed-off-by: Andrea Bocci <andrea.bocci@cern.ch> | ||
12 | Upstream-Status: Backport | ||
13 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
14 | --- | ||
15 | tools/llvm-spirv/CMakeLists.txt | 2 +- | ||
16 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
17 | |||
18 | diff --git a/tools/llvm-spirv/CMakeLists.txt b/tools/llvm-spirv/CMakeLists.txt | ||
19 | index 9aa96d9c..501c0daf 100644 | ||
20 | --- a/tools/llvm-spirv/CMakeLists.txt | ||
21 | +++ b/tools/llvm-spirv/CMakeLists.txt | ||
22 | @@ -14,7 +14,7 @@ add_llvm_tool(llvm-spirv | ||
23 | NO_INSTALL_RPATH | ||
24 | ) | ||
25 | |||
26 | -if (LLVM_SPIRV_BUILD_EXTERNAL) | ||
27 | +if (LLVM_SPIRV_BUILD_EXTERNAL OR LLVM_LINK_LLVM_DYLIB) | ||
28 | target_link_libraries(llvm-spirv PRIVATE LLVMSPIRVLib) | ||
29 | endif() | ||
30 | |||
31 | -- | ||
32 | 2.17.1 | ||
33 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch deleted file mode 100644 index 49edd7e1..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch +++ /dev/null | |||
@@ -1,982 +0,0 @@ | |||
1 | From fbc9996d6490a5d4720b85b47f38335e7fdc99d9 Mon Sep 17 00:00:00 2001 | ||
2 | From: haonanya <haonan.yang@intel.com> | ||
3 | Date: Mon, 19 Jul 2021 10:14:20 +0800 | ||
4 | Subject: [PATCH 3/3] Add support for cl_ext_float_atomics in SPIRVWriter | ||
5 | |||
6 | Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch] | ||
7 | |||
8 | Signed-off-by: haonanya <haonan.yang@intel.com> | ||
9 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
10 | --- | ||
11 | lib/SPIRV/OCL20ToSPIRV.cpp | 79 ++++++++++++++++-- | ||
12 | lib/SPIRV/SPIRVToOCL.h | 3 + | ||
13 | lib/SPIRV/SPIRVToOCL12.cpp | 21 +++++ | ||
14 | lib/SPIRV/SPIRVToOCL20.cpp | 28 ++++++- | ||
15 | lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 1 - | ||
16 | lib/SPIRV/libSPIRV/SPIRVOpCode.h | 8 +- | ||
17 | test/AtomicFAddEXTForOCL.ll | 64 +++++++++++++++ | ||
18 | test/AtomicFAddExt.ll | 111 ++++++++----------------- | ||
19 | test/AtomicFMaxEXT.ll | 113 +++++++------------------- | ||
20 | test/AtomicFMaxEXTForOCL.ll | 64 +++++++++++++++ | ||
21 | test/AtomicFMinEXT.ll | 113 +++++++------------------- | ||
22 | test/AtomicFMinEXTForOCL.ll | 64 +++++++++++++++ | ||
23 | test/InvalidAtomicBuiltins.cl | 8 -- | ||
24 | 13 files changed, 417 insertions(+), 260 deletions(-) | ||
25 | create mode 100644 test/AtomicFAddEXTForOCL.ll | ||
26 | create mode 100644 test/AtomicFMaxEXTForOCL.ll | ||
27 | create mode 100644 test/AtomicFMinEXTForOCL.ll | ||
28 | |||
29 | diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp | ||
30 | index e30aa5be..b676a009 100644 | ||
31 | --- a/lib/SPIRV/OCL20ToSPIRV.cpp | ||
32 | +++ b/lib/SPIRV/OCL20ToSPIRV.cpp | ||
33 | @@ -408,10 +408,63 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) { | ||
34 | if (DemangledName.find(kOCLBuiltinName::AtomicPrefix) == 0 || | ||
35 | DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) { | ||
36 | |||
37 | - // Compute atomic builtins do not support floating types. | ||
38 | - if (CI.getType()->isFloatingPointTy() && | ||
39 | - isComputeAtomicOCLBuiltin(DemangledName)) | ||
40 | - return; | ||
41 | + // Compute "atom" prefixed builtins do not support floating types. | ||
42 | + if (CI.getType()->isFloatingPointTy()) { | ||
43 | + if (DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) | ||
44 | + return; | ||
45 | + // handle functions which are "atomic_" prefixed. | ||
46 | + StringRef Stem = DemangledName; | ||
47 | + Stem = Stem.drop_front(strlen("atomic_")); | ||
48 | + // FP-typed atomic_{add, sub, inc, dec, exchange, min, max, or, and, xor, | ||
49 | + // fetch_or, fetch_xor, fetch_and, fetch_or_explicit, fetch_xor_explicit, | ||
50 | + // fetch_and_explicit} should be identified as function call | ||
51 | + bool IsFunctionCall = llvm::StringSwitch<bool>(Stem) | ||
52 | + .Case("add", true) | ||
53 | + .Case("sub", true) | ||
54 | + .Case("inc", true) | ||
55 | + .Case("dec", true) | ||
56 | + .Case("cmpxchg", true) | ||
57 | + .Case("min", true) | ||
58 | + .Case("max", true) | ||
59 | + .Case("or", true) | ||
60 | + .Case("xor", true) | ||
61 | + .Case("and", true) | ||
62 | + .Case("fetch_or", true) | ||
63 | + .Case("fetch_and", true) | ||
64 | + .Case("fetch_xor", true) | ||
65 | + .Case("fetch_or_explicit", true) | ||
66 | + .Case("fetch_xor_explicit", true) | ||
67 | + .Case("fetch_and_explicit", true) | ||
68 | + .Default(false); | ||
69 | + if (IsFunctionCall) | ||
70 | + return; | ||
71 | + if (F->arg_size() != 2) { | ||
72 | + IsFunctionCall = llvm::StringSwitch<bool>(Stem) | ||
73 | + .Case("exchange", true) | ||
74 | + .Case("fetch_add", true) | ||
75 | + .Case("fetch_sub", true) | ||
76 | + .Case("fetch_min", true) | ||
77 | + .Case("fetch_max", true) | ||
78 | + .Case("load", true) | ||
79 | + .Case("store", true) | ||
80 | + .Default(false); | ||
81 | + if (IsFunctionCall) | ||
82 | + return; | ||
83 | + } | ||
84 | + if (F->arg_size() != 3 && F->arg_size() != 4) { | ||
85 | + IsFunctionCall = llvm::StringSwitch<bool>(Stem) | ||
86 | + .Case("exchange_explicit", true) | ||
87 | + .Case("fetch_add_explicit", true) | ||
88 | + .Case("fetch_sub_explicit", true) | ||
89 | + .Case("fetch_min_explicit", true) | ||
90 | + .Case("fetch_max_explicit", true) | ||
91 | + .Case("load_explicit", true) | ||
92 | + .Case("store_explicit", true) | ||
93 | + .Default(false); | ||
94 | + if (IsFunctionCall) | ||
95 | + return; | ||
96 | + } | ||
97 | + } | ||
98 | |||
99 | auto PCI = &CI; | ||
100 | if (DemangledName == kOCLBuiltinName::AtomicInit) { | ||
101 | @@ -819,7 +872,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { | ||
102 | AttributeList Attrs = CI->getCalledFunction()->getAttributes(); | ||
103 | mutateCallInstSPIRV( | ||
104 | M, CI, | ||
105 | - [=](CallInst *CI, std::vector<Value *> &Args) { | ||
106 | + [=](CallInst *CI, std::vector<Value *> &Args) -> std::string { | ||
107 | Info.PostProc(Args); | ||
108 | // Order of args in OCL20: | ||
109 | // object, 0-2 other args, 1-2 order, scope | ||
110 | @@ -864,7 +917,21 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) { | ||
111 | std::rotate(Args.begin() + 2, Args.begin() + OrderIdx, | ||
112 | Args.end() - Offset); | ||
113 | } | ||
114 | - return getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName)); | ||
115 | + llvm::Type* AtomicBuiltinsReturnType = | ||
116 | + CI->getCalledFunction()->getReturnType(); | ||
117 | + auto IsFPType = [](llvm::Type *ReturnType) { | ||
118 | + return ReturnType->isHalfTy() || ReturnType->isFloatTy() || | ||
119 | + ReturnType->isDoubleTy(); | ||
120 | + }; | ||
121 | + auto SPIRVFunctionName = | ||
122 | + getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName)); | ||
123 | + if (!IsFPType(AtomicBuiltinsReturnType)) | ||
124 | + return SPIRVFunctionName; | ||
125 | + // Translate FP-typed atomic builtins. | ||
126 | + return llvm::StringSwitch<std::string>(SPIRVFunctionName) | ||
127 | + .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT") | ||
128 | + .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT") | ||
129 | + .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT"); | ||
130 | }, | ||
131 | &Attrs); | ||
132 | } | ||
133 | diff --git a/lib/SPIRV/SPIRVToOCL.h b/lib/SPIRV/SPIRVToOCL.h | ||
134 | index ddeec0b6..006fb0b1 100644 | ||
135 | --- a/lib/SPIRV/SPIRVToOCL.h | ||
136 | +++ b/lib/SPIRV/SPIRVToOCL.h | ||
137 | @@ -178,6 +178,9 @@ public: | ||
138 | /// using separate maps for OpenCL 1.2 and OpenCL 2.0 | ||
139 | virtual Instruction *mutateAtomicName(CallInst *CI, Op OC) = 0; | ||
140 | |||
141 | + // Transform FP atomic opcode to corresponding OpenCL function name | ||
142 | + virtual std::string mapFPAtomicName(Op OC) = 0; | ||
143 | + | ||
144 | private: | ||
145 | /// Transform uniform group opcode to corresponding OpenCL function name, | ||
146 | /// example: GroupIAdd(Reduce) => group_iadd => work_group_reduce_add | | ||
147 | diff --git a/lib/SPIRV/SPIRVToOCL12.cpp b/lib/SPIRV/SPIRVToOCL12.cpp | ||
148 | index afddd596..d7f00de3 100644 | ||
149 | --- a/lib/SPIRV/SPIRVToOCL12.cpp | ||
150 | +++ b/lib/SPIRV/SPIRVToOCL12.cpp | ||
151 | @@ -104,6 +104,9 @@ public: | ||
152 | /// cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions. | ||
153 | std::string mapAtomicName(Op OC, Type *Ty); | ||
154 | |||
155 | + // Transform FP atomic opcode to corresponding OpenCL function name | ||
156 | + std::string mapFPAtomicName(Op OC) override; | ||
157 | + | ||
158 | static char ID; | ||
159 | }; | ||
160 | |||
161 | @@ -338,6 +341,21 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) { | ||
162 | return NewCI; | ||
163 | } | ||
164 | |||
165 | +std::string SPIRVToOCL12::mapFPAtomicName(Op OC) { | ||
166 | + assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than " | ||
167 | + "AtomicF{Add/Min/Max}EXT!"); | ||
168 | + switch (OC) { | ||
169 | + case OpAtomicFAddEXT: | ||
170 | + return "atomic_add"; | ||
171 | + case OpAtomicFMinEXT: | ||
172 | + return "atomic_min"; | ||
173 | + case OpAtomicFMaxEXT: | ||
174 | + return "atomic_max"; | ||
175 | + default: | ||
176 | + llvm_unreachable("Unsupported opcode!"); | ||
177 | + } | ||
178 | +} | ||
179 | + | ||
180 | Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) { | ||
181 | AttributeList Attrs = CI->getCalledFunction()->getAttributes(); | ||
182 | return mutateCallInstOCL( | ||
183 | @@ -351,6 +369,9 @@ Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) { | ||
184 | std::string SPIRVToOCL12::mapAtomicName(Op OC, Type *Ty) { | ||
185 | std::string Prefix = Ty->isIntegerTy(64) ? kOCLBuiltinName::AtomPrefix | ||
186 | : kOCLBuiltinName::AtomicPrefix; | ||
187 | + // Map fp atomic instructions to regular OpenCL built-ins. | ||
188 | + if (isFPAtomicOpCode(OC)) | ||
189 | + return mapFPAtomicName(OC); | ||
190 | return Prefix += OCL12SPIRVBuiltinMap::rmap(OC); | ||
191 | } | ||
192 | |||
193 | diff --git a/lib/SPIRV/SPIRVToOCL20.cpp b/lib/SPIRV/SPIRVToOCL20.cpp | ||
194 | index d829ff42..01d088e9 100644 | ||
195 | --- a/lib/SPIRV/SPIRVToOCL20.cpp | ||
196 | +++ b/lib/SPIRV/SPIRVToOCL20.cpp | ||
197 | @@ -82,6 +82,9 @@ public: | ||
198 | /// compare_exchange_strong/weak_explicit | ||
199 | Instruction *visitCallSPIRVAtomicCmpExchg(CallInst *CI, Op OC) override; | ||
200 | |||
201 | + // Transform FP atomic opcode to corresponding OpenCL function name | ||
202 | + std::string mapFPAtomicName(Op OC) override; | ||
203 | + | ||
204 | static char ID; | ||
205 | }; | ||
206 | |||
207 | @@ -144,11 +147,29 @@ void SPIRVToOCL20::visitCallSPIRVControlBarrier(CallInst *CI) { | ||
208 | &Attrs); | ||
209 | } | ||
210 | |||
211 | +std::string SPIRVToOCL20::mapFPAtomicName(Op OC) { | ||
212 | + assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than " | ||
213 | + "AtomicF{Add/Min/Max}EXT!"); | ||
214 | + switch (OC) { | ||
215 | + case OpAtomicFAddEXT: | ||
216 | + return "atomic_fetch_add_explicit"; | ||
217 | + case OpAtomicFMinEXT: | ||
218 | + return "atomic_fetch_min_explicit"; | ||
219 | + case OpAtomicFMaxEXT: | ||
220 | + return "atomic_fetch_max_explicit"; | ||
221 | + default: | ||
222 | + llvm_unreachable("Unsupported opcode!"); | ||
223 | + } | ||
224 | +} | ||
225 | + | ||
226 | Instruction *SPIRVToOCL20::mutateAtomicName(CallInst *CI, Op OC) { | ||
227 | AttributeList Attrs = CI->getCalledFunction()->getAttributes(); | ||
228 | return mutateCallInstOCL( | ||
229 | M, CI, | ||
230 | [=](CallInst *, std::vector<Value *> &Args) { | ||
231 | + // Map fp atomic instructions to regular OpenCL built-ins. | ||
232 | + if (isFPAtomicOpCode(OC)) | ||
233 | + return mapFPAtomicName(OC); | ||
234 | return OCLSPIRVBuiltinMap::rmap(OC); | ||
235 | }, | ||
236 | &Attrs); | ||
237 | @@ -215,7 +236,12 @@ CallInst *SPIRVToOCL20::mutateCommonAtomicArguments(CallInst *CI, Op OC) { | ||
238 | } | ||
239 | } | ||
240 | auto Ptr = findFirstPtr(Args); | ||
241 | - auto Name = OCLSPIRVBuiltinMap::rmap(OC); | ||
242 | + std::string Name; | ||
243 | + // Map fp atomic instructions to regular OpenCL built-ins. | ||
244 | + if (isFPAtomicOpCode(OC)) | ||
245 | + Name = mapFPAtomicName(OC); | ||
246 | + else | ||
247 | + Name = OCLSPIRVBuiltinMap::rmap(OC); | ||
248 | auto NumOrder = getSPIRVAtomicBuiltinNumMemoryOrderArgs(OC); | ||
249 | auto ScopeIdx = Ptr + 1; | ||
250 | auto OrderIdx = Ptr + 2; | ||
251 | diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | ||
252 | index 13f93fbe..7b707993 100644 | ||
253 | --- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | ||
254 | +++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | ||
255 | @@ -521,7 +521,6 @@ template <> inline void SPIRVMap<Capability, std::string>::init() { | ||
256 | add(CapabilityAtomicFloat64AddEXT, "AtomicFloat64AddEXT"); | ||
257 | add(CapabilityAtomicFloat32MinMaxEXT, "AtomicFloat32MinMaxEXT"); | ||
258 | add(CapabilityAtomicFloat64MinMaxEXT, "AtomicFloat64MinMaxEXT"); | ||
259 | - add(CapabilityAtomicFloat16MinMaxEXT, "AtomicFloat16MinMaxEXT"); | ||
260 | add(CapabilitySubgroupShuffleINTEL, "SubgroupShuffleINTEL"); | ||
261 | add(CapabilitySubgroupBufferBlockIOINTEL, "SubgroupBufferBlockIOINTEL"); | ||
262 | add(CapabilitySubgroupImageBlockIOINTEL, "SubgroupImageBlockIOINTEL"); | ||
263 | diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h | ||
264 | index feec70f6..8e595e83 100644 | ||
265 | --- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h | ||
266 | +++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h | ||
267 | @@ -54,11 +54,17 @@ template <> inline void SPIRVMap<Op, std::string>::init() { | ||
268 | } | ||
269 | SPIRV_DEF_NAMEMAP(Op, OpCodeNameMap) | ||
270 | |||
271 | +inline bool isFPAtomicOpCode(Op OpCode) { | ||
272 | + return OpCode == OpAtomicFAddEXT || OpCode == OpAtomicFMinEXT || | ||
273 | + OpCode == OpAtomicFMaxEXT; | ||
274 | +} | ||
275 | + | ||
276 | inline bool isAtomicOpCode(Op OpCode) { | ||
277 | static_assert(OpAtomicLoad < OpAtomicXor, ""); | ||
278 | return ((unsigned)OpCode >= OpAtomicLoad && | ||
279 | (unsigned)OpCode <= OpAtomicXor) || | ||
280 | - OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear; | ||
281 | + OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear || | ||
282 | + isFPAtomicOpCode(OpCode); | ||
283 | } | ||
284 | inline bool isBinaryOpCode(Op OpCode) { | ||
285 | return ((unsigned)OpCode >= OpIAdd && (unsigned)OpCode <= OpFMod) || | ||
286 | diff --git a/test/AtomicFAddEXTForOCL.ll b/test/AtomicFAddEXTForOCL.ll | ||
287 | new file mode 100644 | ||
288 | index 00000000..fb146fb9 | ||
289 | --- /dev/null | ||
290 | +++ b/test/AtomicFAddEXTForOCL.ll | ||
291 | @@ -0,0 +1,64 @@ | ||
292 | +; RUN: llvm-as %s -o %t.bc | ||
293 | +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_add -o %t.spv | ||
294 | +; RUN: spirv-val %t.spv | ||
295 | +; RUN: llvm-spirv -to-text %t.spv -o %t.spt | ||
296 | +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV | ||
297 | + | ||
298 | +; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc | ||
299 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 | ||
300 | + | ||
301 | +; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc | ||
302 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV | ||
303 | + | ||
304 | +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
305 | +target triple = "spir-unknown-unknown" | ||
306 | + | ||
307 | +; CHECK-SPIRV: Capability AtomicFloat32AddEXT | ||
308 | +; CHECK-SPIRV: Capability AtomicFloat64AddEXT | ||
309 | +; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_add" | ||
310 | +; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 | ||
311 | +; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 | ||
312 | + | ||
313 | + | ||
314 | +; Function Attrs: convergent norecurse nounwind | ||
315 | +define dso_local spir_func void @test_atomic_float(float addrspace(1)* %a) local_unnamed_addr #0 { | ||
316 | +entry: | ||
317 | + ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]] | ||
318 | + ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) | ||
319 | + ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}}) | ||
320 | + %call = tail call spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2 | ||
321 | + ret void | ||
322 | +} | ||
323 | + | ||
324 | +; Function Attrs: convergent | ||
325 | +declare spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1 | ||
326 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
327 | + | ||
328 | +; Function Attrs: convergent norecurse nounwind | ||
329 | +define dso_local spir_func void @test_atomic_double(double addrspace(1)* %a) local_unnamed_addr #0 { | ||
330 | +entry: | ||
331 | + ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]] | ||
332 | + ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) | ||
333 | + ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}}) | ||
334 | + %call = tail call spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2 | ||
335 | + ret void | ||
336 | +} | ||
337 | +; Function Attrs: convergent | ||
338 | +declare spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1 | ||
339 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
340 | + | ||
341 | +; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
342 | +; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
343 | + | ||
344 | +attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | ||
345 | +attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | ||
346 | +attributes #2 = { convergent nounwind } | ||
347 | + | ||
348 | +!llvm.module.flags = !{!0} | ||
349 | +!opencl.ocl.version = !{!1} | ||
350 | +!opencl.spir.version = !{!1} | ||
351 | +!llvm.ident = !{!2} | ||
352 | + | ||
353 | +!0 = !{i32 1, !"wchar_size", i32 4} | ||
354 | +!1 = !{i32 2, i32 0} | ||
355 | +!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"} | ||
356 | diff --git a/test/AtomicFAddExt.ll b/test/AtomicFAddExt.ll | ||
357 | index 011dd8a7..42bdfeea 100644 | ||
358 | --- a/test/AtomicFAddExt.ll | ||
359 | +++ b/test/AtomicFAddExt.ll | ||
360 | @@ -4,20 +4,16 @@ | ||
361 | ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV | ||
362 | |||
363 | ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc | ||
364 | -; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM | ||
365 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12 | ||
366 | |||
367 | -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
368 | -target triple = "spir64-unknown-unknown-sycldevice" | ||
369 | - | ||
370 | -%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } | ||
371 | -%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } | ||
372 | -%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } | ||
373 | - | ||
374 | -$_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any | ||
375 | +; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc | ||
376 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 | ||
377 | |||
378 | -$_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any | ||
379 | +; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc | ||
380 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV | ||
381 | |||
382 | -@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 | ||
383 | +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
384 | +target triple = "spir64-unknown-unknown-sycldevice" | ||
385 | |||
386 | ; CHECK-SPIRV: Capability AtomicFloat32AddEXT | ||
387 | ; CHECK-SPIRV: Capability AtomicFloat64AddEXT | ||
388 | @@ -25,62 +21,43 @@ $_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_3 | ||
389 | ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 | ||
390 | ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 | ||
391 | |||
392 | -; Function Attrs: convergent norecurse mustprogress | ||
393 | -define weak_odr dso_local spir_kernel void @_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { | ||
394 | +; Function Attrs: convergent norecurse nounwind | ||
395 | +define dso_local spir_func float @_Z14AtomicFloatIncRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 { | ||
396 | entry: | ||
397 | - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 | ||
398 | - %1 = load i64, i64* %0, align 8 | ||
399 | - %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1 | ||
400 | - %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 | ||
401 | - %3 = load i64, i64* %2, align 8 | ||
402 | - %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3 | ||
403 | - %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 | ||
404 | - %5 = extractelement <3 x i64> %4, i64 0 | ||
405 | + %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)* | ||
406 | ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]] | ||
407 | - ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}}) | ||
408 | - %call3.i.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float 1.000000e+00) #2 | ||
409 | - %add.i.i = fadd float %call3.i.i.i.i, 1.000000e+00 | ||
410 | - %sext.i = shl i64 %5, 32 | ||
411 | - %conv5.i = ashr exact i64 %sext.i, 32 | ||
412 | - %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv5.i | ||
413 | - %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)* | ||
414 | - store float %add.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14 | ||
415 | - ret void | ||
416 | + ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_add[[:alnum:]]+ff]]({{.*}}) | ||
417 | + ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) | ||
418 | + ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}}) | ||
419 | + %call3.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2 | ||
420 | + ret float %call3.i.i | ||
421 | } | ||
422 | |||
423 | ; Function Attrs: convergent | ||
424 | -; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float) | ||
425 | declare dso_local spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1 | ||
426 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
427 | |||
428 | -; Function Attrs: convergent norecurse mustprogress | ||
429 | -define weak_odr dso_local spir_kernel void @_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { | ||
430 | +; Function Attrs: convergent norecurse nounwind | ||
431 | +define dso_local spir_func double @_Z15AtomicDoubleIncRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 { | ||
432 | entry: | ||
433 | - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 | ||
434 | - %1 = load i64, i64* %0, align 8 | ||
435 | - %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1 | ||
436 | - %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 | ||
437 | - %3 = load i64, i64* %2, align 8 | ||
438 | - %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3 | ||
439 | - %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18 | ||
440 | - %5 = extractelement <3 x i64> %4, i64 0 | ||
441 | + %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)* | ||
442 | ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]] | ||
443 | - ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}}) | ||
444 | - %call3.i.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double 1.000000e+00) #2 | ||
445 | - %add.i.i = fadd double %call3.i.i.i.i, 1.000000e+00 | ||
446 | - %sext.i = shl i64 %5, 32 | ||
447 | - %conv5.i = ashr exact i64 %sext.i, 32 | ||
448 | - %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv5.i | ||
449 | - %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)* | ||
450 | - store double %add.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27 | ||
451 | - ret void | ||
452 | + ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_add[[:alnum:]]+dd]]({{.*}}) | ||
453 | + ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) | ||
454 | + ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}}) | ||
455 | + %call3.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2 | ||
456 | + ret double %call3.i.i | ||
457 | } | ||
458 | |||
459 | ; Function Attrs: convergent | ||
460 | -; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double) | ||
461 | declare dso_local spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1 | ||
462 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
463 | |||
464 | -attributes #0 = { convergent norecurse } | ||
465 | -attributes #1 = { convergent } | ||
466 | +; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
467 | +; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
468 | + | ||
469 | +attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
470 | +attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
471 | attributes #2 = { convergent nounwind } | ||
472 | |||
473 | !llvm.module.flags = !{!0} | ||
474 | @@ -91,29 +68,5 @@ attributes #2 = { convergent nounwind } | ||
475 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
476 | !1 = !{i32 1, i32 2} | ||
477 | !2 = !{i32 4, i32 100000} | ||
478 | -!3 = !{!"clang version 12.0.0"} | ||
479 | -!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} | ||
480 | -!5 = !{!6, !8, !10, !12} | ||
481 | -!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} | ||
482 | -!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} | ||
483 | -!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} | ||
484 | -!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} | ||
485 | -!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} | ||
486 | -!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} | ||
487 | -!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} | ||
488 | -!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} | ||
489 | -!14 = !{!15, !15, i64 0} | ||
490 | -!15 = !{!"float", !16, i64 0} | ||
491 | -!16 = !{!"omnipotent char", !17, i64 0} | ||
492 | -!17 = !{!"Simple C++ TBAA"} | ||
493 | -!18 = !{!19, !21, !23, !25} | ||
494 | -!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} | ||
495 | -!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} | ||
496 | -!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} | ||
497 | -!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} | ||
498 | -!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} | ||
499 | -!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} | ||
500 | -!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} | ||
501 | -!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} | ||
502 | -!27 = !{!28, !28, i64 0} | ||
503 | -!28 = !{!"double", !16, i64 0} | ||
504 | +!3 = !{!"clang version 13.0.0"} | ||
505 | + | ||
506 | diff --git a/test/AtomicFMaxEXT.ll b/test/AtomicFMaxEXT.ll | ||
507 | index 1b81e53b..1c2eec93 100644 | ||
508 | --- a/test/AtomicFMaxEXT.ll | ||
509 | +++ b/test/AtomicFMaxEXT.ll | ||
510 | @@ -4,20 +4,16 @@ | ||
511 | ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV | ||
512 | |||
513 | ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc | ||
514 | -; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM | ||
515 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12 | ||
516 | |||
517 | -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
518 | -target triple = "spir64-unknown-unknown-sycldevice" | ||
519 | - | ||
520 | -%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } | ||
521 | -%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } | ||
522 | -%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } | ||
523 | - | ||
524 | -$_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any | ||
525 | +; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc | ||
526 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 | ||
527 | |||
528 | -$_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any | ||
529 | +; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc | ||
530 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV | ||
531 | |||
532 | -@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 | ||
533 | +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
534 | +target triple = "spir64-unknown-unknown-sycldevice" | ||
535 | |||
536 | ; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT | ||
537 | ; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT | ||
538 | @@ -25,65 +21,42 @@ $_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item | ||
539 | ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 | ||
540 | ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 | ||
541 | |||
542 | -; Function Attrs: convergent norecurse | ||
543 | -define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { | ||
544 | +; Function Attrs: convergent norecurse nounwind | ||
545 | +define dso_local spir_func float @_Z14AtomicFloatMaxRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 { | ||
546 | entry: | ||
547 | - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 | ||
548 | - %1 = load i64, i64* %0, align 8 | ||
549 | - %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1 | ||
550 | - %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 | ||
551 | - %3 = load i64, i64* %2, align 8 | ||
552 | - %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3 | ||
553 | - %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 | ||
554 | - %5 = extractelement <3 x i64> %4, i64 0 | ||
555 | - %conv.i = trunc i64 %5 to i32 | ||
556 | - %conv3.i = sitofp i32 %conv.i to float | ||
557 | - %add.i = fadd float %conv3.i, 1.000000e+00 | ||
558 | + %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)* | ||
559 | ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]] | ||
560 | - ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}}) | ||
561 | - %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2 | ||
562 | - %sext.i = shl i64 %5, 32 | ||
563 | - %conv6.i = ashr exact i64 %sext.i, 32 | ||
564 | - %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i | ||
565 | - %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)* | ||
566 | - store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14 | ||
567 | - ret void | ||
568 | + ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_max[[:alnum:]]+ff]]({{.*}}) | ||
569 | + ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) | ||
570 | + ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}}) | ||
571 | + %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2 | ||
572 | + ret float %call.i.i.i | ||
573 | } | ||
574 | |||
575 | ; Function Attrs: convergent | ||
576 | -; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float) | ||
577 | declare dso_local spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1 | ||
578 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
579 | |||
580 | -; Function Attrs: convergent norecurse | ||
581 | -define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { | ||
582 | +; Function Attrs: convergent norecurse nounwind | ||
583 | +define dso_local spir_func double @_Z15AtomicDoubleMaxRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 { | ||
584 | entry: | ||
585 | - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 | ||
586 | - %1 = load i64, i64* %0, align 8 | ||
587 | - %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1 | ||
588 | - %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 | ||
589 | - %3 = load i64, i64* %2, align 8 | ||
590 | - %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3 | ||
591 | - %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18 | ||
592 | - %5 = extractelement <3 x i64> %4, i64 0 | ||
593 | - %conv.i = trunc i64 %5 to i32 | ||
594 | - %conv3.i = sitofp i32 %conv.i to double | ||
595 | - %add.i = fadd double %conv3.i, 1.000000e+00 | ||
596 | + %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)* | ||
597 | ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]] | ||
598 | - ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}}) | ||
599 | - %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2 | ||
600 | - %sext.i = shl i64 %5, 32 | ||
601 | - %conv6.i = ashr exact i64 %sext.i, 32 | ||
602 | - %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i | ||
603 | - %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)* | ||
604 | - store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27 | ||
605 | - ret void | ||
606 | + ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_max[[:alnum:]]+dd]]({{.*}}) | ||
607 | + ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) | ||
608 | + ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}}) | ||
609 | + %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2 | ||
610 | + ret double %call.i.i.i | ||
611 | } | ||
612 | |||
613 | ; Function Attrs: convergent | ||
614 | -; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double) | ||
615 | declare dso_local spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1 | ||
616 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
617 | |||
618 | -attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
619 | +; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
620 | +; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
621 | + | ||
622 | +attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
623 | attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
624 | attributes #2 = { convergent nounwind } | ||
625 | |||
626 | @@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind } | ||
627 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
628 | !1 = !{i32 1, i32 2} | ||
629 | !2 = !{i32 4, i32 100000} | ||
630 | -!3 = !{!"clang version 12.0.0"} | ||
631 | -!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} | ||
632 | -!5 = !{!6, !8, !10, !12} | ||
633 | -!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} | ||
634 | -!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} | ||
635 | -!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} | ||
636 | -!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} | ||
637 | -!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} | ||
638 | -!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} | ||
639 | -!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} | ||
640 | -!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} | ||
641 | -!14 = !{!15, !15, i64 0} | ||
642 | -!15 = !{!"float", !16, i64 0} | ||
643 | -!16 = !{!"omnipotent char", !17, i64 0} | ||
644 | -!17 = !{!"Simple C++ TBAA"} | ||
645 | -!18 = !{!19, !21, !23, !25} | ||
646 | -!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} | ||
647 | -!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} | ||
648 | -!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} | ||
649 | -!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} | ||
650 | -!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} | ||
651 | -!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} | ||
652 | -!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} | ||
653 | -!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} | ||
654 | -!27 = !{!28, !28, i64 0} | ||
655 | -!28 = !{!"double", !16, i64 0} | ||
656 | +!3 = !{!"clang version 13.0.0"} | ||
657 | + | ||
658 | diff --git a/test/AtomicFMaxEXTForOCL.ll b/test/AtomicFMaxEXTForOCL.ll | ||
659 | new file mode 100644 | ||
660 | index 00000000..1f2530d9 | ||
661 | --- /dev/null | ||
662 | +++ b/test/AtomicFMaxEXTForOCL.ll | ||
663 | @@ -0,0 +1,64 @@ | ||
664 | +; RUN: llvm-as %s -o %t.bc | ||
665 | +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv | ||
666 | +; RUN: spirv-val %t.spv | ||
667 | +; RUN: llvm-spirv -to-text %t.spv -o %t.spt | ||
668 | +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV | ||
669 | + | ||
670 | +; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc | ||
671 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 | ||
672 | + | ||
673 | +; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc | ||
674 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV | ||
675 | + | ||
676 | +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
677 | +target triple = "spir-unknown-unknown" | ||
678 | + | ||
679 | +; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT | ||
680 | +; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT | ||
681 | +; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max" | ||
682 | +; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 | ||
683 | +; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 | ||
684 | + | ||
685 | +; Function Attrs: convergent norecurse nounwind | ||
686 | +define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 { | ||
687 | +entry: | ||
688 | + ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]] | ||
689 | + ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) | ||
690 | + ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}}) | ||
691 | + %call = tail call spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2 | ||
692 | + ret void | ||
693 | +} | ||
694 | + | ||
695 | +; Function Attrs: convergent | ||
696 | +declare spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1 | ||
697 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
698 | + | ||
699 | +; Function Attrs: convergent norecurse nounwind | ||
700 | +define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 { | ||
701 | +entry: | ||
702 | + ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]] | ||
703 | + ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) | ||
704 | + ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}}) | ||
705 | + %call = tail call spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2 | ||
706 | + ret void | ||
707 | +} | ||
708 | + | ||
709 | +; Function Attrs: convergent | ||
710 | +declare spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1 | ||
711 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
712 | + | ||
713 | +; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
714 | +; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
715 | + | ||
716 | +attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | ||
717 | +attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | ||
718 | +attributes #2 = { convergent nounwind } | ||
719 | + | ||
720 | +!llvm.module.flags = !{!0} | ||
721 | +!opencl.ocl.version = !{!1} | ||
722 | +!opencl.spir.version = !{!1} | ||
723 | +!llvm.ident = !{!2} | ||
724 | + | ||
725 | +!0 = !{i32 1, !"wchar_size", i32 4} | ||
726 | +!1 = !{i32 2, i32 0} | ||
727 | +!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"} | ||
728 | diff --git a/test/AtomicFMinEXT.ll b/test/AtomicFMinEXT.ll | ||
729 | index 98c98b8e..9e40a669 100644 | ||
730 | --- a/test/AtomicFMinEXT.ll | ||
731 | +++ b/test/AtomicFMinEXT.ll | ||
732 | @@ -4,20 +4,16 @@ | ||
733 | ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV | ||
734 | |||
735 | ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc | ||
736 | -; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM | ||
737 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12 | ||
738 | |||
739 | -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
740 | -target triple = "spir64-unknown-unknown-sycldevice" | ||
741 | - | ||
742 | -%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } | ||
743 | -%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } | ||
744 | -%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } | ||
745 | - | ||
746 | -$_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any | ||
747 | +; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc | ||
748 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 | ||
749 | |||
750 | -$_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any | ||
751 | +; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc | ||
752 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV | ||
753 | |||
754 | -@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 | ||
755 | +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" | ||
756 | +target triple = "spir64-unknown-unknown-sycldevice" | ||
757 | |||
758 | ; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT | ||
759 | ; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT | ||
760 | @@ -25,65 +21,42 @@ $_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item | ||
761 | ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 | ||
762 | ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 | ||
763 | |||
764 | -; Function Attrs: convergent norecurse | ||
765 | -define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { | ||
766 | +; Function Attrs: convergent norecurse nounwind | ||
767 | +define dso_local spir_func float @_Z14AtomicFloatMinRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 { | ||
768 | entry: | ||
769 | - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 | ||
770 | - %1 = load i64, i64* %0, align 8 | ||
771 | - %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1 | ||
772 | - %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 | ||
773 | - %3 = load i64, i64* %2, align 8 | ||
774 | - %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3 | ||
775 | - %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5 | ||
776 | - %5 = extractelement <3 x i64> %4, i64 0 | ||
777 | - %conv.i = trunc i64 %5 to i32 | ||
778 | - %conv3.i = sitofp i32 %conv.i to float | ||
779 | - %add.i = fadd float %conv3.i, 1.000000e+00 | ||
780 | + %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)* | ||
781 | ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]] | ||
782 | - ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}}) | ||
783 | - %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2 | ||
784 | - %sext.i = shl i64 %5, 32 | ||
785 | - %conv6.i = ashr exact i64 %sext.i, 32 | ||
786 | - %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i | ||
787 | - %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)* | ||
788 | - store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14 | ||
789 | - ret void | ||
790 | + ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_min[[:alnum:]]+ff]]({{.*}}) | ||
791 | + ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) | ||
792 | + ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}}) | ||
793 | + %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2 | ||
794 | + ret float %call.i.i.i | ||
795 | } | ||
796 | |||
797 | ; Function Attrs: convergent | ||
798 | -; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float) | ||
799 | declare dso_local spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1 | ||
800 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
801 | |||
802 | -; Function Attrs: convergent norecurse | ||
803 | -define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { | ||
804 | +; Function Attrs: convergent norecurse nounwind | ||
805 | +define dso_local spir_func double @_Z15AtomicDoubleMinRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 { | ||
806 | entry: | ||
807 | - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 | ||
808 | - %1 = load i64, i64* %0, align 8 | ||
809 | - %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1 | ||
810 | - %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0 | ||
811 | - %3 = load i64, i64* %2, align 8 | ||
812 | - %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3 | ||
813 | - %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18 | ||
814 | - %5 = extractelement <3 x i64> %4, i64 0 | ||
815 | - %conv.i = trunc i64 %5 to i32 | ||
816 | - %conv3.i = sitofp i32 %conv.i to double | ||
817 | - %add.i = fadd double %conv3.i, 1.000000e+00 | ||
818 | + %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)* | ||
819 | ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]] | ||
820 | - ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}}) | ||
821 | - %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2 | ||
822 | - %sext.i = shl i64 %5, 32 | ||
823 | - %conv6.i = ashr exact i64 %sext.i, 32 | ||
824 | - %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i | ||
825 | - %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)* | ||
826 | - store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27 | ||
827 | - ret void | ||
828 | + ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_min[[:alnum:]]+dd]]({{.*}}) | ||
829 | + ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) | ||
830 | + ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}}) | ||
831 | + %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2 | ||
832 | + ret double %call.i.i.i | ||
833 | } | ||
834 | |||
835 | ; Function Attrs: convergent | ||
836 | -; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double) | ||
837 | declare dso_local spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1 | ||
838 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
839 | |||
840 | -attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
841 | +; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
842 | +; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
843 | + | ||
844 | +attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
845 | attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } | ||
846 | attributes #2 = { convergent nounwind } | ||
847 | |||
848 | @@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind } | ||
849 | !0 = !{i32 1, !"wchar_size", i32 4} | ||
850 | !1 = !{i32 1, i32 2} | ||
851 | !2 = !{i32 4, i32 100000} | ||
852 | -!3 = !{!"clang version 12.0.0 (https://github.com/otcshare/llvm.git 67add71766d55d6a8d8d894822f583d6365a3b7d)"} | ||
853 | -!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} | ||
854 | -!5 = !{!6, !8, !10, !12} | ||
855 | -!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} | ||
856 | -!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} | ||
857 | -!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} | ||
858 | -!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} | ||
859 | -!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} | ||
860 | -!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} | ||
861 | -!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} | ||
862 | -!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} | ||
863 | -!14 = !{!15, !15, i64 0} | ||
864 | -!15 = !{!"float", !16, i64 0} | ||
865 | -!16 = !{!"omnipotent char", !17, i64 0} | ||
866 | -!17 = !{!"Simple C++ TBAA"} | ||
867 | -!18 = !{!19, !21, !23, !25} | ||
868 | -!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"} | ||
869 | -!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"} | ||
870 | -!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"} | ||
871 | -!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"} | ||
872 | -!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"} | ||
873 | -!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"} | ||
874 | -!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"} | ||
875 | -!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"} | ||
876 | -!27 = !{!28, !28, i64 0} | ||
877 | -!28 = !{!"double", !16, i64 0} | ||
878 | +!3 = !{!"clang version 13.0.0"} | ||
879 | + | ||
880 | diff --git a/test/AtomicFMinEXTForOCL.ll b/test/AtomicFMinEXTForOCL.ll | ||
881 | new file mode 100644 | ||
882 | index 00000000..6196b0f8 | ||
883 | --- /dev/null | ||
884 | +++ b/test/AtomicFMinEXTForOCL.ll | ||
885 | @@ -0,0 +1,64 @@ | ||
886 | +; RUN: llvm-as %s -o %t.bc | ||
887 | +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv | ||
888 | +; RUN: spirv-val %t.spv | ||
889 | +; RUN: llvm-spirv -to-text %t.spv -o %t.spt | ||
890 | +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV | ||
891 | + | ||
892 | +; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc | ||
893 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20 | ||
894 | + | ||
895 | +; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc | ||
896 | +; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV | ||
897 | + | ||
898 | +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | ||
899 | +target triple = "spir-unknown-unknown" | ||
900 | + | ||
901 | +; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT | ||
902 | +; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT | ||
903 | +; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max" | ||
904 | +; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32 | ||
905 | +; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64 | ||
906 | + | ||
907 | +; Function Attrs: convergent norecurse nounwind | ||
908 | +define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 { | ||
909 | +entry: | ||
910 | + ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]] | ||
911 | + ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}}) | ||
912 | + ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}}) | ||
913 | + %call = tail call spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2 | ||
914 | + ret void | ||
915 | +} | ||
916 | + | ||
917 | +; Function Attrs: convergent | ||
918 | +declare spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1 | ||
919 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
920 | + | ||
921 | +; Function Attrs: convergent norecurse nounwind | ||
922 | +define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 { | ||
923 | +entry: | ||
924 | + ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]] | ||
925 | + ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}}) | ||
926 | + ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}}) | ||
927 | + %call = tail call spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2 | ||
928 | + ret void | ||
929 | +} | ||
930 | + | ||
931 | +; Function Attrs: convergent | ||
932 | +declare spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1 | ||
933 | +; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
934 | + | ||
935 | +; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float | ||
936 | +; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double | ||
937 | + | ||
938 | +attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | ||
939 | +attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | ||
940 | +attributes #2 = { convergent nounwind } | ||
941 | + | ||
942 | +!llvm.module.flags = !{!0} | ||
943 | +!opencl.ocl.version = !{!1} | ||
944 | +!opencl.spir.version = !{!1} | ||
945 | +!llvm.ident = !{!2} | ||
946 | + | ||
947 | +!0 = !{i32 1, !"wchar_size", i32 4} | ||
948 | +!1 = !{i32 2, i32 0} | ||
949 | +!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"} | ||
950 | diff --git a/test/InvalidAtomicBuiltins.cl b/test/InvalidAtomicBuiltins.cl | ||
951 | index b8ec5b89..2182f070 100644 | ||
952 | --- a/test/InvalidAtomicBuiltins.cl | ||
953 | +++ b/test/InvalidAtomicBuiltins.cl | ||
954 | @@ -41,13 +41,9 @@ float __attribute__((overloadable)) atomic_fetch_xor(volatile generic atomic_flo | ||
955 | double __attribute__((overloadable)) atomic_fetch_and(volatile generic atomic_double *object, double operand, memory_order order); | ||
956 | double __attribute__((overloadable)) atomic_fetch_max(volatile generic atomic_double *object, double operand, memory_order order); | ||
957 | double __attribute__((overloadable)) atomic_fetch_min(volatile generic atomic_double *object, double operand, memory_order order); | ||
958 | -float __attribute__((overloadable)) atomic_fetch_add_explicit(volatile generic atomic_float *object, float operand, memory_order order); | ||
959 | -float __attribute__((overloadable)) atomic_fetch_sub_explicit(volatile generic atomic_float *object, float operand, memory_order order); | ||
960 | float __attribute__((overloadable)) atomic_fetch_or_explicit(volatile generic atomic_float *object, float operand, memory_order order); | ||
961 | float __attribute__((overloadable)) atomic_fetch_xor_explicit(volatile generic atomic_float *object, float operand, memory_order order); | ||
962 | double __attribute__((overloadable)) atomic_fetch_and_explicit(volatile generic atomic_double *object, double operand, memory_order order); | ||
963 | -double __attribute__((overloadable)) atomic_fetch_max_explicit(volatile generic atomic_double *object, double operand, memory_order order); | ||
964 | -double __attribute__((overloadable)) atomic_fetch_min_explicit(volatile generic atomic_double *object, double operand, memory_order order); | ||
965 | |||
966 | __kernel void test_atomic_fn(volatile __global float *p, | ||
967 | volatile __global double *pp, | ||
968 | @@ -86,11 +82,7 @@ __kernel void test_atomic_fn(volatile __global float *p, | ||
969 | d = atomic_fetch_and(pp, val, order); | ||
970 | d = atomic_fetch_min(pp, val, order); | ||
971 | d = atomic_fetch_max(pp, val, order); | ||
972 | - f = atomic_fetch_add_explicit(p, val, order); | ||
973 | - f = atomic_fetch_sub_explicit(p, val, order); | ||
974 | f = atomic_fetch_or_explicit(p, val, order); | ||
975 | f = atomic_fetch_xor_explicit(p, val, order); | ||
976 | d = atomic_fetch_and_explicit(pp, val, order); | ||
977 | - d = atomic_fetch_min_explicit(pp, val, order); | ||
978 | - d = atomic_fetch_max_explicit(pp, val, order); | ||
979 | } | ||
980 | -- | ||
981 | 2.17.1 | ||
982 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch deleted file mode 100644 index 3b035f47..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | From cfb18b75e8a353bc7486f337541476a36994b063 Mon Sep 17 00:00:00 2001 | ||
2 | From: juanrod2 <> | ||
3 | Date: Tue, 22 Dec 2020 08:33:08 +0800 | ||
4 | Subject: [PATCH 3/7] Memory leak fix for Managed Static Mutex | ||
5 | |||
6 | Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch] | ||
7 | |||
8 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
9 | |||
10 | Cleaning a mutex inside ManagedStatic llvm class. | ||
11 | --- | ||
12 | llvm/lib/Support/ManagedStatic.cpp | 6 +++++- | ||
13 | 1 file changed, 5 insertions(+), 1 deletion(-) | ||
14 | |||
15 | diff --git a/llvm/lib/Support/ManagedStatic.cpp b/llvm/lib/Support/ManagedStatic.cpp | ||
16 | index 053493f72fb5..6571580ccecf 100644 | ||
17 | --- a/llvm/lib/Support/ManagedStatic.cpp | ||
18 | +++ b/llvm/lib/Support/ManagedStatic.cpp | ||
19 | @@ -76,8 +76,12 @@ void ManagedStaticBase::destroy() const { | ||
20 | |||
21 | /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. | ||
22 | void llvm::llvm_shutdown() { | ||
23 | - std::lock_guard<std::recursive_mutex> Lock(*getManagedStaticMutex()); | ||
24 | + getManagedStaticMutex()->lock(); | ||
25 | |||
26 | while (StaticList) | ||
27 | StaticList->destroy(); | ||
28 | + | ||
29 | + getManagedStaticMutex()->unlock(); | ||
30 | + delete ManagedStaticMutex; | ||
31 | + ManagedStaticMutex = nullptr; | ||
32 | } | ||
33 | -- | ||
34 | 2.17.1 | ||
35 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch deleted file mode 100644 index f8dec996..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch +++ /dev/null | |||
@@ -1,49 +0,0 @@ | |||
1 | From b794037bf1f90a93efa4c542855ad569cb13b4c5 Mon Sep 17 00:00:00 2001 | ||
2 | From: Feng Zou <feng.zou@intel.com> | ||
3 | Date: Mon, 19 Oct 2020 14:43:38 +0800 | ||
4 | Subject: [PATCH 4/7] Remove repo name in LLVM IR | ||
5 | |||
6 | Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0003-Remove-repo-name-in-LLVM-IR.patch] | ||
7 | Signed-off-by: Feng Zou <feng.zou@intel.com> | ||
8 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
9 | --- | ||
10 | llvm/cmake/modules/VersionFromVCS.cmake | 23 ++++++++++++----------- | ||
11 | 1 file changed, 12 insertions(+), 11 deletions(-) | ||
12 | |||
13 | diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake | ||
14 | index 1b6519b4b7c4..8fd6b23bb345 100644 | ||
15 | --- a/llvm/cmake/modules/VersionFromVCS.cmake | ||
16 | +++ b/llvm/cmake/modules/VersionFromVCS.cmake | ||
17 | @@ -33,17 +33,18 @@ function(get_source_info path revision repository) | ||
18 | else() | ||
19 | set(remote "origin") | ||
20 | endif() | ||
21 | - execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote} | ||
22 | - WORKING_DIRECTORY ${path} | ||
23 | - RESULT_VARIABLE git_result | ||
24 | - OUTPUT_VARIABLE git_output | ||
25 | - ERROR_QUIET) | ||
26 | - if(git_result EQUAL 0) | ||
27 | - string(STRIP "${git_output}" git_output) | ||
28 | - set(${repository} ${git_output} PARENT_SCOPE) | ||
29 | - else() | ||
30 | - set(${repository} ${path} PARENT_SCOPE) | ||
31 | - endif() | ||
32 | + # Do not show repo name in IR | ||
33 | + # execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote} | ||
34 | + # WORKING_DIRECTORY ${path} | ||
35 | + # RESULT_VARIABLE git_result | ||
36 | + # OUTPUT_VARIABLE git_output | ||
37 | + # ERROR_QUIET) | ||
38 | + # if(git_result EQUAL 0) | ||
39 | + # string(STRIP "${git_output}" git_output) | ||
40 | + # set(${repository} ${git_output} PARENT_SCOPE) | ||
41 | + # else() | ||
42 | + # set(${repository} ${path} PARENT_SCOPE) | ||
43 | + # endif() | ||
44 | endif() | ||
45 | endif() | ||
46 | endfunction() | ||
47 | -- | ||
48 | 2.17.1 | ||
49 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch deleted file mode 100644 index f8f177e5..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | From 3dd4766499d25e5978a5d90001f18e657e875da0 Mon Sep 17 00:00:00 2001 | ||
2 | From: haonanya <haonan.yang@intel.com> | ||
3 | Date: Thu, 12 Aug 2021 15:48:34 +0800 | ||
4 | Subject: [PATCH 5/7] Remove __IMAGE_SUPPORT__ macro for SPIR since SPIR | ||
5 | doesn't require image support | ||
6 | |||
7 | Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0003-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch] | ||
8 | |||
9 | Signed-off-by: haonanya <haonan.yang@intel.com> | ||
10 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
11 | --- | ||
12 | clang/lib/Frontend/InitPreprocessor.cpp | 3 --- | ||
13 | clang/test/Preprocessor/predefined-macros.c | 4 ---- | ||
14 | 2 files changed, 7 deletions(-) | ||
15 | |||
16 | diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp | ||
17 | index aefd208e6cd3..b4a84636673a 100644 | ||
18 | --- a/clang/lib/Frontend/InitPreprocessor.cpp | ||
19 | +++ b/clang/lib/Frontend/InitPreprocessor.cpp | ||
20 | @@ -1108,9 +1108,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI, | ||
21 | if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \ | ||
22 | Builder.defineMacro(#Ext); | ||
23 | #include "clang/Basic/OpenCLExtensions.def" | ||
24 | - | ||
25 | - if (TI.getTriple().isSPIR()) | ||
26 | - Builder.defineMacro("__IMAGE_SUPPORT__"); | ||
27 | } | ||
28 | |||
29 | if (TI.hasInt128Type() && LangOpts.CPlusPlus && LangOpts.GNUMode) { | ||
30 | diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c | ||
31 | index b088a37ba665..39a222d02faf 100644 | ||
32 | --- a/clang/test/Preprocessor/predefined-macros.c | ||
33 | +++ b/clang/test/Preprocessor/predefined-macros.c | ||
34 | @@ -184,10 +184,6 @@ | ||
35 | // MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1 | ||
36 | // MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0 | ||
37 | |||
38 | -// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir-unknown-unknown \ | ||
39 | -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR | ||
40 | -// CHECK-SPIR: #define __IMAGE_SUPPORT__ 1 | ||
41 | - | ||
42 | // RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \ | ||
43 | // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP | ||
44 | // CHECK-HIP-NOT: #define __CUDA_ARCH__ | ||
45 | -- | ||
46 | 2.17.1 | ||
47 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch deleted file mode 100644 index 0b4ee8c7..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch +++ /dev/null | |||
@@ -1,53 +0,0 @@ | |||
1 | From 2c53abd0008bbecfcfe871c6060f4bbf1c94c74a Mon Sep 17 00:00:00 2001 | ||
2 | From: Raphael Isemann <teemperor@gmail.com> | ||
3 | Date: Thu, 1 Apr 2021 18:41:44 +0200 | ||
4 | Subject: [PATCH 6/7] Avoid calling ParseCommandLineOptions in BackendUtil if | ||
5 | possible | ||
6 | |||
7 | Calling `ParseCommandLineOptions` should only be called from `main` as the | ||
8 | CommandLine setup code isn't thread-safe. As BackendUtil is part of the | ||
9 | generic Clang FrontendAction logic, a process which has several threads executing | ||
10 | Clang FrontendActions will randomly crash in the unsafe setup code. | ||
11 | |||
12 | This patch avoids calling the function unless either the debug-pass option or | ||
13 | limit-float-precision option is set. Without these two options set the | ||
14 | `ParseCommandLineOptions` call doesn't do anything beside parsing | ||
15 | the command line `clang` which doesn't set any options. | ||
16 | |||
17 | See also D99652 where LLDB received a workaround for this crash. | ||
18 | |||
19 | Reviewed By: JDevlieghere | ||
20 | |||
21 | Differential Revision: https://reviews.llvm.org/D99740 | ||
22 | |||
23 | Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0004-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch] | ||
24 | |||
25 | Signed-off-by: Raphael Isemann <teemperor@gmail.com> | ||
26 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
27 | --- | ||
28 | clang/lib/CodeGen/BackendUtil.cpp | 8 ++++++++ | ||
29 | 1 file changed, 8 insertions(+) | ||
30 | |||
31 | diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp | ||
32 | index 0bfcab88a3a9..db8fd4166d7a 100644 | ||
33 | --- a/clang/lib/CodeGen/BackendUtil.cpp | ||
34 | +++ b/clang/lib/CodeGen/BackendUtil.cpp | ||
35 | @@ -743,7 +743,15 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) { | ||
36 | BackendArgs.push_back("-limit-float-precision"); | ||
37 | BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str()); | ||
38 | } | ||
39 | + // Check for the default "clang" invocation that won't set any cl::opt values. | ||
40 | + // Skip trying to parse the command line invocation to avoid the issues | ||
41 | + // described below. | ||
42 | + if (BackendArgs.size() == 1) | ||
43 | + return; | ||
44 | BackendArgs.push_back(nullptr); | ||
45 | + // FIXME: The command line parser below is not thread-safe and shares a global | ||
46 | + // state, so this call might crash or overwrite the options of another Clang | ||
47 | + // instance in the same process. | ||
48 | llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1, | ||
49 | BackendArgs.data()); | ||
50 | } | ||
51 | -- | ||
52 | 2.17.1 | ||
53 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch deleted file mode 100644 index f7d191ff..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch +++ /dev/null | |||
@@ -1,377 +0,0 @@ | |||
1 | From a685de6fc45afcdbe4a7120e9d5b33e175dd71cd Mon Sep 17 00:00:00 2001 | ||
2 | From: haonanya <haonan.yang@intel.com> | ||
3 | Date: Fri, 13 Aug 2021 10:00:02 +0800 | ||
4 | Subject: [PATCH 7/7] support cl_ext_float_atomics | ||
5 | |||
6 | Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch] | ||
7 | |||
8 | Signed-off-by: haonanya <haonan.yang@intel.com> | ||
9 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
10 | --- | ||
11 | clang/lib/Headers/opencl-c-base.h | 25 ++++ | ||
12 | clang/lib/Headers/opencl-c.h | 208 ++++++++++++++++++++++++++ | ||
13 | clang/test/Headers/opencl-c-header.cl | 96 ++++++++++++ | ||
14 | 3 files changed, 329 insertions(+) | ||
15 | |||
16 | diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h | ||
17 | index 2cc688ccc3da..86bbee12fdf8 100644 | ||
18 | --- a/clang/lib/Headers/opencl-c-base.h | ||
19 | +++ b/clang/lib/Headers/opencl-c-base.h | ||
20 | @@ -14,6 +14,31 @@ | ||
21 | #define CL_VERSION_3_0 300 | ||
22 | #endif | ||
23 | |||
24 | +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
25 | +// For SPIR all extensions are supported. | ||
26 | +#if defined(__SPIR__) | ||
27 | +#define cl_ext_float_atomics 1 | ||
28 | +#ifdef cl_khr_fp16 | ||
29 | +#define __opencl_c_ext_fp16_global_atomic_load_store 1 | ||
30 | +#define __opencl_c_ext_fp16_local_atomic_load_store 1 | ||
31 | +#define __opencl_c_ext_fp16_global_atomic_add 1 | ||
32 | +#define __opencl_c_ext_fp16_local_atomic_add 1 | ||
33 | +#define __opencl_c_ext_fp16_global_atomic_min_max 1 | ||
34 | +#define __opencl_c_ext_fp16_local_atomic_min_max 1 | ||
35 | +#endif | ||
36 | +#ifdef __opencl_c_fp64 | ||
37 | +#define __opencl_c_ext_fp64_global_atomic_add 1 | ||
38 | +#define __opencl_c_ext_fp64_local_atomic_add 1 | ||
39 | +#define __opencl_c_ext_fp64_global_atomic_min_max 1 | ||
40 | +#define __opencl_c_ext_fp64_local_atomic_min_max 1 | ||
41 | +#endif | ||
42 | +#define __opencl_c_ext_fp32_global_atomic_add 1 | ||
43 | +#define __opencl_c_ext_fp32_local_atomic_add 1 | ||
44 | +#define __opencl_c_ext_fp32_global_atomic_min_max 1 | ||
45 | +#define __opencl_c_ext_fp32_local_atomic_min_max 1 | ||
46 | +#endif // defined(__SPIR__) | ||
47 | +#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
48 | + | ||
49 | // Define features for 2.0 for header backward compatibility | ||
50 | #ifndef __opencl_c_int64 | ||
51 | #define __opencl_c_int64 1 | ||
52 | diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h | ||
53 | index 67d900eb1c3d..b463e702d95e 100644 | ||
54 | --- a/clang/lib/Headers/opencl-c.h | ||
55 | +++ b/clang/lib/Headers/opencl-c.h | ||
56 | @@ -14354,6 +14354,214 @@ intptr_t __ovld atomic_fetch_max_explicit( | ||
57 | // defined(cl_khr_int64_extended_atomics) | ||
58 | #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0) | ||
59 | |||
60 | +#if defined(cl_ext_float_atomics) | ||
61 | + | ||
62 | +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) | ||
63 | +float __ovld atomic_fetch_min(volatile __global atomic_float *object, | ||
64 | + float operand); | ||
65 | +float __ovld atomic_fetch_max(volatile __global atomic_float *object, | ||
66 | + float operand); | ||
67 | +float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, | ||
68 | + float operand, memory_order order); | ||
69 | +float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, | ||
70 | + float operand, memory_order order); | ||
71 | +float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, | ||
72 | + float operand, memory_order order, | ||
73 | + memory_scope scope); | ||
74 | +float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, | ||
75 | + float operand, memory_order order, | ||
76 | + memory_scope scope); | ||
77 | +#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) | ||
78 | + | ||
79 | +#if defined(__opencl_c_ext_fp32_local_atomic_min_max) | ||
80 | +float __ovld atomic_fetch_min(volatile __local atomic_float *object, | ||
81 | + float operand); | ||
82 | +float __ovld atomic_fetch_max(volatile __local atomic_float *object, | ||
83 | + float operand); | ||
84 | +float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, | ||
85 | + float operand, memory_order order); | ||
86 | +float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, | ||
87 | + float operand, memory_order order); | ||
88 | +float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, | ||
89 | + float operand, memory_order order, | ||
90 | + memory_scope scope); | ||
91 | +float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, | ||
92 | + float operand, memory_order order, | ||
93 | + memory_scope scope); | ||
94 | +#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max) | ||
95 | + | ||
96 | +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ | ||
97 | + defined(__opencl_c_ext_fp32_local_atomic_min_max) | ||
98 | +float __ovld atomic_fetch_min(volatile atomic_float *object, float operand); | ||
99 | +float __ovld atomic_fetch_max(volatile atomic_float *object, float operand); | ||
100 | +float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, | ||
101 | + float operand, memory_order order); | ||
102 | +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, | ||
103 | + float operand, memory_order order); | ||
104 | +float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, | ||
105 | + float operand, memory_order order, | ||
106 | + memory_scope scope); | ||
107 | +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, | ||
108 | + float operand, memory_order order, | ||
109 | + memory_scope scope); | ||
110 | +#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ | ||
111 | + defined(__opencl_c_ext_fp32_local_atomic_min_max) | ||
112 | + | ||
113 | +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) | ||
114 | +double __ovld atomic_fetch_min(volatile __global atomic_double *object, | ||
115 | + double operand); | ||
116 | +double __ovld atomic_fetch_max(volatile __global atomic_double *object, | ||
117 | + double operand); | ||
118 | +double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, | ||
119 | + double operand, memory_order order); | ||
120 | +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, | ||
121 | + double operand, memory_order order); | ||
122 | +double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, | ||
123 | + double operand, memory_order order, | ||
124 | + memory_scope scope); | ||
125 | +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, | ||
126 | + double operand, memory_order order, | ||
127 | + memory_scope scope); | ||
128 | +#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) | ||
129 | + | ||
130 | +#if defined(__opencl_c_ext_fp64_local_atomic_min_max) | ||
131 | +double __ovld atomic_fetch_min(volatile __local atomic_double *object, | ||
132 | + double operand); | ||
133 | +double __ovld atomic_fetch_max(volatile __local atomic_double *object, | ||
134 | + double operand); | ||
135 | +double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, | ||
136 | + double operand, memory_order order); | ||
137 | +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, | ||
138 | + double operand, memory_order order); | ||
139 | +double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, | ||
140 | + double operand, memory_order order, | ||
141 | + memory_scope scope); | ||
142 | +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, | ||
143 | + double operand, memory_order order, | ||
144 | + memory_scope scope); | ||
145 | +#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max) | ||
146 | + | ||
147 | +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ | ||
148 | + defined(__opencl_c_ext_fp64_local_atomic_min_max) | ||
149 | +double __ovld atomic_fetch_min(volatile atomic_double *object, double operand); | ||
150 | +double __ovld atomic_fetch_max(volatile atomic_double *object, double operand); | ||
151 | +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, | ||
152 | + double operand, memory_order order); | ||
153 | +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, | ||
154 | + double operand, memory_order order); | ||
155 | +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, | ||
156 | + double operand, memory_order order, | ||
157 | + memory_scope scope); | ||
158 | +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, | ||
159 | + double operand, memory_order order, | ||
160 | + memory_scope scope); | ||
161 | +#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ | ||
162 | + defined(__opencl_c_ext_fp64_local_atomic_min_max) | ||
163 | + | ||
164 | +#if defined(__opencl_c_ext_fp32_global_atomic_add) | ||
165 | +float __ovld atomic_fetch_add(volatile __global atomic_float *object, | ||
166 | + float operand); | ||
167 | +float __ovld atomic_fetch_sub(volatile __global atomic_float *object, | ||
168 | + float operand); | ||
169 | +float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, | ||
170 | + float operand, memory_order order); | ||
171 | +float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, | ||
172 | + float operand, memory_order order); | ||
173 | +float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, | ||
174 | + float operand, memory_order order, | ||
175 | + memory_scope scope); | ||
176 | +float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, | ||
177 | + float operand, memory_order order, | ||
178 | + memory_scope scope); | ||
179 | +#endif // defined(__opencl_c_ext_fp32_global_atomic_add) | ||
180 | + | ||
181 | +#if defined(__opencl_c_ext_fp32_local_atomic_add) | ||
182 | +float __ovld atomic_fetch_add(volatile __local atomic_float *object, | ||
183 | + float operand); | ||
184 | +float __ovld atomic_fetch_sub(volatile __local atomic_float *object, | ||
185 | + float operand); | ||
186 | +float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, | ||
187 | + float operand, memory_order order); | ||
188 | +float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, | ||
189 | + float operand, memory_order order); | ||
190 | +float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, | ||
191 | + float operand, memory_order order, | ||
192 | + memory_scope scope); | ||
193 | +float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, | ||
194 | + float operand, memory_order order, | ||
195 | + memory_scope scope); | ||
196 | +#endif // defined(__opencl_c_ext_fp32_local_atomic_add) | ||
197 | + | ||
198 | +#if defined(__opencl_c_ext_fp32_global_atomic_add) || \ | ||
199 | + defined(__opencl_c_ext_fp32_local_atomic_add) | ||
200 | +float __ovld atomic_fetch_add(volatile atomic_float *object, float operand); | ||
201 | +float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand); | ||
202 | +float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, | ||
203 | + float operand, memory_order order); | ||
204 | +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, | ||
205 | + float operand, memory_order order); | ||
206 | +float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, | ||
207 | + float operand, memory_order order, | ||
208 | + memory_scope scope); | ||
209 | +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, | ||
210 | + float operand, memory_order order, | ||
211 | + memory_scope scope); | ||
212 | +#endif // defined(__opencl_c_ext_fp32_global_atomic_add) || \ | ||
213 | + defined(__opencl_c_ext_fp32_local_atomic_add) | ||
214 | + | ||
215 | +#if defined(__opencl_c_ext_fp64_global_atomic_add) | ||
216 | +double __ovld atomic_fetch_add(volatile __global atomic_double *object, | ||
217 | + double operand); | ||
218 | +double __ovld atomic_fetch_sub(volatile __global atomic_double *object, | ||
219 | + double operand); | ||
220 | +double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, | ||
221 | + double operand, memory_order order); | ||
222 | +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, | ||
223 | + double operand, memory_order order); | ||
224 | +double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, | ||
225 | + double operand, memory_order order, | ||
226 | + memory_scope scope); | ||
227 | +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, | ||
228 | + double operand, memory_order order, | ||
229 | + memory_scope scope); | ||
230 | +#endif // defined(__opencl_c_ext_fp64_global_atomic_add) | ||
231 | + | ||
232 | +#if defined(__opencl_c_ext_fp64_local_atomic_add) | ||
233 | +double __ovld atomic_fetch_add(volatile __local atomic_double *object, | ||
234 | + double operand); | ||
235 | +double __ovld atomic_fetch_sub(volatile __local atomic_double *object, | ||
236 | + double operand); | ||
237 | +double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, | ||
238 | + double operand, memory_order order); | ||
239 | +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, | ||
240 | + double operand, memory_order order); | ||
241 | +double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, | ||
242 | + double operand, memory_order order, | ||
243 | + memory_scope scope); | ||
244 | +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, | ||
245 | + double operand, memory_order order, | ||
246 | + memory_scope scope); | ||
247 | +#endif // defined(__opencl_c_ext_fp64_local_atomic_add) | ||
248 | + | ||
249 | +#if defined(__opencl_c_ext_fp64_global_atomic_add) || \ | ||
250 | + defined(__opencl_c_ext_fp64_local_atomic_add) | ||
251 | +double __ovld atomic_fetch_add(volatile atomic_double *object, double operand); | ||
252 | +double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand); | ||
253 | +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, | ||
254 | + double operand, memory_order order); | ||
255 | +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, | ||
256 | + double operand, memory_order order); | ||
257 | +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, | ||
258 | + double operand, memory_order order, | ||
259 | + memory_scope scope); | ||
260 | +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, | ||
261 | + double operand, memory_order order, | ||
262 | + memory_scope scope); | ||
263 | +#endif // defined(__opencl_c_ext_fp64_global_atomic_add) || \ | ||
264 | + defined(__opencl_c_ext_fp64_local_atomic_add) | ||
265 | + | ||
266 | +#endif // cl_ext_float_atomics | ||
267 | + | ||
268 | // atomic_store() | ||
269 | |||
270 | #if defined(__opencl_c_atomic_scope_device) && \ | ||
271 | diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl | ||
272 | index 2716076acdcf..7f720cf28142 100644 | ||
273 | --- a/clang/test/Headers/opencl-c-header.cl | ||
274 | +++ b/clang/test/Headers/opencl-c-header.cl | ||
275 | @@ -98,3 +98,99 @@ global atomic_int z = ATOMIC_VAR_INIT(99); | ||
276 | #pragma OPENCL EXTENSION cl_intel_planar_yuv : enable | ||
277 | |||
278 | // CHECK-MOD: Reading modules | ||
279 | + | ||
280 | +// For SPIR all extensions are supported. | ||
281 | +#if defined(__SPIR__) | ||
282 | + | ||
283 | +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
284 | + | ||
285 | +#if __opencl_c_ext_fp16_global_atomic_load_store != 1 | ||
286 | +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_load_store" | ||
287 | +#endif | ||
288 | +#if __opencl_c_ext_fp16_local_atomic_load_store != 1 | ||
289 | +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_load_store" | ||
290 | +#endif | ||
291 | +#if __opencl_c_ext_fp16_global_atomic_add != 1 | ||
292 | +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_add" | ||
293 | +#endif | ||
294 | +#if __opencl_c_ext_fp32_global_atomic_add != 1 | ||
295 | +#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add" | ||
296 | +#endif | ||
297 | +#if __opencl_c_ext_fp64_global_atomic_add != 1 | ||
298 | +#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_add" | ||
299 | +#endif | ||
300 | +#if __opencl_c_ext_fp16_local_atomic_add != 1 | ||
301 | +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add" | ||
302 | +#endif | ||
303 | +#if __opencl_c_ext_fp32_local_atomic_add != 1 | ||
304 | +#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add" | ||
305 | +#endif | ||
306 | +#if __opencl_c_ext_fp64_local_atomic_add != 1 | ||
307 | +#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_add" | ||
308 | +#endif | ||
309 | +#if __opencl_c_ext_fp16_global_atomic_min_max != 1 | ||
310 | +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max" | ||
311 | +#endif | ||
312 | +#if __opencl_c_ext_fp32_global_atomic_min_max != 1 | ||
313 | +#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max" | ||
314 | +#endif | ||
315 | +#if __opencl_c_ext_fp64_global_atomic_min_max != 1 | ||
316 | +#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_min_max" | ||
317 | +#endif | ||
318 | +#if __opencl_c_ext_fp16_local_atomic_min_max != 1 | ||
319 | +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max" | ||
320 | +#endif | ||
321 | +#if __opencl_c_ext_fp32_local_atomic_min_max != 1 | ||
322 | +#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max" | ||
323 | +#endif | ||
324 | +#if __opencl_c_ext_fp64_local_atomic_min_max != 1 | ||
325 | +#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max" | ||
326 | +#endif | ||
327 | +#else | ||
328 | + | ||
329 | +#ifdef __opencl_c_ext_fp16_global_atomic_load_store | ||
330 | +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_load_store defined" | ||
331 | +#endif | ||
332 | +#ifdef __opencl_c_ext_fp16_local_atomic_load_store | ||
333 | +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_load_store defined" | ||
334 | +#endif | ||
335 | +#ifdef __opencl_c_ext_fp16_global_atomic_add | ||
336 | +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_add defined" | ||
337 | +#endif | ||
338 | +#ifdef __opencl_c_ext_fp32_global_atomic_add | ||
339 | +#error "Incorrectly __opencl_c_ext_fp32_global_atomic_add defined" | ||
340 | +#endif | ||
341 | +#ifdef __opencl_c_ext_fp64_global_atomic_add | ||
342 | +#error "Incorrectly __opencl_c_ext_fp64_global_atomic_add defined" | ||
343 | +#endif | ||
344 | +#ifdef __opencl_c_ext_fp16_local_atomic_add | ||
345 | +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_add defined" | ||
346 | +#endif | ||
347 | +#ifdef __opencl_c_ext_fp32_local_atomic_add | ||
348 | +#error "Incorrectly __opencl_c_ext_fp32_local_atomic_add defined" | ||
349 | +#endif | ||
350 | +#ifdef __opencl_c_ext_fp64_local_atomic_add | ||
351 | +#error "Incorrectly __opencl_c_ext_fp64_local_atomic_add defined" | ||
352 | +#endif | ||
353 | +#ifdef __opencl_c_ext_fp16_global_atomic_min_max | ||
354 | +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_min_max defined" | ||
355 | +#endif | ||
356 | +#ifdef __opencl_c_ext_fp32_global_atomic_min_max | ||
357 | +#error "Incorrectly __opencl_c_ext_fp32_global_atomic_min_max defined" | ||
358 | +#endif | ||
359 | +#ifdef __opencl_c_ext_fp64_global_atomic_min_max | ||
360 | +#error "Incorrectly __opencl_c_ext_fp64_global_atomic_min_max defined" | ||
361 | +#endif | ||
362 | +#ifdef __opencl_c_ext_fp16_local_atomic_min_max | ||
363 | +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_min_max defined" | ||
364 | +#endif | ||
365 | +#ifdef __opencl_c_ext_fp32_local_atomic_min_max | ||
366 | +#error "Incorrectly __opencl_c_ext_fp32_local_atomic_min_max defined" | ||
367 | +#endif | ||
368 | +#ifdef __opencl_c_ext_fp64_local_atomic_min_max | ||
369 | +#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined" | ||
370 | +#endif | ||
371 | + | ||
372 | +#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
373 | + | ||
374 | +#endif // defined(__SPIR__) | ||
375 | -- | ||
376 | 2.17.1 | ||
377 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch deleted file mode 100644 index 09be8202..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch +++ /dev/null | |||
@@ -1,96 +0,0 @@ | |||
1 | From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 11:53:27 +0800 | ||
4 | Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in | ||
5 | #1767 | ||
6 | |||
7 | It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994. | ||
8 | |||
9 | Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd] | ||
10 | |||
11 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
12 | --- | ||
13 | .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +- | ||
14 | llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++------- | ||
15 | 2 files changed, 21 insertions(+), 13 deletions(-) | ||
16 | |||
17 | diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | ||
18 | index 63ddb59fce68..822da2183269 100644 | ||
19 | --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | ||
20 | +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | ||
21 | @@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { | ||
22 | } | ||
23 | } | ||
24 | } | ||
25 | - | ||
26 | +#ifndef NDEBUG | ||
27 | // Checked that NewNodes are only used by other NewNodes. | ||
28 | for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { | ||
29 | SDNode *N = NewNodes[i]; | ||
30 | @@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { | ||
31 | UI != UE; ++UI) | ||
32 | assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); | ||
33 | } | ||
34 | +#endif | ||
35 | } | ||
36 | |||
37 | /// This is the main entry point for the type legalizer. This does a top-down | ||
38 | diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | ||
39 | index faae14444d51..b908c5c58e9f 100644 | ||
40 | --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | ||
41 | +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | ||
42 | @@ -155,7 +155,9 @@ private: | ||
43 | const SDValue &getSDValue(TableId &Id) { | ||
44 | RemapId(Id); | ||
45 | assert(Id && "TableId should be non-zero"); | ||
46 | - return IdToValueMap[Id]; | ||
47 | + auto I = IdToValueMap.find(Id); | ||
48 | + assert(I != IdToValueMap.end() && "cannot find Id in map"); | ||
49 | + return I->second; | ||
50 | } | ||
51 | |||
52 | public: | ||
53 | @@ -172,24 +174,29 @@ public: | ||
54 | bool run(); | ||
55 | |||
56 | void NoteDeletion(SDNode *Old, SDNode *New) { | ||
57 | + assert(Old != New && "node replaced with self"); | ||
58 | for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { | ||
59 | TableId NewId = getTableId(SDValue(New, i)); | ||
60 | TableId OldId = getTableId(SDValue(Old, i)); | ||
61 | |||
62 | - if (OldId != NewId) | ||
63 | + if (OldId != NewId) { | ||
64 | ReplacedValues[OldId] = NewId; | ||
65 | |||
66 | - // Delete Node from tables. | ||
67 | + // Delete Node from tables. We cannot do this when OldId == NewId, | ||
68 | + // because NewId can still have table references to it in | ||
69 | + // ReplacedValues. | ||
70 | + IdToValueMap.erase(OldId); | ||
71 | + PromotedIntegers.erase(OldId); | ||
72 | + ExpandedIntegers.erase(OldId); | ||
73 | + SoftenedFloats.erase(OldId); | ||
74 | + PromotedFloats.erase(OldId); | ||
75 | + ExpandedFloats.erase(OldId); | ||
76 | + ScalarizedVectors.erase(OldId); | ||
77 | + SplitVectors.erase(OldId); | ||
78 | + WidenedVectors.erase(OldId); | ||
79 | + } | ||
80 | + | ||
81 | ValueToIdMap.erase(SDValue(Old, i)); | ||
82 | - IdToValueMap.erase(OldId); | ||
83 | - PromotedIntegers.erase(OldId); | ||
84 | - ExpandedIntegers.erase(OldId); | ||
85 | - SoftenedFloats.erase(OldId); | ||
86 | - PromotedFloats.erase(OldId); | ||
87 | - ExpandedFloats.erase(OldId); | ||
88 | - ScalarizedVectors.erase(OldId); | ||
89 | - SplitVectors.erase(OldId); | ||
90 | - WidenedVectors.erase(OldId); | ||
91 | } | ||
92 | } | ||
93 | |||
94 | -- | ||
95 | 2.17.1 | ||
96 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch deleted file mode 100644 index 72877d83..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch +++ /dev/null | |||
@@ -1,105 +0,0 @@ | |||
1 | From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 11:56:01 +0800 | ||
4 | Subject: [PATCH 2/2] This patch is a fix for #1788. | ||
5 | |||
6 | It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698 | ||
7 | This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975 | ||
8 | |||
9 | Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c] | ||
10 | |||
11 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
12 | --- | ||
13 | llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++++++--- | ||
14 | .../InstCombine/InstructionCombining.cpp | 27 +++++++++++++++++-- | ||
15 | 2 files changed, 44 insertions(+), 5 deletions(-) | ||
16 | |||
17 | diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h | ||
18 | index 6621fc9f819c..fb7ad93519f6 100644 | ||
19 | --- a/llvm/include/llvm/IR/PatternMatch.h | ||
20 | +++ b/llvm/include/llvm/IR/PatternMatch.h | ||
21 | @@ -152,8 +152,10 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) { | ||
22 | |||
23 | struct apint_match { | ||
24 | const APInt *&Res; | ||
25 | + bool AllowUndef; | ||
26 | |||
27 | - apint_match(const APInt *&R) : Res(R) {} | ||
28 | + apint_match(const APInt *&Res, bool AllowUndef) | ||
29 | + : Res(Res), AllowUndef(AllowUndef) {} | ||
30 | |||
31 | template <typename ITy> bool match(ITy *V) { | ||
32 | if (auto *CI = dyn_cast<ConstantInt>(V)) { | ||
33 | @@ -162,7 +164,8 @@ struct apint_match { | ||
34 | } | ||
35 | if (V->getType()->isVectorTy()) | ||
36 | if (const auto *C = dyn_cast<Constant>(V)) | ||
37 | - if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) { | ||
38 | + if (auto *CI = dyn_cast_or_null<ConstantInt>( | ||
39 | + C->getSplatValue(AllowUndef))) { | ||
40 | Res = &CI->getValue(); | ||
41 | return true; | ||
42 | } | ||
43 | @@ -192,7 +195,20 @@ struct apfloat_match { | ||
44 | |||
45 | /// Match a ConstantInt or splatted ConstantVector, binding the | ||
46 | /// specified pointer to the contained APInt. | ||
47 | -inline apint_match m_APInt(const APInt *&Res) { return Res; } | ||
48 | +inline apint_match m_APInt(const APInt *&Res) { | ||
49 | + // Forbid undefs by default to maintain previous behavior. | ||
50 | + return apint_match(Res, /* AllowUndef */ false); | ||
51 | +} | ||
52 | + | ||
53 | +/// Match APInt while allowing undefs in splat vector constants. | ||
54 | +inline apint_match m_APIntAllowUndef(const APInt *&Res) { | ||
55 | + return apint_match(Res, /* AllowUndef */ true); | ||
56 | +} | ||
57 | + | ||
58 | +/// Match APInt while forbidding undefs in splat vector constants. | ||
59 | +inline apint_match m_APIntForbidUndef(const APInt *&Res) { | ||
60 | + return apint_match(Res, /* AllowUndef */ false); | ||
61 | +} | ||
62 | |||
63 | /// Match a ConstantFP or splatted ConstantVector, binding the | ||
64 | /// specified pointer to the contained APFloat. | ||
65 | diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | ||
66 | index bf32996d96e2..40a246b9d7a7 100644 | ||
67 | --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | ||
68 | +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | ||
69 | @@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { | ||
70 | if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) { | ||
71 | if (CI->hasOneUse()) { | ||
72 | Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); | ||
73 | - if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || | ||
74 | - (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) | ||
75 | + | ||
76 | + // FIXME: This is a hack to avoid infinite looping with min/max patterns. | ||
77 | + // We have to ensure that vector constants that only differ with | ||
78 | + // undef elements are treated as equivalent. | ||
79 | + auto areLooselyEqual = [](Value *A, Value *B) { | ||
80 | + if (A == B) | ||
81 | + return true; | ||
82 | + | ||
83 | + // Test for vector constants. | ||
84 | + Constant *ConstA, *ConstB; | ||
85 | + if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB))) | ||
86 | + return false; | ||
87 | + | ||
88 | + // TODO: Deal with FP constants? | ||
89 | + if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType()) | ||
90 | + return false; | ||
91 | + | ||
92 | + // Compare for equality including undefs as equal. | ||
93 | + auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB); | ||
94 | + const APInt *C; | ||
95 | + return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue(); | ||
96 | + }; | ||
97 | + | ||
98 | + if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) || | ||
99 | + (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1))) | ||
100 | return nullptr; | ||
101 | } | ||
102 | } | ||
103 | -- | ||
104 | 2.17.1 | ||
105 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch deleted file mode 100644 index fc6935a1..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 12:00:23 +0800 | ||
4 | Subject: [PATCH 1/2] This patch is required to fix stability problem #1793 | ||
5 | |||
6 | It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef | ||
7 | |||
8 | Upstream-Status: Backport | ||
9 | |||
10 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
11 | --- | ||
12 | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++-- | ||
13 | 1 file changed, 4 insertions(+), 2 deletions(-) | ||
14 | |||
15 | diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | ||
16 | index 2476fd26f250..2743acc89bca 100644 | ||
17 | --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | ||
18 | +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | ||
19 | @@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { | ||
20 | SDValue N0 = N->getOperand(0); | ||
21 | EVT VT = N->getValueType(0); | ||
22 | |||
23 | + // zext_vector_inreg(undef) = 0 because the top bits will be zero. | ||
24 | if (N0.isUndef()) | ||
25 | - return DAG.getUNDEF(VT); | ||
26 | + return DAG.getConstant(0, SDLoc(N), VT); | ||
27 | |||
28 | if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) | ||
29 | return Res; | ||
30 | @@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { | ||
31 | SDValue N0 = N->getOperand(0); | ||
32 | EVT VT = N->getValueType(0); | ||
33 | |||
34 | + // sext_vector_inreg(undef) = 0 because the top bit will all be the same. | ||
35 | if (N0.isUndef()) | ||
36 | - return DAG.getUNDEF(VT); | ||
37 | + return DAG.getConstant(0, SDLoc(N), VT); | ||
38 | |||
39 | if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) | ||
40 | return Res; | ||
41 | -- | ||
42 | 2.17.1 | ||
43 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch deleted file mode 100644 index e3e70107..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 12:02:37 +0800 | ||
4 | Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and | ||
5 | avx512skx-i16x32. | ||
6 | |||
7 | It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b | ||
8 | https://reviews.llvm.org/D76312 | ||
9 | |||
10 | Upstream-Status: Backport | ||
11 | |||
12 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
13 | --- | ||
14 | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++- | ||
15 | 1 file changed, 3 insertions(+), 1 deletion(-) | ||
16 | |||
17 | diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | ||
18 | index 2743acc89bca..439a8367dabe 100644 | ||
19 | --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | ||
20 | +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | ||
21 | @@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { | ||
22 | |||
23 | // Attempt to pre-truncate BUILD_VECTOR sources. | ||
24 | if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && | ||
25 | - TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) { | ||
26 | + TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) && | ||
27 | + // Avoid creating illegal types if running after type legalizer. | ||
28 | + (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) { | ||
29 | SDLoc DL(N); | ||
30 | EVT SVT = VT.getScalarType(); | ||
31 | SmallVector<SDValue, 8> TruncOps; | ||
32 | -- | ||
33 | 2.17.1 | ||
34 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch deleted file mode 100644 index 8aca5fbf..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch +++ /dev/null | |||
@@ -1,40 +0,0 @@ | |||
1 | From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 12:07:25 +0800 | ||
4 | Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive | ||
5 | createVariablePermute call fails | ||
6 | |||
7 | Account for the case where a recursive createVariablePermute call with a wider vector type fails. | ||
8 | |||
9 | Original test case from @craig.topper (Craig Topper) | ||
10 | |||
11 | Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804] | ||
12 | |||
13 | Signed-off-by: Simon Pilgrim <llvm-dev@redking.me.uk> | ||
14 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
15 | --- | ||
16 | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++--- | ||
17 | 1 file changed, 5 insertions(+), 3 deletions(-) | ||
18 | |||
19 | diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
20 | index c8720d9ae3a6..63eb050e9b3a 100644 | ||
21 | --- a/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
22 | +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
23 | @@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, | ||
24 | IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); | ||
25 | IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false, | ||
26 | Subtarget, DAG, SDLoc(IndicesVec)); | ||
27 | - return extractSubVector( | ||
28 | - createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0, | ||
29 | - DAG, DL, SizeInBits); | ||
30 | + SDValue NewSrcVec = | ||
31 | + createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); | ||
32 | + if (NewSrcVec) | ||
33 | + return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits); | ||
34 | + return SDValue(); | ||
35 | } else if (SrcVec.getValueSizeInBits() < SizeInBits) { | ||
36 | // Widen smaller SrcVec to match VT. | ||
37 | SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec)); | ||
38 | -- | ||
39 | 2.17.1 | ||
40 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch deleted file mode 100644 index e03c279f..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch +++ /dev/null | |||
@@ -1,61 +0,0 @@ | |||
1 | From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 12:09:42 +0800 | ||
4 | Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and | ||
5 | avx512skx-i16x32 targets. | ||
6 | |||
7 | This is combination of two commits: | ||
8 | - 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix. | ||
9 | - d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up. | ||
10 | |||
11 | Upstream-Status: Backport | ||
12 | |||
13 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
14 | --- | ||
15 | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++-- | ||
16 | 1 file changed, 21 insertions(+), 2 deletions(-) | ||
17 | |||
18 | diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | ||
19 | index 439a8367dabe..b1639c7f275d 100644 | ||
20 | --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | ||
21 | +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | ||
22 | @@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { | ||
23 | |||
24 | // Allow targets to opt-out. | ||
25 | EVT VT = Extract->getValueType(0); | ||
26 | + | ||
27 | + // We can only create byte sized loads. | ||
28 | + if (!VT.isByteSized()) | ||
29 | + return SDValue(); | ||
30 | + | ||
31 | + unsigned Index = ExtIdx->getZExtValue(); | ||
32 | + unsigned NumElts = VT.getVectorNumElements(); | ||
33 | + | ||
34 | + // If the index is a multiple of the extract element count, we can offset the | ||
35 | + // address by the store size multiplied by the subvector index. Otherwise if | ||
36 | + // the scalar type is byte sized, we can just use the index multiplied by | ||
37 | + // the element size in bytes as the offset. | ||
38 | + unsigned Offset; | ||
39 | + if (Index % NumElts == 0) | ||
40 | + Offset = (Index / NumElts) * VT.getStoreSize(); | ||
41 | + else if (VT.getScalarType().isByteSized()) | ||
42 | + Offset = Index * VT.getScalarType().getStoreSize(); | ||
43 | + else | ||
44 | + return SDValue(); | ||
45 | + | ||
46 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); | ||
47 | if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) | ||
48 | return SDValue(); | ||
49 | @@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { | ||
50 | // The narrow load will be offset from the base address of the old load if | ||
51 | // we are extracting from something besides index 0 (little-endian). | ||
52 | SDLoc DL(Extract); | ||
53 | - SDValue BaseAddr = Ld->getOperand(1); | ||
54 | - unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); | ||
55 | + SDValue BaseAddr = Ld->getBasePtr(); | ||
56 | |||
57 | // TODO: Use "BaseIndexOffset" to make this more effective. | ||
58 | SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); | ||
59 | -- | ||
60 | 2.17.1 | ||
61 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch deleted file mode 100644 index d1768216..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch +++ /dev/null | |||
@@ -1,97 +0,0 @@ | |||
1 | From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 12:13:00 +0800 | ||
4 | Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we | ||
5 | store zeros in the rest of the byte | ||
6 | |||
7 | We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0. | ||
8 | |||
9 | I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue. | ||
10 | |||
11 | Should fix PR48147. | ||
12 | |||
13 | Reviewed By: RKSimon | ||
14 | |||
15 | Differential Revision: https://reviews.llvm.org/D9129 | ||
16 | |||
17 | Upstream-Status: Backport | ||
18 | |||
19 | Signed-off-by:Craig Topper <craig.topper@sifive.com> | ||
20 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
21 | --- | ||
22 | llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------ | ||
23 | llvm/lib/Target/X86/X86InstrAVX512.td | 2 -- | ||
24 | 2 files changed, 14 insertions(+), 8 deletions(-) | ||
25 | |||
26 | diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
27 | index 63eb050e9b3a..96b5e2cfbd82 100644 | ||
28 | --- a/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
29 | +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
30 | @@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, | ||
31 | // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores. | ||
32 | if (StoredVal.getValueType().isVector() && | ||
33 | StoredVal.getValueType().getVectorElementType() == MVT::i1) { | ||
34 | - assert(StoredVal.getValueType().getVectorNumElements() <= 8 && | ||
35 | - "Unexpected VT"); | ||
36 | + unsigned NumElts = StoredVal.getValueType().getVectorNumElements(); | ||
37 | + assert(NumElts <= 8 && "Unexpected VT"); | ||
38 | assert(!St->isTruncatingStore() && "Expected non-truncating store"); | ||
39 | assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && | ||
40 | "Expected AVX512F without AVX512DQI"); | ||
41 | |||
42 | + // We must pad with zeros to ensure we store zeroes to any unused bits. | ||
43 | StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, | ||
44 | DAG.getUNDEF(MVT::v16i1), StoredVal, | ||
45 | DAG.getIntPtrConstant(0, dl)); | ||
46 | StoredVal = DAG.getBitcast(MVT::i16, StoredVal); | ||
47 | StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal); | ||
48 | + // Make sure we store zeros in the extra bits. | ||
49 | + if (NumElts < 8) | ||
50 | + StoredVal = DAG.getZeroExtendInReg(StoredVal, dl, | ||
51 | + MVT::getIntegerVT(NumElts)); | ||
52 | |||
53 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), | ||
54 | St->getPointerInfo(), St->getAlignment(), | ||
55 | @@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, | ||
56 | |||
57 | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); | ||
58 | StoredVal = DAG.getBitcast(NewVT, StoredVal); | ||
59 | - | ||
60 | - return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), | ||
61 | + SDValue Val = StoredVal.getOperand(0); | ||
62 | + // We must store zeros to the unused bits. | ||
63 | + Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1); | ||
64 | + return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(), | ||
65 | St->getPointerInfo(), St->getAlignment(), | ||
66 | St->getMemOperand()->getFlags()); | ||
67 | } | ||
68 | @@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, | ||
69 | } | ||
70 | |||
71 | // Widen v2i1/v4i1 stores to v8i1. | ||
72 | - if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && | ||
73 | + if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && | ||
74 | Subtarget.hasAVX512()) { | ||
75 | unsigned NumConcats = 8 / VT.getVectorNumElements(); | ||
76 | - SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT)); | ||
77 | + // We must store zeros to the unused bits. | ||
78 | + SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT)); | ||
79 | Ops[0] = StoredVal; | ||
80 | StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); | ||
81 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), | ||
82 | diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td | ||
83 | index 32f012033fb0..d3b92183f87b 100644 | ||
84 | --- a/llvm/lib/Target/X86/X86InstrAVX512.td | ||
85 | +++ b/llvm/lib/Target/X86/X86InstrAVX512.td | ||
86 | @@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), | ||
87 | |||
88 | // Load/store kreg | ||
89 | let Predicates = [HasDQI] in { | ||
90 | - def : Pat<(store VK1:$src, addr:$dst), | ||
91 | - (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>; | ||
92 | |||
93 | def : Pat<(v1i1 (load addr:$src)), | ||
94 | (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; | ||
95 | -- | ||
96 | 2.17.1 | ||
97 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch deleted file mode 100644 index 03b40e9b..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch +++ /dev/null | |||
@@ -1,173 +0,0 @@ | |||
1 | From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 12:15:09 +0800 | ||
4 | Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via | ||
5 | getRegisterTypeForCallingConv rather than using CCPromoteToType in the td | ||
6 | file | ||
7 | |||
8 | Previously we tried to promote these to xmm/ymm/zmm by promoting | ||
9 | in the X86CallingConv.td file. But this breaks when we run out | ||
10 | of xmm/ymm/zmm registers and need to fall back to memory. We end | ||
11 | up trying to create a non-sensical scalar to vector. This lead | ||
12 | to an assertion. The new tests in avx512-calling-conv.ll all | ||
13 | trigger this assertion. | ||
14 | |||
15 | Since we really want to treat these types like we do on avx2, | ||
16 | it seems better to promote them before the calling convention | ||
17 | code gets involved. Except when the calling convention is one | ||
18 | that passes the vXi1 type in a k register. | ||
19 | |||
20 | The changes in avx512-regcall-Mask.ll are because we indicated | ||
21 | that xmm/ymm/zmm types should be passed indirectly for the | ||
22 | Win64 ABI before we go to the common lines that promoted the | ||
23 | vXi1 types. This caused the promoted types to be picked up by | ||
24 | the default calling convention code. Now we promote them earlier | ||
25 | so they get passed indirectly as though they were xmm/ymm/zmm. | ||
26 | |||
27 | Differential Revision: https://reviews.llvm.org/D75154 | ||
28 | |||
29 | Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc] | ||
30 | |||
31 | Signed-off-by:Craig Topper <craig.topper@intel.com> | ||
32 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
33 | --- | ||
34 | llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++-------- | ||
35 | 1 file changed, 61 insertions(+), 29 deletions(-) | ||
36 | |||
37 | diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
38 | index 96b5e2cfbd82..d5de94aeb8a2 100644 | ||
39 | --- a/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
40 | +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
41 | @@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { | ||
42 | return TargetLoweringBase::getPreferredVectorAction(VT); | ||
43 | } | ||
44 | |||
45 | +static std::pair<MVT, unsigned> | ||
46 | +handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, | ||
47 | + const X86Subtarget &Subtarget) { | ||
48 | + // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling | ||
49 | + // convention is one that uses k registers. | ||
50 | + if (NumElts == 2) | ||
51 | + return {MVT::v2i64, 1}; | ||
52 | + if (NumElts == 4) | ||
53 | + return {MVT::v4i32, 1}; | ||
54 | + if (NumElts == 8 && CC != CallingConv::X86_RegCall && | ||
55 | + CC != CallingConv::Intel_OCL_BI) | ||
56 | + return {MVT::v8i16, 1}; | ||
57 | + if (NumElts == 16 && CC != CallingConv::X86_RegCall && | ||
58 | + CC != CallingConv::Intel_OCL_BI) | ||
59 | + return {MVT::v16i8, 1}; | ||
60 | + // v32i1 passes in ymm unless we have BWI and the calling convention is | ||
61 | + // regcall. | ||
62 | + if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) | ||
63 | + return {MVT::v32i8, 1}; | ||
64 | + // Split v64i1 vectors if we don't have v64i8 available. | ||
65 | + if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { | ||
66 | + if (Subtarget.useAVX512Regs()) | ||
67 | + return {MVT::v64i8, 1}; | ||
68 | + return {MVT::v32i8, 2}; | ||
69 | + } | ||
70 | + | ||
71 | + // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. | ||
72 | + if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || | ||
73 | + NumElts > 64) | ||
74 | + return {MVT::i8, NumElts}; | ||
75 | + | ||
76 | + return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; | ||
77 | +} | ||
78 | + | ||
79 | MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, | ||
80 | CallingConv::ID CC, | ||
81 | EVT VT) const { | ||
82 | - // v32i1 vectors should be promoted to v32i8 to match avx2. | ||
83 | - if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) | ||
84 | - return MVT::v32i8; | ||
85 | - // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. | ||
86 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && | ||
87 | - Subtarget.hasAVX512() && | ||
88 | - (!isPowerOf2_32(VT.getVectorNumElements()) || | ||
89 | - (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || | ||
90 | - (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) | ||
91 | - return MVT::i8; | ||
92 | - // Split v64i1 vectors if we don't have v64i8 available. | ||
93 | - if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && | ||
94 | - CC != CallingConv::X86_RegCall) | ||
95 | - return MVT::v32i1; | ||
96 | + Subtarget.hasAVX512()) { | ||
97 | + unsigned NumElts = VT.getVectorNumElements(); | ||
98 | + | ||
99 | + MVT RegisterVT; | ||
100 | + unsigned NumRegisters; | ||
101 | + std::tie(RegisterVT, NumRegisters) = | ||
102 | + handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); | ||
103 | + if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) | ||
104 | + return RegisterVT; | ||
105 | + } | ||
106 | + | ||
107 | // FIXME: Should we just make these types legal and custom split operations? | ||
108 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI && | ||
109 | Subtarget.useAVX512Regs() && !Subtarget.hasBWI()) | ||
110 | return MVT::v16i32; | ||
111 | + | ||
112 | return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); | ||
113 | } | ||
114 | |||
115 | unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, | ||
116 | CallingConv::ID CC, | ||
117 | EVT VT) const { | ||
118 | - // v32i1 vectors should be promoted to v32i8 to match avx2. | ||
119 | - if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) | ||
120 | - return 1; | ||
121 | - // Break wide or odd vXi1 vectors into scalars to match avx2 behavior. | ||
122 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && | ||
123 | - Subtarget.hasAVX512() && | ||
124 | - (!isPowerOf2_32(VT.getVectorNumElements()) || | ||
125 | - (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || | ||
126 | - (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) | ||
127 | - return VT.getVectorNumElements(); | ||
128 | - // Split v64i1 vectors if we don't have v64i8 available. | ||
129 | - if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && | ||
130 | - CC != CallingConv::X86_RegCall) | ||
131 | - return 2; | ||
132 | + Subtarget.hasAVX512()) { | ||
133 | + unsigned NumElts = VT.getVectorNumElements(); | ||
134 | + | ||
135 | + MVT RegisterVT; | ||
136 | + unsigned NumRegisters; | ||
137 | + std::tie(RegisterVT, NumRegisters) = | ||
138 | + handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); | ||
139 | + if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) | ||
140 | + return NumRegisters; | ||
141 | + } | ||
142 | + | ||
143 | // FIXME: Should we just make these types legal and custom split operations? | ||
144 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI && | ||
145 | Subtarget.useAVX512Regs() && !Subtarget.hasBWI()) | ||
146 | return 1; | ||
147 | + | ||
148 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); | ||
149 | } | ||
150 | |||
151 | @@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( | ||
152 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && | ||
153 | Subtarget.hasAVX512() && | ||
154 | (!isPowerOf2_32(VT.getVectorNumElements()) || | ||
155 | - (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) || | ||
156 | - (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) { | ||
157 | + (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || | ||
158 | + VT.getVectorNumElements() > 64)) { | ||
159 | RegisterVT = MVT::i8; | ||
160 | IntermediateVT = MVT::i1; | ||
161 | NumIntermediates = VT.getVectorNumElements(); | ||
162 | @@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( | ||
163 | // Split v64i1 vectors if we don't have v64i8 available. | ||
164 | if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && | ||
165 | CC != CallingConv::X86_RegCall) { | ||
166 | - RegisterVT = MVT::v32i1; | ||
167 | + RegisterVT = MVT::v32i8; | ||
168 | IntermediateVT = MVT::v32i1; | ||
169 | NumIntermediates = 2; | ||
170 | return 2; | ||
171 | -- | ||
172 | 2.17.1 | ||
173 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch deleted file mode 100644 index 2e3872dc..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch +++ /dev/null | |||
@@ -1,550 +0,0 @@ | |||
1 | From 447cb2e1b2f0d8bdcfd8a0b39f47d28de50b5d82 Mon Sep 17 00:00:00 2001 | ||
2 | From: Djordje Todorovic <djordje.todorovic@syrmia.com> | ||
3 | Date: Mon, 9 Mar 2020 11:02:35 +0100 | ||
4 | Subject: [PATCH] Enable the call site info only for -g + optimizations | ||
5 | |||
6 | Emit call site info only in the case of '-g' + 'O>0' level. | ||
7 | |||
8 | Differential Revision: https://reviews.llvm.org/D75175 | ||
9 | |||
10 | Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/c15c68abdc6f1afece637bdedba808676191a8e6] | ||
11 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
12 | --- | ||
13 | clang/include/clang/Basic/CodeGenOptions.def | 2 ++ | ||
14 | clang/lib/CodeGen/BackendUtil.cpp | 1 + | ||
15 | clang/lib/Frontend/CompilerInvocation.cpp | 4 +++- | ||
16 | llvm/include/llvm/CodeGen/CommandFlags.inc | 7 +++++++ | ||
17 | llvm/include/llvm/Target/TargetOptions.h | 7 ++++++- | ||
18 | llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 ++-- | ||
19 | llvm/lib/CodeGen/MachineFunction.cpp | 2 +- | ||
20 | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 2 +- | ||
21 | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- | ||
22 | llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +- | ||
23 | llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- | ||
24 | llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir | 2 +- | ||
25 | llvm/test/CodeGen/X86/call-site-info-output.ll | 4 ++-- | ||
26 | llvm/test/DebugInfo/AArch64/call-site-info-output.ll | 2 +- | ||
27 | llvm/test/DebugInfo/ARM/call-site-info-output.ll | 2 +- | ||
28 | .../MIR/AArch64/dbgcall-site-interpret-movzxi.mir | 2 +- | ||
29 | .../DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir | 2 +- | ||
30 | llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir | 2 +- | ||
31 | .../test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir | 2 +- | ||
32 | .../test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir | 2 +- | ||
33 | .../DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir | 2 +- | ||
34 | llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir | 2 +- | ||
35 | .../MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir | 2 +- | ||
36 | .../MIR/Hexagon/live-debug-values-bundled-entry-values.mir | 2 +- | ||
37 | llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir | 2 +- | ||
38 | llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir | 2 +- | ||
39 | llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir | 2 +- | ||
40 | .../test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir | 2 +- | ||
41 | .../test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir | 2 +- | ||
42 | .../DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir | 2 +- | ||
43 | llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir | 2 +- | ||
44 | .../DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir | 2 +- | ||
45 | llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir | 2 +- | ||
46 | llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir | 4 ++-- | ||
47 | .../DebugInfo/MIR/X86/entry-value-of-modified-param.mir | 2 +- | ||
48 | llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir | 2 +- | ||
49 | .../DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir | 2 +- | ||
50 | .../test/DebugInfo/MIR/X86/unreachable-block-call-site.mir | 2 +- | ||
51 | llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll | 2 +- | ||
52 | llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll | 2 +- | ||
53 | .../tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll | 2 +- | ||
54 | 41 files changed, 58 insertions(+), 41 deletions(-) | ||
55 | |||
56 | diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def | ||
57 | index 1ecae98b13b1..6a6a9465273f 100644 | ||
58 | --- a/clang/include/clang/Basic/CodeGenOptions.def | ||
59 | +++ b/clang/include/clang/Basic/CodeGenOptions.def | ||
60 | @@ -64,6 +64,8 @@ CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new | ||
61 | ///< pass manager. | ||
62 | CODEGENOPT(DisableRedZone , 1, 0) ///< Set when -mno-red-zone is enabled. | ||
63 | CODEGENOPT(EnableDebugEntryValues, 1, 0) ///< Emit call site parameter dbg info | ||
64 | +CODEGENOPT(EmitCallSiteInfo, 1, 0) ///< Emit call site info only in the case of | ||
65 | + ///< '-g' + 'O>0' level. | ||
66 | CODEGENOPT(IndirectTlsSegRefs, 1, 0) ///< Set when -mno-tls-direct-seg-refs | ||
67 | ///< is specified. | ||
68 | CODEGENOPT(DisableTailCalls , 1, 0) ///< Do not emit tail calls. | ||
69 | diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp | ||
70 | index db8fd4166d7a..db09f9b641fe 100644 | ||
71 | --- a/clang/lib/CodeGen/BackendUtil.cpp | ||
72 | +++ b/clang/lib/CodeGen/BackendUtil.cpp | ||
73 | @@ -482,6 +482,7 @@ static void initTargetOptions(llvm::TargetOptions &Options, | ||
74 | Options.EmitAddrsig = CodeGenOpts.Addrsig; | ||
75 | Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues; | ||
76 | Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; | ||
77 | + Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; | ||
78 | |||
79 | Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; | ||
80 | Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; | ||
81 | diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp | ||
82 | index 18fa06bf3c6d..2e73dcbdebe4 100644 | ||
83 | --- a/clang/lib/Frontend/CompilerInvocation.cpp | ||
84 | +++ b/clang/lib/Frontend/CompilerInvocation.cpp | ||
85 | @@ -789,8 +789,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, | ||
86 | |||
87 | llvm::Triple T(TargetOpts.Triple); | ||
88 | if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() && | ||
89 | - llvm::is_contained(DebugEntryValueArchs, T.getArch())) | ||
90 | + llvm::is_contained(DebugEntryValueArchs, T.getArch())) { | ||
91 | Opts.EnableDebugEntryValues = Args.hasArg(OPT_femit_debug_entry_values); | ||
92 | + Opts.EmitCallSiteInfo = true; | ||
93 | + } | ||
94 | |||
95 | Opts.DisableO0ImplyOptNone = Args.hasArg(OPT_disable_O0_optnone); | ||
96 | Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone); | ||
97 | diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc | ||
98 | index 6475a5b19edb..36073fe9cc98 100644 | ||
99 | --- a/llvm/include/llvm/CodeGen/CommandFlags.inc | ||
100 | +++ b/llvm/include/llvm/CodeGen/CommandFlags.inc | ||
101 | @@ -286,6 +286,12 @@ static cl::opt<bool> | ||
102 | EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"), | ||
103 | cl::init(false)); | ||
104 | |||
105 | +static cl::opt<bool> EmitCallSiteInfo( | ||
106 | + "emit-call-site-info", | ||
107 | + cl::desc( | ||
108 | + "Emit call site debug information, if debug information is enabled."), | ||
109 | + cl::init(false)); | ||
110 | + | ||
111 | static cl::opt<bool> | ||
112 | EnableDebugEntryValues("debug-entry-values", | ||
113 | cl::desc("Emit debug info about parameter's entry values"), | ||
114 | @@ -349,6 +355,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() { | ||
115 | Options.ExceptionModel = ExceptionModel; | ||
116 | Options.EmitStackSizeSection = EnableStackSizeSection; | ||
117 | Options.EmitAddrsig = EnableAddrsig; | ||
118 | + Options.EmitCallSiteInfo = EmitCallSiteInfo; | ||
119 | Options.EnableDebugEntryValues = EnableDebugEntryValues; | ||
120 | Options.ForceDwarfFrameSection = ForceDwarfFrameSection; | ||
121 | |||
122 | diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h | ||
123 | index d27c7b0178f0..9378e290bed1 100644 | ||
124 | --- a/llvm/include/llvm/Target/TargetOptions.h | ||
125 | +++ b/llvm/include/llvm/Target/TargetOptions.h | ||
126 | @@ -134,7 +134,8 @@ namespace llvm { | ||
127 | EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), | ||
128 | EmitStackSizeSection(false), EnableMachineOutliner(false), | ||
129 | SupportsDefaultOutlining(false), EmitAddrsig(false), | ||
130 | - EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {} | ||
131 | + EmitCallSiteInfo(false), EnableDebugEntryValues(false), | ||
132 | + ForceDwarfFrameSection(false) {} | ||
133 | |||
134 | /// PrintMachineCode - This flag is enabled when the -print-machineinstrs | ||
135 | /// option is specified on the command line, and should enable debugging | ||
136 | @@ -281,6 +282,10 @@ namespace llvm { | ||
137 | /// to selectively generate basic block sections. | ||
138 | std::shared_ptr<MemoryBuffer> BBSectionsFuncListBuf; | ||
139 | |||
140 | + /// The flag enables call site info production. It is used only for debug | ||
141 | + /// info, and it is restricted only to optimized code. This can be used for | ||
142 | + /// something else, so that should be controlled in the frontend. | ||
143 | + unsigned EmitCallSiteInfo : 1; | ||
144 | /// Emit debug info about parameter's entry values. | ||
145 | unsigned EnableDebugEntryValues : 1; | ||
146 | |||
147 | diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp | ||
148 | index 10157c746b46..f955bdc6186a 100644 | ||
149 | --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp | ||
150 | +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp | ||
151 | @@ -381,11 +381,11 @@ bool MIRParserImpl::initializeCallSiteInfo( | ||
152 | CSInfo.emplace_back(Reg, ArgRegPair.ArgNo); | ||
153 | } | ||
154 | |||
155 | - if (TM.Options.EnableDebugEntryValues) | ||
156 | + if (TM.Options.EmitCallSiteInfo) | ||
157 | MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo)); | ||
158 | } | ||
159 | |||
160 | - if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues) | ||
161 | + if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo) | ||
162 | return error(Twine("Call site info provided but not used")); | ||
163 | return false; | ||
164 | } | ||
165 | diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp | ||
166 | index 4612690644fe..c3795b7ed314 100644 | ||
167 | --- a/llvm/lib/CodeGen/MachineFunction.cpp | ||
168 | +++ b/llvm/lib/CodeGen/MachineFunction.cpp | ||
169 | @@ -855,7 +855,7 @@ MachineFunction::CallSiteInfoMap::iterator | ||
170 | MachineFunction::getCallSiteInfo(const MachineInstr *MI) { | ||
171 | assert(MI->isCall() && "Call site info refers only to call instructions!"); | ||
172 | |||
173 | - if (!Target.Options.EnableDebugEntryValues) | ||
174 | + if (!Target.Options.EmitCallSiteInfo) | ||
175 | return CallSitesInfo.end(); | ||
176 | return CallSitesInfo.find(MI); | ||
177 | } | ||
178 | diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | ||
179 | index 0e4d783e3505..52099f24aca5 100644 | ||
180 | --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | ||
181 | +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | ||
182 | @@ -863,7 +863,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { | ||
183 | MI = &*std::next(Before); | ||
184 | } | ||
185 | |||
186 | - if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues) | ||
187 | + if (MI->isCall() && DAG->getTarget().Options.EmitCallSiteInfo) | ||
188 | MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node)); | ||
189 | |||
190 | return MI; | ||
191 | diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | ||
192 | index 23f05eaad944..63ff3031a5e8 100644 | ||
193 | --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | ||
194 | +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | ||
195 | @@ -4132,7 +4132,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, | ||
196 | RegsToPass.emplace_back(VA.getLocReg(), Arg); | ||
197 | RegsUsed.insert(VA.getLocReg()); | ||
198 | const TargetOptions &Options = DAG.getTarget().Options; | ||
199 | - if (Options.EnableDebugEntryValues) | ||
200 | + if (Options.EmitCallSiteInfo) | ||
201 | CSInfo.emplace_back(VA.getLocReg(), i); | ||
202 | } | ||
203 | } else { | ||
204 | diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp | ||
205 | index 9f504b1eaa42..5589ba34a2ac 100644 | ||
206 | --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp | ||
207 | +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp | ||
208 | @@ -2222,7 +2222,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | ||
209 | isThisReturn = true; | ||
210 | } | ||
211 | const TargetOptions &Options = DAG.getTarget().Options; | ||
212 | - if (Options.EnableDebugEntryValues) | ||
213 | + if (Options.EmitCallSiteInfo) | ||
214 | CSInfo.emplace_back(VA.getLocReg(), i); | ||
215 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); | ||
216 | } else if (isByVal) { | ||
217 | diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
218 | index d5de94aeb8a2..4808bdf6ddc2 100644 | ||
219 | --- a/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
220 | +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
221 | @@ -4030,7 +4030,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, | ||
222 | } else if (VA.isRegLoc()) { | ||
223 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); | ||
224 | const TargetOptions &Options = DAG.getTarget().Options; | ||
225 | - if (Options.EnableDebugEntryValues) | ||
226 | + if (Options.EmitCallSiteInfo) | ||
227 | CSInfo.emplace_back(VA.getLocReg(), I); | ||
228 | if (isVarArg && IsWin64) { | ||
229 | // Win64 ABI requires argument XMM reg to be copied to the corresponding | ||
230 | diff --git a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir | ||
231 | index 5ffa0293a2e1..fec542223fc9 100644 | ||
232 | --- a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir | ||
233 | +++ b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir | ||
234 | @@ -1,4 +1,4 @@ | ||
235 | -# RUN: llc -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s | ||
236 | +# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s | ||
237 | |||
238 | # Verify that it is possible to read and write MIR where a callSites entry | ||
239 | # points to a call residing in a bundle. The offset should point to the call | ||
240 | diff --git a/llvm/test/CodeGen/X86/call-site-info-output.ll b/llvm/test/CodeGen/X86/call-site-info-output.ll | ||
241 | index 4b1e236aadfe..a0438f0c2b98 100644 | ||
242 | --- a/llvm/test/CodeGen/X86/call-site-info-output.ll | ||
243 | +++ b/llvm/test/CodeGen/X86/call-site-info-output.ll | ||
244 | @@ -1,6 +1,6 @@ | ||
245 | ; Test call site info MIR printer and parser.Parser assertions and machine | ||
246 | ; verifier will check the rest; | ||
247 | -; RUN: llc -debug-entry-values %s -stop-before=finalize-isel -o %t.mir | ||
248 | +; RUN: llc -emit-call-site-info -debug-entry-values %s -stop-before=finalize-isel -o %t.mir | ||
249 | ; RUN: cat %t.mir | FileCheck %s | ||
250 | ; CHECK: name: fn2 | ||
251 | ; CHECK: callSites: | ||
252 | @@ -10,7 +10,7 @@ | ||
253 | ; CHECK-NEXT: arg: 0, reg: '$edi' | ||
254 | ; CHECK-NEXT: arg: 1, reg: '$esi' | ||
255 | ; CHECK-NEXT: arg: 2, reg: '$edx' | ||
256 | -; RUN: llc -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER | ||
257 | +; RUN: llc -emit-call-site-info -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER | ||
258 | ; Verify that we are able to parse output mir and that we are getting the same result. | ||
259 | ; PARSER: name: fn2 | ||
260 | ; PARSER: callSites: | ||
261 | diff --git a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll | ||
262 | index d52d6962f3c4..17d9f7f18762 100644 | ||
263 | --- a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll | ||
264 | +++ b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll | ||
265 | @@ -1,4 +1,4 @@ | ||
266 | -; RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s | ||
267 | +; RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s | ||
268 | ; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers. | ||
269 | ; Produced from: | ||
270 | ; extern int fn1(int,int,int); | ||
271 | diff --git a/llvm/test/DebugInfo/ARM/call-site-info-output.ll b/llvm/test/DebugInfo/ARM/call-site-info-output.ll | ||
272 | index 9255a7d57dde..ed726dfe753f 100644 | ||
273 | --- a/llvm/test/DebugInfo/ARM/call-site-info-output.ll | ||
274 | +++ b/llvm/test/DebugInfo/ARM/call-site-info-output.ll | ||
275 | @@ -1,4 +1,4 @@ | ||
276 | -; RUN: llc -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s | ||
277 | +; RUN: llc -emit-call-site-info -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s | ||
278 | ; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers. | ||
279 | ; Produced from: | ||
280 | ; extern int fn1(int,int,int); | ||
281 | diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir | ||
282 | index dc7561ca6400..057779a90721 100644 | ||
283 | --- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir | ||
284 | +++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir | ||
285 | @@ -1,4 +1,4 @@ | ||
286 | -# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
287 | +# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
288 | # | ||
289 | # Based on the following C reproducer: | ||
290 | # | ||
291 | diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir | ||
292 | index 0371ccef603e..d925bc395878 100644 | ||
293 | --- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir | ||
294 | +++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir | ||
295 | @@ -1,4 +1,4 @@ | ||
296 | -# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
297 | +# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
298 | # Following code is used for producing this test case. Note that | ||
299 | # some of argument loading instruction are modified in order to | ||
300 | # cover certain cases. | ||
301 | diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir | ||
302 | index 916a14022ba5..4a87dad3b9b5 100644 | ||
303 | --- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir | ||
304 | +++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir | ||
305 | @@ -1,4 +1,4 @@ | ||
306 | -# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s | ||
307 | +# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s | ||
308 | |||
309 | # Based on the following C reproducer: | ||
310 | # | ||
311 | diff --git a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir | ||
312 | index fbf9b3454689..d85f2d25391d 100644 | ||
313 | --- a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir | ||
314 | +++ b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir | ||
315 | @@ -1,4 +1,4 @@ | ||
316 | -# RUN: llc -start-after=livedebugvalues -filetype=obj -o - %s \ | ||
317 | +# RUN: llc -emit-call-site-info -start-after=livedebugvalues -filetype=obj -o - %s \ | ||
318 | # RUN: | llvm-dwarfdump -v - | FileCheck %s | ||
319 | |||
320 | # This tests for a crash in DwarfDebug's singular DBG_VALUE range promotion when | ||
321 | diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir | ||
322 | index ce8dc97f0e72..0ae4e6ec485c 100644 | ||
323 | --- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir | ||
324 | +++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir | ||
325 | @@ -1,4 +1,4 @@ | ||
326 | -# RUN: llc -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s | ||
327 | +# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s | ||
328 | # Following code is used for producing this test case. Note that | ||
329 | # some of argument loading instruction are modified in order to | ||
330 | # cover certain cases. | ||
331 | diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir | ||
332 | index 9001c8ba8eea..5b84d9e9627f 100644 | ||
333 | --- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir | ||
334 | +++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir | ||
335 | @@ -1,4 +1,4 @@ | ||
336 | -# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s | ||
337 | +# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s | ||
338 | |||
339 | # Based on the following C reproducer: | ||
340 | # | ||
341 | diff --git a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir | ||
342 | index aa7b54c1e5bb..11e9c4c90836 100644 | ||
343 | --- a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir | ||
344 | +++ b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir | ||
345 | @@ -1,4 +1,4 @@ | ||
346 | -# RUN: llc -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s | ||
347 | +# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s | ||
348 | |||
349 | # Vefify that the call site info will be updated after the optimization. | ||
350 | # This test case would previously trigger an assertion when | ||
351 | diff --git a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir | ||
352 | index 8ae628af2c09..3ae23d4189bf 100644 | ||
353 | --- a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir | ||
354 | +++ b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir | ||
355 | @@ -1,4 +1,4 @@ | ||
356 | -# RUN: llc -mtriple hexagon -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s | ||
357 | +# RUN: llc -mtriple hexagon -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s | ||
358 | |||
359 | # Based on the following C reproducer: | ||
360 | # | ||
361 | diff --git a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir | ||
362 | index ff0a539dd15d..8bb0b3202acd 100644 | ||
363 | --- a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir | ||
364 | +++ b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir | ||
365 | @@ -1,4 +1,4 @@ | ||
366 | -# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s | ||
367 | +# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s | ||
368 | |||
369 | # Verify that the entry values for the input parameters are inserted after the | ||
370 | # bundles which contains the registers' clobbering instructions (the calls to | ||
371 | diff --git a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir | ||
372 | index 8a4e8b5632c2..3cf41467f7f9 100644 | ||
373 | --- a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir | ||
374 | +++ b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir | ||
375 | @@ -1,4 +1,4 @@ | ||
376 | -# RUN: llc -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s | ||
377 | +# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s | ||
378 | |||
379 | # This test would previously trigger an assertion when trying to describe the | ||
380 | # call site value for callee()'s float parameter. | ||
381 | diff --git a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir | ||
382 | index e6fe5d2de878..4e5a07321d42 100644 | ||
383 | --- a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir | ||
384 | +++ b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir | ||
385 | @@ -1,4 +1,4 @@ | ||
386 | -# RUN: llc -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj | ||
387 | +# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj | ||
388 | # RUN: llvm-dwarfdump %t | FileCheck %s | ||
389 | # | ||
390 | # int global; | ||
391 | diff --git a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir | ||
392 | index c32a1155d038..edeef2c7aed4 100644 | ||
393 | --- a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir | ||
394 | +++ b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir | ||
395 | @@ -1,6 +1,6 @@ | ||
396 | # Check that llvm can describe a call site parameter which resides in a spill slot. | ||
397 | # | ||
398 | -# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s | ||
399 | +# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s | ||
400 | # | ||
401 | # Command: | ||
402 | # $ ~/src/builds/llvm-project-master-RA/bin/clang -g -Xclang -femit-debug-entry-values -O2 -c -o spill.o spill.cc -mllvm -stop-before=machineverifier -o spill.mir | ||
403 | diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir | ||
404 | index a2d51a203512..01a2b887a60b 100644 | ||
405 | --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir | ||
406 | +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir | ||
407 | @@ -1,4 +1,4 @@ | ||
408 | -# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
409 | +# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
410 | |||
411 | # Based on the following reproducer: | ||
412 | # | ||
413 | diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir | ||
414 | index f9e9459f1abd..104bc0146798 100644 | ||
415 | --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir | ||
416 | +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir | ||
417 | @@ -1,4 +1,4 @@ | ||
418 | -# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
419 | +# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
420 | # | ||
421 | # CHECK: DW_TAG_GNU_call_site | ||
422 | # CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo" | ||
423 | diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir | ||
424 | index 1bb70f6d4530..4d88fa9aab74 100644 | ||
425 | --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir | ||
426 | +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir | ||
427 | @@ -1,4 +1,4 @@ | ||
428 | -# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
429 | +# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s | ||
430 | # CHECK: DW_TAG_GNU_call_site | ||
431 | # CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo") | ||
432 | # CHECK-NEXT: DW_AT_low_pc {{.*}} | ||
433 | diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir | ||
434 | index 235787573f51..81af598ba194 100644 | ||
435 | --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir | ||
436 | +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir | ||
437 | @@ -1,4 +1,4 @@ | ||
438 | -# RUN: llc -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s | ||
439 | +# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s | ||
440 | |||
441 | # Based on the following C++ code: | ||
442 | # struct A { A(A &) {} }; | ||
443 | diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir | ||
444 | index db0934c595c3..46adedd1be44 100644 | ||
445 | --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir | ||
446 | +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir | ||
447 | @@ -1,4 +1,4 @@ | ||
448 | -# RUN: llc -O1 -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s | ||
449 | +# RUN: llc -O1 -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s | ||
450 | |||
451 | # Based on the following C reproducer: | ||
452 | # | ||
453 | diff --git a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir | ||
454 | index 5d203029936e..1d7b64f169d1 100644 | ||
455 | --- a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir | ||
456 | +++ b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir | ||
457 | @@ -1,4 +1,4 @@ | ||
458 | -# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s | ||
459 | +# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s | ||
460 | # | ||
461 | #extern void fn2(int); | ||
462 | # | ||
463 | diff --git a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir | ||
464 | index e79be66cd4e3..c39bc4db50be 100644 | ||
465 | --- a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir | ||
466 | +++ b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir | ||
467 | @@ -2,8 +2,8 @@ | ||
468 | # When the debugger tuning is set to gdb, use GNU opcodes. | ||
469 | # For lldb, use the standard DWARF5 opcodes. | ||
470 | |||
471 | -# RUN: llc -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU | ||
472 | -# RUN: llc -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5 | ||
473 | +# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU | ||
474 | +# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5 | ||
475 | # | ||
476 | # extern void foo(int *a, int b, int c, int d, int e, int f); | ||
477 | # extern int getVal(); | ||
478 | diff --git a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir | ||
479 | index 8d121c3a30b9..c7f15aaaa562 100644 | ||
480 | --- a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir | ||
481 | +++ b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir | ||
482 | @@ -1,4 +1,4 @@ | ||
483 | -# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s | ||
484 | +# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s | ||
485 | # | ||
486 | #extern void fn1 (int, int, int); | ||
487 | # | ||
488 | diff --git a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir | ||
489 | index 2396daada876..aa8fdd7afd47 100644 | ||
490 | --- a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir | ||
491 | +++ b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir | ||
492 | @@ -1,4 +1,4 @@ | ||
493 | -# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s | ||
494 | +# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s | ||
495 | # | ||
496 | # The test case was artificially adjusted, in order to make proper diamond basic | ||
497 | # block structure relevant to the debug entry values propagation. | ||
498 | diff --git a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir | ||
499 | index 86b1cddaa462..c5af863954bf 100644 | ||
500 | --- a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir | ||
501 | +++ b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir | ||
502 | @@ -1,4 +1,4 @@ | ||
503 | -# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s | ||
504 | +# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s | ||
505 | # | ||
506 | #extern void fn1 (int, int, int); | ||
507 | #__attribute__((noinline)) | ||
508 | diff --git a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir | ||
509 | index d282d796f6d7..ea9c12b5a192 100644 | ||
510 | --- a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir | ||
511 | +++ b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir | ||
512 | @@ -1,4 +1,4 @@ | ||
513 | -# RUN: llc -mtriple=x86_64-pc-linux -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s | ||
514 | +# RUN: llc -mtriple=x86_64-pc-linux -emit-call-site-info -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s | ||
515 | |||
516 | # Verify that the call site information for the call residing in the eliminated | ||
517 | # block is removed. This test case would previously trigger an assertion when | ||
518 | diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll | ||
519 | index b698f1cdbfe8..b8cd9574cc63 100644 | ||
520 | --- a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll | ||
521 | +++ b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll | ||
522 | @@ -1,4 +1,4 @@ | ||
523 | -; RUN: llc -O1 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s | ||
524 | +; RUN: llc -O1 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s | ||
525 | |||
526 | ; Verify that the 64-bit call site immediates are not truncated. | ||
527 | ; | ||
528 | diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll | ||
529 | index 9fe67f82a2b4..5d37774f55d6 100644 | ||
530 | --- a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll | ||
531 | +++ b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll | ||
532 | @@ -1,4 +1,4 @@ | ||
533 | -; RUN: llc -O3 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s | ||
534 | +; RUN: llc -O3 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s | ||
535 | |||
536 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
537 | target triple = "x86_64-unknown-linux-gnu" | ||
538 | diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll | ||
539 | index c304e9d768a5..d126757398ff 100644 | ||
540 | --- a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll | ||
541 | +++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll | ||
542 | @@ -1,4 +1,4 @@ | ||
543 | -; RUN: llc -debug-entry-values %s -o - -filetype=obj \ | ||
544 | +; RUN: llc -emit-call-site-info -debug-entry-values %s -o - -filetype=obj \ | ||
545 | ; RUN: | llvm-dwarfdump -statistics - | FileCheck %s | ||
546 | ; | ||
547 | ; The LLVM IR file was generated on this source code by using | ||
548 | -- | ||
549 | 2.33.1 | ||
550 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch deleted file mode 100644 index 6b2db931..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch +++ /dev/null | |||
@@ -1,2243 +0,0 @@ | |||
1 | From fd7146658e3b4ce045dfb332c2edf216f76c1e1f Mon Sep 17 00:00:00 2001 | ||
2 | From: serge-sans-paille <sguelton@redhat.com> | ||
3 | Date: Wed, 4 Mar 2020 00:47:43 +0100 | ||
4 | Subject: [PATCH] Replace MCTargetOptionsCommandFlags.inc and CommandFlags.inc | ||
5 | by runtime registration | ||
6 | |||
7 | MCTargetOptionsCommandFlags.inc and CommandFlags.inc are headers which contain | ||
8 | cl::opt with static storage. | ||
9 | These headers are meant to be incuded by tools to make it easier to parametrize | ||
10 | codegen/mc. | ||
11 | |||
12 | However, these headers are also included in at least two libraries: lldCommon | ||
13 | and handle-llvm. As a result, when creating DYLIB, clang-cpp holds a reference | ||
14 | to the options, and lldCommon holds another reference. Linking the two in a | ||
15 | single executable, as zig does[0], results in a double registration. | ||
16 | |||
17 | This patch explores an other approach: the .inc files are moved to regular | ||
18 | files, and the registration happens on-demand through static declaration of | ||
19 | options in the constructor of a static object. | ||
20 | |||
21 | [0] https://bugzilla.redhat.com/show_bug.cgi?id=1756977#c5 | ||
22 | |||
23 | Differential Revision: https://reviews.llvm.org/D75579 | ||
24 | |||
25 | Upstream-Status: Backport | ||
26 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
27 | --- | ||
28 | .../clang-fuzzer/handle-llvm/handle_llvm.cpp | 32 +- | ||
29 | lld/Common/TargetOptionsCommandFlags.cpp | 25 +- | ||
30 | llvm/include/llvm/CodeGen/CommandFlags.h | 149 +++++ | ||
31 | .../llvm/MC/MCTargetOptionsCommandFlags.h | 54 ++ | ||
32 | .../llvm/MC/MCTargetOptionsCommandFlags.inc | 65 -- | ||
33 | llvm/include/llvm/module.modulemap | 3 - | ||
34 | llvm/lib/CodeGen/CMakeLists.txt | 1 + | ||
35 | llvm/lib/CodeGen/CommandFlags.cpp | 588 ++++++++++++++++++ | ||
36 | llvm/lib/MC/CMakeLists.txt | 1 + | ||
37 | llvm/lib/MC/MCTargetOptionsCommandFlags.cpp | 105 ++++ | ||
38 | llvm/tools/dsymutil/DwarfStreamer.cpp | 7 +- | ||
39 | llvm/tools/gold/gold-plugin.cpp | 16 +- | ||
40 | llvm/tools/llc/CMakeLists.txt | 1 + | ||
41 | llvm/tools/llc/llc.cpp | 43 +- | ||
42 | llvm/tools/lli/CMakeLists.txt | 1 + | ||
43 | llvm/tools/lli/lli.cpp | 41 +- | ||
44 | llvm/tools/llvm-dwp/llvm-dwp.cpp | 6 +- | ||
45 | .../llvm-isel-fuzzer/llvm-isel-fuzzer.cpp | 17 +- | ||
46 | llvm/tools/llvm-lto/CMakeLists.txt | 5 +- | ||
47 | llvm/tools/llvm-lto/llvm-lto.cpp | 28 +- | ||
48 | llvm/tools/llvm-lto2/CMakeLists.txt | 1 + | ||
49 | llvm/tools/llvm-lto2/llvm-lto2.cpp | 20 +- | ||
50 | .../llvm-mc-assemble-fuzzer/CMakeLists.txt | 1 + | ||
51 | .../llvm-mc-assemble-fuzzer.cpp | 12 +- | ||
52 | llvm/tools/llvm-mc/CMakeLists.txt | 1 + | ||
53 | llvm/tools/llvm-mc/llvm-mc.cpp | 6 +- | ||
54 | llvm/tools/llvm-mca/llvm-mca.cpp | 8 +- | ||
55 | .../tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp | 16 +- | ||
56 | llvm/tools/lto/CMakeLists.txt | 4 +- | ||
57 | llvm/tools/lto/lto.cpp | 35 +- | ||
58 | llvm/tools/opt/opt.cpp | 23 +- | ||
59 | .../DebugInfo/DWARF/DwarfGenerator.cpp | 6 +- | ||
60 | 32 files changed, 1088 insertions(+), 233 deletions(-) | ||
61 | create mode 100644 llvm/include/llvm/CodeGen/CommandFlags.h | ||
62 | create mode 100644 llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h | ||
63 | delete mode 100644 llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc | ||
64 | create mode 100644 llvm/lib/CodeGen/CommandFlags.cpp | ||
65 | create mode 100644 llvm/lib/MC/MCTargetOptionsCommandFlags.cpp | ||
66 | |||
67 | diff --git a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp | ||
68 | index d8ab14594185..aefb761cd70c 100644 | ||
69 | --- a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp | ||
70 | +++ b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp | ||
71 | @@ -19,7 +19,7 @@ | ||
72 | #include "llvm/ADT/Triple.h" | ||
73 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||
74 | #include "llvm/Analysis/TargetTransformInfo.h" | ||
75 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
76 | +#include "llvm/CodeGen/CommandFlags.h" | ||
77 | #include "llvm/CodeGen/MachineModuleInfo.h" | ||
78 | #include "llvm/CodeGen/TargetPassConfig.h" | ||
79 | #include "llvm/ExecutionEngine/JITEventListener.h" | ||
80 | @@ -29,9 +29,9 @@ | ||
81 | #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" | ||
82 | #include "llvm/ExecutionEngine/SectionMemoryManager.h" | ||
83 | #include "llvm/IR/IRPrintingPasses.h" | ||
84 | +#include "llvm/IR/LLVMContext.h" | ||
85 | #include "llvm/IR/LegacyPassManager.h" | ||
86 | #include "llvm/IR/LegacyPassNameParser.h" | ||
87 | -#include "llvm/IR/LLVMContext.h" | ||
88 | #include "llvm/IR/Module.h" | ||
89 | #include "llvm/IR/Verifier.h" | ||
90 | #include "llvm/IRReader/IRReader.h" | ||
91 | @@ -42,12 +42,14 @@ | ||
92 | #include "llvm/Support/TargetRegistry.h" | ||
93 | #include "llvm/Support/TargetSelect.h" | ||
94 | #include "llvm/Target/TargetMachine.h" | ||
95 | -#include "llvm/Transforms/IPO/PassManagerBuilder.h" | ||
96 | #include "llvm/Transforms/IPO.h" | ||
97 | +#include "llvm/Transforms/IPO/PassManagerBuilder.h" | ||
98 | #include "llvm/Transforms/Vectorize.h" | ||
99 | |||
100 | using namespace llvm; | ||
101 | |||
102 | +static codegen::RegisterCodeGenFlags CGF; | ||
103 | + | ||
104 | // Define a type for the functions that are compiled and executed | ||
105 | typedef void (*LLVMFunc)(int*, int*, int*, int); | ||
106 | |||
107 | @@ -100,15 +102,17 @@ static std::string OptLLVM(const std::string &IR, CodeGenOpt::Level OLvl) { | ||
108 | ErrorAndExit("Could not parse IR"); | ||
109 | |||
110 | Triple ModuleTriple(M->getTargetTriple()); | ||
111 | - const TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
112 | + const TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
113 | std::string E; | ||
114 | - const Target *TheTarget = TargetRegistry::lookupTarget(MArch, ModuleTriple, E); | ||
115 | - TargetMachine *Machine = | ||
116 | - TheTarget->createTargetMachine(M->getTargetTriple(), getCPUStr(), | ||
117 | - getFeaturesStr(), Options, getRelocModel(), | ||
118 | - getCodeModel(), OLvl); | ||
119 | + const Target *TheTarget = | ||
120 | + TargetRegistry::lookupTarget(codegen::getMArch(), ModuleTriple, E); | ||
121 | + TargetMachine *Machine = TheTarget->createTargetMachine( | ||
122 | + M->getTargetTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(), | ||
123 | + Options, codegen::getExplicitRelocModel(), | ||
124 | + codegen::getExplicitCodeModel(), OLvl); | ||
125 | std::unique_ptr<TargetMachine> TM(Machine); | ||
126 | - setFunctionAttributes(getCPUStr(), getFeaturesStr(), *M); | ||
127 | + codegen::setFunctionAttributes(codegen::getCPUStr(), | ||
128 | + codegen::getFeaturesStr(), *M); | ||
129 | |||
130 | legacy::PassManager Passes; | ||
131 | |||
132 | @@ -154,14 +158,14 @@ static void CreateAndRunJITFunc(const std::string &IR, CodeGenOpt::Level OLvl) { | ||
133 | |||
134 | std::string ErrorMsg; | ||
135 | EngineBuilder builder(std::move(M)); | ||
136 | - builder.setMArch(MArch); | ||
137 | - builder.setMCPU(getCPUStr()); | ||
138 | - builder.setMAttrs(getFeatureList()); | ||
139 | + builder.setMArch(codegen::getMArch()); | ||
140 | + builder.setMCPU(codegen::getCPUStr()); | ||
141 | + builder.setMAttrs(codegen::getFeatureList()); | ||
142 | builder.setErrorStr(&ErrorMsg); | ||
143 | builder.setEngineKind(EngineKind::JIT); | ||
144 | builder.setMCJITMemoryManager(std::make_unique<SectionMemoryManager>()); | ||
145 | builder.setOptLevel(OLvl); | ||
146 | - builder.setTargetOptions(InitTargetOptionsFromCodeGenFlags()); | ||
147 | + builder.setTargetOptions(codegen::InitTargetOptionsFromCodeGenFlags()); | ||
148 | |||
149 | std::unique_ptr<ExecutionEngine> EE(builder.create()); | ||
150 | if (!EE) | ||
151 | diff --git a/lld/Common/TargetOptionsCommandFlags.cpp b/lld/Common/TargetOptionsCommandFlags.cpp | ||
152 | index 0137feb63f37..9b166a3e130a 100644 | ||
153 | --- a/lld/Common/TargetOptionsCommandFlags.cpp | ||
154 | +++ b/lld/Common/TargetOptionsCommandFlags.cpp | ||
155 | @@ -5,35 +5,26 @@ | ||
156 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
157 | // | ||
158 | //===----------------------------------------------------------------------===// | ||
159 | -// | ||
160 | -// This file exists as a place for global variables defined in LLVM's | ||
161 | -// CodeGen/CommandFlags.inc. By putting the resulting object file in | ||
162 | -// an archive and linking with it, the definitions will automatically be | ||
163 | -// included when needed and skipped when already present. | ||
164 | -// | ||
165 | -//===----------------------------------------------------------------------===// | ||
166 | |||
167 | #include "lld/Common/TargetOptionsCommandFlags.h" | ||
168 | |||
169 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
170 | +#include "llvm/CodeGen/CommandFlags.h" | ||
171 | #include "llvm/Target/TargetOptions.h" | ||
172 | |||
173 | -// Define an externally visible version of | ||
174 | -// initTargetOptionsFromCodeGenFlags, so that its functionality can be | ||
175 | -// used without having to include llvm/CodeGen/CommandFlags.inc, which | ||
176 | -// would lead to multiple definitions of the command line flags. | ||
177 | +static llvm::codegen::RegisterCodeGenFlags CGF; | ||
178 | + | ||
179 | llvm::TargetOptions lld::initTargetOptionsFromCodeGenFlags() { | ||
180 | - return ::InitTargetOptionsFromCodeGenFlags(); | ||
181 | + return llvm::codegen::InitTargetOptionsFromCodeGenFlags(); | ||
182 | } | ||
183 | |||
184 | llvm::Optional<llvm::Reloc::Model> lld::getRelocModelFromCMModel() { | ||
185 | - return getRelocModel(); | ||
186 | + return llvm::codegen::getExplicitRelocModel(); | ||
187 | } | ||
188 | |||
189 | llvm::Optional<llvm::CodeModel::Model> lld::getCodeModelFromCMModel() { | ||
190 | - return getCodeModel(); | ||
191 | + return llvm::codegen::getExplicitCodeModel(); | ||
192 | } | ||
193 | |||
194 | -std::string lld::getCPUStr() { return ::getCPUStr(); } | ||
195 | +std::string lld::getCPUStr() { return llvm::codegen::getCPUStr(); } | ||
196 | |||
197 | -std::vector<std::string> lld::getMAttrs() { return ::MAttrs; } | ||
198 | +std::vector<std::string> lld::getMAttrs() { return llvm::codegen::getMAttrs(); } | ||
199 | diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h | ||
200 | new file mode 100644 | ||
201 | index 000000000000..cdec26879f73 | ||
202 | --- /dev/null | ||
203 | +++ b/llvm/include/llvm/CodeGen/CommandFlags.h | ||
204 | @@ -0,0 +1,149 @@ | ||
205 | +//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===// | ||
206 | +// | ||
207 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
208 | +// See https://llvm.org/LICENSE.txt for license information. | ||
209 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
210 | +// | ||
211 | +//===----------------------------------------------------------------------===// | ||
212 | +// | ||
213 | +// This file contains codegen-specific flags that are shared between different | ||
214 | +// command line tools. The tools "llc" and "opt" both use this file to prevent | ||
215 | +// flag duplication. | ||
216 | +// | ||
217 | +//===----------------------------------------------------------------------===// | ||
218 | + | ||
219 | +#include "llvm/ADT/StringExtras.h" | ||
220 | +#include "llvm/IR/Instructions.h" | ||
221 | +#include "llvm/IR/Intrinsics.h" | ||
222 | +#include "llvm/IR/Module.h" | ||
223 | +#include "llvm/MC/MCTargetOptionsCommandFlags.h" | ||
224 | +#include "llvm/MC/SubtargetFeature.h" | ||
225 | +#include "llvm/Support/CodeGen.h" | ||
226 | +#include "llvm/Support/CommandLine.h" | ||
227 | +#include "llvm/Support/Host.h" | ||
228 | +#include "llvm/Target/TargetMachine.h" | ||
229 | +#include "llvm/Target/TargetOptions.h" | ||
230 | +#include <string> | ||
231 | + | ||
232 | +namespace llvm { | ||
233 | + | ||
234 | +namespace codegen { | ||
235 | + | ||
236 | +std::string getMArch(); | ||
237 | + | ||
238 | +std::string getMCPU(); | ||
239 | + | ||
240 | +std::vector<std::string> getMAttrs(); | ||
241 | + | ||
242 | +Reloc::Model getRelocModel(); | ||
243 | +Optional<Reloc::Model> getExplicitRelocModel(); | ||
244 | + | ||
245 | +ThreadModel::Model getThreadModel(); | ||
246 | + | ||
247 | +CodeModel::Model getCodeModel(); | ||
248 | +Optional<CodeModel::Model> getExplicitCodeModel(); | ||
249 | + | ||
250 | +llvm::ExceptionHandling getExceptionModel(); | ||
251 | + | ||
252 | +CodeGenFileType getFileType(); | ||
253 | +Optional<CodeGenFileType> getExplicitFileType(); | ||
254 | + | ||
255 | +CodeGenFileType getFileType(); | ||
256 | + | ||
257 | +llvm::FramePointer::FP getFramePointerUsage(); | ||
258 | + | ||
259 | +bool getEnableUnsafeFPMath(); | ||
260 | + | ||
261 | +bool getEnableNoInfsFPMath(); | ||
262 | + | ||
263 | +bool getEnableNoNaNsFPMath(); | ||
264 | + | ||
265 | +bool getEnableNoSignedZerosFPMath(); | ||
266 | + | ||
267 | +bool getEnableNoTrappingFPMath(); | ||
268 | + | ||
269 | +llvm::FPDenormal::DenormalMode getDenormalFPMath(); | ||
270 | + | ||
271 | +bool getEnableHonorSignDependentRoundingFPMath(); | ||
272 | + | ||
273 | +llvm::FloatABI::ABIType getFloatABIForCalls(); | ||
274 | + | ||
275 | +llvm::FPOpFusion::FPOpFusionMode getFuseFPOps(); | ||
276 | + | ||
277 | +bool getDontPlaceZerosInBSS(); | ||
278 | + | ||
279 | +bool getEnableGuaranteedTailCallOpt(); | ||
280 | + | ||
281 | +bool getDisableTailCalls(); | ||
282 | + | ||
283 | +bool getStackSymbolOrdering(); | ||
284 | + | ||
285 | +unsigned getOverrideStackAlignment(); | ||
286 | + | ||
287 | +bool getStackRealign(); | ||
288 | + | ||
289 | +std::string getTrapFuncName(); | ||
290 | + | ||
291 | +bool getUseCtors(); | ||
292 | + | ||
293 | +bool getRelaxELFRelocations(); | ||
294 | + | ||
295 | +bool getDataSections(); | ||
296 | +Optional<bool> getExplicitDataSections(); | ||
297 | + | ||
298 | +bool getFunctionSections(); | ||
299 | +Optional<bool> getExplicitFunctionSections(); | ||
300 | + | ||
301 | +std::string getBBSections(); | ||
302 | + | ||
303 | +unsigned getTLSSize(); | ||
304 | + | ||
305 | +bool getEmulatedTLS(); | ||
306 | + | ||
307 | +bool getUniqueSectionNames(); | ||
308 | + | ||
309 | +bool getUniqueBBSectionNames(); | ||
310 | + | ||
311 | +llvm::EABI getEABIVersion(); | ||
312 | + | ||
313 | +llvm::DebuggerKind getDebuggerTuningOpt(); | ||
314 | + | ||
315 | +bool getEnableStackSizeSection(); | ||
316 | + | ||
317 | +bool getEnableAddrsig(); | ||
318 | + | ||
319 | +bool getEmitCallSiteInfo(); | ||
320 | + | ||
321 | +bool getEnableDebugEntryValues(); | ||
322 | + | ||
323 | +bool getForceDwarfFrameSection(); | ||
324 | + | ||
325 | +/// Create this object with static storage to register codegen-related command | ||
326 | +/// line options. | ||
327 | +struct RegisterCodeGenFlags { | ||
328 | + RegisterCodeGenFlags(); | ||
329 | +}; | ||
330 | + | ||
331 | +llvm::BasicBlockSection getBBSectionsMode(llvm::TargetOptions &Options); | ||
332 | + | ||
333 | +// Common utility function tightly tied to the options listed here. Initializes | ||
334 | +// a TargetOptions object with CodeGen flags and returns it. | ||
335 | +TargetOptions InitTargetOptionsFromCodeGenFlags(); | ||
336 | + | ||
337 | +std::string getCPUStr(); | ||
338 | + | ||
339 | +std::string getFeaturesStr(); | ||
340 | + | ||
341 | +std::vector<std::string> getFeatureList(); | ||
342 | + | ||
343 | +void renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val); | ||
344 | + | ||
345 | +/// Set function attributes of function \p F based on CPU, Features, and command | ||
346 | +/// line flags. | ||
347 | +void setFunctionAttributes(StringRef CPU, StringRef Features, Function &F); | ||
348 | + | ||
349 | +/// Set function attributes of functions in Module M based on CPU, | ||
350 | +/// Features, and command line flags. | ||
351 | +void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M); | ||
352 | +} // namespace codegen | ||
353 | +} // namespace llvm | ||
354 | diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h | ||
355 | new file mode 100644 | ||
356 | index 000000000000..7a5edf78fdcc | ||
357 | --- /dev/null | ||
358 | +++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h | ||
359 | @@ -0,0 +1,54 @@ | ||
360 | +//===-- MCTargetOptionsCommandFlags.h --------------------------*- C++ -*-===// | ||
361 | +// | ||
362 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
363 | +// See https://llvm.org/LICENSE.txt for license information. | ||
364 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
365 | +// | ||
366 | +//===----------------------------------------------------------------------===// | ||
367 | +// | ||
368 | +// This file contains machine code-specific flags that are shared between | ||
369 | +// different command line tools. | ||
370 | +// | ||
371 | +//===----------------------------------------------------------------------===// | ||
372 | + | ||
373 | +#ifndef LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H | ||
374 | +#define LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H | ||
375 | + | ||
376 | +#include "llvm/ADT/Optional.h" | ||
377 | +#include "llvm/MC/MCTargetOptions.h" | ||
378 | +#include "llvm/Support/CommandLine.h" | ||
379 | + | ||
380 | +namespace llvm { | ||
381 | + | ||
382 | +namespace mc { | ||
383 | + | ||
384 | +bool getRelaxAll(); | ||
385 | +Optional<bool> getExplicitRelaxAll(); | ||
386 | + | ||
387 | +bool getIncrementalLinkerCompatible(); | ||
388 | + | ||
389 | +int getDwarfVersion(); | ||
390 | + | ||
391 | +bool getShowMCInst(); | ||
392 | + | ||
393 | +bool getFatalWarnings(); | ||
394 | + | ||
395 | +bool getNoWarn(); | ||
396 | + | ||
397 | +bool getNoDeprecatedWarn(); | ||
398 | + | ||
399 | +std::string getABIName(); | ||
400 | + | ||
401 | +/// Create this object with static storage to register mc-related command | ||
402 | +/// line options. | ||
403 | +struct RegisterMCTargetOptionsFlags { | ||
404 | + RegisterMCTargetOptionsFlags(); | ||
405 | +}; | ||
406 | + | ||
407 | +MCTargetOptions InitMCTargetOptionsFromFlags(); | ||
408 | + | ||
409 | +} // namespace mc | ||
410 | + | ||
411 | +} // namespace llvm | ||
412 | + | ||
413 | +#endif | ||
414 | diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc | ||
415 | deleted file mode 100644 | ||
416 | index 93e21b626eac..000000000000 | ||
417 | --- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc | ||
418 | +++ /dev/null | ||
419 | @@ -1,65 +0,0 @@ | ||
420 | -//===-- MCTargetOptionsCommandFlags.h --------------------------*- C++ -*-===// | ||
421 | -// | ||
422 | -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
423 | -// See https://llvm.org/LICENSE.txt for license information. | ||
424 | -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
425 | -// | ||
426 | -//===----------------------------------------------------------------------===// | ||
427 | -// | ||
428 | -// This file contains machine code-specific flags that are shared between | ||
429 | -// different command line tools. | ||
430 | -// | ||
431 | -//===----------------------------------------------------------------------===// | ||
432 | - | ||
433 | -#ifndef LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H | ||
434 | -#define LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H | ||
435 | - | ||
436 | -#include "llvm/MC/MCTargetOptions.h" | ||
437 | -#include "llvm/Support/CommandLine.h" | ||
438 | -using namespace llvm; | ||
439 | - | ||
440 | -static cl::opt<bool> RelaxAll("mc-relax-all", | ||
441 | - cl::desc("When used with filetype=obj, " | ||
442 | - "relax all fixups in the emitted object file")); | ||
443 | - | ||
444 | -static cl::opt<bool> IncrementalLinkerCompatible( | ||
445 | - "incremental-linker-compatible", | ||
446 | - cl::desc( | ||
447 | - "When used with filetype=obj, " | ||
448 | - "emit an object file which can be used with an incremental linker")); | ||
449 | - | ||
450 | -static cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"), | ||
451 | - cl::init(0)); | ||
452 | - | ||
453 | -static cl::opt<bool> ShowMCInst("asm-show-inst", | ||
454 | - cl::desc("Emit internal instruction representation to " | ||
455 | - "assembly file")); | ||
456 | - | ||
457 | -static cl::opt<bool> FatalWarnings("fatal-warnings", | ||
458 | - cl::desc("Treat warnings as errors")); | ||
459 | - | ||
460 | -static cl::opt<bool> NoWarn("no-warn", cl::desc("Suppress all warnings")); | ||
461 | -static cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"), cl::aliasopt(NoWarn)); | ||
462 | - | ||
463 | -static cl::opt<bool> NoDeprecatedWarn("no-deprecated-warn", | ||
464 | - cl::desc("Suppress all deprecated warnings")); | ||
465 | - | ||
466 | -static cl::opt<std::string> | ||
467 | -ABIName("target-abi", cl::Hidden, | ||
468 | - cl::desc("The name of the ABI to be targeted from the backend."), | ||
469 | - cl::init("")); | ||
470 | - | ||
471 | -static MCTargetOptions InitMCTargetOptionsFromFlags() { | ||
472 | - MCTargetOptions Options; | ||
473 | - Options.MCRelaxAll = RelaxAll; | ||
474 | - Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible; | ||
475 | - Options.DwarfVersion = DwarfVersion; | ||
476 | - Options.ShowMCInst = ShowMCInst; | ||
477 | - Options.ABIName = ABIName; | ||
478 | - Options.MCFatalWarnings = FatalWarnings; | ||
479 | - Options.MCNoWarn = NoWarn; | ||
480 | - Options.MCNoDeprecatedWarn = NoDeprecatedWarn; | ||
481 | - return Options; | ||
482 | -} | ||
483 | - | ||
484 | -#endif | ||
485 | diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap | ||
486 | index d281682ae003..d176b3dfd4be 100644 | ||
487 | --- a/llvm/include/llvm/module.modulemap | ||
488 | +++ b/llvm/include/llvm/module.modulemap | ||
489 | @@ -29,7 +29,6 @@ module LLVM_Backend { | ||
490 | exclude header "CodeGen/LinkAllCodegenComponents.h" | ||
491 | |||
492 | // These are intended for (repeated) textual inclusion. | ||
493 | - textual header "CodeGen/CommandFlags.inc" | ||
494 | textual header "CodeGen/DIEValue.def" | ||
495 | } | ||
496 | } | ||
497 | @@ -308,8 +307,6 @@ module LLVM_MC { | ||
498 | |||
499 | umbrella "MC" | ||
500 | module * { export * } | ||
501 | - | ||
502 | - textual header "MC/MCTargetOptionsCommandFlags.inc" | ||
503 | } | ||
504 | |||
505 | // Used by llvm-tblgen | ||
506 | diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt | ||
507 | index a3916b7c6242..c6be91247017 100644 | ||
508 | --- a/llvm/lib/CodeGen/CMakeLists.txt | ||
509 | +++ b/llvm/lib/CodeGen/CMakeLists.txt | ||
510 | @@ -14,6 +14,7 @@ add_llvm_component_library(LLVMCodeGen | ||
511 | CFIInstrInserter.cpp | ||
512 | CodeGen.cpp | ||
513 | CodeGenPrepare.cpp | ||
514 | + CommandFlags.cpp | ||
515 | CriticalAntiDepBreaker.cpp | ||
516 | DeadMachineInstructionElim.cpp | ||
517 | DetectDeadLanes.cpp | ||
518 | diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp | ||
519 | new file mode 100644 | ||
520 | index 000000000000..7acb84df582f | ||
521 | --- /dev/null | ||
522 | +++ b/llvm/lib/CodeGen/CommandFlags.cpp | ||
523 | @@ -0,0 +1,588 @@ | ||
524 | +//===-- CommandFlags.cpp - Command Line Flags Interface ---------*- C++ -*-===// | ||
525 | +// | ||
526 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
527 | +// See https://llvm.org/LICENSE.txt for license information. | ||
528 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
529 | +// | ||
530 | +//===----------------------------------------------------------------------===// | ||
531 | +// | ||
532 | +// This file contains codegen-specific flags that are shared between different | ||
533 | +// command line tools. The tools "llc" and "opt" both use this file to prevent | ||
534 | +// flag duplication. | ||
535 | +// | ||
536 | +//===----------------------------------------------------------------------===// | ||
537 | + | ||
538 | +#include "llvm/CodeGen/CommandFlags.h" | ||
539 | + | ||
540 | +using namespace llvm; | ||
541 | + | ||
542 | +#define CGOPT(TY, NAME) \ | ||
543 | + static cl::opt<TY> *NAME##View; \ | ||
544 | + TY codegen::get##NAME() { \ | ||
545 | + assert(NAME##View && "RegisterCodeGenFlags not created."); \ | ||
546 | + return *NAME##View; \ | ||
547 | + } | ||
548 | + | ||
549 | +#define CGLIST(TY, NAME) \ | ||
550 | + static cl::list<TY> *NAME##View; \ | ||
551 | + std::vector<TY> codegen::get##NAME() { \ | ||
552 | + assert(NAME##View && "RegisterCodeGenFlags not created."); \ | ||
553 | + return *NAME##View; \ | ||
554 | + } | ||
555 | + | ||
556 | +#define CGOPT_EXP(TY, NAME) \ | ||
557 | + CGOPT(TY, NAME) \ | ||
558 | + Optional<TY> codegen::getExplicit##NAME() { \ | ||
559 | + if (NAME##View->getNumOccurrences()) { \ | ||
560 | + TY res = *NAME##View; \ | ||
561 | + return res; \ | ||
562 | + } \ | ||
563 | + return None; \ | ||
564 | + } | ||
565 | + | ||
566 | +CGOPT(std::string, MArch) | ||
567 | +CGOPT(std::string, MCPU) | ||
568 | +CGLIST(std::string, MAttrs) | ||
569 | +CGOPT_EXP(Reloc::Model, RelocModel) | ||
570 | +CGOPT(ThreadModel::Model, ThreadModel) | ||
571 | +CGOPT_EXP(CodeModel::Model, CodeModel) | ||
572 | +CGOPT(ExceptionHandling, ExceptionModel) | ||
573 | +CGOPT_EXP(CodeGenFileType, FileType) | ||
574 | +CGOPT(FramePointer::FP, FramePointerUsage) | ||
575 | +CGOPT(bool, EnableUnsafeFPMath) | ||
576 | +CGOPT(bool, EnableNoInfsFPMath) | ||
577 | +CGOPT(bool, EnableNoNaNsFPMath) | ||
578 | +CGOPT(bool, EnableNoSignedZerosFPMath) | ||
579 | +CGOPT(bool, EnableNoTrappingFPMath) | ||
580 | +CGOPT(FPDenormal::DenormalMode, DenormalFPMath) | ||
581 | +CGOPT(bool, EnableHonorSignDependentRoundingFPMath) | ||
582 | +CGOPT(FloatABI::ABIType, FloatABIForCalls) | ||
583 | +CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps) | ||
584 | +CGOPT(bool, DontPlaceZerosInBSS) | ||
585 | +CGOPT(bool, EnableGuaranteedTailCallOpt) | ||
586 | +CGOPT(bool, DisableTailCalls) | ||
587 | +CGOPT(bool, StackSymbolOrdering) | ||
588 | +CGOPT(unsigned, OverrideStackAlignment) | ||
589 | +CGOPT(bool, StackRealign) | ||
590 | +CGOPT(std::string, TrapFuncName) | ||
591 | +CGOPT(bool, UseCtors) | ||
592 | +CGOPT(bool, RelaxELFRelocations) | ||
593 | +CGOPT_EXP(bool, DataSections) | ||
594 | +CGOPT_EXP(bool, FunctionSections) | ||
595 | +CGOPT(std::string, BBSections) | ||
596 | +CGOPT(unsigned, TLSSize) | ||
597 | +CGOPT(bool, EmulatedTLS) | ||
598 | +CGOPT(bool, UniqueSectionNames) | ||
599 | +CGOPT(bool, UniqueBBSectionNames) | ||
600 | +CGOPT(EABI, EABIVersion) | ||
601 | +CGOPT(DebuggerKind, DebuggerTuningOpt) | ||
602 | +CGOPT(bool, EnableStackSizeSection) | ||
603 | +CGOPT(bool, EnableAddrsig) | ||
604 | +CGOPT(bool, EmitCallSiteInfo) | ||
605 | +CGOPT(bool, EnableDebugEntryValues) | ||
606 | +CGOPT(bool, ForceDwarfFrameSection) | ||
607 | + | ||
608 | +codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { | ||
609 | +#define CGBINDOPT(NAME) \ | ||
610 | + do { \ | ||
611 | + NAME##View = std::addressof(NAME); \ | ||
612 | + } while (0) | ||
613 | + | ||
614 | + static cl::opt<std::string> MArch( | ||
615 | + "march", cl::desc("Architecture to generate code for (see --version)")); | ||
616 | + CGBINDOPT(MArch); | ||
617 | + | ||
618 | + static cl::opt<std::string> MCPU( | ||
619 | + "mcpu", cl::desc("Target a specific cpu type (-mcpu=help for details)"), | ||
620 | + cl::value_desc("cpu-name"), cl::init("")); | ||
621 | + CGBINDOPT(MCPU); | ||
622 | + | ||
623 | + static cl::list<std::string> MAttrs( | ||
624 | + "mattr", cl::CommaSeparated, | ||
625 | + cl::desc("Target specific attributes (-mattr=help for details)"), | ||
626 | + cl::value_desc("a1,+a2,-a3,...")); | ||
627 | + CGBINDOPT(MAttrs); | ||
628 | + | ||
629 | + static cl::opt<Reloc::Model> RelocModel( | ||
630 | + "relocation-model", cl::desc("Choose relocation model"), | ||
631 | + cl::values( | ||
632 | + clEnumValN(Reloc::Static, "static", "Non-relocatable code"), | ||
633 | + clEnumValN(Reloc::PIC_, "pic", | ||
634 | + "Fully relocatable, position independent code"), | ||
635 | + clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic", | ||
636 | + "Relocatable external references, non-relocatable code"), | ||
637 | + clEnumValN( | ||
638 | + Reloc::ROPI, "ropi", | ||
639 | + "Code and read-only data relocatable, accessed PC-relative"), | ||
640 | + clEnumValN( | ||
641 | + Reloc::RWPI, "rwpi", | ||
642 | + "Read-write data relocatable, accessed relative to static base"), | ||
643 | + clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi", | ||
644 | + "Combination of ropi and rwpi"))); | ||
645 | + CGBINDOPT(RelocModel); | ||
646 | + | ||
647 | + static cl::opt<ThreadModel::Model> ThreadModel( | ||
648 | + "thread-model", cl::desc("Choose threading model"), | ||
649 | + cl::init(ThreadModel::POSIX), | ||
650 | + cl::values( | ||
651 | + clEnumValN(ThreadModel::POSIX, "posix", "POSIX thread model"), | ||
652 | + clEnumValN(ThreadModel::Single, "single", "Single thread model"))); | ||
653 | + CGBINDOPT(ThreadModel); | ||
654 | + | ||
655 | + static cl::opt<CodeModel::Model> CodeModel( | ||
656 | + "code-model", cl::desc("Choose code model"), | ||
657 | + cl::values(clEnumValN(CodeModel::Tiny, "tiny", "Tiny code model"), | ||
658 | + clEnumValN(CodeModel::Small, "small", "Small code model"), | ||
659 | + clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"), | ||
660 | + clEnumValN(CodeModel::Medium, "medium", "Medium code model"), | ||
661 | + clEnumValN(CodeModel::Large, "large", "Large code model"))); | ||
662 | + CGBINDOPT(CodeModel); | ||
663 | + | ||
664 | + static cl::opt<ExceptionHandling> ExceptionModel( | ||
665 | + "exception-model", cl::desc("exception model"), | ||
666 | + cl::init(ExceptionHandling::None), | ||
667 | + cl::values( | ||
668 | + clEnumValN(ExceptionHandling::None, "default", | ||
669 | + "default exception handling model"), | ||
670 | + clEnumValN(ExceptionHandling::DwarfCFI, "dwarf", | ||
671 | + "DWARF-like CFI based exception handling"), | ||
672 | + clEnumValN(ExceptionHandling::SjLj, "sjlj", | ||
673 | + "SjLj exception handling"), | ||
674 | + clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"), | ||
675 | + clEnumValN(ExceptionHandling::WinEH, "wineh", | ||
676 | + "Windows exception model"), | ||
677 | + clEnumValN(ExceptionHandling::Wasm, "wasm", | ||
678 | + "WebAssembly exception handling"))); | ||
679 | + CGBINDOPT(ExceptionModel); | ||
680 | + | ||
681 | + static cl::opt<CodeGenFileType> FileType( | ||
682 | + "filetype", cl::init(CGFT_AssemblyFile), | ||
683 | + cl::desc( | ||
684 | + "Choose a file type (not all types are supported by all targets):"), | ||
685 | + cl::values( | ||
686 | + clEnumValN(CGFT_AssemblyFile, "asm", "Emit an assembly ('.s') file"), | ||
687 | + clEnumValN(CGFT_ObjectFile, "obj", | ||
688 | + "Emit a native object ('.o') file"), | ||
689 | + clEnumValN(CGFT_Null, "null", | ||
690 | + "Emit nothing, for performance testing"))); | ||
691 | + CGBINDOPT(FileType); | ||
692 | + | ||
693 | + static cl::opt<FramePointer::FP> FramePointerUsage( | ||
694 | + "frame-pointer", | ||
695 | + cl::desc("Specify frame pointer elimination optimization"), | ||
696 | + cl::init(FramePointer::None), | ||
697 | + cl::values( | ||
698 | + clEnumValN(FramePointer::All, "all", | ||
699 | + "Disable frame pointer elimination"), | ||
700 | + clEnumValN(FramePointer::NonLeaf, "non-leaf", | ||
701 | + "Disable frame pointer elimination for non-leaf frame"), | ||
702 | + clEnumValN(FramePointer::None, "none", | ||
703 | + "Enable frame pointer elimination"))); | ||
704 | + CGBINDOPT(FramePointerUsage); | ||
705 | + | ||
706 | + static cl::opt<bool> EnableUnsafeFPMath( | ||
707 | + "enable-unsafe-fp-math", | ||
708 | + cl::desc("Enable optimizations that may decrease FP precision"), | ||
709 | + cl::init(false)); | ||
710 | + CGBINDOPT(EnableUnsafeFPMath); | ||
711 | + | ||
712 | + static cl::opt<bool> EnableNoInfsFPMath( | ||
713 | + "enable-no-infs-fp-math", | ||
714 | + cl::desc("Enable FP math optimizations that assume no +-Infs"), | ||
715 | + cl::init(false)); | ||
716 | + CGBINDOPT(EnableNoInfsFPMath); | ||
717 | + | ||
718 | + static cl::opt<bool> EnableNoNaNsFPMath( | ||
719 | + "enable-no-nans-fp-math", | ||
720 | + cl::desc("Enable FP math optimizations that assume no NaNs"), | ||
721 | + cl::init(false)); | ||
722 | + CGBINDOPT(EnableNoNaNsFPMath); | ||
723 | + | ||
724 | + static cl::opt<bool> EnableNoSignedZerosFPMath( | ||
725 | + "enable-no-signed-zeros-fp-math", | ||
726 | + cl::desc("Enable FP math optimizations that assume " | ||
727 | + "the sign of 0 is insignificant"), | ||
728 | + cl::init(false)); | ||
729 | + CGBINDOPT(EnableNoSignedZerosFPMath); | ||
730 | + | ||
731 | + static cl::opt<bool> EnableNoTrappingFPMath( | ||
732 | + "enable-no-trapping-fp-math", | ||
733 | + cl::desc("Enable setting the FP exceptions build " | ||
734 | + "attribute not to use exceptions"), | ||
735 | + cl::init(false)); | ||
736 | + CGBINDOPT(EnableNoTrappingFPMath); | ||
737 | + | ||
738 | + static cl::opt<FPDenormal::DenormalMode> DenormalFPMath( | ||
739 | + "denormal-fp-math", | ||
740 | + cl::desc( | ||
741 | + "Select which denormal numbers the code is permitted to require"), | ||
742 | + cl::init(FPDenormal::IEEE), | ||
743 | + cl::values( | ||
744 | + clEnumValN(FPDenormal::IEEE, "ieee", "IEEE 754 denormal numbers"), | ||
745 | + clEnumValN(FPDenormal::PreserveSign, "preserve-sign", | ||
746 | + "the sign of a flushed-to-zero number is preserved " | ||
747 | + "in the sign of 0"), | ||
748 | + clEnumValN(FPDenormal::PositiveZero, "positive-zero", | ||
749 | + "denormals are flushed to positive zero"))); | ||
750 | + CGBINDOPT(DenormalFPMath); | ||
751 | + | ||
752 | + static cl::opt<bool> EnableHonorSignDependentRoundingFPMath( | ||
753 | + "enable-sign-dependent-rounding-fp-math", cl::Hidden, | ||
754 | + cl::desc("Force codegen to assume rounding mode can change dynamically"), | ||
755 | + cl::init(false)); | ||
756 | + CGBINDOPT(EnableHonorSignDependentRoundingFPMath); | ||
757 | + | ||
758 | + static cl::opt<FloatABI::ABIType> FloatABIForCalls( | ||
759 | + "float-abi", cl::desc("Choose float ABI type"), | ||
760 | + cl::init(FloatABI::Default), | ||
761 | + cl::values(clEnumValN(FloatABI::Default, "default", | ||
762 | + "Target default float ABI type"), | ||
763 | + clEnumValN(FloatABI::Soft, "soft", | ||
764 | + "Soft float ABI (implied by -soft-float)"), | ||
765 | + clEnumValN(FloatABI::Hard, "hard", | ||
766 | + "Hard float ABI (uses FP registers)"))); | ||
767 | + CGBINDOPT(FloatABIForCalls); | ||
768 | + | ||
769 | + static cl::opt<FPOpFusion::FPOpFusionMode> FuseFPOps( | ||
770 | + "fp-contract", cl::desc("Enable aggressive formation of fused FP ops"), | ||
771 | + cl::init(FPOpFusion::Standard), | ||
772 | + cl::values( | ||
773 | + clEnumValN(FPOpFusion::Fast, "fast", | ||
774 | + "Fuse FP ops whenever profitable"), | ||
775 | + clEnumValN(FPOpFusion::Standard, "on", "Only fuse 'blessed' FP ops."), | ||
776 | + clEnumValN(FPOpFusion::Strict, "off", | ||
777 | + "Only fuse FP ops when the result won't be affected."))); | ||
778 | + CGBINDOPT(FuseFPOps); | ||
779 | + | ||
780 | + static cl::opt<bool> DontPlaceZerosInBSS( | ||
781 | + "nozero-initialized-in-bss", | ||
782 | + cl::desc("Don't place zero-initialized symbols into bss section"), | ||
783 | + cl::init(false)); | ||
784 | + CGBINDOPT(DontPlaceZerosInBSS); | ||
785 | + | ||
786 | + static cl::opt<bool> EnableGuaranteedTailCallOpt( | ||
787 | + "tailcallopt", | ||
788 | + cl::desc( | ||
789 | + "Turn fastcc calls into tail calls by (potentially) changing ABI."), | ||
790 | + cl::init(false)); | ||
791 | + CGBINDOPT(EnableGuaranteedTailCallOpt); | ||
792 | + | ||
793 | + static cl::opt<bool> DisableTailCalls( | ||
794 | + "disable-tail-calls", cl::desc("Never emit tail calls"), cl::init(false)); | ||
795 | + CGBINDOPT(DisableTailCalls); | ||
796 | + | ||
797 | + static cl::opt<bool> StackSymbolOrdering( | ||
798 | + "stack-symbol-ordering", cl::desc("Order local stack symbols."), | ||
799 | + cl::init(true)); | ||
800 | + CGBINDOPT(StackSymbolOrdering); | ||
801 | + | ||
802 | + static cl::opt<unsigned> OverrideStackAlignment( | ||
803 | + "stack-alignment", cl::desc("Override default stack alignment"), | ||
804 | + cl::init(0)); | ||
805 | + CGBINDOPT(OverrideStackAlignment); | ||
806 | + | ||
807 | + static cl::opt<bool> StackRealign( | ||
808 | + "stackrealign", | ||
809 | + cl::desc("Force align the stack to the minimum alignment"), | ||
810 | + cl::init(false)); | ||
811 | + CGBINDOPT(StackRealign); | ||
812 | + | ||
813 | + static cl::opt<std::string> TrapFuncName( | ||
814 | + "trap-func", cl::Hidden, | ||
815 | + cl::desc("Emit a call to trap function rather than a trap instruction"), | ||
816 | + cl::init("")); | ||
817 | + CGBINDOPT(TrapFuncName); | ||
818 | + | ||
819 | + static cl::opt<bool> UseCtors("use-ctors", | ||
820 | + cl::desc("Use .ctors instead of .init_array."), | ||
821 | + cl::init(false)); | ||
822 | + CGBINDOPT(UseCtors); | ||
823 | + | ||
824 | + static cl::opt<bool> RelaxELFRelocations( | ||
825 | + "relax-elf-relocations", | ||
826 | + cl::desc( | ||
827 | + "Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"), | ||
828 | + cl::init(false)); | ||
829 | + CGBINDOPT(RelaxELFRelocations); | ||
830 | + | ||
831 | + static cl::opt<bool> DataSections( | ||
832 | + "data-sections", cl::desc("Emit data into separate sections"), | ||
833 | + cl::init(false)); | ||
834 | + CGBINDOPT(DataSections); | ||
835 | + | ||
836 | + static cl::opt<bool> FunctionSections( | ||
837 | + "function-sections", cl::desc("Emit functions into separate sections"), | ||
838 | + cl::init(false)); | ||
839 | + CGBINDOPT(FunctionSections); | ||
840 | + | ||
841 | + static cl::opt<std::string> BBSections( | ||
842 | + "basicblock-sections", | ||
843 | + cl::desc("Emit basic blocks into separate sections"), | ||
844 | + cl::value_desc("all | <function list (file)> | labels | none"), | ||
845 | + cl::init("none")); | ||
846 | + CGBINDOPT(BBSections); | ||
847 | + | ||
848 | + static cl::opt<unsigned> TLSSize( | ||
849 | + "tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0)); | ||
850 | + CGBINDOPT(TLSSize); | ||
851 | + | ||
852 | + static cl::opt<bool> EmulatedTLS( | ||
853 | + "emulated-tls", cl::desc("Use emulated TLS model"), cl::init(false)); | ||
854 | + CGBINDOPT(EmulatedTLS); | ||
855 | + | ||
856 | + static cl::opt<bool> UniqueSectionNames( | ||
857 | + "unique-section-names", cl::desc("Give unique names to every section"), | ||
858 | + cl::init(true)); | ||
859 | + CGBINDOPT(UniqueSectionNames); | ||
860 | + | ||
861 | + static cl::opt<bool> UniqueBBSectionNames( | ||
862 | + "unique-bb-section-names", | ||
863 | + cl::desc("Give unique names to every basic block section"), | ||
864 | + cl::init(false)); | ||
865 | + CGBINDOPT(UniqueBBSectionNames); | ||
866 | + | ||
867 | + static cl::opt<EABI> EABIVersion( | ||
868 | + "meabi", cl::desc("Set EABI type (default depends on triple):"), | ||
869 | + cl::init(EABI::Default), | ||
870 | + cl::values( | ||
871 | + clEnumValN(EABI::Default, "default", "Triple default EABI version"), | ||
872 | + clEnumValN(EABI::EABI4, "4", "EABI version 4"), | ||
873 | + clEnumValN(EABI::EABI5, "5", "EABI version 5"), | ||
874 | + clEnumValN(EABI::GNU, "gnu", "EABI GNU"))); | ||
875 | + CGBINDOPT(EABIVersion); | ||
876 | + | ||
877 | + static cl::opt<DebuggerKind> DebuggerTuningOpt( | ||
878 | + "debugger-tune", cl::desc("Tune debug info for a particular debugger"), | ||
879 | + cl::init(DebuggerKind::Default), | ||
880 | + cl::values( | ||
881 | + clEnumValN(DebuggerKind::GDB, "gdb", "gdb"), | ||
882 | + clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"), | ||
883 | + clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)"))); | ||
884 | + CGBINDOPT(DebuggerTuningOpt); | ||
885 | + | ||
886 | + static cl::opt<bool> EnableStackSizeSection( | ||
887 | + "stack-size-section", | ||
888 | + cl::desc("Emit a section containing stack size metadata"), | ||
889 | + cl::init(false)); | ||
890 | + CGBINDOPT(EnableStackSizeSection); | ||
891 | + | ||
892 | + static cl::opt<bool> EnableAddrsig( | ||
893 | + "addrsig", cl::desc("Emit an address-significance table"), | ||
894 | + cl::init(false)); | ||
895 | + CGBINDOPT(EnableAddrsig); | ||
896 | + | ||
897 | + static cl::opt<bool> EmitCallSiteInfo( | ||
898 | + "emit-call-site-info", | ||
899 | + cl::desc( | ||
900 | + "Emit call site debug information, if debug information is enabled."), | ||
901 | + cl::init(false)); | ||
902 | + CGBINDOPT(EmitCallSiteInfo); | ||
903 | + | ||
904 | + static cl::opt<bool> EnableDebugEntryValues( | ||
905 | + "debug-entry-values", | ||
906 | + cl::desc("Emit debug info about parameter's entry values"), | ||
907 | + cl::init(false)); | ||
908 | + CGBINDOPT(EnableDebugEntryValues); | ||
909 | + | ||
910 | + static cl::opt<bool> ForceDwarfFrameSection( | ||
911 | + "force-dwarf-frame-section", | ||
912 | + cl::desc("Always emit a debug frame section."), cl::init(false)); | ||
913 | + CGBINDOPT(ForceDwarfFrameSection); | ||
914 | + | ||
915 | +#undef CGBINDOPT | ||
916 | + | ||
917 | + mc::RegisterMCTargetOptionsFlags(); | ||
918 | +} | ||
919 | + | ||
920 | +llvm::BasicBlockSection | ||
921 | +codegen::getBBSectionsMode(llvm::TargetOptions &Options) { | ||
922 | + if (getBBSections() == "all") | ||
923 | + return BasicBlockSection::All; | ||
924 | + else if (getBBSections() == "labels") | ||
925 | + return BasicBlockSection::Labels; | ||
926 | + else if (getBBSections() == "none") | ||
927 | + return BasicBlockSection::None; | ||
928 | + else { | ||
929 | + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = | ||
930 | + MemoryBuffer::getFile(getBBSections()); | ||
931 | + if (!MBOrErr) { | ||
932 | + errs() << "Error loading basic block sections function list file: " | ||
933 | + << MBOrErr.getError().message() << "\n"; | ||
934 | + } else { | ||
935 | + Options.BBSectionsFuncListBuf = std::move(*MBOrErr); | ||
936 | + } | ||
937 | + return BasicBlockSection::List; | ||
938 | + } | ||
939 | +} | ||
940 | + | ||
941 | +// Common utility function tightly tied to the options listed here. Initializes | ||
942 | +// a TargetOptions object with CodeGen flags and returns it. | ||
943 | +TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() { | ||
944 | + TargetOptions Options; | ||
945 | + Options.AllowFPOpFusion = getFuseFPOps(); | ||
946 | + Options.UnsafeFPMath = getEnableUnsafeFPMath(); | ||
947 | + Options.NoInfsFPMath = getEnableNoInfsFPMath(); | ||
948 | + Options.NoNaNsFPMath = getEnableNoNaNsFPMath(); | ||
949 | + Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath(); | ||
950 | + Options.NoTrappingFPMath = getEnableNoTrappingFPMath(); | ||
951 | + Options.FPDenormalMode = getDenormalFPMath(); | ||
952 | + Options.HonorSignDependentRoundingFPMathOption = | ||
953 | + getEnableHonorSignDependentRoundingFPMath(); | ||
954 | + if (getFloatABIForCalls() != FloatABI::Default) | ||
955 | + Options.FloatABIType = getFloatABIForCalls(); | ||
956 | + Options.NoZerosInBSS = getDontPlaceZerosInBSS(); | ||
957 | + Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt(); | ||
958 | + Options.StackAlignmentOverride = getOverrideStackAlignment(); | ||
959 | + Options.StackSymbolOrdering = getStackSymbolOrdering(); | ||
960 | + Options.UseInitArray = !getUseCtors(); | ||
961 | + Options.RelaxELFRelocations = getRelaxELFRelocations(); | ||
962 | + Options.DataSections = getDataSections(); | ||
963 | + Options.FunctionSections = getFunctionSections(); | ||
964 | + Options.BBSections = getBBSectionsMode(Options); | ||
965 | + Options.UniqueSectionNames = getUniqueSectionNames(); | ||
966 | + Options.UniqueBBSectionNames = getUniqueBBSectionNames(); | ||
967 | + Options.TLSSize = getTLSSize(); | ||
968 | + Options.EmulatedTLS = getEmulatedTLS(); | ||
969 | + Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0; | ||
970 | + Options.ExceptionModel = getExceptionModel(); | ||
971 | + Options.EmitStackSizeSection = getEnableStackSizeSection(); | ||
972 | + Options.EmitAddrsig = getEnableAddrsig(); | ||
973 | + Options.EmitCallSiteInfo = getEmitCallSiteInfo(); | ||
974 | + Options.EnableDebugEntryValues = getEnableDebugEntryValues(); | ||
975 | + Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); | ||
976 | + | ||
977 | + Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); | ||
978 | + | ||
979 | + Options.ThreadModel = getThreadModel(); | ||
980 | + Options.EABIVersion = getEABIVersion(); | ||
981 | + Options.DebuggerTuning = getDebuggerTuningOpt(); | ||
982 | + | ||
983 | + return Options; | ||
984 | +} | ||
985 | + | ||
986 | +std::string codegen::getCPUStr() { | ||
987 | + // If user asked for the 'native' CPU, autodetect here. If autodection fails, | ||
988 | + // this will set the CPU to an empty string which tells the target to | ||
989 | + // pick a basic default. | ||
990 | + if (getMCPU() == "native") | ||
991 | + return std::string(sys::getHostCPUName()); | ||
992 | + | ||
993 | + return getMCPU(); | ||
994 | +} | ||
995 | + | ||
996 | +std::string codegen::getFeaturesStr() { | ||
997 | + SubtargetFeatures Features; | ||
998 | + | ||
999 | + // If user asked for the 'native' CPU, we need to autodetect features. | ||
1000 | + // This is necessary for x86 where the CPU might not support all the | ||
1001 | + // features the autodetected CPU name lists in the target. For example, | ||
1002 | + // not all Sandybridge processors support AVX. | ||
1003 | + if (getMCPU() == "native") { | ||
1004 | + StringMap<bool> HostFeatures; | ||
1005 | + if (sys::getHostCPUFeatures(HostFeatures)) | ||
1006 | + for (auto &F : HostFeatures) | ||
1007 | + Features.AddFeature(F.first(), F.second); | ||
1008 | + } | ||
1009 | + | ||
1010 | + for (auto const &MAttr : getMAttrs()) | ||
1011 | + Features.AddFeature(MAttr); | ||
1012 | + | ||
1013 | + return Features.getString(); | ||
1014 | +} | ||
1015 | + | ||
1016 | +std::vector<std::string> codegen::getFeatureList() { | ||
1017 | + SubtargetFeatures Features; | ||
1018 | + | ||
1019 | + // If user asked for the 'native' CPU, we need to autodetect features. | ||
1020 | + // This is necessary for x86 where the CPU might not support all the | ||
1021 | + // features the autodetected CPU name lists in the target. For example, | ||
1022 | + // not all Sandybridge processors support AVX. | ||
1023 | + if (getMCPU() == "native") { | ||
1024 | + StringMap<bool> HostFeatures; | ||
1025 | + if (sys::getHostCPUFeatures(HostFeatures)) | ||
1026 | + for (auto &F : HostFeatures) | ||
1027 | + Features.AddFeature(F.first(), F.second); | ||
1028 | + } | ||
1029 | + | ||
1030 | + for (auto const &MAttr : getMAttrs()) | ||
1031 | + Features.AddFeature(MAttr); | ||
1032 | + | ||
1033 | + return Features.getFeatures(); | ||
1034 | +} | ||
1035 | + | ||
1036 | +void codegen::renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val) { | ||
1037 | + B.addAttribute(Name, Val ? "true" : "false"); | ||
1038 | +} | ||
1039 | + | ||
1040 | +#define HANDLE_BOOL_ATTR(CL, AttrName) \ | ||
1041 | + do { \ | ||
1042 | + if (CL->getNumOccurrences() > 0 && !F.hasFnAttribute(AttrName)) \ | ||
1043 | + renderBoolStringAttr(NewAttrs, AttrName, *CL); \ | ||
1044 | + } while (0) | ||
1045 | + | ||
1046 | +/// Set function attributes of function \p F based on CPU, Features, and command | ||
1047 | +/// line flags. | ||
1048 | +void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, | ||
1049 | + Function &F) { | ||
1050 | + auto &Ctx = F.getContext(); | ||
1051 | + AttributeList Attrs = F.getAttributes(); | ||
1052 | + AttrBuilder NewAttrs; | ||
1053 | + | ||
1054 | + if (!CPU.empty() && !F.hasFnAttribute("target-cpu")) | ||
1055 | + NewAttrs.addAttribute("target-cpu", CPU); | ||
1056 | + if (!Features.empty()) { | ||
1057 | + // Append the command line features to any that are already on the function. | ||
1058 | + StringRef OldFeatures = | ||
1059 | + F.getFnAttribute("target-features").getValueAsString(); | ||
1060 | + if (OldFeatures.empty()) | ||
1061 | + NewAttrs.addAttribute("target-features", Features); | ||
1062 | + else { | ||
1063 | + SmallString<256> Appended(OldFeatures); | ||
1064 | + Appended.push_back(','); | ||
1065 | + Appended.append(Features); | ||
1066 | + NewAttrs.addAttribute("target-features", Appended); | ||
1067 | + } | ||
1068 | + } | ||
1069 | + if (FramePointerUsageView->getNumOccurrences() > 0 && | ||
1070 | + !F.hasFnAttribute("frame-pointer")) { | ||
1071 | + if (getFramePointerUsage() == FramePointer::All) | ||
1072 | + NewAttrs.addAttribute("frame-pointer", "all"); | ||
1073 | + else if (getFramePointerUsage() == FramePointer::NonLeaf) | ||
1074 | + NewAttrs.addAttribute("frame-pointer", "non-leaf"); | ||
1075 | + else if (getFramePointerUsage() == FramePointer::None) | ||
1076 | + NewAttrs.addAttribute("frame-pointer", "none"); | ||
1077 | + } | ||
1078 | + if (DisableTailCallsView->getNumOccurrences() > 0) | ||
1079 | + NewAttrs.addAttribute("disable-tail-calls", | ||
1080 | + toStringRef(getDisableTailCalls())); | ||
1081 | + if (getStackRealign()) | ||
1082 | + NewAttrs.addAttribute("stackrealign"); | ||
1083 | + | ||
1084 | + HANDLE_BOOL_ATTR(EnableUnsafeFPMathView, "unsafe-fp-math"); | ||
1085 | + HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math"); | ||
1086 | + HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math"); | ||
1087 | + HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math"); | ||
1088 | + | ||
1089 | + if (TrapFuncNameView->getNumOccurrences() > 0) | ||
1090 | + for (auto &B : F) | ||
1091 | + for (auto &I : B) | ||
1092 | + if (auto *Call = dyn_cast<CallInst>(&I)) | ||
1093 | + if (const auto *F = Call->getCalledFunction()) | ||
1094 | + if (F->getIntrinsicID() == Intrinsic::debugtrap || | ||
1095 | + F->getIntrinsicID() == Intrinsic::trap) | ||
1096 | + Call->addAttribute( | ||
1097 | + AttributeList::FunctionIndex, | ||
1098 | + Attribute::get(Ctx, "trap-func-name", getTrapFuncName())); | ||
1099 | + | ||
1100 | + // Let NewAttrs override Attrs. | ||
1101 | + F.setAttributes( | ||
1102 | + Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); | ||
1103 | +} | ||
1104 | + | ||
1105 | +/// Set function attributes of functions in Module M based on CPU, | ||
1106 | +/// Features, and command line flags. | ||
1107 | +void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, | ||
1108 | + Module &M) { | ||
1109 | + for (Function &F : M) | ||
1110 | + setFunctionAttributes(CPU, Features, F); | ||
1111 | +} | ||
1112 | diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt | ||
1113 | index de2e47d8d9b2..ab809daf5273 100644 | ||
1114 | --- a/llvm/lib/MC/CMakeLists.txt | ||
1115 | +++ b/llvm/lib/MC/CMakeLists.txt | ||
1116 | @@ -44,6 +44,7 @@ add_llvm_component_library(LLVMMC | ||
1117 | MCSymbol.cpp | ||
1118 | MCSymbolELF.cpp | ||
1119 | MCTargetOptions.cpp | ||
1120 | + MCTargetOptionsCommandFlags.cpp | ||
1121 | MCValue.cpp | ||
1122 | MCWasmObjectTargetWriter.cpp | ||
1123 | MCWasmStreamer.cpp | ||
1124 | diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp | ||
1125 | new file mode 100644 | ||
1126 | index 000000000000..3ca34061241a | ||
1127 | --- /dev/null | ||
1128 | +++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp | ||
1129 | @@ -0,0 +1,105 @@ | ||
1130 | +//===-- MCTargetOptionsCommandFlags.cpp --------------------------*- C++ | ||
1131 | +//-*-===// | ||
1132 | +// | ||
1133 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
1134 | +// See https://llvm.org/LICENSE.txt for license information. | ||
1135 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
1136 | +// | ||
1137 | +//===----------------------------------------------------------------------===// | ||
1138 | +// | ||
1139 | +// This file contains machine code-specific flags that are shared between | ||
1140 | +// different command line tools. | ||
1141 | +// | ||
1142 | +//===----------------------------------------------------------------------===// | ||
1143 | + | ||
1144 | +#include "llvm/MC/MCTargetOptionsCommandFlags.h" | ||
1145 | + | ||
1146 | +using namespace llvm; | ||
1147 | + | ||
1148 | +#define MCOPT(TY, NAME) \ | ||
1149 | + static cl::opt<TY> *NAME##View; \ | ||
1150 | + TY llvm::mc::get##NAME() { \ | ||
1151 | + assert(NAME##View && "RegisterMCTargetOptionsFlags not created."); \ | ||
1152 | + return *NAME##View; \ | ||
1153 | + } | ||
1154 | + | ||
1155 | +#define MCOPT_EXP(TY, NAME) \ | ||
1156 | + MCOPT(TY, NAME) \ | ||
1157 | + Optional<TY> llvm::mc::getExplicit##NAME() { \ | ||
1158 | + if (NAME##View->getNumOccurrences()) { \ | ||
1159 | + TY res = *NAME##View; \ | ||
1160 | + return res; \ | ||
1161 | + } \ | ||
1162 | + return None; \ | ||
1163 | + } | ||
1164 | + | ||
1165 | +MCOPT_EXP(bool, RelaxAll) | ||
1166 | +MCOPT(bool, IncrementalLinkerCompatible) | ||
1167 | +MCOPT(int, DwarfVersion) | ||
1168 | +MCOPT(bool, ShowMCInst) | ||
1169 | +MCOPT(bool, FatalWarnings) | ||
1170 | +MCOPT(bool, NoWarn) | ||
1171 | +MCOPT(bool, NoDeprecatedWarn) | ||
1172 | +MCOPT(std::string, ABIName) | ||
1173 | + | ||
1174 | +llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() { | ||
1175 | +#define MCBINDOPT(NAME) \ | ||
1176 | + do { \ | ||
1177 | + NAME##View = std::addressof(NAME); \ | ||
1178 | + } while (0) | ||
1179 | + | ||
1180 | + static cl::opt<bool> RelaxAll( | ||
1181 | + "mc-relax-all", cl::desc("When used with filetype=obj, relax all fixups " | ||
1182 | + "in the emitted object file")); | ||
1183 | + MCBINDOPT(RelaxAll); | ||
1184 | + | ||
1185 | + static cl::opt<bool> IncrementalLinkerCompatible( | ||
1186 | + "incremental-linker-compatible", | ||
1187 | + cl::desc( | ||
1188 | + "When used with filetype=obj, " | ||
1189 | + "emit an object file which can be used with an incremental linker")); | ||
1190 | + MCBINDOPT(IncrementalLinkerCompatible); | ||
1191 | + | ||
1192 | + static cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"), | ||
1193 | + cl::init(0)); | ||
1194 | + MCBINDOPT(DwarfVersion); | ||
1195 | + | ||
1196 | + static cl::opt<bool> ShowMCInst( | ||
1197 | + "asm-show-inst", | ||
1198 | + cl::desc("Emit internal instruction representation to assembly file")); | ||
1199 | + MCBINDOPT(ShowMCInst); | ||
1200 | + | ||
1201 | + static cl::opt<bool> FatalWarnings("fatal-warnings", | ||
1202 | + cl::desc("Treat warnings as errors")); | ||
1203 | + MCBINDOPT(FatalWarnings); | ||
1204 | + | ||
1205 | + static cl::opt<bool> NoWarn("no-warn", cl::desc("Suppress all warnings")); | ||
1206 | + static cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"), | ||
1207 | + cl::aliasopt(NoWarn)); | ||
1208 | + MCBINDOPT(NoWarn); | ||
1209 | + | ||
1210 | + static cl::opt<bool> NoDeprecatedWarn( | ||
1211 | + "no-deprecated-warn", cl::desc("Suppress all deprecated warnings")); | ||
1212 | + MCBINDOPT(NoDeprecatedWarn); | ||
1213 | + | ||
1214 | + static cl::opt<std::string> ABIName( | ||
1215 | + "target-abi", cl::Hidden, | ||
1216 | + cl::desc("The name of the ABI to be targeted from the backend."), | ||
1217 | + cl::init("")); | ||
1218 | + MCBINDOPT(ABIName); | ||
1219 | + | ||
1220 | +#undef MCBINDOPT | ||
1221 | +} | ||
1222 | + | ||
1223 | +MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() { | ||
1224 | + MCTargetOptions Options; | ||
1225 | + Options.MCRelaxAll = getRelaxAll(); | ||
1226 | + Options.MCIncrementalLinkerCompatible = getIncrementalLinkerCompatible(); | ||
1227 | + Options.DwarfVersion = getDwarfVersion(); | ||
1228 | + Options.ShowMCInst = getShowMCInst(); | ||
1229 | + Options.ABIName = getABIName(); | ||
1230 | + Options.MCFatalWarnings = getFatalWarnings(); | ||
1231 | + Options.MCNoWarn = getNoWarn(); | ||
1232 | + Options.MCNoDeprecatedWarn = getNoDeprecatedWarn(); | ||
1233 | + return Options; | ||
1234 | +} | ||
1235 | diff --git a/llvm/tools/dsymutil/DwarfStreamer.cpp b/llvm/tools/dsymutil/DwarfStreamer.cpp | ||
1236 | index 3e132c29eada..eb068effbc71 100644 | ||
1237 | --- a/llvm/tools/dsymutil/DwarfStreamer.cpp | ||
1238 | +++ b/llvm/tools/dsymutil/DwarfStreamer.cpp | ||
1239 | @@ -13,13 +13,16 @@ | ||
1240 | #include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" | ||
1241 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" | ||
1242 | #include "llvm/MC/MCTargetOptions.h" | ||
1243 | -#include "llvm/MC/MCTargetOptionsCommandFlags.inc" | ||
1244 | +#include "llvm/MC/MCTargetOptionsCommandFlags.h" | ||
1245 | #include "llvm/Support/LEB128.h" | ||
1246 | #include "llvm/Support/TargetRegistry.h" | ||
1247 | #include "llvm/Target/TargetMachine.h" | ||
1248 | #include "llvm/Target/TargetOptions.h" | ||
1249 | |||
1250 | namespace llvm { | ||
1251 | + | ||
1252 | +static mc::RegisterMCTargetOptionsFlags MOF; | ||
1253 | + | ||
1254 | namespace dsymutil { | ||
1255 | |||
1256 | /// Retrieve the section named \a SecName in \a Obj. | ||
1257 | @@ -61,7 +64,7 @@ bool DwarfStreamer::init(Triple TheTriple) { | ||
1258 | if (!MRI) | ||
1259 | return error(Twine("no register info for target ") + TripleName, Context); | ||
1260 | |||
1261 | - MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); | ||
1262 | + MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); | ||
1263 | MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); | ||
1264 | if (!MAI) | ||
1265 | return error("no asm info for target " + TripleName, Context); | ||
1266 | diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp | ||
1267 | index 406079dad307..95e72b861197 100644 | ||
1268 | --- a/llvm/tools/gold/gold-plugin.cpp | ||
1269 | +++ b/llvm/tools/gold/gold-plugin.cpp | ||
1270 | @@ -14,7 +14,7 @@ | ||
1271 | #include "llvm/ADT/Statistic.h" | ||
1272 | #include "llvm/Bitcode/BitcodeReader.h" | ||
1273 | #include "llvm/Bitcode/BitcodeWriter.h" | ||
1274 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
1275 | +#include "llvm/CodeGen/CommandFlags.h" | ||
1276 | #include "llvm/Config/config.h" // plugin-api.h requires HAVE_STDINT_H | ||
1277 | #include "llvm/IR/Constants.h" | ||
1278 | #include "llvm/IR/DiagnosticPrinter.h" | ||
1279 | @@ -50,6 +50,8 @@ | ||
1280 | using namespace llvm; | ||
1281 | using namespace lto; | ||
1282 | |||
1283 | +static codegen::RegisterCodeGenFlags CodeGenFlags; | ||
1284 | + | ||
1285 | // FIXME: Remove when binutils 2.31 (containing gold 1.16) is the minimum | ||
1286 | // required version. | ||
1287 | typedef enum ld_plugin_status (*ld_plugin_get_wrap_symbols)( | ||
1288 | @@ -842,21 +844,21 @@ static std::unique_ptr<LTO> createLTO(IndexWriteCallback OnIndexWrite, | ||
1289 | ThinBackend Backend; | ||
1290 | |||
1291 | Conf.CPU = options::mcpu; | ||
1292 | - Conf.Options = InitTargetOptionsFromCodeGenFlags(); | ||
1293 | + Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
1294 | |||
1295 | // Disable the new X86 relax relocations since gold might not support them. | ||
1296 | // FIXME: Check the gold version or add a new option to enable them. | ||
1297 | Conf.Options.RelaxELFRelocations = false; | ||
1298 | |||
1299 | // Toggle function/data sections. | ||
1300 | - if (FunctionSections.getNumOccurrences() == 0) | ||
1301 | + if (!codegen::getExplicitFunctionSections()) | ||
1302 | Conf.Options.FunctionSections = SplitSections; | ||
1303 | - if (DataSections.getNumOccurrences() == 0) | ||
1304 | + if (!codegen::getExplicitDataSections()) | ||
1305 | Conf.Options.DataSections = SplitSections; | ||
1306 | |||
1307 | - Conf.MAttrs = MAttrs; | ||
1308 | - Conf.RelocModel = RelocationModel; | ||
1309 | - Conf.CodeModel = getCodeModel(); | ||
1310 | + Conf.MAttrs = codegen::getMAttrs(); | ||
1311 | + Conf.RelocModel = codegen::getExplicitRelocModel(); | ||
1312 | + Conf.CodeModel = codegen::getExplicitCodeModel(); | ||
1313 | Conf.CGOptLevel = getCGOptLevel(); | ||
1314 | Conf.DisableVerify = options::DisableVerify; | ||
1315 | Conf.OptLevel = options::OptLevel; | ||
1316 | diff --git a/llvm/tools/llc/CMakeLists.txt b/llvm/tools/llc/CMakeLists.txt | ||
1317 | index 880deefa539c..479bc6b55b27 100644 | ||
1318 | --- a/llvm/tools/llc/CMakeLists.txt | ||
1319 | +++ b/llvm/tools/llc/CMakeLists.txt | ||
1320 | @@ -26,4 +26,5 @@ add_llvm_tool(llc | ||
1321 | intrinsics_gen | ||
1322 | SUPPORT_PLUGINS | ||
1323 | ) | ||
1324 | + | ||
1325 | export_executable_symbols(llc) | ||
1326 | diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp | ||
1327 | index b35f8e853c30..4c41ed292fdc 100644 | ||
1328 | --- a/llvm/tools/llc/llc.cpp | ||
1329 | +++ b/llvm/tools/llc/llc.cpp | ||
1330 | @@ -15,7 +15,7 @@ | ||
1331 | #include "llvm/ADT/STLExtras.h" | ||
1332 | #include "llvm/ADT/Triple.h" | ||
1333 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||
1334 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
1335 | +#include "llvm/CodeGen/CommandFlags.h" | ||
1336 | #include "llvm/CodeGen/LinkAllAsmWriterComponents.h" | ||
1337 | #include "llvm/CodeGen/LinkAllCodegenComponents.h" | ||
1338 | #include "llvm/CodeGen/MIRParser/MIRParser.h" | ||
1339 | @@ -55,6 +55,8 @@ | ||
1340 | #include <memory> | ||
1341 | using namespace llvm; | ||
1342 | |||
1343 | +static codegen::RegisterCodeGenFlags CGF; | ||
1344 | + | ||
1345 | // General options for llc. Other pass-specific options are specified | ||
1346 | // within the corresponding llc passes, and target-specific options | ||
1347 | // and back-end code generation options are specified with the target machine. | ||
1348 | @@ -202,7 +204,7 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName, | ||
1349 | else | ||
1350 | OutputFilename = IFN; | ||
1351 | |||
1352 | - switch (FileType) { | ||
1353 | + switch (codegen::getFileType()) { | ||
1354 | case CGFT_AssemblyFile: | ||
1355 | if (TargetName[0] == 'c') { | ||
1356 | if (TargetName[1] == 0) | ||
1357 | @@ -229,7 +231,7 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName, | ||
1358 | |||
1359 | // Decide if we need "binary" output. | ||
1360 | bool Binary = false; | ||
1361 | - switch (FileType) { | ||
1362 | + switch (codegen::getFileType()) { | ||
1363 | case CGFT_AssemblyFile: | ||
1364 | break; | ||
1365 | case CGFT_ObjectFile: | ||
1366 | @@ -395,14 +397,16 @@ static int compileModule(char **argv, LLVMContext &Context) { | ||
1367 | std::unique_ptr<Module> M; | ||
1368 | std::unique_ptr<MIRParser> MIR; | ||
1369 | Triple TheTriple; | ||
1370 | - std::string CPUStr = getCPUStr(), FeaturesStr = getFeaturesStr(); | ||
1371 | + std::string CPUStr = codegen::getCPUStr(), | ||
1372 | + FeaturesStr = codegen::getFeaturesStr(); | ||
1373 | |||
1374 | // Set attributes on functions as loaded from MIR from command line arguments. | ||
1375 | auto setMIRFunctionAttributes = [&CPUStr, &FeaturesStr](Function &F) { | ||
1376 | - setFunctionAttributes(CPUStr, FeaturesStr, F); | ||
1377 | + codegen::setFunctionAttributes(CPUStr, FeaturesStr, F); | ||
1378 | }; | ||
1379 | |||
1380 | - bool SkipModule = MCPU == "help" || | ||
1381 | + auto MAttrs = codegen::getMAttrs(); | ||
1382 | + bool SkipModule = codegen::getMCPU() == "help" || | ||
1383 | (!MAttrs.empty() && MAttrs.front() == "help"); | ||
1384 | |||
1385 | // If user just wants to list available options, skip module loading | ||
1386 | @@ -433,8 +437,8 @@ static int compileModule(char **argv, LLVMContext &Context) { | ||
1387 | |||
1388 | // Get the target specific parser. | ||
1389 | std::string Error; | ||
1390 | - const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, | ||
1391 | - Error); | ||
1392 | + const Target *TheTarget = | ||
1393 | + TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error); | ||
1394 | if (!TheTarget) { | ||
1395 | WithColor::error(errs(), argv[0]) << Error; | ||
1396 | return 1; | ||
1397 | @@ -452,7 +456,7 @@ static int compileModule(char **argv, LLVMContext &Context) { | ||
1398 | case '3': OLvl = CodeGenOpt::Aggressive; break; | ||
1399 | } | ||
1400 | |||
1401 | - TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
1402 | + TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
1403 | Options.DisableIntegratedAS = NoIntegratedAssembler; | ||
1404 | Options.MCOptions.ShowMCEncoding = ShowMCEncoding; | ||
1405 | Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory; | ||
1406 | @@ -462,8 +466,8 @@ static int compileModule(char **argv, LLVMContext &Context) { | ||
1407 | Options.MCOptions.SplitDwarfFile = SplitDwarfFile; | ||
1408 | |||
1409 | std::unique_ptr<TargetMachine> Target(TheTarget->createTargetMachine( | ||
1410 | - TheTriple.getTriple(), CPUStr, FeaturesStr, Options, getRelocModel(), | ||
1411 | - getCodeModel(), OLvl)); | ||
1412 | + TheTriple.getTriple(), CPUStr, FeaturesStr, Options, codegen::getExplicitRelocModel(), | ||
1413 | + codegen::getExplicitCodeModel(), OLvl)); | ||
1414 | |||
1415 | assert(Target && "Could not allocate target machine!"); | ||
1416 | |||
1417 | @@ -474,8 +478,8 @@ static int compileModule(char **argv, LLVMContext &Context) { | ||
1418 | return 0; | ||
1419 | |||
1420 | assert(M && "Should have exited if we didn't have a module!"); | ||
1421 | - if (FloatABIForCalls != FloatABI::Default) | ||
1422 | - Options.FloatABIType = FloatABIForCalls; | ||
1423 | + if (codegen::getFloatABIForCalls() != FloatABI::Default) | ||
1424 | + Options.FloatABIType = codegen::getFloatABIForCalls(); | ||
1425 | |||
1426 | // Figure out where we are going to send the output. | ||
1427 | std::unique_ptr<ToolOutputFile> Out = | ||
1428 | @@ -522,10 +526,9 @@ static int compileModule(char **argv, LLVMContext &Context) { | ||
1429 | |||
1430 | // Override function attributes based on CPUStr, FeaturesStr, and command line | ||
1431 | // flags. | ||
1432 | - setFunctionAttributes(CPUStr, FeaturesStr, *M); | ||
1433 | + codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M); | ||
1434 | |||
1435 | - if (RelaxAll.getNumOccurrences() > 0 && | ||
1436 | - FileType != CGFT_ObjectFile) | ||
1437 | + if (mc::getExplicitRelaxAll() && codegen::getFileType() != CGFT_ObjectFile) | ||
1438 | WithColor::warning(errs(), argv[0]) | ||
1439 | << ": warning: ignoring -mc-relax-all because filetype != obj"; | ||
1440 | |||
1441 | @@ -536,7 +539,7 @@ static int compileModule(char **argv, LLVMContext &Context) { | ||
1442 | // so we can memcmp the contents in CompileTwice mode | ||
1443 | SmallVector<char, 0> Buffer; | ||
1444 | std::unique_ptr<raw_svector_ostream> BOS; | ||
1445 | - if ((FileType != CGFT_AssemblyFile && | ||
1446 | + if ((codegen::getFileType() != CGFT_AssemblyFile && | ||
1447 | !Out->os().supportsSeeking()) || | ||
1448 | CompileTwice) { | ||
1449 | BOS = std::make_unique<raw_svector_ostream>(Buffer); | ||
1450 | @@ -575,9 +578,9 @@ static int compileModule(char **argv, LLVMContext &Context) { | ||
1451 | TPC.setInitialized(); | ||
1452 | PM.add(createPrintMIRPass(*OS)); | ||
1453 | PM.add(createFreeMachineFunctionPass()); | ||
1454 | - } else if (Target->addPassesToEmitFile(PM, *OS, | ||
1455 | - DwoOut ? &DwoOut->os() : nullptr, | ||
1456 | - FileType, NoVerify, MMIWP)) { | ||
1457 | + } else if (Target->addPassesToEmitFile( | ||
1458 | + PM, *OS, DwoOut ? &DwoOut->os() : nullptr, | ||
1459 | + codegen::getFileType(), NoVerify, MMIWP)) { | ||
1460 | WithColor::warning(errs(), argv[0]) | ||
1461 | << "target does not support generation of this" | ||
1462 | << " file type!\n"; | ||
1463 | diff --git a/llvm/tools/lli/CMakeLists.txt b/llvm/tools/lli/CMakeLists.txt | ||
1464 | index db163ad131e8..bc6ef213b8fd 100644 | ||
1465 | --- a/llvm/tools/lli/CMakeLists.txt | ||
1466 | +++ b/llvm/tools/lli/CMakeLists.txt | ||
1467 | @@ -53,4 +53,5 @@ add_llvm_tool(lli | ||
1468 | DEPENDS | ||
1469 | intrinsics_gen | ||
1470 | ) | ||
1471 | + | ||
1472 | export_executable_symbols(lli) | ||
1473 | diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp | ||
1474 | index 0efd0df2c12b..b6a4e3f2833c 100644 | ||
1475 | --- a/llvm/tools/lli/lli.cpp | ||
1476 | +++ b/llvm/tools/lli/lli.cpp | ||
1477 | @@ -16,7 +16,7 @@ | ||
1478 | #include "llvm/ADT/StringExtras.h" | ||
1479 | #include "llvm/ADT/Triple.h" | ||
1480 | #include "llvm/Bitcode/BitcodeReader.h" | ||
1481 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
1482 | +#include "llvm/CodeGen/CommandFlags.h" | ||
1483 | #include "llvm/CodeGen/LinkAllCodegenComponents.h" | ||
1484 | #include "llvm/Config/llvm-config.h" | ||
1485 | #include "llvm/ExecutionEngine/GenericValue.h" | ||
1486 | @@ -67,6 +67,8 @@ | ||
1487 | |||
1488 | using namespace llvm; | ||
1489 | |||
1490 | +static codegen::RegisterCodeGenFlags CGF; | ||
1491 | + | ||
1492 | #define DEBUG_TYPE "lli" | ||
1493 | |||
1494 | namespace { | ||
1495 | @@ -410,13 +412,13 @@ int main(int argc, char **argv, char * const *envp) { | ||
1496 | |||
1497 | std::string ErrorMsg; | ||
1498 | EngineBuilder builder(std::move(Owner)); | ||
1499 | - builder.setMArch(MArch); | ||
1500 | - builder.setMCPU(getCPUStr()); | ||
1501 | - builder.setMAttrs(getFeatureList()); | ||
1502 | - if (RelocModel.getNumOccurrences()) | ||
1503 | - builder.setRelocationModel(RelocModel); | ||
1504 | - if (CMModel.getNumOccurrences()) | ||
1505 | - builder.setCodeModel(CMModel); | ||
1506 | + builder.setMArch(codegen::getMArch()); | ||
1507 | + builder.setMCPU(codegen::getCPUStr()); | ||
1508 | + builder.setMAttrs(codegen::getFeatureList()); | ||
1509 | + if (auto RM = codegen::getExplicitRelocModel()) | ||
1510 | + builder.setRelocationModel(RM.getValue()); | ||
1511 | + if (auto CM = codegen::getExplicitCodeModel()) | ||
1512 | + builder.setCodeModel(CM.getValue()); | ||
1513 | builder.setErrorStr(&ErrorMsg); | ||
1514 | builder.setEngineKind(ForceInterpreter | ||
1515 | ? EngineKind::Interpreter | ||
1516 | @@ -448,9 +450,9 @@ int main(int argc, char **argv, char * const *envp) { | ||
1517 | |||
1518 | builder.setOptLevel(getOptLevel()); | ||
1519 | |||
1520 | - TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
1521 | - if (FloatABIForCalls != FloatABI::Default) | ||
1522 | - Options.FloatABIType = FloatABIForCalls; | ||
1523 | + TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
1524 | + if (codegen::getFloatABIForCalls() != FloatABI::Default) | ||
1525 | + Options.FloatABIType = codegen::getFloatABIForCalls(); | ||
1526 | |||
1527 | builder.setTargetOptions(Options); | ||
1528 | |||
1529 | @@ -762,18 +764,15 @@ int runOrcLazyJIT(const char *ProgName) { | ||
1530 | TT.empty() ? ExitOnErr(orc::JITTargetMachineBuilder::detectHost()) | ||
1531 | : orc::JITTargetMachineBuilder(Triple(TT))); | ||
1532 | |||
1533 | - if (!MArch.empty()) | ||
1534 | - Builder.getJITTargetMachineBuilder()->getTargetTriple().setArchName(MArch); | ||
1535 | + if (!codegen::getMArch().empty()) | ||
1536 | + Builder.getJITTargetMachineBuilder()->getTargetTriple().setArchName( | ||
1537 | + codegen::getMArch()); | ||
1538 | |||
1539 | Builder.getJITTargetMachineBuilder() | ||
1540 | - ->setCPU(getCPUStr()) | ||
1541 | - .addFeatures(getFeatureList()) | ||
1542 | - .setRelocationModel(RelocModel.getNumOccurrences() | ||
1543 | - ? Optional<Reloc::Model>(RelocModel) | ||
1544 | - : None) | ||
1545 | - .setCodeModel(CMModel.getNumOccurrences() | ||
1546 | - ? Optional<CodeModel::Model>(CMModel) | ||
1547 | - : None); | ||
1548 | + ->setCPU(codegen::getCPUStr()) | ||
1549 | + .addFeatures(codegen::getFeatureList()) | ||
1550 | + .setRelocationModel(codegen::getExplicitRelocModel()) | ||
1551 | + .setCodeModel(codegen::getExplicitCodeModel()); | ||
1552 | |||
1553 | Builder.setLazyCompileFailureAddr( | ||
1554 | pointerToJITTargetAddress(exitOnLazyCallThroughFailure)); | ||
1555 | diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp | ||
1556 | index 23513ef8fb4e..8cfd433d5da3 100644 | ||
1557 | --- a/llvm/tools/llvm-dwp/llvm-dwp.cpp | ||
1558 | +++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp | ||
1559 | @@ -27,7 +27,7 @@ | ||
1560 | #include "llvm/MC/MCObjectWriter.h" | ||
1561 | #include "llvm/MC/MCRegisterInfo.h" | ||
1562 | #include "llvm/MC/MCStreamer.h" | ||
1563 | -#include "llvm/MC/MCTargetOptionsCommandFlags.inc" | ||
1564 | +#include "llvm/MC/MCTargetOptionsCommandFlags.h" | ||
1565 | #include "llvm/Object/Decompressor.h" | ||
1566 | #include "llvm/Object/ObjectFile.h" | ||
1567 | #include "llvm/Support/DataExtractor.h" | ||
1568 | @@ -46,6 +46,8 @@ | ||
1569 | using namespace llvm; | ||
1570 | using namespace llvm::object; | ||
1571 | |||
1572 | +static mc::RegisterMCTargetOptionsFlags MCTargetOptionsFlags; | ||
1573 | + | ||
1574 | cl::OptionCategory DwpCategory("Specific Options"); | ||
1575 | static cl::list<std::string> InputFiles(cl::Positional, cl::ZeroOrMore, | ||
1576 | cl::desc("<input files>"), | ||
1577 | @@ -676,7 +678,7 @@ int main(int argc, char **argv) { | ||
1578 | if (!MRI) | ||
1579 | return error(Twine("no register info for target ") + TripleName, Context); | ||
1580 | |||
1581 | - MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); | ||
1582 | + MCTargetOptions MCOptions = llvm::mc::InitMCTargetOptionsFromFlags(); | ||
1583 | std::unique_ptr<MCAsmInfo> MAI( | ||
1584 | TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); | ||
1585 | if (!MAI) | ||
1586 | diff --git a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp | ||
1587 | index b71ed4a70566..627e9ab4c03f 100644 | ||
1588 | --- a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp | ||
1589 | +++ b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp | ||
1590 | @@ -14,7 +14,7 @@ | ||
1591 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||
1592 | #include "llvm/Bitcode/BitcodeReader.h" | ||
1593 | #include "llvm/Bitcode/BitcodeWriter.h" | ||
1594 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
1595 | +#include "llvm/CodeGen/CommandFlags.h" | ||
1596 | #include "llvm/FuzzMutate/FuzzerCLI.h" | ||
1597 | #include "llvm/FuzzMutate/IRMutator.h" | ||
1598 | #include "llvm/FuzzMutate/Operations.h" | ||
1599 | @@ -35,6 +35,8 @@ | ||
1600 | |||
1601 | using namespace llvm; | ||
1602 | |||
1603 | +static codegen::RegisterCodeGenFlags CGF; | ||
1604 | + | ||
1605 | static cl::opt<char> | ||
1606 | OptLevel("O", | ||
1607 | cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " | ||
1608 | @@ -133,14 +135,15 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, | ||
1609 | // Get the target specific parser. | ||
1610 | std::string Error; | ||
1611 | const Target *TheTarget = | ||
1612 | - TargetRegistry::lookupTarget(MArch, TheTriple, Error); | ||
1613 | + TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error); | ||
1614 | if (!TheTarget) { | ||
1615 | errs() << argv[0] << ": " << Error; | ||
1616 | return 1; | ||
1617 | } | ||
1618 | |||
1619 | // Set up the pipeline like llc does. | ||
1620 | - std::string CPUStr = getCPUStr(), FeaturesStr = getFeaturesStr(); | ||
1621 | + std::string CPUStr = codegen::getCPUStr(), | ||
1622 | + FeaturesStr = codegen::getFeaturesStr(); | ||
1623 | |||
1624 | CodeGenOpt::Level OLvl = CodeGenOpt::Default; | ||
1625 | switch (OptLevel) { | ||
1626 | @@ -154,10 +157,10 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, | ||
1627 | case '3': OLvl = CodeGenOpt::Aggressive; break; | ||
1628 | } | ||
1629 | |||
1630 | - TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
1631 | - TM.reset(TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr, | ||
1632 | - FeaturesStr, Options, getRelocModel(), | ||
1633 | - getCodeModel(), OLvl)); | ||
1634 | + TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
1635 | + TM.reset(TheTarget->createTargetMachine( | ||
1636 | + TheTriple.getTriple(), CPUStr, FeaturesStr, Options, | ||
1637 | + codegen::getExplicitRelocModel(), codegen::getExplicitCodeModel(), OLvl)); | ||
1638 | assert(TM && "Could not allocate target machine!"); | ||
1639 | |||
1640 | // Make sure we print the summary and the current unit when LLVM errors out. | ||
1641 | diff --git a/llvm/tools/llvm-lto/CMakeLists.txt b/llvm/tools/llvm-lto/CMakeLists.txt | ||
1642 | index 69868fb870c0..5128e713eecf 100644 | ||
1643 | --- a/llvm/tools/llvm-lto/CMakeLists.txt | ||
1644 | +++ b/llvm/tools/llvm-lto/CMakeLists.txt | ||
1645 | @@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS | ||
1646 | AllTargetsInfos | ||
1647 | BitReader | ||
1648 | BitWriter | ||
1649 | + CodeGen | ||
1650 | Core | ||
1651 | IRReader | ||
1652 | LTO | ||
1653 | @@ -17,7 +18,5 @@ set(LLVM_LINK_COMPONENTS | ||
1654 | add_llvm_tool(llvm-lto | ||
1655 | llvm-lto.cpp | ||
1656 | |||
1657 | - DEPENDS | ||
1658 | - intrinsics_gen | ||
1659 | + DEPENDS intrinsics_gen | ||
1660 | ) | ||
1661 | - | ||
1662 | diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp | ||
1663 | index b47e68e82850..7886426e8945 100644 | ||
1664 | --- a/llvm/tools/llvm-lto/llvm-lto.cpp | ||
1665 | +++ b/llvm/tools/llvm-lto/llvm-lto.cpp | ||
1666 | @@ -21,7 +21,7 @@ | ||
1667 | #include "llvm/ADT/Twine.h" | ||
1668 | #include "llvm/Bitcode/BitcodeReader.h" | ||
1669 | #include "llvm/Bitcode/BitcodeWriter.h" | ||
1670 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
1671 | +#include "llvm/CodeGen/CommandFlags.h" | ||
1672 | #include "llvm/IR/DiagnosticInfo.h" | ||
1673 | #include "llvm/IR/DiagnosticPrinter.h" | ||
1674 | #include "llvm/IR/LLVMContext.h" | ||
1675 | @@ -62,6 +62,8 @@ | ||
1676 | |||
1677 | using namespace llvm; | ||
1678 | |||
1679 | +static codegen::RegisterCodeGenFlags CGF; | ||
1680 | + | ||
1681 | static cl::opt<char> | ||
1682 | OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " | ||
1683 | "(default = '-O2')"), | ||
1684 | @@ -521,7 +523,7 @@ public: | ||
1685 | ThinLTOCodeGenerator ThinGenerator; | ||
1686 | |||
1687 | ThinLTOProcessing(const TargetOptions &Options) { | ||
1688 | - ThinGenerator.setCodePICModel(getRelocModel()); | ||
1689 | + ThinGenerator.setCodePICModel(codegen::getExplicitRelocModel()); | ||
1690 | ThinGenerator.setTargetOptions(Options); | ||
1691 | ThinGenerator.setCacheDir(ThinLTOCacheDir); | ||
1692 | ThinGenerator.setCachePruningInterval(ThinLTOCachePruningInterval); | ||
1693 | @@ -873,7 +875,7 @@ int main(int argc, char **argv) { | ||
1694 | InitializeAllAsmParsers(); | ||
1695 | |||
1696 | // set up the TargetOptions for the machine | ||
1697 | - TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
1698 | + TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
1699 | |||
1700 | if (ListSymbolsOnly) { | ||
1701 | listSymbols(Options); | ||
1702 | @@ -929,7 +931,7 @@ int main(int argc, char **argv) { | ||
1703 | if (UseDiagnosticHandler) | ||
1704 | CodeGen.setDiagnosticHandler(handleDiagnostics, nullptr); | ||
1705 | |||
1706 | - CodeGen.setCodePICModel(getRelocModel()); | ||
1707 | + CodeGen.setCodePICModel(codegen::getExplicitRelocModel()); | ||
1708 | CodeGen.setFreestanding(EnableFreestanding); | ||
1709 | |||
1710 | CodeGen.setDebugInfo(LTO_DEBUG_MODEL_DWARF); | ||
1711 | @@ -980,22 +982,18 @@ int main(int argc, char **argv) { | ||
1712 | CodeGen.addMustPreserveSymbol(KeptDSOSyms[i]); | ||
1713 | |||
1714 | // Set cpu and attrs strings for the default target/subtarget. | ||
1715 | - CodeGen.setCpu(MCPU.c_str()); | ||
1716 | + CodeGen.setCpu(codegen::getMCPU().c_str()); | ||
1717 | |||
1718 | CodeGen.setOptLevel(OptLevel - '0'); | ||
1719 | |||
1720 | - std::string attrs; | ||
1721 | - for (unsigned i = 0; i < MAttrs.size(); ++i) { | ||
1722 | - if (i > 0) | ||
1723 | - attrs.append(","); | ||
1724 | - attrs.append(MAttrs[i]); | ||
1725 | - } | ||
1726 | - | ||
1727 | - if (!attrs.empty()) | ||
1728 | + auto MAttrs = codegen::getMAttrs(); | ||
1729 | + if (!MAttrs.empty()) { | ||
1730 | + std::string attrs = join(MAttrs, ","); | ||
1731 | CodeGen.setAttr(attrs); | ||
1732 | + } | ||
1733 | |||
1734 | - if (FileType.getNumOccurrences()) | ||
1735 | - CodeGen.setFileType(FileType); | ||
1736 | + if (auto FT = codegen::getExplicitFileType()) | ||
1737 | + CodeGen.setFileType(FT.getValue()); | ||
1738 | |||
1739 | if (!OutputFilename.empty()) { | ||
1740 | if (!CodeGen.optimize(DisableVerify, DisableInline, DisableGVNLoadPRE, | ||
1741 | diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt | ||
1742 | index fa2d8624fd94..4d3364175b04 100644 | ||
1743 | --- a/llvm/tools/llvm-lto2/CMakeLists.txt | ||
1744 | +++ b/llvm/tools/llvm-lto2/CMakeLists.txt | ||
1745 | @@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS | ||
1746 | AllTargetsDescs | ||
1747 | AllTargetsInfos | ||
1748 | BitReader | ||
1749 | + CodeGen | ||
1750 | Core | ||
1751 | Linker | ||
1752 | LTO | ||
1753 | diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp | ||
1754 | index 67a677dd45fb..142ba605aa2a 100644 | ||
1755 | --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp | ||
1756 | +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp | ||
1757 | @@ -16,7 +16,7 @@ | ||
1758 | //===----------------------------------------------------------------------===// | ||
1759 | |||
1760 | #include "llvm/Bitcode/BitcodeReader.h" | ||
1761 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
1762 | +#include "llvm/CodeGen/CommandFlags.h" | ||
1763 | #include "llvm/IR/DiagnosticPrinter.h" | ||
1764 | #include "llvm/LTO/Caching.h" | ||
1765 | #include "llvm/LTO/LTO.h" | ||
1766 | @@ -29,6 +29,8 @@ | ||
1767 | using namespace llvm; | ||
1768 | using namespace lto; | ||
1769 | |||
1770 | +static codegen::RegisterCodeGenFlags CGF; | ||
1771 | + | ||
1772 | static cl::opt<char> | ||
1773 | OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " | ||
1774 | "(default = '-O2')"), | ||
1775 | @@ -217,12 +219,12 @@ static int run(int argc, char **argv) { | ||
1776 | exit(1); | ||
1777 | }; | ||
1778 | |||
1779 | - Conf.CPU = MCPU; | ||
1780 | - Conf.Options = InitTargetOptionsFromCodeGenFlags(); | ||
1781 | - Conf.MAttrs = MAttrs; | ||
1782 | - if (auto RM = getRelocModel()) | ||
1783 | - Conf.RelocModel = *RM; | ||
1784 | - Conf.CodeModel = getCodeModel(); | ||
1785 | + Conf.CPU = codegen::getMCPU(); | ||
1786 | + Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
1787 | + Conf.MAttrs = codegen::getMAttrs(); | ||
1788 | + if (auto RM = codegen::getExplicitRelocModel()) | ||
1789 | + Conf.RelocModel = RM.getValue(); | ||
1790 | + Conf.CodeModel = codegen::getExplicitCodeModel(); | ||
1791 | |||
1792 | Conf.DebugPassManager = DebugPassManager; | ||
1793 | |||
1794 | @@ -264,8 +266,8 @@ static int run(int argc, char **argv) { | ||
1795 | return 1; | ||
1796 | } | ||
1797 | |||
1798 | - if (FileType.getNumOccurrences()) | ||
1799 | - Conf.CGFileType = FileType; | ||
1800 | + if (auto FT = codegen::getExplicitFileType()) | ||
1801 | + Conf.CGFileType = FT.getValue(); | ||
1802 | |||
1803 | Conf.OverrideTriple = OverrideTriple; | ||
1804 | Conf.DefaultTriple = DefaultTriple; | ||
1805 | diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt b/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt | ||
1806 | index fb6befd3c54a..6bbc502e2eee 100644 | ||
1807 | --- a/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt | ||
1808 | +++ b/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt | ||
1809 | @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS | ||
1810 | MCParser | ||
1811 | Support | ||
1812 | ) | ||
1813 | + | ||
1814 | add_llvm_fuzzer(llvm-mc-assemble-fuzzer | ||
1815 | llvm-mc-assemble-fuzzer.cpp | ||
1816 | ) | ||
1817 | diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp | ||
1818 | index 6c5961f7027c..29699c634bfa 100644 | ||
1819 | --- a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp | ||
1820 | +++ b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp | ||
1821 | @@ -9,7 +9,6 @@ | ||
1822 | //===----------------------------------------------------------------------===// | ||
1823 | |||
1824 | #include "llvm-c/Target.h" | ||
1825 | -#include "llvm/MC/SubtargetFeature.h" | ||
1826 | #include "llvm/MC/MCAsmBackend.h" | ||
1827 | #include "llvm/MC/MCAsmInfo.h" | ||
1828 | #include "llvm/MC/MCCodeEmitter.h" | ||
1829 | @@ -24,15 +23,16 @@ | ||
1830 | #include "llvm/MC/MCSectionMachO.h" | ||
1831 | #include "llvm/MC/MCStreamer.h" | ||
1832 | #include "llvm/MC/MCSubtargetInfo.h" | ||
1833 | -#include "llvm/MC/MCTargetOptionsCommandFlags.inc" | ||
1834 | -#include "llvm/Support/MemoryBuffer.h" | ||
1835 | +#include "llvm/MC/MCTargetOptionsCommandFlags.h" | ||
1836 | +#include "llvm/MC/SubtargetFeature.h" | ||
1837 | #include "llvm/Support/CommandLine.h" | ||
1838 | #include "llvm/Support/FileUtilities.h" | ||
1839 | -#include "llvm/Support/raw_ostream.h" | ||
1840 | +#include "llvm/Support/MemoryBuffer.h" | ||
1841 | #include "llvm/Support/SourceMgr.h" | ||
1842 | -#include "llvm/Support/TargetSelect.h" | ||
1843 | #include "llvm/Support/TargetRegistry.h" | ||
1844 | +#include "llvm/Support/TargetSelect.h" | ||
1845 | #include "llvm/Support/ToolOutputFile.h" | ||
1846 | +#include "llvm/Support/raw_ostream.h" | ||
1847 | |||
1848 | using namespace llvm; | ||
1849 | |||
1850 | @@ -161,7 +161,7 @@ int AssembleOneInput(const uint8_t *Data, size_t Size) { | ||
1851 | abort(); | ||
1852 | } | ||
1853 | |||
1854 | - MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); | ||
1855 | + MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); | ||
1856 | std::unique_ptr<MCAsmInfo> MAI( | ||
1857 | TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); | ||
1858 | if (!MAI) { | ||
1859 | diff --git a/llvm/tools/llvm-mc/CMakeLists.txt b/llvm/tools/llvm-mc/CMakeLists.txt | ||
1860 | index 15c6dda1b258..94add70b6943 100644 | ||
1861 | --- a/llvm/tools/llvm-mc/CMakeLists.txt | ||
1862 | +++ b/llvm/tools/llvm-mc/CMakeLists.txt | ||
1863 | @@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS | ||
1864 | AllTargetsDescs | ||
1865 | AllTargetsDisassemblers | ||
1866 | AllTargetsInfos | ||
1867 | + CodeGen | ||
1868 | MC | ||
1869 | MCParser | ||
1870 | Support | ||
1871 | diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp | ||
1872 | index 6aa347d98be2..8c1b3cf2cab0 100644 | ||
1873 | --- a/llvm/tools/llvm-mc/llvm-mc.cpp | ||
1874 | +++ b/llvm/tools/llvm-mc/llvm-mc.cpp | ||
1875 | @@ -25,7 +25,7 @@ | ||
1876 | #include "llvm/MC/MCRegisterInfo.h" | ||
1877 | #include "llvm/MC/MCStreamer.h" | ||
1878 | #include "llvm/MC/MCSubtargetInfo.h" | ||
1879 | -#include "llvm/MC/MCTargetOptionsCommandFlags.inc" | ||
1880 | +#include "llvm/MC/MCTargetOptionsCommandFlags.h" | ||
1881 | #include "llvm/Support/CommandLine.h" | ||
1882 | #include "llvm/Support/Compression.h" | ||
1883 | #include "llvm/Support/FileUtilities.h" | ||
1884 | @@ -41,6 +41,8 @@ | ||
1885 | |||
1886 | using namespace llvm; | ||
1887 | |||
1888 | +static mc::RegisterMCTargetOptionsFlags MOF; | ||
1889 | + | ||
1890 | static cl::opt<std::string> | ||
1891 | InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-")); | ||
1892 | |||
1893 | @@ -317,7 +319,7 @@ int main(int argc, char **argv) { | ||
1894 | cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); | ||
1895 | |||
1896 | cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); | ||
1897 | - const MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); | ||
1898 | + const MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); | ||
1899 | setDwarfDebugFlags(argc, argv); | ||
1900 | |||
1901 | setDwarfDebugProducer(); | ||
1902 | diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp | ||
1903 | index fff5906bb59b..eca86768aa5c 100644 | ||
1904 | --- a/llvm/tools/llvm-mca/llvm-mca.cpp | ||
1905 | +++ b/llvm/tools/llvm-mca/llvm-mca.cpp | ||
1906 | @@ -39,7 +39,7 @@ | ||
1907 | #include "llvm/MC/MCObjectFileInfo.h" | ||
1908 | #include "llvm/MC/MCRegisterInfo.h" | ||
1909 | #include "llvm/MC/MCSubtargetInfo.h" | ||
1910 | -#include "llvm/MC/MCTargetOptionsCommandFlags.inc" | ||
1911 | +#include "llvm/MC/MCTargetOptionsCommandFlags.h" | ||
1912 | #include "llvm/MCA/CodeEmitter.h" | ||
1913 | #include "llvm/MCA/Context.h" | ||
1914 | #include "llvm/MCA/InstrBuilder.h" | ||
1915 | @@ -62,6 +62,8 @@ | ||
1916 | |||
1917 | using namespace llvm; | ||
1918 | |||
1919 | +static mc::RegisterMCTargetOptionsFlags MOF; | ||
1920 | + | ||
1921 | static cl::OptionCategory ToolOptions("Tool Options"); | ||
1922 | static cl::OptionCategory ViewOptions("View Options"); | ||
1923 | |||
1924 | @@ -353,7 +355,7 @@ int main(int argc, char **argv) { | ||
1925 | std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName)); | ||
1926 | assert(MRI && "Unable to create target register info!"); | ||
1927 | |||
1928 | - MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); | ||
1929 | + MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); | ||
1930 | std::unique_ptr<MCAsmInfo> MAI( | ||
1931 | TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); | ||
1932 | assert(MAI && "Unable to create target asm info!"); | ||
1933 | @@ -443,7 +445,7 @@ int main(int argc, char **argv) { | ||
1934 | TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx)); | ||
1935 | |||
1936 | std::unique_ptr<MCAsmBackend> MAB(TheTarget->createMCAsmBackend( | ||
1937 | - *STI, *MRI, InitMCTargetOptionsFromFlags())); | ||
1938 | + *STI, *MRI, mc::InitMCTargetOptionsFromFlags())); | ||
1939 | |||
1940 | for (const std::unique_ptr<mca::CodeRegion> &Region : Regions) { | ||
1941 | // Skip empty code regions. | ||
1942 | diff --git a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp | ||
1943 | index cd6c7d380cc6..e353e333f580 100644 | ||
1944 | --- a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp | ||
1945 | +++ b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp | ||
1946 | @@ -12,7 +12,7 @@ | ||
1947 | |||
1948 | #include "llvm/Bitcode/BitcodeReader.h" | ||
1949 | #include "llvm/Bitcode/BitcodeWriter.h" | ||
1950 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
1951 | +#include "llvm/CodeGen/CommandFlags.h" | ||
1952 | #include "llvm/FuzzMutate/FuzzerCLI.h" | ||
1953 | #include "llvm/FuzzMutate/IRMutator.h" | ||
1954 | #include "llvm/IR/Verifier.h" | ||
1955 | @@ -24,6 +24,8 @@ | ||
1956 | |||
1957 | using namespace llvm; | ||
1958 | |||
1959 | +static codegen::RegisterCodeGenFlags CGF; | ||
1960 | + | ||
1961 | static cl::opt<std::string> | ||
1962 | TargetTripleStr("mtriple", cl::desc("Override target triple for module")); | ||
1963 | |||
1964 | @@ -124,7 +126,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { | ||
1965 | |||
1966 | M->setTargetTriple(TM->getTargetTriple().normalize()); | ||
1967 | M->setDataLayout(TM->createDataLayout()); | ||
1968 | - setFunctionAttributes(TM->getTargetCPU(), TM->getTargetFeatureString(), *M); | ||
1969 | + codegen::setFunctionAttributes(TM->getTargetCPU(), | ||
1970 | + TM->getTargetFeatureString(), *M); | ||
1971 | |||
1972 | // Create pass pipeline | ||
1973 | // | ||
1974 | @@ -214,16 +217,17 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize( | ||
1975 | |||
1976 | std::string Error; | ||
1977 | const Target *TheTarget = | ||
1978 | - TargetRegistry::lookupTarget(MArch, TargetTriple, Error); | ||
1979 | + TargetRegistry::lookupTarget(codegen::getMArch(), TargetTriple, Error); | ||
1980 | if (!TheTarget) { | ||
1981 | errs() << *argv[0] << ": " << Error; | ||
1982 | exit(1); | ||
1983 | } | ||
1984 | |||
1985 | - TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
1986 | + TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
1987 | TM.reset(TheTarget->createTargetMachine( | ||
1988 | - TargetTriple.getTriple(), getCPUStr(), getFeaturesStr(), | ||
1989 | - Options, getRelocModel(), getCodeModel(), CodeGenOpt::Default)); | ||
1990 | + TargetTriple.getTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(), | ||
1991 | + Options, codegen::getExplicitRelocModel(), | ||
1992 | + codegen::getExplicitCodeModel(), CodeGenOpt::Default)); | ||
1993 | assert(TM && "Could not allocate target machine!"); | ||
1994 | |||
1995 | // Check that pass pipeline is specified and correct | ||
1996 | diff --git a/llvm/tools/lto/CMakeLists.txt b/llvm/tools/lto/CMakeLists.txt | ||
1997 | index b86e4abd01a7..2963f97cad88 100644 | ||
1998 | --- a/llvm/tools/lto/CMakeLists.txt | ||
1999 | +++ b/llvm/tools/lto/CMakeLists.txt | ||
2000 | @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS | ||
2001 | AllTargetsInfos | ||
2002 | BitReader | ||
2003 | Core | ||
2004 | + CodeGen | ||
2005 | LTO | ||
2006 | MC | ||
2007 | MCDisassembler | ||
2008 | @@ -20,7 +21,8 @@ set(SOURCES | ||
2009 | |||
2010 | set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/lto.exports) | ||
2011 | |||
2012 | -add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS intrinsics_gen) | ||
2013 | +add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS | ||
2014 | + intrinsics_gen) | ||
2015 | |||
2016 | install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/lto.h | ||
2017 | DESTINATION include/llvm-c | ||
2018 | diff --git a/llvm/tools/lto/lto.cpp b/llvm/tools/lto/lto.cpp | ||
2019 | index 9933af94de1e..6d207b76685f 100644 | ||
2020 | --- a/llvm/tools/lto/lto.cpp | ||
2021 | +++ b/llvm/tools/lto/lto.cpp | ||
2022 | @@ -15,7 +15,7 @@ | ||
2023 | #include "llvm/ADT/STLExtras.h" | ||
2024 | #include "llvm/ADT/StringExtras.h" | ||
2025 | #include "llvm/Bitcode/BitcodeReader.h" | ||
2026 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
2027 | +#include "llvm/CodeGen/CommandFlags.h" | ||
2028 | #include "llvm/IR/DiagnosticInfo.h" | ||
2029 | #include "llvm/IR/DiagnosticPrinter.h" | ||
2030 | #include "llvm/IR/LLVMContext.h" | ||
2031 | @@ -28,6 +28,10 @@ | ||
2032 | #include "llvm/Support/TargetSelect.h" | ||
2033 | #include "llvm/Support/raw_ostream.h" | ||
2034 | |||
2035 | +using namespace llvm; | ||
2036 | + | ||
2037 | +static codegen::RegisterCodeGenFlags CGF; | ||
2038 | + | ||
2039 | // extra command-line flags needed for LTOCodeGenerator | ||
2040 | static cl::opt<char> | ||
2041 | OptLevel("O", | ||
2042 | @@ -154,14 +158,9 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LTOModule, lto_module_t) | ||
2043 | // Convert the subtarget features into a string to pass to LTOCodeGenerator. | ||
2044 | static void lto_add_attrs(lto_code_gen_t cg) { | ||
2045 | LTOCodeGenerator *CG = unwrap(cg); | ||
2046 | - if (MAttrs.size()) { | ||
2047 | - std::string attrs; | ||
2048 | - for (unsigned i = 0; i < MAttrs.size(); ++i) { | ||
2049 | - if (i > 0) | ||
2050 | - attrs.append(","); | ||
2051 | - attrs.append(MAttrs[i]); | ||
2052 | - } | ||
2053 | - | ||
2054 | + auto MAttrs = codegen::getMAttrs(); | ||
2055 | + if (!MAttrs.empty()) { | ||
2056 | + std::string attrs = join(MAttrs, ","); | ||
2057 | CG->setAttr(attrs); | ||
2058 | } | ||
2059 | |||
2060 | @@ -219,7 +218,7 @@ lto_module_is_object_file_in_memory_for_target(const void* mem, | ||
2061 | |||
2062 | lto_module_t lto_module_create(const char* path) { | ||
2063 | lto_initialize(); | ||
2064 | - llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
2065 | + llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
2066 | ErrorOr<std::unique_ptr<LTOModule>> M = | ||
2067 | LTOModule::createFromFile(*LTOContext, StringRef(path), Options); | ||
2068 | if (!M) | ||
2069 | @@ -229,7 +228,7 @@ lto_module_t lto_module_create(const char* path) { | ||
2070 | |||
2071 | lto_module_t lto_module_create_from_fd(int fd, const char *path, size_t size) { | ||
2072 | lto_initialize(); | ||
2073 | - llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
2074 | + llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
2075 | ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createFromOpenFile( | ||
2076 | *LTOContext, fd, StringRef(path), size, Options); | ||
2077 | if (!M) | ||
2078 | @@ -242,7 +241,7 @@ lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path, | ||
2079 | size_t map_size, | ||
2080 | off_t offset) { | ||
2081 | lto_initialize(); | ||
2082 | - llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
2083 | + llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
2084 | ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createFromOpenFileSlice( | ||
2085 | *LTOContext, fd, StringRef(path), map_size, offset, Options); | ||
2086 | if (!M) | ||
2087 | @@ -252,7 +251,7 @@ lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path, | ||
2088 | |||
2089 | lto_module_t lto_module_create_from_memory(const void* mem, size_t length) { | ||
2090 | lto_initialize(); | ||
2091 | - llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
2092 | + llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
2093 | ErrorOr<std::unique_ptr<LTOModule>> M = | ||
2094 | LTOModule::createFromBuffer(*LTOContext, mem, length, Options); | ||
2095 | if (!M) | ||
2096 | @@ -264,7 +263,7 @@ lto_module_t lto_module_create_from_memory_with_path(const void* mem, | ||
2097 | size_t length, | ||
2098 | const char *path) { | ||
2099 | lto_initialize(); | ||
2100 | - llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
2101 | + llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
2102 | ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createFromBuffer( | ||
2103 | *LTOContext, mem, length, Options, StringRef(path)); | ||
2104 | if (!M) | ||
2105 | @@ -275,7 +274,7 @@ lto_module_t lto_module_create_from_memory_with_path(const void* mem, | ||
2106 | lto_module_t lto_module_create_in_local_context(const void *mem, size_t length, | ||
2107 | const char *path) { | ||
2108 | lto_initialize(); | ||
2109 | - llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
2110 | + llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
2111 | |||
2112 | // Create a local context. Ownership will be transferred to LTOModule. | ||
2113 | std::unique_ptr<LLVMContext> Context = std::make_unique<LLVMContext>(); | ||
2114 | @@ -294,7 +293,7 @@ lto_module_t lto_module_create_in_codegen_context(const void *mem, | ||
2115 | const char *path, | ||
2116 | lto_code_gen_t cg) { | ||
2117 | lto_initialize(); | ||
2118 | - llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
2119 | + llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
2120 | ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createFromBuffer( | ||
2121 | unwrap(cg)->getContext(), mem, length, Options, StringRef(path)); | ||
2122 | return wrap(M->release()); | ||
2123 | @@ -336,7 +335,7 @@ void lto_codegen_set_diagnostic_handler(lto_code_gen_t cg, | ||
2124 | static lto_code_gen_t createCodeGen(bool InLocalContext) { | ||
2125 | lto_initialize(); | ||
2126 | |||
2127 | - TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
2128 | + TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
2129 | |||
2130 | LibLTOCodeGenerator *CodeGen = | ||
2131 | InLocalContext ? new LibLTOCodeGenerator(std::make_unique<LLVMContext>()) | ||
2132 | @@ -484,7 +483,7 @@ void lto_codegen_set_should_embed_uselists(lto_code_gen_t cg, | ||
2133 | thinlto_code_gen_t thinlto_create_codegen(void) { | ||
2134 | lto_initialize(); | ||
2135 | ThinLTOCodeGenerator *CodeGen = new ThinLTOCodeGenerator(); | ||
2136 | - CodeGen->setTargetOptions(InitTargetOptionsFromCodeGenFlags()); | ||
2137 | + CodeGen->setTargetOptions(codegen::InitTargetOptionsFromCodeGenFlags()); | ||
2138 | CodeGen->setFreestanding(EnableFreestanding); | ||
2139 | |||
2140 | if (OptLevel.getNumOccurrences()) { | ||
2141 | diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp | ||
2142 | index 75a6cdc3892b..8d619ef76b7a 100644 | ||
2143 | --- a/llvm/tools/opt/opt.cpp | ||
2144 | +++ b/llvm/tools/opt/opt.cpp | ||
2145 | @@ -22,7 +22,7 @@ | ||
2146 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||
2147 | #include "llvm/Analysis/TargetTransformInfo.h" | ||
2148 | #include "llvm/Bitcode/BitcodeWriterPass.h" | ||
2149 | -#include "llvm/CodeGen/CommandFlags.inc" | ||
2150 | +#include "llvm/CodeGen/CommandFlags.h" | ||
2151 | #include "llvm/CodeGen/TargetPassConfig.h" | ||
2152 | #include "llvm/Config/llvm-config.h" | ||
2153 | #include "llvm/IR/DataLayout.h" | ||
2154 | @@ -61,6 +61,8 @@ | ||
2155 | using namespace llvm; | ||
2156 | using namespace opt_tool; | ||
2157 | |||
2158 | +static codegen::RegisterCodeGenFlags CFG; | ||
2159 | + | ||
2160 | // The OptimizationList is automatically populated with registered Passes by the | ||
2161 | // PassNameParser. | ||
2162 | // | ||
2163 | @@ -470,16 +472,17 @@ static TargetMachine* GetTargetMachine(Triple TheTriple, StringRef CPUStr, | ||
2164 | StringRef FeaturesStr, | ||
2165 | const TargetOptions &Options) { | ||
2166 | std::string Error; | ||
2167 | - const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, | ||
2168 | - Error); | ||
2169 | + const Target *TheTarget = | ||
2170 | + TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error); | ||
2171 | // Some modules don't specify a triple, and this is okay. | ||
2172 | if (!TheTarget) { | ||
2173 | return nullptr; | ||
2174 | } | ||
2175 | |||
2176 | - return TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr, | ||
2177 | - FeaturesStr, Options, getRelocModel(), | ||
2178 | - getCodeModel(), GetCodeGenOptLevel()); | ||
2179 | + return TheTarget->createTargetMachine( | ||
2180 | + TheTriple.getTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(), | ||
2181 | + Options, codegen::getExplicitRelocModel(), | ||
2182 | + codegen::getExplicitCodeModel(), GetCodeGenOptLevel()); | ||
2183 | } | ||
2184 | |||
2185 | #ifdef BUILD_EXAMPLES | ||
2186 | @@ -659,11 +662,11 @@ int main(int argc, char **argv) { | ||
2187 | Triple ModuleTriple(M->getTargetTriple()); | ||
2188 | std::string CPUStr, FeaturesStr; | ||
2189 | TargetMachine *Machine = nullptr; | ||
2190 | - const TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); | ||
2191 | + const TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(); | ||
2192 | |||
2193 | if (ModuleTriple.getArch()) { | ||
2194 | - CPUStr = getCPUStr(); | ||
2195 | - FeaturesStr = getFeaturesStr(); | ||
2196 | + CPUStr = codegen::getCPUStr(); | ||
2197 | + FeaturesStr = codegen::getFeaturesStr(); | ||
2198 | Machine = GetTargetMachine(ModuleTriple, CPUStr, FeaturesStr, Options); | ||
2199 | } else if (ModuleTriple.getArchName() != "unknown" && | ||
2200 | ModuleTriple.getArchName() != "") { | ||
2201 | @@ -676,7 +679,7 @@ int main(int argc, char **argv) { | ||
2202 | |||
2203 | // Override function attributes based on CPUStr, FeaturesStr, and command line | ||
2204 | // flags. | ||
2205 | - setFunctionAttributes(CPUStr, FeaturesStr, *M); | ||
2206 | + codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M); | ||
2207 | |||
2208 | // If the output is set to be emitted to standard out, and standard out is a | ||
2209 | // console, print out a warning message and refuse to do it. We don't | ||
2210 | diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp | ||
2211 | index 472d4dd6ad1e..32d1d3c91ff2 100644 | ||
2212 | --- a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp | ||
2213 | +++ b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp | ||
2214 | @@ -25,7 +25,7 @@ | ||
2215 | #include "llvm/MC/MCRegisterInfo.h" | ||
2216 | #include "llvm/MC/MCStreamer.h" | ||
2217 | #include "llvm/MC/MCSubtargetInfo.h" | ||
2218 | -#include "llvm/MC/MCTargetOptionsCommandFlags.inc" | ||
2219 | +#include "llvm/MC/MCTargetOptionsCommandFlags.h" | ||
2220 | #include "llvm/PassAnalysisSupport.h" | ||
2221 | #include "llvm/Support/TargetRegistry.h" | ||
2222 | #include "llvm/Support/raw_ostream.h" | ||
2223 | @@ -36,6 +36,8 @@ | ||
2224 | using namespace llvm; | ||
2225 | using namespace dwarf; | ||
2226 | |||
2227 | +mc::RegisterMCTargetOptionsFlags MOF; | ||
2228 | + | ||
2229 | namespace {} // end anonymous namespace | ||
2230 | |||
2231 | //===----------------------------------------------------------------------===// | ||
2232 | @@ -410,7 +412,7 @@ llvm::Error dwarfgen::Generator::init(Triple TheTriple, uint16_t V) { | ||
2233 | TripleName, | ||
2234 | inconvertibleErrorCode()); | ||
2235 | |||
2236 | - MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags(); | ||
2237 | + MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags(); | ||
2238 | MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions)); | ||
2239 | if (!MAI) | ||
2240 | return make_error<StringError>("no asm info for target " + TripleName, | ||
2241 | -- | ||
2242 | 2.33.1 | ||
2243 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch deleted file mode 100644 index f90a79ae..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch +++ /dev/null | |||
@@ -1,237 +0,0 @@ | |||
1 | From d51fdb9f2986747a56c593fa057d531720b39deb Mon Sep 17 00:00:00 2001 | ||
2 | From: Sriraman Tallam <tmsriram@google.com> | ||
3 | Date: Fri, 13 Mar 2020 15:58:57 -0700 | ||
4 | Subject: [PATCH] Basic Block Sections Support. | ||
5 | |||
6 | This is the first in a series of patches to enable Basic Block Sections | ||
7 | in LLVM. | ||
8 | |||
9 | We introduce a new compiler option, -fbasicblock-sections=, which places every | ||
10 | basic block in a unique ELF text section in the object file along with a | ||
11 | symbol labeling the basic block. The linker can then order the basic block | ||
12 | sections in any arbitrary sequence which when done correctly can encapsulate | ||
13 | block layout, function layout and function splitting optimizations. However, | ||
14 | there are a couple of challenges to be addressed for this to be feasible: | ||
15 | |||
16 | 1) The compiler must not allow any implicit fall-through between any two | ||
17 | adjacent basic blocks as they could be reordered at link time to be | ||
18 | non-adjacent. In other words, the compiler must make a fall-through | ||
19 | between adjacent basic blocks explicit by retaining the direct jump | ||
20 | instruction that jumps to the next basic block. These branches can only | ||
21 | be removed later by the linker after the blocks have been reordered. | ||
22 | 2) All inter-basic block branch targets would now need to be resolved by | ||
23 | the linker as they cannot be calculated during compile time. This is | ||
24 | done using static relocations which bloats the size of the object files. | ||
25 | Further, the compiler tries to use short branch instructions on some ISAs | ||
26 | for branch offsets that can be accommodated in one byte. This is not | ||
27 | possible with basic block sections as the offset is not determined at | ||
28 | compile time, and long branch instructions have to be used everywhere. | ||
29 | 3) Each additional section bloats object file sizes by tens of bytes. The | ||
30 | number of basic blocks can be potentially very large compared to the | ||
31 | size of functions and can bloat object sizes significantly. Option | ||
32 | fbasicblock-sections= also takes a file path which can be used to | ||
33 | specify a subset of basic blocks that needs unique sections to keep | ||
34 | the bloats small. | ||
35 | 4) Debug Info and CFI need special handling and will be presented as | ||
36 | separate patches. | ||
37 | |||
38 | Basic Block Labels | ||
39 | |||
40 | With -fbasicblock-sections=labels, or when a basic block is placed in a | ||
41 | unique section, it is labelled with a symbol. This allows easy mapping of | ||
42 | virtual addresses from PMU profiles back to the corresponding basic blocks. | ||
43 | Since the number of basic blocks is large, the labeling bloats the symbol | ||
44 | table sizes and the string table sizes significantly. While the binary size | ||
45 | does increase, it does not affect performance as the symbol table is not | ||
46 | loaded in memory during run-time. The string table size bloat is kept very | ||
47 | minimal using a unary naming scheme that uses string suffix compression. | ||
48 | The basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ... | ||
49 | This turns out to be very good for string table sizes and the bloat in the | ||
50 | string table size for a very large binary is ~8 %. The naming also allows | ||
51 | using the --symbol-ordering-file option in LLD to arbitrarily reorder the | ||
52 | sections. | ||
53 | |||
54 | Differential Revision: https://reviews.llvm.org/D68063 | ||
55 | |||
56 | Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4dfe92e46542be46d634a7ec24da2f2f889623d0] | ||
57 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
58 | --- | ||
59 | llvm/include/llvm/CodeGen/CommandFlags.inc | 34 ++++++++++++++++++++++ | ||
60 | llvm/include/llvm/Target/TargetMachine.h | 14 +++++++++ | ||
61 | llvm/include/llvm/Target/TargetOptions.h | 31 ++++++++++++++++++-- | ||
62 | 3 files changed, 76 insertions(+), 3 deletions(-) | ||
63 | |||
64 | diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc | ||
65 | index 8739b644873d..6475a5b19edb 100644 | ||
66 | --- a/llvm/include/llvm/CodeGen/CommandFlags.inc | ||
67 | +++ b/llvm/include/llvm/CodeGen/CommandFlags.inc | ||
68 | @@ -238,6 +238,12 @@ static cl::opt<bool> | ||
69 | cl::desc("Emit functions into separate sections"), | ||
70 | cl::init(false)); | ||
71 | |||
72 | +static cl::opt<std::string> | ||
73 | + BBSections("basicblock-sections", | ||
74 | + cl::desc("Emit basic blocks into separate sections"), | ||
75 | + cl::value_desc("all | <function list (file)> | labels | none"), | ||
76 | + cl::init("none")); | ||
77 | + | ||
78 | static cl::opt<unsigned> TLSSize("tls-size", | ||
79 | cl::desc("Bit size of immediate TLS offsets"), | ||
80 | cl::init(0)); | ||
81 | @@ -251,6 +257,11 @@ static cl::opt<bool> | ||
82 | cl::desc("Give unique names to every section"), | ||
83 | cl::init(true)); | ||
84 | |||
85 | +static cl::opt<bool> UniqueBBSectionNames( | ||
86 | + "unique-bb-section-names", | ||
87 | + cl::desc("Give unique names to every basic block section"), | ||
88 | + cl::init(false)); | ||
89 | + | ||
90 | static cl::opt<llvm::EABI> | ||
91 | EABIVersion("meabi", cl::desc("Set EABI type (default depends on triple):"), | ||
92 | cl::init(EABI::Default), | ||
93 | @@ -285,6 +296,27 @@ static cl::opt<bool> | ||
94 | cl::desc("Always emit a debug frame section."), | ||
95 | cl::init(false)); | ||
96 | |||
97 | +static llvm::BasicBlockSection | ||
98 | +getBBSectionsMode(llvm::TargetOptions &Options) { | ||
99 | + if (BBSections == "all") | ||
100 | + return BasicBlockSection::All; | ||
101 | + else if (BBSections == "labels") | ||
102 | + return BasicBlockSection::Labels; | ||
103 | + else if (BBSections == "none") | ||
104 | + return BasicBlockSection::None; | ||
105 | + else { | ||
106 | + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = | ||
107 | + MemoryBuffer::getFile(BBSections); | ||
108 | + if (!MBOrErr) { | ||
109 | + errs() << "Error loading basic block sections function list file: " | ||
110 | + << MBOrErr.getError().message() << "\n"; | ||
111 | + } else { | ||
112 | + Options.BBSectionsFuncListBuf = std::move(*MBOrErr); | ||
113 | + } | ||
114 | + return BasicBlockSection::List; | ||
115 | + } | ||
116 | +} | ||
117 | + | ||
118 | // Common utility function tightly tied to the options listed here. Initializes | ||
119 | // a TargetOptions object with CodeGen flags and returns it. | ||
120 | static TargetOptions InitTargetOptionsFromCodeGenFlags() { | ||
121 | @@ -308,7 +340,9 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() { | ||
122 | Options.RelaxELFRelocations = RelaxELFRelocations; | ||
123 | Options.DataSections = DataSections; | ||
124 | Options.FunctionSections = FunctionSections; | ||
125 | + Options.BBSections = getBBSectionsMode(Options); | ||
126 | Options.UniqueSectionNames = UniqueSectionNames; | ||
127 | + Options.UniqueBBSectionNames = UniqueBBSectionNames; | ||
128 | Options.TLSSize = TLSSize; | ||
129 | Options.EmulatedTLS = EmulatedTLS; | ||
130 | Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0; | ||
131 | diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h | ||
132 | index 176ae39b17a7..4a1f3377f31d 100644 | ||
133 | --- a/llvm/include/llvm/Target/TargetMachine.h | ||
134 | +++ b/llvm/include/llvm/Target/TargetMachine.h | ||
135 | @@ -242,6 +242,9 @@ public: | ||
136 | |||
137 | bool getUniqueSectionNames() const { return Options.UniqueSectionNames; } | ||
138 | |||
139 | + /// Return true if unique basic block section names must be generated. | ||
140 | + bool getUniqueBBSectionNames() const { return Options.UniqueBBSectionNames; } | ||
141 | + | ||
142 | /// Return true if data objects should be emitted into their own section, | ||
143 | /// corresponds to -fdata-sections. | ||
144 | bool getDataSections() const { | ||
145 | @@ -254,6 +257,17 @@ public: | ||
146 | return Options.FunctionSections; | ||
147 | } | ||
148 | |||
149 | + /// If basic blocks should be emitted into their own section, | ||
150 | + /// corresponding to -fbasicblock-sections. | ||
151 | + llvm::BasicBlockSection getBBSectionsType() const { | ||
152 | + return Options.BBSections; | ||
153 | + } | ||
154 | + | ||
155 | + /// Get the list of functions and basic block ids that need unique sections. | ||
156 | + const MemoryBuffer *getBBSectionsFuncListBuf() const { | ||
157 | + return Options.BBSectionsFuncListBuf.get(); | ||
158 | + } | ||
159 | + | ||
160 | /// Get a \c TargetIRAnalysis appropriate for the target. | ||
161 | /// | ||
162 | /// This is used to construct the new pass manager's target IR analysis pass, | ||
163 | diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h | ||
164 | index 84c6ee2a6387..d27c7b0178f0 100644 | ||
165 | --- a/llvm/include/llvm/Target/TargetOptions.h | ||
166 | +++ b/llvm/include/llvm/Target/TargetOptions.h | ||
167 | @@ -16,8 +16,11 @@ | ||
168 | |||
169 | #include "llvm/MC/MCTargetOptions.h" | ||
170 | |||
171 | +#include <memory> | ||
172 | + | ||
173 | namespace llvm { | ||
174 | class MachineFunction; | ||
175 | + class MemoryBuffer; | ||
176 | class Module; | ||
177 | |||
178 | namespace FloatABI { | ||
179 | @@ -63,6 +66,18 @@ namespace llvm { | ||
180 | }; | ||
181 | } | ||
182 | |||
183 | + enum class BasicBlockSection { | ||
184 | + All, // Use Basic Block Sections for all basic blocks. A section | ||
185 | + // for every basic block can significantly bloat object file sizes. | ||
186 | + List, // Get list of functions & BBs from a file. Selectively enables | ||
187 | + // basic block sections for a subset of basic blocks which can be | ||
188 | + // used to control object size bloats from creating sections. | ||
189 | + Labels, // Do not use Basic Block Sections but label basic blocks. This | ||
190 | + // is useful when associating profile counts from virtual addresses | ||
191 | + // to basic blocks. | ||
192 | + None // Do not use Basic Block Sections. | ||
193 | + }; | ||
194 | + | ||
195 | enum class EABI { | ||
196 | Unknown, | ||
197 | Default, // Default means not specified | ||
198 | @@ -114,9 +129,9 @@ namespace llvm { | ||
199 | EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), | ||
200 | DisableIntegratedAS(false), RelaxELFRelocations(false), | ||
201 | FunctionSections(false), DataSections(false), | ||
202 | - UniqueSectionNames(true), TrapUnreachable(false), | ||
203 | - NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false), | ||
204 | - ExplicitEmulatedTLS(false), EnableIPRA(false), | ||
205 | + UniqueSectionNames(true), UniqueBBSectionNames(false), | ||
206 | + TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0), | ||
207 | + EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), | ||
208 | EmitStackSizeSection(false), EnableMachineOutliner(false), | ||
209 | SupportsDefaultOutlining(false), EmitAddrsig(false), | ||
210 | EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {} | ||
211 | @@ -224,6 +239,9 @@ namespace llvm { | ||
212 | |||
213 | unsigned UniqueSectionNames : 1; | ||
214 | |||
215 | + /// Use unique names for basic block sections. | ||
216 | + unsigned UniqueBBSectionNames : 1; | ||
217 | + | ||
218 | /// Emit target-specific trap instruction for 'unreachable' IR instructions. | ||
219 | unsigned TrapUnreachable : 1; | ||
220 | |||
221 | @@ -256,6 +274,13 @@ namespace llvm { | ||
222 | /// Emit address-significance table. | ||
223 | unsigned EmitAddrsig : 1; | ||
224 | |||
225 | + /// Emit basic blocks into separate sections. | ||
226 | + BasicBlockSection BBSections = BasicBlockSection::None; | ||
227 | + | ||
228 | + /// Memory Buffer that contains information on sampled basic blocks and used | ||
229 | + /// to selectively generate basic block sections. | ||
230 | + std::shared_ptr<MemoryBuffer> BBSectionsFuncListBuf; | ||
231 | + | ||
232 | /// Emit debug info about parameter's entry values. | ||
233 | unsigned EnableDebugEntryValues : 1; | ||
234 | |||
235 | -- | ||
236 | 2.33.1 | ||
237 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch deleted file mode 100644 index 7e6c4f92..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | From 3632f727dfd786a8eca50bd01219669bbe7b0df9 Mon Sep 17 00:00:00 2001 | ||
2 | From: haonanya <haonan.yang@intel.com> | ||
3 | Date: Tue, 11 May 2021 11:13:02 +0800 | ||
4 | Subject: [PATCH 1/3] Remove __IMAGE_SUPPORT__ macro for SPIR since SPIR | ||
5 | doesn't require image support | ||
6 | |||
7 | Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-120/patches/clang/0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch] | ||
8 | |||
9 | Signed-off-by: haonanya <haonan.yang@intel.com> | ||
10 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
11 | --- | ||
12 | clang/lib/Frontend/InitPreprocessor.cpp | 3 --- | ||
13 | clang/test/Preprocessor/predefined-macros.c | 2 -- | ||
14 | 2 files changed, 5 deletions(-) | ||
15 | |||
16 | diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp | ||
17 | index c64a912ce919..c60972c96e5d 100644 | ||
18 | --- a/clang/lib/Frontend/InitPreprocessor.cpp | ||
19 | +++ b/clang/lib/Frontend/InitPreprocessor.cpp | ||
20 | @@ -1121,9 +1121,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI, | ||
21 | // OpenCL definitions. | ||
22 | if (LangOpts.OpenCL) { | ||
23 | TI.getOpenCLFeatureDefines(LangOpts, Builder); | ||
24 | - | ||
25 | - if (TI.getTriple().isSPIR()) | ||
26 | - Builder.defineMacro("__IMAGE_SUPPORT__"); | ||
27 | } | ||
28 | |||
29 | if (TI.hasInt128Type() && LangOpts.CPlusPlus && LangOpts.GNUMode) { | ||
30 | diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c | ||
31 | index e406b9a70570..88606518c7de 100644 | ||
32 | --- a/clang/test/Preprocessor/predefined-macros.c | ||
33 | +++ b/clang/test/Preprocessor/predefined-macros.c | ||
34 | @@ -188,14 +188,12 @@ | ||
35 | |||
36 | // RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir-unknown-unknown \ | ||
37 | // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR | ||
38 | -// CHECK-SPIR-DAG: #define __IMAGE_SUPPORT__ 1 | ||
39 | // CHECK-SPIR-DAG: #define __SPIR__ 1 | ||
40 | // CHECK-SPIR-DAG: #define __SPIR32__ 1 | ||
41 | // CHECK-SPIR-NOT: #define __SPIR64__ 1 | ||
42 | |||
43 | // RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir64-unknown-unknown \ | ||
44 | // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR64 | ||
45 | -// CHECK-SPIR64-DAG: #define __IMAGE_SUPPORT__ 1 | ||
46 | // CHECK-SPIR64-DAG: #define __SPIR__ 1 | ||
47 | // CHECK-SPIR64-DAG: #define __SPIR64__ 1 | ||
48 | // CHECK-SPIR64-NOT: #define __SPIR32__ 1 | ||
49 | -- | ||
50 | 2.17.1 | ||
51 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch deleted file mode 100644 index 4f7d3e51..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch +++ /dev/null | |||
@@ -1,52 +0,0 @@ | |||
1 | From 06cf750d2ef892eaa4f0ff5d0a9e9e5c49697264 Mon Sep 17 00:00:00 2001 | ||
2 | From: Raphael Isemann <teemperor@gmail.com> | ||
3 | Date: Thu, 1 Apr 2021 18:41:44 +0200 | ||
4 | Subject: [PATCH 2/3] Avoid calling ParseCommandLineOptions in BackendUtil if | ||
5 | possible | ||
6 | |||
7 | Calling `ParseCommandLineOptions` should only be called from `main` as the | ||
8 | CommandLine setup code isn't thread-safe. As BackendUtil is part of the | ||
9 | generic Clang FrontendAction logic, a process which has several threads executing | ||
10 | Clang FrontendActions will randomly crash in the unsafe setup code. | ||
11 | |||
12 | This patch avoids calling the function unless either the debug-pass option or | ||
13 | limit-float-precision option is set. Without these two options set the | ||
14 | `ParseCommandLineOptions` call doesn't do anything beside parsing | ||
15 | the command line `clang` which doesn't set any options. | ||
16 | |||
17 | See also D99652 where LLDB received a workaround for this crash. | ||
18 | |||
19 | Reviewed By: JDevlieghere | ||
20 | |||
21 | Differential Revision: https://reviews.llvm.org/D99740 | ||
22 | |||
23 | Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-120/patches/clang/0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch] | ||
24 | |||
25 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
26 | --- | ||
27 | clang/lib/CodeGen/BackendUtil.cpp | 8 ++++++++ | ||
28 | 1 file changed, 8 insertions(+) | ||
29 | |||
30 | diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp | ||
31 | index 52bcd971dc8c..f9f891247530 100644 | ||
32 | --- a/clang/lib/CodeGen/BackendUtil.cpp | ||
33 | +++ b/clang/lib/CodeGen/BackendUtil.cpp | ||
34 | @@ -850,7 +850,15 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) { | ||
35 | BackendArgs.push_back("-limit-float-precision"); | ||
36 | BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str()); | ||
37 | } | ||
38 | + // Check for the default "clang" invocation that won't set any cl::opt values. | ||
39 | + // Skip trying to parse the command line invocation to avoid the issues | ||
40 | + // described below. | ||
41 | + if (BackendArgs.size() == 1) | ||
42 | + return; | ||
43 | BackendArgs.push_back(nullptr); | ||
44 | + // FIXME: The command line parser below is not thread-safe and shares a global | ||
45 | + // state, so this call might crash or overwrite the options of another Clang | ||
46 | + // instance in the same process. | ||
47 | llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1, | ||
48 | BackendArgs.data()); | ||
49 | } | ||
50 | -- | ||
51 | 2.17.1 | ||
52 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0003-Support-cl_ext_float_atomics.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0003-Support-cl_ext_float_atomics.patch deleted file mode 100644 index 99dbb81c..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0003-Support-cl_ext_float_atomics.patch +++ /dev/null | |||
@@ -1,344 +0,0 @@ | |||
1 | From f1a24eeb89342186c6c718e02dd394775620799f Mon Sep 17 00:00:00 2001 | ||
2 | From: haonanya <haonan.yang@intel.com> | ||
3 | Date: Wed, 28 Jul 2021 14:20:08 +0800 | ||
4 | Subject: [PATCH 3/3] Support cl_ext_float_atomics | ||
5 | |||
6 | Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-120/patches/clang/0003-OpenCL-Support-cl_ext_float_atomics.patch] | ||
7 | |||
8 | Signed-off-by: haonanya <haonan.yang@intel.com> | ||
9 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
10 | --- | ||
11 | clang/lib/Headers/opencl-c-base.h | 19 +++ | ||
12 | clang/lib/Headers/opencl-c.h | 195 ++++++++++++++++++++++++++ | ||
13 | clang/test/Headers/opencl-c-header.cl | 72 ++++++++++ | ||
14 | 3 files changed, 286 insertions(+) | ||
15 | |||
16 | diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h | ||
17 | index e8dcd70377e5..c8b6d36029ec 100644 | ||
18 | --- a/clang/lib/Headers/opencl-c-base.h | ||
19 | +++ b/clang/lib/Headers/opencl-c-base.h | ||
20 | @@ -21,6 +21,25 @@ | ||
21 | #define cl_khr_subgroup_shuffle 1 | ||
22 | #define cl_khr_subgroup_shuffle_relative 1 | ||
23 | #define cl_khr_subgroup_clustered_reduce 1 | ||
24 | +#define cl_ext_float_atomics | ||
25 | +#ifdef cl_khr_fp16 | ||
26 | +#define __opencl_c_ext_fp16_global_atomic_load_store 1 | ||
27 | +#define __opencl_c_ext_fp16_local_atomic_load_store 1 | ||
28 | +#define __opencl_c_ext_fp16_global_atomic_add 1 | ||
29 | +#define __opencl_c_ext_fp16_local_atomic_add 1 | ||
30 | +#define __opencl_c_ext_fp16_global_atomic_min_max 1 | ||
31 | +#define __opencl_c_ext_fp16_local_atomic_min_max 1 | ||
32 | +#endif | ||
33 | +#ifdef __opencl_c_fp64 | ||
34 | +#define __opencl_c_ext_fp64_global_atomic_add 1 | ||
35 | +#define __opencl_c_ext_fp64_local_atomic_add 1 | ||
36 | +#define __opencl_c_ext_fp64_global_atomic_min_max 1 | ||
37 | +#define __opencl_c_ext_fp64_local_atomic_min_max 1 | ||
38 | +#endif | ||
39 | +#define __opencl_c_ext_fp32_global_atomic_add 1 | ||
40 | +#define __opencl_c_ext_fp32_local_atomic_add 1 | ||
41 | +#define __opencl_c_ext_fp32_global_atomic_min_max 1 | ||
42 | +#define __opencl_c_ext_fp32_local_atomic_min_max 1 | ||
43 | #endif // defined(__SPIR__) | ||
44 | #endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
45 | |||
46 | diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h | ||
47 | index ab665628c8e1..6676da858d2a 100644 | ||
48 | --- a/clang/lib/Headers/opencl-c.h | ||
49 | +++ b/clang/lib/Headers/opencl-c.h | ||
50 | @@ -13531,6 +13531,201 @@ intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uint | ||
51 | intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope); | ||
52 | #endif | ||
53 | |||
54 | +#if defined(cl_ext_float_atomics) | ||
55 | + | ||
56 | +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) | ||
57 | +float __ovld atomic_fetch_min(volatile __global atomic_float *object, | ||
58 | + float operand); | ||
59 | +float __ovld atomic_fetch_max(volatile __global atomic_float *object, | ||
60 | + float operand); | ||
61 | +float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, | ||
62 | + float operand, memory_order order); | ||
63 | +float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, | ||
64 | + float operand, memory_order order); | ||
65 | +float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object, | ||
66 | + float operand, memory_order order, | ||
67 | + memory_scope scope); | ||
68 | +float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object, | ||
69 | + float operand, memory_order order, | ||
70 | + memory_scope scope); | ||
71 | +#endif | ||
72 | +#if defined(__opencl_c_ext_fp32_local_atomic_min_max) | ||
73 | +float __ovld atomic_fetch_min(volatile __local atomic_float *object, | ||
74 | + float operand); | ||
75 | +float __ovld atomic_fetch_max(volatile __local atomic_float *object, | ||
76 | + float operand); | ||
77 | +float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, | ||
78 | + float operand, memory_order order); | ||
79 | +float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, | ||
80 | + float operand, memory_order order); | ||
81 | +float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object, | ||
82 | + float operand, memory_order order, | ||
83 | + memory_scope scope); | ||
84 | +float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object, | ||
85 | + float operand, memory_order order, | ||
86 | + memory_scope scope); | ||
87 | +#endif | ||
88 | +#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \ | ||
89 | + defined(__opencl_c_ext_fp32_local_atomic_min_max) | ||
90 | +float __ovld atomic_fetch_min(volatile atomic_float *object, float operand); | ||
91 | +float __ovld atomic_fetch_max(volatile atomic_float *object, float operand); | ||
92 | +float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, | ||
93 | + float operand, memory_order order); | ||
94 | +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, | ||
95 | + float operand, memory_order order); | ||
96 | +float __ovld atomic_fetch_min_explicit(volatile atomic_float *object, | ||
97 | + float operand, memory_order order, | ||
98 | + memory_scope scope); | ||
99 | +float __ovld atomic_fetch_max_explicit(volatile atomic_float *object, | ||
100 | + float operand, memory_order order, | ||
101 | + memory_scope scope); | ||
102 | +#endif | ||
103 | +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) | ||
104 | +double __ovld atomic_fetch_min(volatile __global atomic_double *object, | ||
105 | + double operand); | ||
106 | +double __ovld atomic_fetch_max(volatile __global atomic_double *object, | ||
107 | + double operand); | ||
108 | +double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, | ||
109 | + double operand, memory_order order); | ||
110 | +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, | ||
111 | + double operand, memory_order order); | ||
112 | +double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object, | ||
113 | + double operand, memory_order order, | ||
114 | + memory_scope scope); | ||
115 | +double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object, | ||
116 | + double operand, memory_order order, | ||
117 | + memory_scope scope); | ||
118 | +#endif | ||
119 | +#if defined(__opencl_c_ext_fp64_local_atomic_min_max) | ||
120 | +double __ovld atomic_fetch_min(volatile __local atomic_double *object, | ||
121 | + double operand); | ||
122 | +double __ovld atomic_fetch_max(volatile __local atomic_double *object, | ||
123 | + double operand); | ||
124 | +double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, | ||
125 | + double operand, memory_order order); | ||
126 | +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, | ||
127 | + double operand, memory_order order); | ||
128 | +double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object, | ||
129 | + double operand, memory_order order, | ||
130 | + memory_scope scope); | ||
131 | +double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object, | ||
132 | + double operand, memory_order order, | ||
133 | + memory_scope scope); | ||
134 | +#endif | ||
135 | +#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \ | ||
136 | + defined(__opencl_c_ext_fp64_local_atomic_min_max) | ||
137 | +double __ovld atomic_fetch_min(volatile atomic_double *object, double operand); | ||
138 | +double __ovld atomic_fetch_max(volatile atomic_double *object, double operand); | ||
139 | +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, | ||
140 | + double operand, memory_order order); | ||
141 | +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, | ||
142 | + double operand, memory_order order); | ||
143 | +double __ovld atomic_fetch_min_explicit(volatile atomic_double *object, | ||
144 | + double operand, memory_order order, | ||
145 | + memory_scope scope); | ||
146 | +double __ovld atomic_fetch_max_explicit(volatile atomic_double *object, | ||
147 | + double operand, memory_order order, | ||
148 | + memory_scope scope); | ||
149 | +#endif | ||
150 | + | ||
151 | +#if defined(__opencl_c_ext_fp32_global_atomic_add) | ||
152 | +float __ovld atomic_fetch_add(volatile __global atomic_float *object, | ||
153 | + float operand); | ||
154 | +float __ovld atomic_fetch_sub(volatile __global atomic_float *object, | ||
155 | + float operand); | ||
156 | +float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, | ||
157 | + float operand, memory_order order); | ||
158 | +float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, | ||
159 | + float operand, memory_order order); | ||
160 | +float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object, | ||
161 | + float operand, memory_order order, | ||
162 | + memory_scope scope); | ||
163 | +float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object, | ||
164 | + float operand, memory_order order, | ||
165 | + memory_scope scope); | ||
166 | +#endif | ||
167 | +#if defined(__opencl_c_ext_fp32_local_atomic_add) | ||
168 | +float __ovld atomic_fetch_add(volatile __local atomic_float *object, | ||
169 | + float operand); | ||
170 | +float __ovld atomic_fetch_sub(volatile __local atomic_float *object, | ||
171 | + float operand); | ||
172 | +float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, | ||
173 | + float operand, memory_order order); | ||
174 | +float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, | ||
175 | + float operand, memory_order order); | ||
176 | +float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object, | ||
177 | + float operand, memory_order order, | ||
178 | + memory_scope scope); | ||
179 | +float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object, | ||
180 | + float operand, memory_order order, | ||
181 | + memory_scope scope); | ||
182 | +#endif | ||
183 | +#if defined(__opencl_c_ext_fp32_global_atomic_add) || \ | ||
184 | + defined(__opencl_c_ext_fp32_local_atomic_add) | ||
185 | +float __ovld atomic_fetch_add(volatile atomic_float *object, float operand); | ||
186 | +float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand); | ||
187 | +float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, | ||
188 | + float operand, memory_order order); | ||
189 | +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, | ||
190 | + float operand, memory_order order); | ||
191 | +float __ovld atomic_fetch_add_explicit(volatile atomic_float *object, | ||
192 | + float operand, memory_order order, | ||
193 | + memory_scope scope); | ||
194 | +float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object, | ||
195 | + float operand, memory_order order, | ||
196 | + memory_scope scope); | ||
197 | +#endif | ||
198 | + | ||
199 | +#if defined(__opencl_c_ext_fp64_global_atomic_add) | ||
200 | +double __ovld atomic_fetch_add(volatile __global atomic_double *object, | ||
201 | + double operand); | ||
202 | +double __ovld atomic_fetch_sub(volatile __global atomic_double *object, | ||
203 | + double operand); | ||
204 | +double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, | ||
205 | + double operand, memory_order order); | ||
206 | +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, | ||
207 | + double operand, memory_order order); | ||
208 | +double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object, | ||
209 | + double operand, memory_order order, | ||
210 | + memory_scope scope); | ||
211 | +double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object, | ||
212 | + double operand, memory_order order, | ||
213 | + memory_scope scope); | ||
214 | +#endif | ||
215 | +#if defined(__opencl_c_ext_fp64_local_atomic_add) | ||
216 | +double __ovld atomic_fetch_add(volatile __local atomic_double *object, | ||
217 | + double operand); | ||
218 | +double __ovld atomic_fetch_sub(volatile __local atomic_double *object, | ||
219 | + double operand); | ||
220 | +double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, | ||
221 | + double operand, memory_order order); | ||
222 | +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, | ||
223 | + double operand, memory_order order); | ||
224 | +double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object, | ||
225 | + double operand, memory_order order, | ||
226 | + memory_scope scope); | ||
227 | +double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object, | ||
228 | + double operand, memory_order order, | ||
229 | + memory_scope scope); | ||
230 | +#endif | ||
231 | +#if defined(__opencl_c_ext_fp64_global_atomic_add) || \ | ||
232 | + defined(__opencl_c_ext_fp64_local_atomic_add) | ||
233 | +double __ovld atomic_fetch_add(volatile atomic_double *object, double operand); | ||
234 | +double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand); | ||
235 | +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, | ||
236 | + double operand, memory_order order); | ||
237 | +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, | ||
238 | + double operand, memory_order order); | ||
239 | +double __ovld atomic_fetch_add_explicit(volatile atomic_double *object, | ||
240 | + double operand, memory_order order, | ||
241 | + memory_scope scope); | ||
242 | +double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object, | ||
243 | + double operand, memory_order order, | ||
244 | + memory_scope scope); | ||
245 | +#endif | ||
246 | + | ||
247 | +#endif // cl_ext_float_atomics | ||
248 | + | ||
249 | // atomic_store() | ||
250 | |||
251 | void __ovld atomic_store(volatile atomic_int *object, int desired); | ||
252 | diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl | ||
253 | index 13a3b62481ec..2c02d14f25c3 100644 | ||
254 | --- a/clang/test/Headers/opencl-c-header.cl | ||
255 | +++ b/clang/test/Headers/opencl-c-header.cl | ||
256 | @@ -124,6 +124,36 @@ global atomic_int z = ATOMIC_VAR_INIT(99); | ||
257 | #if cl_khr_subgroup_clustered_reduce != 1 | ||
258 | #error "Incorrectly defined cl_khr_subgroup_clustered_reduce" | ||
259 | #endif | ||
260 | +#if __opencl_c_ext_fp16_global_atomic_load_store != 1 | ||
261 | +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_load_store" | ||
262 | +#endif | ||
263 | +#if __opencl_c_ext_fp16_local_atomic_load_store != 1 | ||
264 | +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_load_store" | ||
265 | +#endif | ||
266 | +#if __opencl_c_ext_fp16_global_atomic_add != 1 | ||
267 | +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_add" | ||
268 | +#endif | ||
269 | +#if __opencl_c_ext_fp32_global_atomic_add != 1 | ||
270 | +#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add" | ||
271 | +#endif | ||
272 | +#if __opencl_c_ext_fp16_local_atomic_add != 1 | ||
273 | +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add" | ||
274 | +#endif | ||
275 | +#if __opencl_c_ext_fp32_local_atomic_add != 1 | ||
276 | +#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add" | ||
277 | +#endif | ||
278 | +#if __opencl_c_ext_fp16_global_atomic_min_max != 1 | ||
279 | +#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max" | ||
280 | +#endif | ||
281 | +#if __opencl_c_ext_fp32_global_atomic_min_max != 1 | ||
282 | +#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max" | ||
283 | +#endif | ||
284 | +#if __opencl_c_ext_fp16_local_atomic_min_max != 1 | ||
285 | +#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max" | ||
286 | +#endif | ||
287 | +#if __opencl_c_ext_fp32_local_atomic_min_max != 1 | ||
288 | +#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max" | ||
289 | +#endif | ||
290 | |||
291 | #else | ||
292 | |||
293 | @@ -148,6 +178,48 @@ global atomic_int z = ATOMIC_VAR_INIT(99); | ||
294 | #ifdef cl_khr_subgroup_clustered_reduce | ||
295 | #error "Incorrect cl_khr_subgroup_clustered_reduce define" | ||
296 | #endif | ||
297 | +#ifdef __opencl_c_ext_fp16_global_atomic_load_store | ||
298 | +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_load_store defined" | ||
299 | +#endif | ||
300 | +#ifdef __opencl_c_ext_fp16_local_atomic_load_store | ||
301 | +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_load_store defined" | ||
302 | +#endif | ||
303 | +#ifdef __opencl_c_ext_fp16_global_atomic_add | ||
304 | +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_add defined" | ||
305 | +#endif | ||
306 | +#ifdef __opencl_c_ext_fp32_global_atomic_add | ||
307 | +#error "Incorrectly __opencl_c_ext_fp32_global_atomic_add defined" | ||
308 | +#endif | ||
309 | +#ifdef __opencl_c_ext_fp64_global_atomic_add | ||
310 | +#error "Incorrectly __opencl_c_ext_fp64_global_atomic_add defined" | ||
311 | +#endif | ||
312 | +#ifdef __opencl_c_ext_fp16_local_atomic_add | ||
313 | +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_add defined" | ||
314 | +#endif | ||
315 | +#ifdef __opencl_c_ext_fp32_local_atomic_add | ||
316 | +#error "Incorrectly __opencl_c_ext_fp32_local_atomic_add defined" | ||
317 | +#endif | ||
318 | +#ifdef __opencl_c_ext_fp64_local_atomic_add | ||
319 | +#error "Incorrectly __opencl_c_ext_fp64_local_atomic_add defined" | ||
320 | +#endif | ||
321 | +#ifdef __opencl_c_ext_fp16_global_atomic_min_max | ||
322 | +#error "Incorrectly __opencl_c_ext_fp16_global_atomic_min_max defined" | ||
323 | +#endif | ||
324 | +#ifdef __opencl_c_ext_fp32_global_atomic_min_max | ||
325 | +#error "Incorrectly __opencl_c_ext_fp32_global_atomic_min_max defined" | ||
326 | +#endif | ||
327 | +#ifdef __opencl_c_ext_fp64_global_atomic_min_max | ||
328 | +#error "Incorrectly __opencl_c_ext_fp64_global_atomic_min_max defined" | ||
329 | +#endif | ||
330 | +#ifdef __opencl_c_ext_fp16_local_atomic_min_max | ||
331 | +#error "Incorrectly __opencl_c_ext_fp16_local_atomic_min_max defined" | ||
332 | +#endif | ||
333 | +#ifdef __opencl_c_ext_fp32_local_atomic_min_max | ||
334 | +#error "Incorrectly __opencl_c_ext_fp32_local_atomic_min_max defined" | ||
335 | +#endif | ||
336 | +#ifdef __opencl_c_ext_fp64_local_atomic_min_max | ||
337 | +#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined" | ||
338 | +#endif | ||
339 | |||
340 | #endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) | ||
341 | |||
342 | -- | ||
343 | 2.17.1 | ||
344 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0004-ispc-12_0_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0004-ispc-12_0_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch deleted file mode 100644 index fb15d19c..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0004-ispc-12_0_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch +++ /dev/null | |||
@@ -1,67 +0,0 @@ | |||
1 | From 0c4ba4947d1630f2e13fc260399f0892b2c9b323 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 10:55:13 +0800 | ||
4 | Subject: [PATCH 1/2] This patch is needed for ISPC for Gen only | ||
5 | |||
6 | 1. Transformation of add to or is not safe for VC backend. | ||
7 | 2. bswap intrinsics is not supported in VC backend yet. | ||
8 | |||
9 | Upstream-Status: Backport [Taken from ispc, https://github.com/ispc/ispc/blob/v1.16.1/llvm_patches/12_0_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch] | ||
10 | |||
11 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
12 | --- | ||
13 | llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 10 +++++++--- | ||
14 | .../lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 9 ++++++--- | ||
15 | 2 files changed, 13 insertions(+), 6 deletions(-) | ||
16 | |||
17 | diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | ||
18 | index bacb8689892a..f3d0120db256 100644 | ||
19 | --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | ||
20 | +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | ||
21 | @@ -15,6 +15,7 @@ | ||
22 | #include "llvm/ADT/APInt.h" | ||
23 | #include "llvm/ADT/STLExtras.h" | ||
24 | #include "llvm/ADT/SmallVector.h" | ||
25 | +#include "llvm/ADT/Triple.h" | ||
26 | #include "llvm/Analysis/InstructionSimplify.h" | ||
27 | #include "llvm/Analysis/ValueTracking.h" | ||
28 | #include "llvm/IR/Constant.h" | ||
29 | @@ -1363,9 +1364,12 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) { | ||
30 | } | ||
31 | } | ||
32 | |||
33 | - // A+B --> A|B iff A and B have no bits set in common. | ||
34 | - if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)) | ||
35 | - return BinaryOperator::CreateOr(LHS, RHS); | ||
36 | + // Disable this transformation for ISPC SPIR-V | ||
37 | + if (!Triple(I.getModule()->getTargetTriple()).isSPIR()) { | ||
38 | + // A+B --> A|B iff A and B have no bits set in common. | ||
39 | + if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)) | ||
40 | + return BinaryOperator::CreateOr(LHS, RHS); | ||
41 | + } | ||
42 | |||
43 | // add (select X 0 (sub n A)) A --> select X A n | ||
44 | { | ||
45 | diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | ||
46 | index 68c4156af2c4..b145b863ca84 100644 | ||
47 | --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | ||
48 | +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | ||
49 | @@ -2584,9 +2584,12 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { | ||
50 | if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I)) | ||
51 | return FoldedLogic; | ||
52 | |||
53 | - if (Instruction *BSwap = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true, | ||
54 | - /*MatchBitReversals*/ false)) | ||
55 | - return BSwap; | ||
56 | + // Disable this transformation for ISPC SPIR-V | ||
57 | + if (!Triple(I.getModule()->getTargetTriple()).isSPIR()) { | ||
58 | + if (Instruction *BSwap = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true, | ||
59 | + /*MatchBitReversals*/ false)) | ||
60 | + return BSwap; | ||
61 | + } | ||
62 | |||
63 | if (Instruction *Funnel = matchFunnelShift(I, *this)) | ||
64 | return Funnel; | ||
65 | -- | ||
66 | 2.17.1 | ||
67 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0005-ispc-12_0_fix_for_2111.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0005-ispc-12_0_fix_for_2111.patch deleted file mode 100644 index 4951a63d..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm12-0005-ispc-12_0_fix_for_2111.patch +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | From 913e07ea5acf2148e3748b45ddfe3fac3b2d051c Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Fri, 27 Aug 2021 10:56:57 +0800 | ||
4 | Subject: [PATCH 2/2] This patch is a fix for #2111 | ||
5 | |||
6 | It ensures that shuffle is lowered for this particular case correctly. | ||
7 | |||
8 | Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/9ab99f773fec7da4183495a3fdc655a797d3bea2] | ||
9 | |||
10 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
11 | --- | ||
12 | llvm/lib/Target/X86/X86ISelLowering.cpp | 7 ++++--- | ||
13 | 1 file changed, 4 insertions(+), 3 deletions(-) | ||
14 | |||
15 | diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
16 | index 6b816c710f98..3121b0e818ac 100644 | ||
17 | --- a/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
18 | +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp | ||
19 | @@ -43192,9 +43192,10 @@ static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG, | ||
20 | ShuffleVectorSDNode::commuteMask(ShuffleMask1); | ||
21 | } | ||
22 | if ((Op00 == Op10) && (Op01 == Op11)) { | ||
23 | - SmallVector<int, 4> ShuffleMask; | ||
24 | - ShuffleMask.append(ShuffleMask0.begin(), ShuffleMask0.end()); | ||
25 | - ShuffleMask.append(ShuffleMask1.begin(), ShuffleMask1.end()); | ||
26 | + const int Map[4] = {0, 2, 1, 3}; | ||
27 | + SmallVector<int, 4> ShuffleMask( | ||
28 | + {Map[ShuffleMask0[0]], Map[ShuffleMask1[0]], Map[ShuffleMask0[1]], | ||
29 | + Map[ShuffleMask1[1]]}); | ||
30 | SDLoc DL(N); | ||
31 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; | ||
32 | SDValue Res = DAG.getNode(Opcode, DL, VT, Op00, Op01); | ||
33 | -- | ||
34 | 2.17.1 | ||
35 | |||
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend deleted file mode 100644 index 9701aca6..00000000 --- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend +++ /dev/null | |||
@@ -1,45 +0,0 @@ | |||
1 | FILESEXTRAPATHS:prepend:intel-x86-common := "${THISDIR}/files:" | ||
2 | |||
3 | SPIRV_SRCREV = "fe4d6b767363a1995ccbfca27f79efb10dcfe110" | ||
4 | |||
5 | SRC_URI_LLVM10_PATCHES = " \ | ||
6 | file://llvm10-0001-llvm-spirv-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \ | ||
7 | file://llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch;patchdir=llvm/projects/llvm-spirv \ | ||
8 | file://llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch;patchdir=llvm/projects/llvm-spirv \ | ||
9 | file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \ | ||
10 | file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \ | ||
11 | file://llvm10-0001-OpenCL-3.0-support.patch \ | ||
12 | file://llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch \ | ||
13 | file://llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch \ | ||
14 | file://llvm10-0004-Remove-repo-name-in-LLVM-IR.patch \ | ||
15 | file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \ | ||
16 | file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \ | ||
17 | file://llvm10-0007-support-cl_ext_float_atomics.patch \ | ||
18 | file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \ | ||
19 | file://llvm10-0009-ispc-10_0_fix_for_1788.patch \ | ||
20 | file://llvm10-0010-ispc-10_0_fix_for_1793.patch \ | ||
21 | file://llvm10-0011-ispc-10_0_fix_for_1844.patch \ | ||
22 | file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \ | ||
23 | file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \ | ||
24 | file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \ | ||
25 | file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \ | ||
26 | file://llvm10-basic-block-sections-support.patch \ | ||
27 | file://llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch \ | ||
28 | file://llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch \ | ||
29 | " | ||
30 | |||
31 | SRC_URI_LLVM12_PATCHES = " \ | ||
32 | file://llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \ | ||
33 | file://llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \ | ||
34 | file://llvm12-0003-Support-cl_ext_float_atomics.patch \ | ||
35 | file://llvm12-0004-ispc-12_0_disable-A-B-A-B-and-BSWAP-in-InstCombine.patch \ | ||
36 | file://llvm12-0005-ispc-12_0_fix_for_2111.patch \ | ||
37 | " | ||
38 | |||
39 | |||
40 | SPIRV_LLVM10_SRC_URI = "git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_100;destsuffix=git/llvm/projects/llvm-spirv;name=spirv" | ||
41 | |||
42 | SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '10.0.1', ' ${SPIRV_LLVM10_SRC_URI} ${SRC_URI_LLVM10_PATCHES} ', '', d)}" | ||
43 | SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '12.0.0', ' ${SRC_URI_LLVM12_PATCHES} ', '', d)}" | ||
44 | |||
45 | SRCREV_spirv = "${@bb.utils.contains_any('LLVMVERSION', [ '13.0.0', '12.0.0' ], '', '${SPIRV_SRCREV}', d)}" | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/allow-to-find-cpp-generation-tool.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/allow-to-find-cpp-generation-tool.patch index 456dc5cd..265fcfa2 100644 --- a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/allow-to-find-cpp-generation-tool.patch +++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/allow-to-find-cpp-generation-tool.patch | |||
@@ -1,4 +1,4 @@ | |||
1 | From 47ae5d13ad021076f5a79f245e33bcb228b0a0da Mon Sep 17 00:00:00 2001 | 1 | From a6361d635e5f3046853883f3ac06fb175116933c Mon Sep 17 00:00:00 2001 |
2 | From: Dongwon Kim <dongwon.kim@intel.com> | 2 | From: Dongwon Kim <dongwon.kim@intel.com> |
3 | Date: Sat, 21 Aug 2021 16:09:39 -0700 | 3 | Date: Sat, 21 Aug 2021 16:09:39 -0700 |
4 | Subject: [PATCH] Build not able to locate cpp_generation_tool. | 4 | Subject: [PATCH] Build not able to locate cpp_generation_tool. |
@@ -8,26 +8,26 @@ Upstream-Status: Inappropriate [oe specific] | |||
8 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | 8 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> |
9 | Signed-off-by: Dongwon Kim <dongwon.kim@intel.com> | 9 | Signed-off-by: Dongwon Kim <dongwon.kim@intel.com> |
10 | --- | 10 | --- |
11 | shared/source/built_ins/kernels/CMakeLists.txt | 8 ++++---- | 11 | shared/source/built_ins/kernels/CMakeLists.txt | 10 +++++----- |
12 | 1 file changed, 4 insertions(+), 4 deletions(-) | 12 | 1 file changed, 5 insertions(+), 5 deletions(-) |
13 | 13 | ||
14 | diff --git a/shared/source/built_ins/kernels/CMakeLists.txt b/shared/source/built_ins/kernels/CMakeLists.txt | 14 | diff --git a/shared/source/built_ins/kernels/CMakeLists.txt b/shared/source/built_ins/kernels/CMakeLists.txt |
15 | index 929b981fe..57cd3d4b3 100644 | 15 | index ed85a37c52..f7c9e79137 100644 |
16 | --- a/shared/source/built_ins/kernels/CMakeLists.txt | 16 | --- a/shared/source/built_ins/kernels/CMakeLists.txt |
17 | +++ b/shared/source/built_ins/kernels/CMakeLists.txt | 17 | +++ b/shared/source/built_ins/kernels/CMakeLists.txt |
18 | @@ -100,9 +100,9 @@ if(NOT NEO_DISABLE_BUILTINS_COMPILATION) | 18 | @@ -107,9 +107,9 @@ function(compile_builtin core_type platform_it builtin bits builtin_options mode |
19 | ) | 19 | ) |
20 | add_custom_command( | 20 | add_custom_command( |
21 | OUTPUT ${OUTPUT_FILE_CPP} | 21 | OUTPUT ${OUTPUT_FILE_CPP} |
22 | - COMMAND $<TARGET_FILE:cpp_generate_tool> --file ${BINARY_OUTPUT}.gen --output ${OUTPUT_FILE_CPP} --array ${mode}_${BASENAME} --platform ${family_name_with_type} --revision_id ${REVISION_ID} | 22 | - COMMAND $<TARGET_FILE:cpp_generate_tool> --file ${BINARY_OUTPUT}.bin --output ${OUTPUT_FILE_CPP} --array ${mode}_${BASENAME} --device ${RELEASE_FILENAME} |
23 | + COMMAND cpp_generate_tool --file ${BINARY_OUTPUT}.gen --output ${OUTPUT_FILE_CPP} --array ${mode}_${BASENAME} --platform ${family_name_with_type} --revision_id ${REVISION_ID} | 23 | + COMMAND cpp_generate_tool --file ${BINARY_OUTPUT}.bin --output ${OUTPUT_FILE_CPP} --array ${mode}_${BASENAME} --device ${RELEASE_FILENAME} |
24 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} | 24 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} |
25 | - DEPENDS ${OUTPUT_FILES_BINARIES} $<TARGET_FILE:cpp_generate_tool> | 25 | - DEPENDS ${OUTPUT_FILES_BINARIES} $<TARGET_FILE:cpp_generate_tool> |
26 | + DEPENDS ${OUTPUT_FILES_BINARIES} cpp_generate_tool | 26 | + DEPENDS ${OUTPUT_FILES_BINARIES} cpp_generate_tool |
27 | ) | 27 | ) |
28 | endforeach() | 28 | list(APPEND BUILTINS_COMMANDS "${OUTPUT_FILE_CPP}") |
29 | set(BUILTINS_COMMANDS ${BUILTINS_COMMANDS} PARENT_SCOPE) | 29 | else() |
30 | @@ -144,9 +144,9 @@ if(NOT NEO_DISABLE_BUILTINS_COMPILATION) | 30 | @@ -159,9 +159,9 @@ function(generate_cpp_spirv builtin) |
31 | ) | 31 | ) |
32 | add_custom_command( | 32 | add_custom_command( |
33 | OUTPUT ${OUTPUT_FILE_CPP} | 33 | OUTPUT ${OUTPUT_FILE_CPP} |
@@ -37,8 +37,15 @@ index 929b981fe..57cd3d4b3 100644 | |||
37 | - DEPENDS ${GENERATED_SPV_INPUT} $<TARGET_FILE:cpp_generate_tool> | 37 | - DEPENDS ${GENERATED_SPV_INPUT} $<TARGET_FILE:cpp_generate_tool> |
38 | + DEPENDS ${GENERATED_SPV_INPUT} cpp_generate_tool | 38 | + DEPENDS ${GENERATED_SPV_INPUT} cpp_generate_tool |
39 | ) | 39 | ) |
40 | endfunction() | 40 | set(OUTPUT_LIST_CPP_FILES ${OUTPUT_LIST_CPP_FILES} ${OUTPUT_FILE_CPP} PARENT_SCOPE) |
41 | else() | ||
42 | @@ -277,4 +277,4 @@ if(NOT "${OUTPUT_LIST_CPP_FILES}" STREQUAL "") | ||
43 | ) | ||
44 | endif() | ||
41 | 45 | ||
46 | -apply_macro_for_each_core_type("SUPPORTED") | ||
47 | \ No newline at end of file | ||
48 | +apply_macro_for_each_core_type("SUPPORTED") | ||
42 | -- | 49 | -- |
43 | 2.32.0 | 50 | 2.43.2 |
44 | 51 | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/disable-werror.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/disable-werror.patch new file mode 100644 index 00000000..20d9b847 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/disable-werror.patch | |||
@@ -0,0 +1,16 @@ | |||
1 | Upstream-Status: Inappropriate | ||
2 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
3 | |||
4 | diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
5 | index d52e089778..bc0cf35014 100644 | ||
6 | --- a/CMakeLists.txt | ||
7 | +++ b/CMakeLists.txt | ||
8 | @@ -727,7 +727,7 @@ if(NOT MSVC) | ||
9 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-noexcept-type") # Added for gtest | ||
10 | endif() | ||
11 | endif() | ||
12 | - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Werror=vla") | ||
13 | + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=vla") | ||
14 | |||
15 | if(USE_SANITIZE_UB) | ||
16 | check_cxx_compiler_flag(-fsanitize=undefined COMPILER_SUPPORTS_UNDEFINED_BEHAVIOR_SANITIZER) | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/external-ocloc.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/external-ocloc.patch new file mode 100644 index 00000000..5f93b7b6 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/external-ocloc.patch | |||
@@ -0,0 +1,40 @@ | |||
1 | From 1f58c22992ddea4167b01b44448528de427f50d5 Mon Sep 17 00:00:00 2001 | ||
2 | From: Dongwon Kim <dongwon.kim@intel.com> | ||
3 | Date: Wed, 2 Mar 2022 15:52:45 -0800 | ||
4 | Subject: [PATCH] external ocloc | ||
5 | |||
6 | Upstream-Status: Inappropriate | ||
7 | |||
8 | Signed-off-by: Dongwon Kim <dongwon.kim@intel.com> | ||
9 | --- | ||
10 | cmake/ocloc_cmd_prefix.cmake | 14 ++++++++------ | ||
11 | 1 file changed, 8 insertions(+), 6 deletions(-) | ||
12 | |||
13 | diff --git a/cmake/ocloc_cmd_prefix.cmake b/cmake/ocloc_cmd_prefix.cmake | ||
14 | index 2b44330831..03067c9df0 100644 | ||
15 | --- a/cmake/ocloc_cmd_prefix.cmake | ||
16 | +++ b/cmake/ocloc_cmd_prefix.cmake | ||
17 | @@ -4,12 +4,14 @@ | ||
18 | # SPDX-License-Identifier: MIT | ||
19 | # | ||
20 | |||
21 | -if(WIN32) | ||
22 | - set(ocloc_cmd_prefix ocloc) | ||
23 | -else() | ||
24 | - if(DEFINED NEO__IGC_LIBRARY_PATH) | ||
25 | - set(ocloc_cmd_prefix ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${NEO__IGC_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib>" $<TARGET_FILE:ocloc>) | ||
26 | +if(NOT DEFINED ocloc_cmd_prefix) | ||
27 | + if(WIN32) | ||
28 | + set(ocloc_cmd_prefix ocloc) | ||
29 | else() | ||
30 | - set(ocloc_cmd_prefix ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib>" $<TARGET_FILE:ocloc>) | ||
31 | + if(DEFINED NEO__IGC_LIBRARY_PATH) | ||
32 | + set(ocloc_cmd_prefix LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${NEO__IGC_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib> $<TARGET_FILE:ocloc>) | ||
33 | + else() | ||
34 | + set(ocloc_cmd_prefix LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$<TARGET_FILE_DIR:ocloc_lib> $<TARGET_FILE:ocloc>) | ||
35 | + endif() | ||
36 | endif() | ||
37 | endif() | ||
38 | -- | ||
39 | 2.37.3 | ||
40 | |||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/f10439aea214984a060566831f63d3aa198ef1b8.patch b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/f10439aea214984a060566831f63d3aa198ef1b8.patch new file mode 100644 index 00000000..b7fcb3d1 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime/f10439aea214984a060566831f63d3aa198ef1b8.patch | |||
@@ -0,0 +1,54 @@ | |||
1 | From f10439aea214984a060566831f63d3aa198ef1b8 Mon Sep 17 00:00:00 2001 | ||
2 | From: Pawel Cieslak <pawel.cieslak@intel.com> | ||
3 | Date: Tue, 14 May 2024 14:20:24 +0000 | ||
4 | Subject: [PATCH] fix: include <algorithm> where std::find is used | ||
5 | |||
6 | Related-To: NEO-11375 | ||
7 | Signed-off-by: Pawel Cieslak <pawel.cieslak@intel.com> | ||
8 | |||
9 | Upstream-Status: Backport [https://github.com/intel/compute-runtime/commit/f10439aea214984a060566831f63d3aa198ef1b8] | ||
10 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
11 | --- | ||
12 | .../sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp | 3 ++- | ||
13 | shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp | 4 +++- | ||
14 | 2 files changed, 5 insertions(+), 2 deletions(-) | ||
15 | |||
16 | diff --git a/level_zero/sysman/test/unit_tests/sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp b/level_zero/sysman/test/unit_tests/sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp | ||
17 | index 6ab1b751d866..8a224752c8fc 100644 | ||
18 | --- a/level_zero/sysman/test/unit_tests/sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp | ||
19 | +++ b/level_zero/sysman/test/unit_tests/sources/linux/nl_api/test_sysman_iaf_nl_api_prelim.cpp | ||
20 | @@ -1,5 +1,5 @@ | ||
21 | /* | ||
22 | - * Copyright (C) 2021-2023 Intel Corporation | ||
23 | + * Copyright (C) 2021-2024 Intel Corporation | ||
24 | * | ||
25 | * SPDX-License-Identifier: MIT | ||
26 | * | ||
27 | @@ -13,6 +13,7 @@ | ||
28 | |||
29 | #include "gtest/gtest.h" | ||
30 | |||
31 | +#include <algorithm> | ||
32 | #include <limits> | ||
33 | #include <netlink/handlers.h> | ||
34 | |||
35 | diff --git a/shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp b/shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp | ||
36 | index 206c272c5bb2..e961248e73a3 100644 | ||
37 | --- a/shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp | ||
38 | +++ b/shared/test/unit_test/gmm_helper/gmm_resource_info_tests.cpp | ||
39 | @@ -1,5 +1,5 @@ | ||
40 | /* | ||
41 | - * Copyright (C) 2021-2023 Intel Corporation | ||
42 | + * Copyright (C) 2021-2024 Intel Corporation | ||
43 | * | ||
44 | * SPDX-License-Identifier: MIT | ||
45 | * | ||
46 | @@ -17,6 +17,8 @@ | ||
47 | |||
48 | #include "gtest/gtest.h" | ||
49 | |||
50 | +#include <algorithm> | ||
51 | + | ||
52 | using namespace NEO; | ||
53 | |||
54 | struct MockGmmHandleAllocator : NEO::GmmHandleAllocator { | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_21.40.21182.bb b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_24.13.29138.7.bb index bca3c767..7280ed03 100644 --- a/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_21.40.21182.bb +++ b/dynamic-layers/clang-layer/recipes-opencl/compute-runtime/intel-compute-runtime_24.13.29138.7.bb | |||
@@ -4,25 +4,26 @@ is an open source project to converge Intel's development efforts \ | |||
4 | on OpenCL(TM) compute stacks supporting the GEN graphics hardware \ | 4 | on OpenCL(TM) compute stacks supporting the GEN graphics hardware \ |
5 | architecture." | 5 | architecture." |
6 | 6 | ||
7 | LICENSE = "MIT" | 7 | LICENSE = "MIT & Apache-2.0" |
8 | LIC_FILES_CHKSUM = "file://LICENSE.md;md5=983b0c493ea3dc3c21a90ff743bf90e4 \ | 8 | LIC_FILES_CHKSUM = "file://LICENSE.md;md5=eca6ec6997e18db166db7109cdbe611c \ |
9 | file://third_party/opencl_headers/LICENSE;md5=dcefc90f4c3c689ec0c2489064e7273b" | 9 | file://third_party/opencl_headers/LICENSE;md5=3b83ef96387f14655fc854ddc3c6bd57" |
10 | 10 | ||
11 | SRC_URI = "git://github.com/intel/compute-runtime.git;protocol=https;branch=master \ | 11 | SRC_URI = "git://github.com/intel/compute-runtime.git;protocol=https;branch=releases/24.13 \ |
12 | " | 12 | file://disable-werror.patch \ |
13 | file://allow-to-find-cpp-generation-tool.patch \ | ||
14 | file://external-ocloc.patch \ | ||
15 | file://f10439aea214984a060566831f63d3aa198ef1b8.patch \ | ||
16 | " | ||
13 | 17 | ||
14 | SRC_URI:append:class-target = "file://allow-to-find-cpp-generation-tool.patch" | 18 | SRCREV = "7131387cdbb02d480a225c70daef913a6c024a6e" |
15 | |||
16 | SRCREV = "3269e719a3ee7bcd97c50ec2cfe78fc8674adec0" | ||
17 | 19 | ||
18 | S = "${WORKDIR}/git" | 20 | S = "${WORKDIR}/git" |
19 | 21 | ||
20 | DEPENDS += " intel-graphics-compiler gmmlib" | 22 | DEPENDS += " intel-graphics-compiler gmmlib libva qemu-native" |
21 | DEPENDS:append:class-target = " intel-compute-runtime-native libva" | ||
22 | 23 | ||
23 | RDEPENDS:${PN} += " intel-graphics-compiler gmmlib" | 24 | RDEPENDS:${PN} += " intel-graphics-compiler gmmlib" |
24 | 25 | ||
25 | inherit cmake pkgconfig | 26 | inherit cmake pkgconfig qemu |
26 | 27 | ||
27 | COMPATIBLE_HOST = '(x86_64).*-linux' | 28 | COMPATIBLE_HOST = '(x86_64).*-linux' |
28 | COMPATIBLE_HOST:libc-musl = "null" | 29 | COMPATIBLE_HOST:libc-musl = "null" |
@@ -35,18 +36,23 @@ EXTRA_OECMAKE = " \ | |||
35 | -DNEO_DISABLE_LD_LLD=ON \ | 36 | -DNEO_DISABLE_LD_LLD=ON \ |
36 | -DNEO_DISABLE_LD_GOLD=ON \ | 37 | -DNEO_DISABLE_LD_GOLD=ON \ |
37 | " | 38 | " |
38 | EXTRA_OECMAKE:append:class-native = " -DNEO_DISABLE_BUILTINS_COMPILATION=ON" | ||
39 | 39 | ||
40 | EXTRA_OECMAKE:append:class-target = " \ | 40 | EXTRA_OECMAKE:append:class-target = " \ |
41 | -Dcloc_cmd_prefix=ocloc \ | 41 | -Docloc_cmd_prefix=ocloc \ |
42 | " | 42 | -DCMAKE_CROSSCOMPILING_EMULATOR=${WORKDIR}/qemuwrapper \ |
43 | " | ||
43 | 44 | ||
44 | PACKAGECONFIG ??= "" | 45 | PACKAGECONFIG ??= "" |
45 | PACKAGECONFIG[levelzero] = "-DBUILD_WITH_L0=ON, -DBUILD_WITH_L0=OFF, level-zero" | 46 | PACKAGECONFIG[levelzero] = "-DBUILD_WITH_L0=ON, -DBUILD_WITH_L0=OFF, level-zero" |
46 | 47 | ||
47 | do_install:append:class-native() { | 48 | do_configure:prepend:class-target () { |
48 | install -d ${D}${bindir} | 49 | # Write out a qemu wrapper that will be used by cmake. |
49 | install ${B}/bin/cpp_generate_tool ${D}${bindir}/ | 50 | qemu_binary="${@qemu_wrapper_cmdline(d, d.getVar('STAGING_DIR_HOST'), [d.expand('${B}/bin'),d.expand('${STAGING_DIR_HOST}${libdir}'),d.expand('${STAGING_DIR_HOST}${base_libdir}')])}" |
51 | cat > ${WORKDIR}/qemuwrapper << EOF | ||
52 | #!/bin/sh | ||
53 | $qemu_binary "\$@" | ||
54 | EOF | ||
55 | chmod +x ${WORKDIR}/qemuwrapper | ||
50 | } | 56 | } |
51 | 57 | ||
52 | FILES:${PN} += " \ | 58 | FILES:${PN} += " \ |
@@ -56,6 +62,4 @@ FILES:${PN} += " \ | |||
56 | 62 | ||
57 | FILES:${PN}-dev = "${includedir}" | 63 | FILES:${PN}-dev = "${includedir}" |
58 | 64 | ||
59 | BBCLASSEXTEND = "native nativesdk" | ||
60 | |||
61 | UPSTREAM_CHECK_GITTAGREGEX = "(?P<pver>\d+(\.\d+)+)" | 65 | UPSTREAM_CHECK_GITTAGREGEX = "(?P<pver>\d+(\.\d+)+)" |
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch index a6af0ef3..377081fd 100644 --- a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch +++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch | |||
@@ -1,4 +1,4 @@ | |||
1 | From 6f6997066040c5f33bf05ffde36a9800dcde1fcf Mon Sep 17 00:00:00 2001 | 1 | From 3d71670f8ad5b54d434c2f5f71713bb1d5433ae4 Mon Sep 17 00:00:00 2001 |
2 | From: Anuj Mittal <anuj.mittal@intel.com> | 2 | From: Anuj Mittal <anuj.mittal@intel.com> |
3 | Date: Tue, 12 Oct 2021 23:46:42 +0800 | 3 | Date: Tue, 12 Oct 2021 23:46:42 +0800 |
4 | Subject: [PATCH] BiF/CMakeLists.txt: remove opt from DEPENDS | 4 | Subject: [PATCH] BiF/CMakeLists.txt: remove opt from DEPENDS |
@@ -14,22 +14,22 @@ Upstream-Status: Inappropriate | |||
14 | 14 | ||
15 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | 15 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> |
16 | --- | 16 | --- |
17 | IGC/VectorCompiler/lib/BiF/CMakeLists.txt | 2 +- | 17 | IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake | 2 +- |
18 | 1 file changed, 1 insertion(+), 1 deletion(-) | 18 | 1 file changed, 1 insertion(+), 1 deletion(-) |
19 | 19 | ||
20 | diff --git a/IGC/VectorCompiler/lib/BiF/CMakeLists.txt b/IGC/VectorCompiler/lib/BiF/CMakeLists.txt | 20 | diff --git a/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake b/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake |
21 | index 5d9a901d7..66e59e399 100644 | 21 | index d20d7f887..882e09fea 100644 |
22 | --- a/IGC/VectorCompiler/lib/BiF/CMakeLists.txt | 22 | --- a/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake |
23 | +++ b/IGC/VectorCompiler/lib/BiF/CMakeLists.txt | 23 | +++ b/IGC/VectorCompiler/lib/BiF/cmake/Functions.cmake |
24 | @@ -88,7 +88,7 @@ function(vc_embed_bif RES_FILE CMCL_SRC_PATH BIF_NAME PTR_BIT_SIZE) | 24 | @@ -109,7 +109,7 @@ function(vc_build_bif TARGET RES_FILE CMCL_SRC_PATH BIF_NAME PTR_BIT_SIZE) |
25 | add_custom_command(OUTPUT ${BIF_OPT_BC_PATH} | 25 | COMMENT "vc_build_bif: Translating CMCL builtins: ${BIF_CLANG_BC_NAME_FINAL} -> ${BIF_OPT_BC_NAME}" |
26 | COMMAND ${LLVM_OPT_EXE} --O2 -o ${BIF_OPT_BC_NAME} ${BIF_CMCL_BC_NAME} | 26 | COMMAND CMCLTranslatorTool -o ${BIF_CMCL_BC_NAME} ${BIF_CLANG_BC_NAME_FINAL} |
27 | COMMENT "vc_embed_bif: running opt with O2: ${BIF_CMCL_BC_NAME} -> ${BIF_OPT_BC_NAME}" | 27 | COMMAND ${LLVM_OPT_EXE} ${IGC_LLVM_DEPENDENT_OPT_FLAGS} --O2 -o ${BIF_OPT_BC_NAME} ${BIF_CMCL_BC_NAME} |
28 | - DEPENDS opt ${BIF_CMCL_BC_PATH}) | 28 | - DEPENDS CMCLTranslatorTool ${LLVM_OPT_EXE} ${BIF_CLANG_BC_PATH_FINAL} |
29 | + DEPENDS ${BIF_CMCL_BC_PATH}) | 29 | + DEPENDS CMCLTranslatorTool ${BIF_CLANG_BC_PATH_FINAL} |
30 | add_custom_command( | 30 | BYPRODUCTS ${BIF_OPT_BC_PATH} |
31 | OUTPUT ${BIF_CPP_PATH} | 31 | SOURCES ${CMCL_SRC_PATH}) |
32 | COMMAND ${PYTHON_EXECUTABLE} ${RESOURCE_EMBEDDER_SCRIPT} ${BIF_OPT_BC_NAME} ${BIF_CPP_NAME} | 32 | set(${RES_FILE} ${BIF_OPT_BC_NAME} PARENT_SCOPE) |
33 | -- | 33 | -- |
34 | 2.32.0 | 34 | 2.43.2 |
35 | 35 | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch new file mode 100644 index 00000000..dca75e22 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch | |||
@@ -0,0 +1,31 @@ | |||
1 | From e69a3181482e5f442756a61c7b683914072884f1 Mon Sep 17 00:00:00 2001 | ||
2 | From: Anuj Mittal <anuj.mittal@intel.com> | ||
3 | Date: Mon, 9 Jan 2023 11:43:05 +0800 | ||
4 | Subject: [PATCH] external/SPIRV-Tools: change path to tools and headers | ||
5 | |||
6 | We clone the SPIRV headers and tools in a different directory to ensure | ||
7 | file path substitutions take place. | ||
8 | |||
9 | Upstream-Status: Inappropriate | ||
10 | |||
11 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
12 | |||
13 | --- | ||
14 | external/SPIRV-Tools/CMakeLists.txt | 4 ++-- | ||
15 | 1 file changed, 2 insertions(+), 2 deletions(-) | ||
16 | |||
17 | diff --git a/external/SPIRV-Tools/CMakeLists.txt b/external/SPIRV-Tools/CMakeLists.txt | ||
18 | index 9afa5746c..7ca24d5dc 100644 | ||
19 | --- a/external/SPIRV-Tools/CMakeLists.txt | ||
20 | +++ b/external/SPIRV-Tools/CMakeLists.txt | ||
21 | @@ -43,8 +43,8 @@ else() #By default use build from sources | ||
22 | message(STATUS "[SPIRV-Tools] : Building from source") | ||
23 | message(STATUS "[SPIRV-Tools] : Current source dir: ${CMAKE_CURRENT_SOURCE_DIR}") | ||
24 | |||
25 | - set(SPIRV-Headers_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../SPIRV-Headers") # used in subdirectory | ||
26 | - set(SPIRV-Tools_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../SPIRV-Tools") | ||
27 | + set(SPIRV-Headers_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../SPIRV-Headers") # used in subdirectory | ||
28 | + set(SPIRV-Tools_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../SPIRV-Tools") | ||
29 | |||
30 | set(SPIRV-Tools_OUTPUT_DIR "${IGC_OPTION__OUTPUT_DIR}/external/SPIRV-Tools/build") | ||
31 | set(IGC_BUILD__SPIRV-Headers_DIR "${SPIRV-Headers_SOURCE_DIR}") | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch new file mode 100644 index 00000000..39443931 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-fix-tblgen.patch | |||
@@ -0,0 +1,24 @@ | |||
1 | From 5648568e597acd0fed82aac3e6aef0f95a1b78d1 Mon Sep 17 00:00:00 2001 | ||
2 | From: Anuj Mittal <anuj.mittal@intel.com> | ||
3 | Date: Thu, 19 May 2022 22:50:09 +0800 | ||
4 | Subject: [PATCH] fix tblgen | ||
5 | |||
6 | Upstream-Status: Inappropriate [OE specific] | ||
7 | |||
8 | --- | ||
9 | IGC/cmake/igc_llvm.cmake | 2 +- | ||
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/IGC/cmake/igc_llvm.cmake b/IGC/cmake/igc_llvm.cmake | ||
13 | index 10322533c..9020cb3c8 100644 | ||
14 | --- a/IGC/cmake/igc_llvm.cmake | ||
15 | +++ b/IGC/cmake/igc_llvm.cmake | ||
16 | @@ -52,7 +52,7 @@ else() | ||
17 | set(LLVM_OPT_EXE "opt" CACHE STRING "") | ||
18 | |||
19 | set(LLVM_TABLEGEN_EXE "llvm-tblgen") | ||
20 | - if(CMAKE_CROSSCOMPILING) | ||
21 | + if(TRUE) | ||
22 | if(DEFINED LLVM_TABLEGEN) | ||
23 | set(LLVM_TABLEGEN_EXE ${LLVM_TABLEGEN}) | ||
24 | else() | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-llvm_deps.cmake-don-t-copy-header-file-when-building.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-llvm_deps.cmake-don-t-copy-header-file-when-building.patch deleted file mode 100644 index d62f47ca..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0001-llvm_deps.cmake-don-t-copy-header-file-when-building.patch +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | From 3d99559779d628704568879a2ee51e968e66d005 Mon Sep 17 00:00:00 2001 | ||
2 | From: Anuj Mittal <anuj.mittal@intel.com> | ||
3 | Date: Tue, 5 Oct 2021 00:11:26 +0800 | ||
4 | Subject: [PATCH] llvm_deps.cmake: don't copy header file when building | ||
5 | |||
6 | We build in pre-built mode and this header shouldn't be copied in | ||
7 | that case. | ||
8 | |||
9 | Upstream-Status: Pending | ||
10 | |||
11 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
12 | --- | ||
13 | external/llvm/llvm_deps.cmake | 6 +++--- | ||
14 | 1 file changed, 3 insertions(+), 3 deletions(-) | ||
15 | |||
16 | diff --git a/external/llvm/llvm_deps.cmake b/external/llvm/llvm_deps.cmake | ||
17 | index 425d3766f..e43804f77 100644 | ||
18 | --- a/external/llvm/llvm_deps.cmake | ||
19 | +++ b/external/llvm/llvm_deps.cmake | ||
20 | @@ -46,9 +46,9 @@ if(IGC_OPTION__LLVM_LLD) | ||
21 | include(llvm_lld_source_hook) | ||
22 | if(NOT EXISTS "${IGC_LLVM_WORKSPACE_SRC}/libunwind/include/mach-o" AND ${IGC_OPTION__LLVM_PREFERRED_VERSION} GREATER_EQUAL "12.0.0") | ||
23 | # Need to copy one header from unwind package for LLD (only for building from sources) | ||
24 | - file(MAKE_DIRECTORY ${IGC_LLVM_WORKSPACE_SRC}/libunwind/include/mach-o) | ||
25 | - file(COPY ${DEFAULT_IGC_LLVM_SOURCES_DIR}/libunwind/include/mach-o/compact_unwind_encoding.h | ||
26 | - DESTINATION ${IGC_LLVM_WORKSPACE_SRC}/libunwind/include/mach-o/) | ||
27 | + #file(MAKE_DIRECTORY ${IGC_LLVM_WORKSPACE_SRC}/libunwind/include/mach-o) | ||
28 | + #file(COPY ${DEFAULT_IGC_LLVM_SOURCES_DIR}/libunwind/include/mach-o/compact_unwind_encoding.h | ||
29 | + # DESTINATION ${IGC_LLVM_WORKSPACE_SRC}/libunwind/include/mach-o/) | ||
30 | endif() | ||
31 | endif() | ||
32 | |||
33 | -- | ||
34 | 2.32.0 | ||
35 | |||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch index 7dd36aec..650130a8 100644 --- a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch +++ b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0003-Improve-Reproducibility-for-src-package.patch | |||
@@ -1,7 +1,7 @@ | |||
1 | From c2b7f30dd56568482b1b7c2f22bafdf68736fc88 Mon Sep 17 00:00:00 2001 | 1 | From 0559332abd04b6c8bc70171d201f43d2e4735336 Mon Sep 17 00:00:00 2001 |
2 | From: Lee Chee Yang <chee.yang.lee@intel.com> | 2 | From: Lee Chee Yang <chee.yang.lee@intel.com> |
3 | Date: Wed, 2 Sep 2020 08:28:35 +0800 | 3 | Date: Wed, 2 Sep 2020 08:28:35 +0800 |
4 | Subject: [PATCH 3/5] Improve Reproducibility for src package | 4 | Subject: [PATCH] Improve Reproducibility for src package |
5 | 5 | ||
6 | Improve reproducibility for intel-graphics-compiler-src package. | 6 | Improve reproducibility for intel-graphics-compiler-src package. |
7 | needs to pass build path as environment variable to the build. | 7 | needs to pass build path as environment variable to the build. |
@@ -9,12 +9,13 @@ this only works on bison 3.7 onward, hence check for bison version | |||
9 | before adding the flags. | 9 | before adding the flags. |
10 | Upstream-Status: Inappropriate [applying --file-prefix-map in such way does not work for upstream] | 10 | Upstream-Status: Inappropriate [applying --file-prefix-map in such way does not work for upstream] |
11 | Signed-off-by: Lee Chee Yang <chee.yang.lee@intel.com> | 11 | Signed-off-by: Lee Chee Yang <chee.yang.lee@intel.com> |
12 | |||
12 | --- | 13 | --- |
13 | visa/CMakeLists.txt | 7 +++++-- | 14 | visa/CMakeLists.txt | 7 +++++-- |
14 | 1 file changed, 5 insertions(+), 2 deletions(-) | 15 | 1 file changed, 5 insertions(+), 2 deletions(-) |
15 | 16 | ||
16 | diff --git a/visa/CMakeLists.txt b/visa/CMakeLists.txt | 17 | diff --git a/visa/CMakeLists.txt b/visa/CMakeLists.txt |
17 | index 65dbb4934..8cd607a69 100644 | 18 | index 6be467587..930c386a6 100644 |
18 | --- a/visa/CMakeLists.txt | 19 | --- a/visa/CMakeLists.txt |
19 | +++ b/visa/CMakeLists.txt | 20 | +++ b/visa/CMakeLists.txt |
20 | @@ -123,8 +123,11 @@ endif() | 21 | @@ -123,8 +123,11 @@ endif() |
@@ -31,6 +32,3 @@ index 65dbb4934..8cd607a69 100644 | |||
31 | ADD_FLEX_BISON_DEPENDENCY(CISAScanner CISAParser) | 32 | ADD_FLEX_BISON_DEPENDENCY(CISAScanner CISAParser) |
32 | set(CISAScanner_dependencies) | 33 | set(CISAScanner_dependencies) |
33 | 34 | ||
34 | -- | ||
35 | 2.20.1 | ||
36 | |||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0004-find-external-llvm-tblgen.patch b/dynamic-layers/clang-layer/recipes-opencl/igc/files/0004-find-external-llvm-tblgen.patch deleted file mode 100644 index 79f79976..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/igc/files/0004-find-external-llvm-tblgen.patch +++ /dev/null | |||
@@ -1,30 +0,0 @@ | |||
1 | From c9fe51ec555fadd098cfc98804ce91b1cf3029d4 Mon Sep 17 00:00:00 2001 | ||
2 | From: Dongwon Kim <dongwon.kim@intel.com> | ||
3 | Date: Thu, 19 Aug 2021 08:28:03 -0700 | ||
4 | Subject: [PATCH 4/5] find external llvm-tblgen | ||
5 | |||
6 | Upstream-Status: Pending | ||
7 | Signed-off-by: Dongwon Kim <dongwon.kim@intel.com> | ||
8 | --- | ||
9 | IGC/cmake/igc_llvm.cmake | 5 ++++- | ||
10 | 1 file changed, 4 insertions(+), 1 deletion(-) | ||
11 | |||
12 | diff --git a/IGC/cmake/igc_llvm.cmake b/IGC/cmake/igc_llvm.cmake | ||
13 | index 541793f21..bc82922b1 100644 | ||
14 | --- a/IGC/cmake/igc_llvm.cmake | ||
15 | +++ b/IGC/cmake/igc_llvm.cmake | ||
16 | @@ -24,7 +24,10 @@ set(CMAKE_MODULE_PATH | ||
17 | ${CMAKE_MODULE_PATH} | ||
18 | ) | ||
19 | |||
20 | -set(LLVM_TABLEGEN_EXE "llvm-tblgen") | ||
21 | +find_program(LLVM_TABLEGEN_EXE "llvm-tblgen") | ||
22 | +if(LLVM_TABLEGEN_EXE-NOTFOUND) | ||
23 | + message(FATAL_ERROR "[VC] llvm-tblgen is not found") | ||
24 | +endif() | ||
25 | |||
26 | include(AddLLVM) | ||
27 | include(TableGen) | ||
28 | -- | ||
29 | 2.20.1 | ||
30 | |||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.16510.2.bb b/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.16510.2.bb new file mode 100644 index 00000000..24eb97bd --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.16510.2.bb | |||
@@ -0,0 +1,75 @@ | |||
1 | SUMMARY = "The Intel(R) Graphics Compiler for OpenCL(TM)" | ||
2 | DESCRIPTION = "The Intel(R) Graphics Compiler for OpenCL(TM) is an \ | ||
3 | llvm based compiler for OpenCL(TM) targeting Intel Gen graphics \ | ||
4 | hardware architecture." | ||
5 | |||
6 | LICENSE = "MIT & Apache-2.0" | ||
7 | LIC_FILES_CHKSUM = "file://IGC/BiFModule/Implementation/ExternalLibraries/libclc/LICENSE.TXT;md5=311cfc1a5b54bab8ed34a0b5fba4373e \ | ||
8 | file://LICENSE.md;md5=488d74376edf2765f6e78d271543dde3 \ | ||
9 | file://NOTICES.txt;md5=b81a52411c84df3419f20bad4d755880" | ||
10 | |||
11 | SRC_URI = "git://github.com/intel/intel-graphics-compiler.git;protocol=https;name=igc;branch=releases/igc-1.0.16510 \ | ||
12 | git://github.com/intel/vc-intrinsics.git;protocol=https;destsuffix=git/vc-intrinsics;name=vc;nobranch=1 \ | ||
13 | git://github.com/KhronosGroup/SPIRV-Tools.git;protocol=https;destsuffix=git/SPIRV-Tools;name=spirv-tools;branch=main \ | ||
14 | git://github.com/KhronosGroup/SPIRV-Headers.git;protocol=https;destsuffix=git/SPIRV-Headers;name=spirv-headers;branch=main \ | ||
15 | file://0003-Improve-Reproducibility-for-src-package.patch \ | ||
16 | file://0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch \ | ||
17 | file://0001-external-SPIRV-Tools-change-path-to-tools-and-header.patch \ | ||
18 | " | ||
19 | |||
20 | SRC_URI:append:class-native = " file://0001-fix-tblgen.patch" | ||
21 | |||
22 | SRCREV_igc = "c2495d45f37fadd963ad22eb0cc1a8235a306775" | ||
23 | SRCREV_vc = "f9c34404d0ea9abad83875a10bd48d88cea90ebd" | ||
24 | SRCREV_spirv-tools = "f0cc85efdbbe3a46eae90e0f915dc1509836d0fc" | ||
25 | SRCREV_spirv-headers = "1c6bb2743599e6eb6f37b2969acc0aef812e32e3" | ||
26 | |||
27 | SRCREV_FORMAT = "igc_vc_spirv-tools_spirv-headers" | ||
28 | |||
29 | # Used to replace with relative path in reproducibility patch | ||
30 | export B | ||
31 | |||
32 | S = "${WORKDIR}/git" | ||
33 | |||
34 | inherit cmake pkgconfig qemu python3native | ||
35 | |||
36 | CXXFLAGS:append = " -Wno-error=nonnull" | ||
37 | |||
38 | COMPATIBLE_HOST = '(x86_64).*-linux' | ||
39 | COMPATIBLE_HOST:libc-musl = "null" | ||
40 | |||
41 | DEPENDS += " flex-native bison-native clang clang-cross-x86_64 opencl-clang qemu-native python3-mako-native" | ||
42 | |||
43 | RDEPENDS:${PN} += "opencl-clang" | ||
44 | |||
45 | PACKAGECONFIG ??= "vc" | ||
46 | PACKAGECONFIG[vc] = "-DIGC_BUILD__VC_ENABLED=ON -DIGC_OPTION__LINK_KHRONOS_SPIRV_TRANSLATOR=ON -DIGC_OPTION__SPIRV_TRANSLATOR_MODE=Prebuilds,-DIGC_BUILD__VC_ENABLED=OFF," | ||
47 | |||
48 | EXTRA_OECMAKE = " \ | ||
49 | -DIGC_OPTION__LLVM_PREFERRED_VERSION=${LLVMVERSION} \ | ||
50 | -DVC_INTRINSICS_SRC="${S}/vc-intrinsics" \ | ||
51 | -DIGC_OPTION__LLVM_MODE=Prebuilds \ | ||
52 | -DLLVM_TABLEGEN=${STAGING_BINDIR_NATIVE}/llvm-tblgen \ | ||
53 | -DLLVM_LINK_EXE=${STAGING_BINDIR_NATIVE}/llvm-link \ | ||
54 | -DCLANG_EXE=${STAGING_BINDIR_NATIVE}/clang \ | ||
55 | -DCMAKE_CROSSCOMPILING_EMULATOR=${WORKDIR}/qemuwrapper \ | ||
56 | " | ||
57 | |||
58 | do_configure:prepend:class-target () { | ||
59 | # Write out a qemu wrapper that will be used by cmake. | ||
60 | qemu_binary="${@qemu_wrapper_cmdline(d, d.getVar('STAGING_DIR_HOST'), [d.expand('${STAGING_DIR_HOST}${libdir}'),d.expand('${STAGING_DIR_HOST}${base_libdir}')])}" | ||
61 | cat > ${WORKDIR}/qemuwrapper << EOF | ||
62 | #!/bin/sh | ||
63 | $qemu_binary "\$@" | ||
64 | EOF | ||
65 | chmod +x ${WORKDIR}/qemuwrapper | ||
66 | } | ||
67 | |||
68 | UPSTREAM_CHECK_GITTAGREGEX = "^igc-(?P<pver>(?!19\..*)\d+(\.\d+)+)$" | ||
69 | |||
70 | FILES:${PN} += " \ | ||
71 | ${libdir}/igc/NOTICES.txt \ | ||
72 | " | ||
73 | |||
74 | # libigc.so contains buildpaths | ||
75 | INSANE_SKIP:${PN} += "buildpaths" | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.8744.bb b/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.8744.bb deleted file mode 100644 index 8a09934d..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/igc/intel-graphics-compiler_1.0.8744.bb +++ /dev/null | |||
@@ -1,63 +0,0 @@ | |||
1 | SUMMARY = "The Intel(R) Graphics Compiler for OpenCL(TM)" | ||
2 | DESCRIPTION = "The Intel(R) Graphics Compiler for OpenCL(TM) is an \ | ||
3 | llvm based compiler for OpenCL(TM) targeting Intel Gen graphics \ | ||
4 | hardware architecture." | ||
5 | |||
6 | LICENSE = "MIT & BSD-3-Clause" | ||
7 | LIC_FILES_CHKSUM = "file://IGC/BiFModule/Implementation/ExternalLibraries/libclc/LICENSE.TXT;md5=311cfc1a5b54bab8ed34a0b5fba4373e \ | ||
8 | file://IGC/Compiler/LegalizationPass.cpp;beginline=1;endline=23;md5=4a985f2545dd5a846e205b1e60a51cd9 \ | ||
9 | file://NOTICES.txt;md5=db621145dfb627436bc90ad600386801" | ||
10 | |||
11 | SRC_URI = "git://github.com/intel/intel-graphics-compiler.git;protocol=https;name=igc;branch=master \ | ||
12 | git://github.com/intel/vc-intrinsics.git;protocol=https;destsuffix=git/vc-intrinsics;name=vc;branch=master \ | ||
13 | file://0001-llvm_deps.cmake-don-t-copy-header-file-when-building.patch \ | ||
14 | file://0003-Improve-Reproducibility-for-src-package.patch \ | ||
15 | file://0004-find-external-llvm-tblgen.patch \ | ||
16 | file://0001-BiF-CMakeLists.txt-remove-opt-from-DEPENDS.patch \ | ||
17 | " | ||
18 | |||
19 | SRCREV_igc = "3ba8dde8c414a0e47df58b1bba12a64f8ba2089e" | ||
20 | SRCREV_vc = "e5ad7e02aa4aa21a3cd7b3e5d1f3ec9b95f58872" | ||
21 | |||
22 | # Used to replace with relative path in reproducibility patch | ||
23 | export B | ||
24 | |||
25 | S = "${WORKDIR}/git" | ||
26 | |||
27 | inherit cmake pkgconfig | ||
28 | |||
29 | CXXFLAGS:append = " -Wno-error=nonnull" | ||
30 | |||
31 | COMPATIBLE_HOST = '(x86_64).*-linux' | ||
32 | COMPATIBLE_HOST:libc-musl = "null" | ||
33 | |||
34 | DEPENDS += " flex-native bison-native clang opencl-clang" | ||
35 | DEPENDS:append:class-target = " clang-cross-x86_64 intel-graphics-compiler-native" | ||
36 | |||
37 | RDEPENDS:${PN} += "opencl-clang" | ||
38 | |||
39 | PACKAGECONFIG ??= "vc" | ||
40 | PACKAGECONFIG[vc] = "-DIGC_BUILD__VC_ENABLED=ON -DIGC_OPTION__LINK_KHRONOS_SPIRV_TRANSLATOR=ON -DIGC_OPTION__USE_KHRONOS_SPIRV_TRANSLATOR_IN_VC=ON -DIGC_OPTION__SPIRV_TRANSLATOR_MODE=Prebuilds,-DIGC_BUILD__VC_ENABLED=OFF," | ||
41 | |||
42 | EXTRA_OECMAKE = " \ | ||
43 | -DIGC_OPTION__LLVM_PREFERRED_VERSION=${LLVMVERSION} \ | ||
44 | -DPYTHON_EXECUTABLE=${HOSTTOOLS_DIR}/python3 \ | ||
45 | -DVC_INTRINSICS_SRC="${S}/vc-intrinsics" \ | ||
46 | -DIGC_OPTION__LLVM_MODE=Prebuilds \ | ||
47 | " | ||
48 | |||
49 | do_install:append:class-native () { | ||
50 | install -d ${D}${bindir} | ||
51 | install ${B}/IGC/Release/elf_packager ${D}${bindir}/ | ||
52 | if ${@bb.utils.contains('PACKAGECONFIG', 'vc', 'true', 'false', d)}; then | ||
53 | install ${B}/IGC/Release/CMCLTranslatorTool ${D}${bindir}/ | ||
54 | fi | ||
55 | } | ||
56 | |||
57 | BBCLASSEXTEND = "native nativesdk" | ||
58 | |||
59 | UPSTREAM_CHECK_GITTAGREGEX = "^igc-(?P<pver>(?!19\..*)\d+(\.\d+)+)$" | ||
60 | |||
61 | FILES:${PN} += " \ | ||
62 | ${libdir}/igc/NOTICES.txt \ | ||
63 | " | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch deleted file mode 100644 index 8ffa853b..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch +++ /dev/null | |||
@@ -1,35 +0,0 @@ | |||
1 | From 7fc05c52dd91902fa324a7aac9b90715cfca4717 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Wed, 15 Apr 2020 17:55:32 +0800 | ||
4 | Subject: [PATCH] Building in-tree with LLVM 10.0 with the LLVM_LINK_LLVM_DYLIB | ||
5 | |||
6 | Failed to link with the LLVMSPIRVLib library. | ||
7 | |||
8 | Add an explicit dependency to force the correct build order and linking. | ||
9 | |||
10 | Reference: | ||
11 | https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/a6d4ccf082858e63e139ca06c02a071c343d2657 | ||
12 | |||
13 | Upstream-Status: Submitted [https://github.com/intel/opencl-clang/pull/118] | ||
14 | |||
15 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
16 | --- | ||
17 | CMakeLists.txt | 2 +- | ||
18 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
19 | |||
20 | diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
21 | index 51c140d..b8b514e 100644 | ||
22 | --- a/CMakeLists.txt | ||
23 | +++ b/CMakeLists.txt | ||
24 | @@ -208,7 +208,7 @@ link_directories( | ||
25 | |||
26 | set(OPENCL_CLANG_LINK_LIBS ${CMAKE_DL_LIBS}) | ||
27 | |||
28 | -if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS) | ||
29 | +if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS OR (USE_PREBUILT_LLVM AND LLVM_LINK_LLVM_DYLIB)) | ||
30 | # SPIRV-LLVM-Translator is not included into LLVM as a component. | ||
31 | # So, we need to list it here explicitly as an external library | ||
32 | list(APPEND OPENCL_CLANG_LINK_LIBS LLVMSPIRVLib) | ||
33 | -- | ||
34 | 2.17.1 | ||
35 | |||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch new file mode 100644 index 00000000..031a77c7 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch | |||
@@ -0,0 +1,49 @@ | |||
1 | From 5aea653e611b59c70e529a1bd71885a509831557 Mon Sep 17 00:00:00 2001 | ||
2 | From: Anuj Mittal <anuj.mittal@intel.com> | ||
3 | Date: Tue, 1 Aug 2023 11:15:31 +0800 | ||
4 | Subject: [PATCH] cl_headers/CMakeLists.txt: use clang from native sysroot | ||
5 | |||
6 | Allow clang to be found in target sysroot for target builds and dont try | ||
7 | to compile cross binaries, we do that ourselves. | ||
8 | |||
9 | Upstream-Status: Inappropriate [oe-specific] | ||
10 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
11 | --- | ||
12 | CMakeLists.txt | 8 ++++---- | ||
13 | cl_headers/CMakeLists.txt | 2 +- | ||
14 | 2 files changed, 5 insertions(+), 5 deletions(-) | ||
15 | |||
16 | diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
17 | index 5864009..60ba39e 100644 | ||
18 | --- a/CMakeLists.txt | ||
19 | +++ b/CMakeLists.txt | ||
20 | @@ -35,10 +35,10 @@ set(CMAKE_MODULE_PATH | ||
21 | |||
22 | include(CMakeFunctions) | ||
23 | |||
24 | -if(CMAKE_CROSSCOMPILING AND OPENCL_CLANG_BUILD_EXTERNAL) | ||
25 | - include(CrossCompile) | ||
26 | - llvm_create_cross_target(${PROJECT_NAME} NATIVE "" Release) | ||
27 | -endif() | ||
28 | +#if(CMAKE_CROSSCOMPILING AND OPENCL_CLANG_BUILD_EXTERNAL) | ||
29 | +# include(CrossCompile) | ||
30 | +# llvm_create_cross_target(${PROJECT_NAME} NATIVE "" Release) | ||
31 | +#endif() | ||
32 | |||
33 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) | ||
34 | set(USE_PREBUILT_LLVM ON) | ||
35 | diff --git a/cl_headers/CMakeLists.txt b/cl_headers/CMakeLists.txt | ||
36 | index 16cabb7..4423536 100644 | ||
37 | --- a/cl_headers/CMakeLists.txt | ||
38 | +++ b/cl_headers/CMakeLists.txt | ||
39 | @@ -1,6 +1,6 @@ | ||
40 | set(CL_HEADERS_LIB cl_headers) | ||
41 | if(USE_PREBUILT_LLVM) | ||
42 | - find_program(CLANG_COMMAND clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) | ||
43 | + find_program(CLANG_COMMAND clang PATHS ${LLVM_TOOLS_BINARY_DIR}) | ||
44 | else() | ||
45 | set(CLANG_COMMAND $<TARGET_FILE:clang>) | ||
46 | endif() | ||
47 | -- | ||
48 | 2.37.3 | ||
49 | |||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-don-t-redefine-LLVM_TABLEGEN_EXE.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-don-t-redefine-LLVM_TABLEGEN_EXE.patch deleted file mode 100644 index 8df7e3ab..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-don-t-redefine-LLVM_TABLEGEN_EXE.patch +++ /dev/null | |||
@@ -1,32 +0,0 @@ | |||
1 | From f3ef79a6301bab0b3a447f07ceb94c39a95009df Mon Sep 17 00:00:00 2001 | ||
2 | From: Anuj Mittal <anuj.mittal@intel.com> | ||
3 | Date: Thu, 2 Apr 2020 08:59:20 +0800 | ||
4 | Subject: [PATCH] don't redefine LLVM_TABLEGEN_EXE | ||
5 | |||
6 | Use the value that has been passed by the user. | ||
7 | |||
8 | Upstream-Status: Submitted | ||
9 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
10 | --- | ||
11 | CMakeLists.txt | 5 ++++- | ||
12 | 1 file changed, 4 insertions(+), 1 deletion(-) | ||
13 | |||
14 | diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
15 | index 6893e97..941b0ae 100644 | ||
16 | --- a/CMakeLists.txt | ||
17 | +++ b/CMakeLists.txt | ||
18 | @@ -137,7 +137,10 @@ endif(NOT USE_PREBUILT_LLVM) | ||
19 | set (COMPILE_OPTIONS_TD opencl_clang_options.td) | ||
20 | set (COMPILE_OPTIONS_INC opencl_clang_options.inc) | ||
21 | |||
22 | -set(LLVM_TABLEGEN_EXE "llvm-tblgen") | ||
23 | +if(NOT DEFINED LLVM_TABLEGEN_EXE) | ||
24 | + set(LLVM_TABLEGEN_EXE "llvm-tblgen") | ||
25 | +endif() | ||
26 | + | ||
27 | set(LLVM_TARGET_DEFINITIONS ${COMPILE_OPTIONS_TD}) | ||
28 | if(USE_PREBUILT_LLVM) | ||
29 | set(TABLEGEN_ADDITIONAL -I ${LLVM_INCLUDE_DIRS}) | ||
30 | -- | ||
31 | 2.25.1 | ||
32 | |||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch new file mode 100644 index 00000000..2f1814f8 --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-Request-native-clang-only-when-cross-compiling-464.patch | |||
@@ -0,0 +1,60 @@ | |||
1 | From 43c806ef321b1f677a49d28c89fb7ffecf539c2d Mon Sep 17 00:00:00 2001 | ||
2 | From: Tim Creech <timothy.m.creech@intel.com> | ||
3 | Date: Wed, 28 Jun 2023 03:45:51 -0400 | ||
4 | Subject: [PATCH 2/2] Request native clang only when cross-compiling (#464) | ||
5 | |||
6 | * Request native clang only when cross-compiling | ||
7 | |||
8 | LLVM_USE_HOST_TOOLS may be set if LLVM is configured with | ||
9 | LLVM_OPTIMIZED_TABLEGEN, which does not necessarily indicate | ||
10 | cross-compilation or that clang will only execute on the target. | ||
11 | |||
12 | By checking that CMAKE_CROSSCOMPILING is set, we ensure that we only | ||
13 | build/use clang again if necessary for host execution. | ||
14 | |||
15 | * fixup: CMAKE_CROSSCOMPILING implies LLVM_USE_HOST_TOOLS | ||
16 | |||
17 | Co-authored-by: Wenju He <wenju.he@intel.com> | ||
18 | |||
19 | * fixup: also use CMAKE_CROSSCOMPILING in top-level CMakeLists.txt | ||
20 | |||
21 | --------- | ||
22 | |||
23 | Co-authored-by: Wenju He <wenju.he@intel.com> | ||
24 | |||
25 | Upstream-Status: Backport [https://github.com/intel/opencl-clang/commit/53843eee13cfb2357919ee02714a43bef1af0f86] | ||
26 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
27 | --- | ||
28 | CMakeLists.txt | 2 +- | ||
29 | cl_headers/CMakeLists.txt | 2 +- | ||
30 | 2 files changed, 2 insertions(+), 2 deletions(-) | ||
31 | |||
32 | diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
33 | index e772de9..5864009 100644 | ||
34 | --- a/CMakeLists.txt | ||
35 | +++ b/CMakeLists.txt | ||
36 | @@ -35,7 +35,7 @@ set(CMAKE_MODULE_PATH | ||
37 | |||
38 | include(CMakeFunctions) | ||
39 | |||
40 | -if(LLVM_USE_HOST_TOOLS AND OPENCL_CLANG_BUILD_EXTERNAL) | ||
41 | +if(CMAKE_CROSSCOMPILING AND OPENCL_CLANG_BUILD_EXTERNAL) | ||
42 | include(CrossCompile) | ||
43 | llvm_create_cross_target(${PROJECT_NAME} NATIVE "" Release) | ||
44 | endif() | ||
45 | diff --git a/cl_headers/CMakeLists.txt b/cl_headers/CMakeLists.txt | ||
46 | index 18296c2..16cabb7 100644 | ||
47 | --- a/cl_headers/CMakeLists.txt | ||
48 | +++ b/cl_headers/CMakeLists.txt | ||
49 | @@ -4,7 +4,7 @@ if(USE_PREBUILT_LLVM) | ||
50 | else() | ||
51 | set(CLANG_COMMAND $<TARGET_FILE:clang>) | ||
52 | endif() | ||
53 | -if(LLVM_USE_HOST_TOOLS AND NOT OPENCL_CLANG_BUILD_EXTERNAL) | ||
54 | +if(CMAKE_CROSSCOMPILING AND NOT OPENCL_CLANG_BUILD_EXTERNAL) | ||
55 | build_native_tool(clang CLANG_COMMAND) | ||
56 | endif() | ||
57 | |||
58 | -- | ||
59 | 2.37.3 | ||
60 | |||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch deleted file mode 100644 index 473f4d24..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | From b29e00e6fe428a031cf577dfb703cf13eff837f6 Mon Sep 17 00:00:00 2001 | ||
2 | From: Naveen Saini <naveen.kumar.saini@intel.com> | ||
3 | Date: Wed, 15 Apr 2020 18:05:14 +0800 | ||
4 | Subject: [PATCH 2/2] make sure only static libraries linked for native build | ||
5 | |||
6 | LINK_COMPONENTS=all isn't working for static libs for out of tree builds. Use | ||
7 | LLVM_AVAILABLE_LIBS instead. Reported: | ||
8 | |||
9 | https://github.com/intel/opencl-clang/issues/114 | ||
10 | |||
11 | Upstream-Status: Pending | ||
12 | |||
13 | Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> | ||
14 | Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com> | ||
15 | --- | ||
16 | CMakeLists.txt | 3 ++- | ||
17 | 1 file changed, 2 insertions(+), 1 deletion(-) | ||
18 | |||
19 | diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
20 | index 8707487..ad2dbda 100644 | ||
21 | --- a/CMakeLists.txt | ||
22 | +++ b/CMakeLists.txt | ||
23 | @@ -218,7 +218,7 @@ add_subdirectory(cl_headers) | ||
24 | |||
25 | set(LLVM_REQUIRES_EH ON) | ||
26 | |||
27 | -if(USE_PREBUILT_LLVM OR CLANG_LINK_CLANG_DYLIB) | ||
28 | +if(false) | ||
29 | list(APPEND OPENCL_CLANG_LINK_LIBS clang-cpp) | ||
30 | else() | ||
31 | list(APPEND OPENCL_CLANG_LINK_LIBS | ||
32 | @@ -266,6 +266,7 @@ add_llvm_library(${TARGET_NAME} SHARED | ||
33 | all | ||
34 | LINK_LIBS | ||
35 | ${OPENCL_CLANG_LINK_LIBS} | ||
36 | + ${LLVM_AVAILABLE_LIBS} | ||
37 | ) | ||
38 | |||
39 | # Configure resource file on Windows | ||
40 | -- | ||
41 | 2.17.1 | ||
42 | |||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc index 1d9b4d51..31a3fb21 100644 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc +++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang.inc | |||
@@ -6,6 +6,8 @@ LICENSE = "NCSA" | |||
6 | LIC_FILES_CHKSUM = "file://LICENSE;md5=e8a15bf1416762a09ece07e44c79118c" | 6 | LIC_FILES_CHKSUM = "file://LICENSE;md5=e8a15bf1416762a09ece07e44c79118c" |
7 | 7 | ||
8 | SRC_URI = "git://github.com/intel/opencl-clang.git;branch=${BRANCH};protocol=https \ | 8 | SRC_URI = "git://github.com/intel/opencl-clang.git;branch=${BRANCH};protocol=https \ |
9 | file://0002-Request-native-clang-only-when-cross-compiling-464.patch \ | ||
10 | file://0001-cl_headers-CMakeLists.txt-use-clang-from-native-sysr.patch \ | ||
9 | " | 11 | " |
10 | S = "${WORKDIR}/git" | 12 | S = "${WORKDIR}/git" |
11 | 13 | ||
@@ -16,9 +18,17 @@ DEPENDS:append:class-target = " opencl-clang-native" | |||
16 | COMPATIBLE_HOST = '(x86_64).*-linux' | 18 | COMPATIBLE_HOST = '(x86_64).*-linux' |
17 | COMPATIBLE_HOST:libc-musl = "null" | 19 | COMPATIBLE_HOST:libc-musl = "null" |
18 | 20 | ||
21 | DEPENDS += " spirv-llvm-translator" | ||
22 | |||
23 | EXTRA_OECMAKE += "\ | ||
24 | -DLLVM_TABLEGEN_EXE=${STAGING_BINDIR_NATIVE}/llvm-tblgen \ | ||
25 | -DCMAKE_SKIP_RPATH=TRUE \ | ||
26 | -DPREFERRED_LLVM_VERSION=${LLVMVERSION} \ | ||
27 | " | ||
28 | |||
19 | do_install:append:class-native() { | 29 | do_install:append:class-native() { |
20 | install -d ${D}${bindir} | 30 | install -d ${D}${bindir} |
21 | install -m 0755 ${B}/linux_linker/linux_resource_linker ${D}${bindir}/ | 31 | install -m 0755 ${B}/bin/linux_resource_linker ${D}${bindir}/ |
22 | } | 32 | } |
23 | 33 | ||
24 | BBCLASSEXTEND = "native nativesdk" | 34 | BBCLASSEXTEND = "native nativesdk" |
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb deleted file mode 100644 index e08f2278..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | require opencl-clang.inc | ||
2 | |||
3 | SRC_URI:append = " file://0001-don-t-redefine-LLVM_TABLEGEN_EXE.patch \ | ||
4 | file://0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch \ | ||
5 | " | ||
6 | SRC_URI:append:class-native = " file://0002-make-sure-only-static-libraries-linked-for-native-bu.patch" | ||
7 | |||
8 | BRANCH = "ocl-open-100" | ||
9 | |||
10 | SRCREV = "c8cd72e32b6abc18ce6da71c357ea45ba78b52f0" | ||
11 | |||
12 | EXTRA_OECMAKE += "\ | ||
13 | -DLLVM_TABLEGEN_EXE=${STAGING_BINDIR_NATIVE}/llvm-tblgen \ | ||
14 | -DCMAKE_SKIP_RPATH=TRUE \ | ||
15 | " | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_12.0.0.bb b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_12.0.0.bb deleted file mode 100644 index d0b2acfb..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_12.0.0.bb +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | require opencl-clang.inc | ||
2 | |||
3 | SRCREV = "8fc6b059248dc6c9c40c7cbe5fedcc6ebb951983" | ||
4 | |||
5 | DEPENDS += " spirv-llvm-translator" | ||
6 | |||
7 | BRANCH = "ocl-open-120" | ||
8 | |||
9 | EXTRA_OECMAKE += "\ | ||
10 | -DCMAKE_SKIP_RPATH=TRUE \ | ||
11 | -DPREFERRED_LLVM_VERSION="12.0.0" \ | ||
12 | " | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_13.0.0.bb b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_13.0.0.bb deleted file mode 100644 index 7ca7895e..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_13.0.0.bb +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | require opencl-clang.inc | ||
2 | |||
3 | SRC_URI:append = " file://0001-don-t-redefine-LLVM_TABLEGEN_EXE.patch \ | ||
4 | " | ||
5 | SRCREV = "0f36f940b25b8e7661cfaf8a7c11fdbb7d853223" | ||
6 | |||
7 | BRANCH = "ocl-open-130" | ||
8 | |||
9 | DEPENDS += " spirv-llvm-translator" | ||
10 | |||
11 | EXTRA_OECMAKE += "\ | ||
12 | -DLLVM_TABLEGEN_EXE=${STAGING_BINDIR_NATIVE}/llvm-tblgen \ | ||
13 | -DCMAKE_SKIP_RPATH=TRUE \ | ||
14 | -DPREFERRED_LLVM_VERSION=${LLVMVERSION} \ | ||
15 | " | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb new file mode 100644 index 00000000..e946c31c --- /dev/null +++ b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_15.0.0.bb | |||
@@ -0,0 +1,5 @@ | |||
1 | require opencl-clang.inc | ||
2 | |||
3 | SRCREV = "60fd799cc58755c16d951f9ebfde6d0f9b8554dd" | ||
4 | |||
5 | BRANCH = "ocl-open-150" | ||
diff --git a/dynamic-layers/clang-layer/recipes-opencl/vc-intrinsics/vc-intrinsics_1.0.bb b/dynamic-layers/clang-layer/recipes-opencl/vc-intrinsics/vc-intrinsics_1.0.bb deleted file mode 100644 index d98a9ddf..00000000 --- a/dynamic-layers/clang-layer/recipes-opencl/vc-intrinsics/vc-intrinsics_1.0.bb +++ /dev/null | |||
@@ -1,24 +0,0 @@ | |||
1 | SUMMARY = "VC Intrinsics" | ||
2 | DESCRIPTION = "VC Intrinsics project contains a set of new intrinsics on \ | ||
3 | top of core LLVM IR instructions that represent SIMD semantics of a program \ | ||
4 | targeting GPU" | ||
5 | |||
6 | LICENSE = "MIT" | ||
7 | LIC_FILES_CHKSUM = "file://Readme.md;beginline=1;endline=7;md5=3b2db19c3b0877bb312b7adbcb815adc" | ||
8 | |||
9 | SRC_URI = "git://github.com/intel/vc-intrinsics.git;protocol=https;;branch=master \ | ||
10 | " | ||
11 | SRCREV = "a2f2f10dc61c8161c57cf33ed606c8e3ccf3a921" | ||
12 | |||
13 | S = "${WORKDIR}/git" | ||
14 | |||
15 | inherit cmake | ||
16 | |||
17 | COMPATIBLE_HOST = '(x86_64).*-linux' | ||
18 | COMPATIBLE_HOST:libc-musl = "null" | ||
19 | |||
20 | DEPENDS += " clang" | ||
21 | |||
22 | EXTRA_OECMAKE = "-DLLVM_DIR=${STAGING_LIBDIR}" | ||
23 | |||
24 | BBCLASSEXTEND = "native nativesdk" | ||