summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnuj Mittal <anuj.mittal@intel.com>2022-04-05 10:34:33 +0800
committerAnuj Mittal <anuj.mittal@intel.com>2022-04-05 15:59:12 +0800
commitc993e8e815b34129b75e761ad249a526830973b1 (patch)
tree910059602e3da50ef6fef1b0bbc291dfe9d38a25
parent7bb49b1ae15ab04b352edb58e97927f75846c9b0 (diff)
downloadmeta-intel-c993e8e815b34129b75e761ad249a526830973b1.tar.gz
Remove support for building with LLVM 10
We no longer support building with older branches of OE-Core/meta-clang so remove LLVM 10 specific configurations and patches. Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
-rw-r--r--conf/machine/include/meta-intel.inc6
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch111
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch146
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch8259
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch51
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch812
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch33
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch982
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch35
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch49
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch47
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch53
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch377
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch96
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch105
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch43
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch34
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch40
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch61
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch97
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch173
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch550
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch2243
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch237
-rw-r--r--dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend34
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch35
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch42
-rw-r--r--dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb15
28 files changed, 2 insertions, 14764 deletions
diff --git a/conf/machine/include/meta-intel.inc b/conf/machine/include/meta-intel.inc
index 452a423d..511179e3 100644
--- a/conf/machine/include/meta-intel.inc
+++ b/conf/machine/include/meta-intel.inc
@@ -21,10 +21,8 @@ PREFERRED_PROVIDER_libva-utils = "libva-intel-utils"
21PREFERRED_PROVIDER_libva-utils-native = "libva-intel-utils-native" 21PREFERRED_PROVIDER_libva-utils-native = "libva-intel-utils-native"
22PREFERRED_PROVIDER_nativesdk-libva-utils = "nativesdk-libva-intel-utils" 22PREFERRED_PROVIDER_nativesdk-libva-utils = "nativesdk-libva-intel-utils"
23 23
24PREFERRED_VERSION_opencl-clang ?= "${@bb.utils.contains('LLVMVERSION', '10.0.1', '10.0.0', \ 24PREFERRED_VERSION_opencl-clang ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d)}"
25 bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d), d)}" 25PREFERRED_VERSION_opencl-clang-native ?= "${@bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d)}"
26PREFERRED_VERSION_opencl-clang-native ?= "${@bb.utils.contains('LLVMVERSION', '10.0.1', '10.0.0', \
27 bb.utils.contains('LLVMVERSION', '12.0.0', '12.0.0', '13.0.0', d), d)}"
28 26
29XSERVER_X86_ASPEED_AST = "xf86-video-ast \ 27XSERVER_X86_ASPEED_AST = "xf86-video-ast \
30 " 28 "
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch
deleted file mode 100644
index cd519971..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch
+++ /dev/null
@@ -1,111 +0,0 @@
1From eeb816d95f0910bd246e37bb2bb3923acf0edf6b Mon Sep 17 00:00:00 2001
2From: Aleksander Us <aleksander.us@intel.com>
3Date: Mon, 26 Aug 2019 15:47:41 +0300
4Subject: [PATCH] [BasicBlockUtils] Add metadata fixing in
5 SplitBlockPredecessors.
6
7In case when BB is header of some loop and predecessor is latch of
8this loop, metadata was not attached to newly created basic block.
9This led to loss of loop metadata for other passes.
10
11Upstream-Status: Submitted [https://reviews.llvm.org/D66892]
12
13https://github.com/intel/llvm-patches/commit/8af4449e2d201707f7f2f832b473a0439e255f32
14
15Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
16---
17 lib/Transforms/Utils/BasicBlockUtils.cpp | 23 ++++++++----
18 test/Transforms/LoopSimplify/loop_metadata.ll | 36 +++++++++++++++++++
19 2 files changed, 52 insertions(+), 7 deletions(-)
20 create mode 100644 test/Transforms/LoopSimplify/loop_metadata.ll
21
22diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
23index 5fa371377c8..3a90ae061fb 100644
24--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
25+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
26@@ -579,24 +579,33 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
27
28 // The new block unconditionally branches to the old block.
29 BranchInst *BI = BranchInst::Create(BB, NewBB);
30+ bool IsBBHeader = LI && LI->isLoopHeader(BB);
31+ Loop *BBLoop = LI ? LI->getLoopFor(BB) : nullptr;
32 // Splitting the predecessors of a loop header creates a preheader block.
33- if (LI && LI->isLoopHeader(BB))
34+ if (IsBBHeader)
35 // Using the loop start line number prevents debuggers stepping into the
36 // loop body for this instruction.
37- BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
38+ BI->setDebugLoc(BBLoop->getStartLoc());
39 else
40 BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
41
42 // Move the edges from Preds to point to NewBB instead of BB.
43- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
44+ for (BasicBlock *Pred : Preds) {
45+ Instruction *PI = Pred->getTerminator();
46 // This is slightly more strict than necessary; the minimum requirement
47 // is that there be no more than one indirectbr branching to BB. And
48 // all BlockAddress uses would need to be updated.
49- assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
50+ assert(!isa<IndirectBrInst>(PI) &&
51 "Cannot split an edge from an IndirectBrInst");
52- assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
53- "Cannot split an edge from a CallBrInst");
54- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
55+ assert(!isa<CallBrInst>(PI) && "Cannot split an edge from a CallBrInst");
56+ if (IsBBHeader && BBLoop->contains(Pred) && BBLoop->isLoopLatch(Pred)) {
57+ // Update loop metadata if it exists.
58+ if (MDNode *LoopMD = PI->getMetadata(LLVMContext::MD_loop)) {
59+ BI->setMetadata(LLVMContext::MD_loop, LoopMD);
60+ PI->setMetadata(LLVMContext::MD_loop, nullptr);
61+ }
62+ }
63+ PI->replaceUsesOfWith(BB, NewBB);
64 }
65
66 // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
67diff --git a/test/Transforms/LoopSimplify/loop_metadata.ll b/test/Transforms/LoopSimplify/loop_metadata.ll
68new file mode 100644
69index 00000000000..c15c92fe3ae
70--- /dev/null
71+++ b/test/Transforms/LoopSimplify/loop_metadata.ll
72@@ -0,0 +1,36 @@
73+; RUN: opt -S -loop-simplify < %s | FileCheck %s
74+
75+; CHECK: for.cond.loopexit:
76+; CHECK: br label %for.cond, !llvm.loop !0
77+; CHECK: br i1 %cmp1, label %for.body1, label %for.cond.loopexit
78+
79+define void @foo() {
80+entry:
81+ br label %for.cond
82+
83+for.cond: ; preds = %for.cond1, %entry
84+ %j = phi i32 [ 0, %entry ], [ %add, %for.cond1 ]
85+ %cmp = icmp ult i32 %j, 8
86+ br i1 %cmp, label %for.body, label %for.end
87+
88+for.body: ; preds = %for.cond
89+ %dummy1 = add i32 1, 1
90+ %add = add nuw nsw i32 %j, 1
91+ br label %for.cond1
92+
93+for.cond1: ; preds = %for.body1, %for.body
94+ %i.0 = phi i32 [ 1, %for.body ], [ %inc, %for.body1 ]
95+ %cmp1 = icmp ult i32 %i.0, 8
96+ br i1 %cmp1, label %for.body1, label %for.cond, !llvm.loop !0
97+
98+for.body1: ; preds = %for.cond1
99+ %dummy2 = add i32 1, 1
100+ %inc = add nuw nsw i32 %i.0, 1
101+ br label %for.cond1
102+
103+for.end: ; preds = %for.cond
104+ ret void
105+}
106+
107+!0 = distinct !{!0, !1}
108+!1 = !{!"llvm.loop.unroll.full"}
109--
1102.18.0
111
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch
deleted file mode 100644
index 48307deb..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch
+++ /dev/null
@@ -1,146 +0,0 @@
1From 35e218a886f4c066eabd18685240d55270bd5a6d Mon Sep 17 00:00:00 2001
2From: Aleksander Us <aleksander.us@intel.com>
3Date: Mon, 26 Aug 2019 15:45:47 +0300
4Subject: [PATCH] [IndVarSimplify] Do not use SCEV expander for IVCount in
5 LFTR when possible.
6
7SCEV analysis cannot properly cache instruction with poison flags
8(for example, add nsw outside of loop will not be reused by expander).
9This can lead to generating of additional instructions by SCEV expander.
10
11Example IR:
12
13 ...
14 %maxval = add nuw nsw i32 %a1, %a2
15 ...
16for.body:
17 ...
18 %cmp22 = icmp ult i32 %ivadd, %maxval
19 br i1 %cmp22, label %for.body, label %for.end
20 ...
21
22SCEV expander will generate copy of %maxval in preheader but without
23nuw/nsw flags. This can be avoided by explicit check that iv count
24value gives the same SCEV expressions as calculated by LFTR.
25
26Upstream-Status: Submitted [https://reviews.llvm.org/D66890]
27
28https://github.com/intel/llvm-patches/commit/fd6a6c97341a56fd21bc32bc940afea751312e8f
29
30Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
31---
32 lib/Transforms/Scalar/IndVarSimplify.cpp | 12 +++++++++-
33 test/Transforms/IndVarSimplify/add_nsw.ll | 23 ++++++++++++++++++++
34 test/Transforms/IndVarSimplify/lftr-reuse.ll | 9 +++-----
35 test/Transforms/IndVarSimplify/udiv.ll | 1 +
36 4 files changed, 38 insertions(+), 7 deletions(-)
37 create mode 100644 test/Transforms/IndVarSimplify/add_nsw.ll
38
39diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
40index f9fc698a4a9..5e04dac8aa6 100644
41--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
42+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
43@@ -2375,6 +2375,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
44 if (UsePostInc)
45 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
46
47+ // If computed limit is equal to old limit then do not use SCEV expander
48+ // because it can lost NUW/NSW flags and create extra instructions.
49+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
50+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getOperand(0))) {
51+ Value *Limit = Cmp->getOperand(0);
52+ if (!L->isLoopInvariant(Limit))
53+ Limit = Cmp->getOperand(1);
54+ if (SE->getSCEV(Limit) == IVLimit)
55+ return Limit;
56+ }
57+
58 // Expand the code for the iteration count.
59 assert(SE->isLoopInvariant(IVLimit, L) &&
60 "Computed iteration count is not loop invariant!");
61@@ -2383,7 +2394,6 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
62 // SCEV expression (IVInit) for a pointer type IV value (IndVar).
63 Type *LimitTy = ExitCount->getType()->isPointerTy() ?
64 IndVar->getType() : ExitCount->getType();
65- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
66 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
67 }
68 }
69diff --git a/test/Transforms/IndVarSimplify/add_nsw.ll b/test/Transforms/IndVarSimplify/add_nsw.ll
70new file mode 100644
71index 00000000000..abd1cbb6c51
72--- /dev/null
73+++ b/test/Transforms/IndVarSimplify/add_nsw.ll
74@@ -0,0 +1,23 @@
75+; RUN: opt -indvars -S %s | FileCheck %s
76+
77+target datalayout = "e-p:32:32-i64:64-n8:16:32"
78+
79+; CHECK: for.body.preheader:
80+; CHECK-NOT: add
81+; CHECK: for.body:
82+
83+define void @foo(i32 %a1, i32 %a2) {
84+entry:
85+ %maxval = add nuw nsw i32 %a1, %a2
86+ %cmp = icmp slt i32 %maxval, 1
87+ br i1 %cmp, label %for.end, label %for.body
88+
89+for.body: ; preds = %entry, %for.body
90+ %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ]
91+ %add31 = add nuw nsw i32 %j.02, 1
92+ %cmp22 = icmp slt i32 %add31, %maxval
93+ br i1 %cmp22, label %for.body, label %for.end
94+
95+for.end: ; preds = %for.body
96+ ret void
97+}
98diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
99index 14ae9738696..509d662b767 100644
100--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
101+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
102@@ -67,11 +67,9 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
103 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
104 ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
105 ; CHECK: outer.preheader:
106-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1
107 ; CHECK-NEXT: br label [[OUTER:%.*]]
108 ; CHECK: outer:
109-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ]
110-; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ]
111+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
112 ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
113 ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
114 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
115@@ -81,14 +79,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind {
116 ; CHECK: inner:
117 ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ]
118 ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1
119-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]]
120+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]]
121 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
122 ; CHECK: outer.inc.loopexit:
123 ; CHECK-NEXT: br label [[OUTER_INC]]
124 ; CHECK: outer.inc:
125 ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1
126-; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1
127-; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]]
128+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]]
129 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
130 ; CHECK: exit.loopexit:
131 ; CHECK-NEXT: br label [[EXIT]]
132diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
133index b3f2c2a6a66..3530343ef4a 100644
134--- a/test/Transforms/IndVarSimplify/udiv.ll
135+++ b/test/Transforms/IndVarSimplify/udiv.ll
136@@ -133,6 +133,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
137 ; CHECK-LABEL: @foo(
138 ; CHECK: for.body.preheader:
139 ; CHECK-NOT: udiv
140+; CHECK: for.body:
141
142 define void @foo(double* %p, i64 %n) nounwind {
143 entry:
144--
1452.18.0
146
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch
deleted file mode 100644
index 1ab00df0..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-OpenCL-3.0-support.patch
+++ /dev/null
@@ -1,8259 +0,0 @@
1From 8dbdb2f26674a938ff43b5bfe5b3bf3d1117f9e4 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Wed, 7 Apr 2021 16:36:10 +0800
4Subject: [PATCH 1/7] OpenCL 3.0 support
5
6Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0001-OpenCL-3.0-support.patch]
7Signed-off-by: Anton Zabaznov <anton.zabaznov@intel.com>
8Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
9---
10 clang/include/clang/Basic/Builtins.def | 65 +-
11 clang/include/clang/Basic/Builtins.h | 13 +-
12 .../clang/Basic/DiagnosticParseKinds.td | 2 +
13 .../clang/Basic/DiagnosticSemaKinds.td | 7 +
14 clang/include/clang/Basic/LangOptions.def | 2 +
15 clang/include/clang/Basic/LangStandards.def | 4 +
16 .../include/clang/Basic/OpenCLExtensions.def | 15 +
17 clang/include/clang/Basic/OpenCLOptions.h | 175 +-
18 clang/include/clang/Driver/Options.td | 2 +-
19 clang/include/clang/Sema/Sema.h | 9 +
20 clang/lib/AST/ASTContext.cpp | 3 +-
21 clang/lib/Basic/Builtins.cpp | 28 +-
22 clang/lib/Basic/TargetInfo.cpp | 11 +
23 clang/lib/Basic/Targets.cpp | 1 -
24 clang/lib/CodeGen/CodeGenFunction.cpp | 6 +-
25 clang/lib/Frontend/CompilerInvocation.cpp | 22 +-
26 clang/lib/Frontend/InitPreprocessor.cpp | 6 +-
27 clang/lib/Headers/opencl-c-base.h | 75 +-
28 clang/lib/Headers/opencl-c.h | 3228 ++++++++++++++---
29 clang/lib/Parse/ParseDecl.cpp | 12 +-
30 clang/lib/Parse/ParsePragma.cpp | 10 +-
31 clang/lib/Sema/OpenCLBuiltins.td | 49 +-
32 clang/lib/Sema/Sema.cpp | 47 +-
33 clang/lib/Sema/SemaChecking.cpp | 38 +-
34 clang/lib/Sema/SemaDecl.cpp | 15 +-
35 clang/lib/Sema/SemaDeclAttr.cpp | 9 +-
36 clang/lib/Sema/SemaDeclCXX.cpp | 10 +
37 clang/lib/Sema/SemaLookup.cpp | 19 +-
38 clang/lib/Sema/SemaType.cpp | 14 +-
39 .../CodeGenOpenCL/addr-space-struct-arg.cl | 7 +-
40 .../address-spaces-conversions.cl | 2 +
41 .../CodeGenOpenCL/address-spaces-mangling.cl | 3 +-
42 clang/test/CodeGenOpenCL/address-spaces.cl | 4 +
43 .../amdgcn-automatic-variable.cl | 1 +
44 .../CodeGenOpenCL/amdgpu-sizeof-alignof.cl | 21 +-
45 .../CodeGenOpenCL/arm-integer-dot-product.cl | 1 +
46 .../test/CodeGenOpenCL/cl-uniform-wg-size.cl | 2 +
47 clang/test/CodeGenOpenCL/fpmath.cl | 2 +
48 .../generic-address-space-feature.cl | 28 +
49 .../intel-subgroups-avc-ext-types.cl | 1 +
50 .../kernels-have-spir-cc-by-default.cl | 3 +
51 clang/test/CodeGenOpenCL/logical-ops.cl | 1 +
52 clang/test/CodeGenOpenCL/no-half.cl | 1 +
53 clang/test/CodeGenOpenCL/pipe_builtin.cl | 3 +
54 clang/test/CodeGenOpenCL/pipe_types.cl | 1 +
55 clang/test/CodeGenOpenCL/printf.cl | 2 +
56 clang/test/CodeGenOpenCL/unroll-hint.cl | 1 +
57 clang/test/Driver/autocomplete.c | 2 +
58 clang/test/Driver/opencl.cl | 2 +
59 clang/test/Driver/unknown-std.cl | 1 +
60 clang/test/Frontend/stdlang.c | 1 +
61 clang/test/Headers/opencl-c-header.cl | 7 +-
62 clang/test/Index/pipe-size.cl | 7 +
63 clang/test/Preprocessor/predefined-macros.c | 13 +
64 .../Sema/feature-extensions-simult-support.cl | 75 +
65 clang/test/Sema/features-ignore-pragma.cl | 24 +
66 clang/test/Sema/opencl-features-pipes.cl | 18 +
67 clang/test/Sema/opencl-features.cl | 128 +
68 clang/test/Sema/pipe_builtins_feature.cl | 21 +
69 .../address-spaces-conversions-cl2.0.cl | 3 +
70 clang/test/SemaOpenCL/address-spaces.cl | 1 +
71 .../SemaOpenCL/cl20-device-side-enqueue.cl | 16 +-
72 .../SemaOpenCL/forget-unsupported-builtins.cl | 22 +
73 clang/test/SemaOpenCL/image-features.cl | 20 +
74 .../SemaOpenCL/invalid-pipe-builtin-cl2.0.cl | 1 +
75 clang/test/SemaOpenCL/storageclass-cl20.cl | 1 +
76 .../TableGen/ClangOpenCLBuiltinEmitter.cpp | 35 +-
77 67 files changed, 3656 insertions(+), 723 deletions(-)
78 create mode 100644 clang/test/CodeGenOpenCL/generic-address-space-feature.cl
79 create mode 100644 clang/test/Sema/feature-extensions-simult-support.cl
80 create mode 100644 clang/test/Sema/features-ignore-pragma.cl
81 create mode 100644 clang/test/Sema/opencl-features-pipes.cl
82 create mode 100644 clang/test/Sema/opencl-features.cl
83 create mode 100644 clang/test/Sema/pipe_builtins_feature.cl
84 create mode 100644 clang/test/SemaOpenCL/forget-unsupported-builtins.cl
85 create mode 100644 clang/test/SemaOpenCL/image-features.cl
86
87diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
88index 1a6c85ce2dd3..b76e277f0337 100644
89--- a/clang/include/clang/Basic/Builtins.def
90+++ b/clang/include/clang/Basic/Builtins.def
91@@ -106,6 +106,10 @@
92 # define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS)
93 #endif
94
95+#if defined(BUILTIN) && !defined(OPENCLBUILTIN)
96+# define OPENCLBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS)
97+#endif
98+
99 // Standard libc/libm functions:
100 BUILTIN(__builtin_atan2 , "ddd" , "Fne")
101 BUILTIN(__builtin_atan2f, "fff" , "Fne")
102@@ -1514,50 +1518,54 @@ BUILTIN(__builtin_coro_param, "bv*v*", "n")
103
104 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Pipe functions.
105 // We need the generic prototype, since the packet type could be anything.
106-LANGBUILTIN(read_pipe, "i.", "tn", OCLC20_LANG)
107-LANGBUILTIN(write_pipe, "i.", "tn", OCLC20_LANG)
108+OPENCLBUILTIN(read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
109+OPENCLBUILTIN(write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
110
111-LANGBUILTIN(reserve_read_pipe, "i.", "tn", OCLC20_LANG)
112-LANGBUILTIN(reserve_write_pipe, "i.", "tn", OCLC20_LANG)
113+OPENCLBUILTIN(reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
114+OPENCLBUILTIN(reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
115
116-LANGBUILTIN(commit_write_pipe, "v.", "tn", OCLC20_LANG)
117-LANGBUILTIN(commit_read_pipe, "v.", "tn", OCLC20_LANG)
118+OPENCLBUILTIN(commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
119+OPENCLBUILTIN(commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
120
121-LANGBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG)
122-LANGBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG)
123+OPENCLBUILTIN(sub_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
124+OPENCLBUILTIN(sub_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
125
126-LANGBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC20_LANG)
127-LANGBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC20_LANG)
128+OPENCLBUILTIN(sub_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
129+OPENCLBUILTIN(sub_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
130
131-LANGBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC20_LANG)
132-LANGBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC20_LANG)
133+OPENCLBUILTIN(work_group_reserve_read_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
134+OPENCLBUILTIN(work_group_reserve_write_pipe, "i.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
135
136-LANGBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC20_LANG)
137-LANGBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC20_LANG)
138+OPENCLBUILTIN(work_group_commit_read_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
139+OPENCLBUILTIN(work_group_commit_write_pipe, "v.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
140
141-LANGBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC20_LANG)
142-LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG)
143+OPENCLBUILTIN(get_pipe_num_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
144+OPENCLBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_pipes")
145
146 // OpenCL v2.0 s6.13.17 - Enqueue kernel functions.
147 // Custom builtin check allows to perform special check of passed block arguments.
148-LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG)
149-LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG)
150-LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG)
151-LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG)
152-LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG)
153+OPENCLBUILTIN(enqueue_kernel, "i.", "tn", OCLC2P_LANG,
154+ "__opencl_c_device_enqueue")
155+OPENCLBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC2P_LANG,
156+ "__opencl_c_device_enqueue")
157+OPENCLBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn",
158+ OCLC2P_LANG, "__opencl_c_device_enqueue")
159+OPENCLBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn",
160+ OCLC2P_LANG, "__opencl_c_device_enqueue")
161+OPENCLBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC2P_LANG, "__opencl_c_device_enqueue")
162
163 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
164 // FIXME: Pointer parameters of OpenCL builtins should have their address space
165 // requirement defined.
166-LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG)
167-LANGBUILTIN(to_local, "v*v*", "tn", OCLC20_LANG)
168-LANGBUILTIN(to_private, "v*v*", "tn", OCLC20_LANG)
169+OPENCLBUILTIN(to_global, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space")
170+OPENCLBUILTIN(to_local, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space")
171+OPENCLBUILTIN(to_private, "v*v*", "tn", OCLC2P_LANG, "__opencl_c_generic_address_space")
172
173 // OpenCL half load/store builtin
174-LANGBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES)
175-LANGBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES)
176-LANGBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES)
177-LANGBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES)
178+OPENCLBUILTIN(__builtin_store_half, "vdh*", "n", ALL_OCLC_LANGUAGES, "")
179+OPENCLBUILTIN(__builtin_store_halff, "vfh*", "n", ALL_OCLC_LANGUAGES, "")
180+OPENCLBUILTIN(__builtin_load_half, "dhC*", "nc", ALL_OCLC_LANGUAGES, "")
181+OPENCLBUILTIN(__builtin_load_halff, "fhC*", "nc", ALL_OCLC_LANGUAGES, "")
182
183 // Builtins for os_log/os_trace
184 BUILTIN(__builtin_os_log_format_buffer_size, "zcC*.", "p:0:nut")
185@@ -1578,3 +1586,4 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
186 #undef BUILTIN
187 #undef LIBBUILTIN
188 #undef LANGBUILTIN
189+#undef OPENCLBUILTIN
190diff --git a/clang/include/clang/Basic/Builtins.h b/clang/include/clang/Basic/Builtins.h
191index e4ed482d9068..713ea4bc267d 100644
192--- a/clang/include/clang/Basic/Builtins.h
193+++ b/clang/include/clang/Basic/Builtins.h
194@@ -33,13 +33,13 @@ enum LanguageID {
195 CXX_LANG = 0x4, // builtin for cplusplus only.
196 OBJC_LANG = 0x8, // builtin for objective-c and objective-c++
197 MS_LANG = 0x10, // builtin requires MS mode.
198- OCLC20_LANG = 0x20, // builtin for OpenCL C 2.0 only.
199+ OCLC2P_LANG = 0x20, // builtin for OpenCL C 2.0+ versions.
200 OCLC1X_LANG = 0x40, // builtin for OpenCL C 1.x only.
201 OMP_LANG = 0x80, // builtin requires OpenMP.
202 ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages.
203 ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG, // builtin requires GNU mode.
204 ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG, // builtin requires MS mode.
205- ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC20_LANG // builtin for OCLC languages.
206+ ALL_OCLC_LANGUAGES = OCLC1X_LANG | OCLC2P_LANG // builtin for OCLC languages.
207 };
208
209 namespace Builtin {
210@@ -228,6 +228,10 @@ public:
211 /// for non-builtins.
212 bool canBeRedeclared(unsigned ID) const;
213
214+ bool requiresFeatures(unsigned ID) const {
215+ return requiresFeatures(getRecord(ID));
216+ }
217+
218 private:
219 const Info &getRecord(unsigned ID) const;
220
221@@ -235,6 +239,11 @@ private:
222 bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
223 const LangOptions &LangOpts);
224
225+ bool OclBuiltinIsSupported(const Builtin::Info &BuiltinInfo,
226+ const LangOptions &LangOpts) const;
227+
228+ bool requiresFeatures(const Builtin::Info &BuiltinInfo) const;
229+
230 /// Helper function for isPrintfLike and isScanfLike.
231 bool isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg,
232 const char *Fmt) const;
233diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
234index 33adf093693f..43ea63586f91 100644
235--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
236+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
237@@ -1199,6 +1199,8 @@ def warn_pragma_unsupported_extension : Warning<
238 "unsupported OpenCL extension %0 - ignoring">, InGroup<IgnoredPragmas>;
239 def warn_pragma_extension_is_core : Warning<
240 "OpenCL extension %0 is core feature or supported optional core feature - ignoring">, InGroup<DiagGroup<"pedantic-core-features">>, DefaultIgnore;
241+def warn_opencl_pragma_feature_ignore : Warning<
242+ "OpenCL feature support can't be controlled via pragma, ignoring">, InGroup<IgnoredPragmas>;
243
244 // OpenCL errors.
245 def err_opencl_taking_function_address_parser : Error<
246diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
247index 917377420505..91648aa27820 100644
248--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
249+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
250@@ -9378,6 +9378,13 @@ def ext_opencl_ext_vector_type_rgba_selector: ExtWarn<
251 def err_openclcxx_placement_new : Error<
252 "use of placement new requires explicit declaration">;
253
254+def err_opencl_pipes_require_feat : Error<
255+ "usage of OpenCL pipes requires feature support">;
256+def err_opencl_memory_scope_require_feat : Error<
257+ "usage of memory scope requires feature support">;
258+def err_opencl_memory_ordering_require_feat : Error<
259+ "usage of memory ordering requires feature support">;
260+
261 // MIG routine annotations.
262 def warn_mig_server_routine_does_not_return_kern_return_t : Warning<
263 "'mig_server_routine' attribute only applies to routines that return a kern_return_t">,
264diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
265index 3319a3123976..406f45c0be69 100644
266--- a/clang/include/clang/Basic/LangOptions.def
267+++ b/clang/include/clang/Basic/LangOptions.def
268@@ -203,6 +203,8 @@ LANGOPT(OpenCL , 1, 0, "OpenCL")
269 LANGOPT(OpenCLVersion , 32, 0, "OpenCL C version")
270 LANGOPT(OpenCLCPlusPlus , 1, 0, "C++ for OpenCL")
271 LANGOPT(OpenCLCPlusPlusVersion , 32, 0, "C++ for OpenCL version")
272+LANGOPT(OpenCLGenericKeyword , 1, 0, "OpenCL generic keyword")
273+LANGOPT(OpenCLPipeKeyword , 1, 0, "OpenCL pipe keyword")
274 LANGOPT(NativeHalfType , 1, 0, "Native half type support")
275 LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns")
276 LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
277diff --git a/clang/include/clang/Basic/LangStandards.def b/clang/include/clang/Basic/LangStandards.def
278index 7f1a24db7e9b..69aaba3ff5a2 100644
279--- a/clang/include/clang/Basic/LangStandards.def
280+++ b/clang/include/clang/Basic/LangStandards.def
281@@ -167,6 +167,9 @@ LANGSTANDARD(opencl12, "cl1.2",
282 LANGSTANDARD(opencl20, "cl2.0",
283 OpenCL, "OpenCL 2.0",
284 LineComment | C99 | Digraphs | HexFloat | OpenCL)
285+LANGSTANDARD(opencl30, "cl3.0",
286+ OpenCL, "OpenCL 3.0",
287+ LineComment | C99 | Digraphs | HexFloat | OpenCL)
288 LANGSTANDARD(openclcpp, "clc++",
289 OpenCL, "C++ for OpenCL",
290 LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
291@@ -176,6 +179,7 @@ LANGSTANDARD_ALIAS_DEPR(opencl10, "CL")
292 LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1")
293 LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2")
294 LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0")
295+LANGSTANDARD_ALIAS_DEPR(opencl30, "CL3.0")
296 LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++")
297
298 // CUDA
299diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def
300index 517481584313..608f78a13eef 100644
301--- a/clang/include/clang/Basic/OpenCLExtensions.def
302+++ b/clang/include/clang/Basic/OpenCLExtensions.def
303@@ -93,6 +93,21 @@ OPENCLEXT_INTERNAL(cl_intel_subgroups, 120, ~0U)
304 OPENCLEXT_INTERNAL(cl_intel_subgroups_short, 120, ~0U)
305 OPENCLEXT_INTERNAL(cl_intel_device_side_avc_motion_estimation, 120, ~0U)
306
307+OPENCLEXT_INTERNAL(__opencl_c_pipes, 200, ~0U)
308+OPENCLEXT_INTERNAL(__opencl_c_generic_address_space, 200, ~0U)
309+OPENCLEXT_INTERNAL(__opencl_c_work_group_collective_functions, 200, ~0U)
310+OPENCLEXT_INTERNAL(__opencl_c_atomic_order_acq_rel, 200, ~0U)
311+OPENCLEXT_INTERNAL(__opencl_c_atomic_order_seq_cst, 200, ~0U)
312+OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_device, 200, ~0U)
313+OPENCLEXT_INTERNAL(__opencl_c_atomic_scope_all_devices, 200, ~0U)
314+OPENCLEXT_INTERNAL(__opencl_c_subgroups, 200, ~0U)
315+OPENCLEXT_INTERNAL(__opencl_c_3d_image_writes, 100, ~0U)
316+OPENCLEXT_INTERNAL(__opencl_c_device_enqueue, 200, ~0U)
317+OPENCLEXT_INTERNAL(__opencl_c_read_write_images, 200, ~0U)
318+OPENCLEXT_INTERNAL(__opencl_c_program_scope_global_variables, 200, ~0U)
319+OPENCLEXT_INTERNAL(__opencl_c_fp64, 120, ~0U)
320+OPENCLEXT_INTERNAL(__opencl_c_int64, 100, ~0U)
321+OPENCLEXT_INTERNAL(__opencl_c_images, 100, ~0U)
322 #undef OPENCLEXT_INTERNAL
323
324 #ifdef OPENCLEXT
325diff --git a/clang/include/clang/Basic/OpenCLOptions.h b/clang/include/clang/Basic/OpenCLOptions.h
326index 15661154eab5..9a3a8e33cadd 100644
327--- a/clang/include/clang/Basic/OpenCLOptions.h
328+++ b/clang/include/clang/Basic/OpenCLOptions.h
329@@ -16,11 +16,16 @@
330
331 #include "clang/Basic/LangOptions.h"
332 #include "llvm/ADT/StringMap.h"
333+#include "llvm/ADT/StringSwitch.h"
334
335 namespace clang {
336
337 /// OpenCL supported extensions and optional core features
338 class OpenCLOptions {
339+ // OpenCL Version
340+ unsigned CLVer = 120;
341+ bool IsOpenCLCPlusPlus = false;
342+
343 struct Info {
344 bool Supported; // Is this option supported
345 bool Enabled; // Is this option enabled
346@@ -31,7 +36,38 @@ class OpenCLOptions {
347 :Supported(S), Enabled(E), Avail(A), Core(C){}
348 };
349 llvm::StringMap<Info> OptMap;
350+
351 public:
352+ void setOpenCLVersion(const LangOptions &LO) {
353+ IsOpenCLCPlusPlus = LO.OpenCLCPlusPlus;
354+ CLVer = IsOpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
355+ }
356+
357+ // Get extension which is semantically equivalent to a given feature
358+ // if exists (e.g. __opencl_c_subgroups -> cl_khr_subgroups)
359+ llvm::Optional<StringRef> getEquivalentExtension(StringRef Feature) const {
360+ return llvm::StringSwitch<llvm::Optional<StringRef>>(Feature)
361+ .Case("__opencl_c_3d_image_writes",
362+ Optional<StringRef>("cl_khr_3d_image_writes"))
363+ .Case("__opencl_c_subgroups", Optional<StringRef>("cl_khr_subgroups"))
364+ .Case("__opencl_c_fp64", Optional<StringRef>("cl_khr_fp64"))
365+ .Default(Optional<StringRef>());
366+ }
367+
368+ // Same as above but for extensions
369+ llvm::Optional<StringRef> getEquivalentFeature(StringRef Extension) const {
370+ return llvm::StringSwitch<llvm::Optional<StringRef>>(Extension)
371+ .Case("cl_khr_3d_image_writes",
372+ Optional<StringRef>("__opencl_c_3d_image_writes"))
373+ .Case("cl_khr_subgroups", Optional<StringRef>("__opencl_c_subgroups"))
374+ .Case("cl_khr_fp64", Optional<StringRef>("__opencl_c_fp64"))
375+ .Default(Optional<StringRef>());
376+ }
377+
378+ bool isFeature(llvm::StringRef Ext) const {
379+ return Ext.startswith("__opencl_c");
380+ }
381+
382 bool isKnown(llvm::StringRef Ext) const {
383 return OptMap.find(Ext) != OptMap.end();
384 }
385@@ -42,32 +78,88 @@ public:
386
387 // Is supported as either an extension or an (optional) core feature for
388 // OpenCL version \p CLVer.
389- bool isSupported(llvm::StringRef Ext, const LangOptions &LO) const {
390+ bool isSupported(llvm::StringRef Ext) const {
391 // In C++ mode all extensions should work at least as in v2.0.
392- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
393 auto I = OptMap.find(Ext)->getValue();
394 return I.Supported && I.Avail <= CLVer;
395 }
396
397 // Is supported (optional) OpenCL core features for OpenCL version \p CLVer.
398 // For supported extension, return false.
399- bool isSupportedCore(llvm::StringRef Ext, const LangOptions &LO) const {
400+ bool isSupportedCore(llvm::StringRef Ext) const {
401 // In C++ mode all extensions should work at least as in v2.0.
402- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
403 auto I = OptMap.find(Ext)->getValue();
404 return I.Supported && I.Avail <= CLVer && I.Core != ~0U && CLVer >= I.Core;
405 }
406
407 // Is supported OpenCL extension for OpenCL version \p CLVer.
408 // For supported (optional) core feature, return false.
409- bool isSupportedExtension(llvm::StringRef Ext, const LangOptions &LO) const {
410+ bool isSupportedExtension(llvm::StringRef Ext) const {
411 // In C++ mode all extensions should work at least as in v2.0.
412- auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
413 auto I = OptMap.find(Ext)->getValue();
414 return I.Supported && I.Avail <= CLVer && (I.Core == ~0U || CLVer < I.Core);
415 }
416
417+ // Support features whose support is directly related to the
418+ // specific OpenCL version. For example, OpenCL 2.0 supports
419+ // all features that are optional in 3.0
420+ void adjustFeatures() {
421+ // Support int64 by default (assume compiling for FULL profile)
422+ OptMap["__opencl_c_int64"].Supported = true;
423+
424+ if (CLVer >= 300) {
425+ // Simultaneously support extension and corresponding feature
426+ for (llvm::StringRef F :
427+ {"__opencl_c_subgroups", "__opencl_c_3d_image_writes",
428+ "__opencl_c_fp64"}) {
429+ auto Ext = getEquivalentExtension(F);
430+ OptMap[*Ext].Supported = OptMap[F].Supported;
431+ }
432+
433+ // OpenCL C compilers that define the feature macro __opencl_c_pipes or
434+ // or __opencl_c_device_enqueue must also define the
435+ // feature macro __opencl_c_generic_address_space.
436+ if (OptMap["__opencl_c_pipes"].Supported ||
437+ OptMap["__opencl_c_device_enqueue"].Supported)
438+ OptMap["__opencl_c_generic_address_space"].Supported = true;
439+
440+ // OpenCL C compilers that define the feature macro
441+ // __opencl_c_3d_image_writes or __opencl_c_read_write_images must also
442+ // define the feature macro __opencl_c_images
443+ if (OptMap["__opencl_c_3d_image_writes"].Supported ||
444+ OptMap["__opencl_c_read_write_images"].Supported)
445+ OptMap["__opencl_c_images"].Supported = true;
446+
447+ // All other features are already supported with options
448+ // or in target settings
449+ return;
450+ }
451+
452+ auto FeaturesRange = llvm::make_filter_range(
453+ OptMap, [&](llvm::StringMapEntry<Info> &OptVal) {
454+ auto Opt = OptVal.getKey();
455+ return isFeature(Opt);
456+ });
457+
458+ for (auto &It : FeaturesRange) {
459+ auto &Info = It.getValue();
460+ // For OpenCL version less then 3.0 some
461+ // features should be supported simulateneously
462+ // with specific extension
463+ if (Optional<llvm::StringRef> Ext = getEquivalentExtension(It.getKey()))
464+ Info.Supported = Info.Enabled = OptMap[*Ext].Supported;
465+ else if (Info.Avail <= CLVer)
466+ Info.Supported = Info.Enabled = true;
467+ }
468+ }
469+
470 void enable(llvm::StringRef Ext, bool V = true) {
471+ // Ignore disabling extensions if corresponding features
472+ // already supported for OpenCL version higher then 3.0
473+ if (CLVer >= 300)
474+ if (Optional<llvm::StringRef> F = getEquivalentFeature(Ext))
475+ if (V != OptMap[*F].Enabled)
476+ return;
477 OptMap[Ext].Enabled = V;
478 }
479
480@@ -96,7 +188,7 @@ public:
481 OptMap[Ext].Supported = V;
482 }
483
484- OpenCLOptions(){
485+ OpenCLOptions() {
486 #define OPENCLEXT_INTERNAL(Ext, AvailVer, CoreVer) \
487 OptMap[#Ext].Avail = AvailVer; \
488 OptMap[#Ext].Core = CoreVer;
489@@ -104,35 +196,86 @@ public:
490 }
491
492 void addSupport(const OpenCLOptions &Opts) {
493+ assert(IsOpenCLCPlusPlus == Opts.IsOpenCLCPlusPlus && CLVer == Opts.CLVer);
494 for (auto &I:Opts.OptMap)
495- if (I.second.Supported)
496+ if (I.second.Supported) {
497 OptMap[I.getKey()].Supported = true;
498+ // All features are enabled as they are supported
499+ if (isFeature(I.getKey()))
500+ OptMap[I.getKey()].Enabled = true;
501+ }
502+ if (CLVer >= 300) {
503+ // Enabling extensions with respect to features
504+ for (llvm::StringRef Ext :
505+ {"cl_khr_3d_image_writes", "cl_khr_subgroups", "cl_khr_fp64"}) {
506+ auto Feature = getEquivalentFeature(Ext);
507+ enable(Ext, OptMap[*Feature].Enabled);
508+ }
509+ }
510 }
511
512 void copy(const OpenCLOptions &Opts) {
513+ CLVer = Opts.CLVer;
514+ IsOpenCLCPlusPlus = Opts.IsOpenCLCPlusPlus;
515 OptMap = Opts.OptMap;
516 }
517
518 // Turn on or off support of all options.
519 void supportAll(bool On = true) {
520- for (llvm::StringMap<Info>::iterator I = OptMap.begin(),
521- E = OptMap.end(); I != E; ++I)
522- I->second.Supported = On;
523+ for (llvm::StringMap<Info>::iterator I = OptMap.begin(), E = OptMap.end();
524+ I != E; ++I)
525+ if (!isFeature(I->getKey()))
526+ I->second.Supported = On;
527 }
528
529 void disableAll() {
530- for (llvm::StringMap<Info>::iterator I = OptMap.begin(),
531- E = OptMap.end(); I != E; ++I)
532- I->second.Enabled = false;
533+ for (llvm::StringMap<Info>::iterator I = OptMap.begin(), E = OptMap.end();
534+ I != E; ++I) {
535+ auto Ext = I->getKey();
536+ if (!isFeature(Ext))
537+ enable(Ext, false);
538+ }
539 }
540
541- void enableSupportedCore(LangOptions LO) {
542+ void enableSupportedCore() {
543 for (llvm::StringMap<Info>::iterator I = OptMap.begin(), E = OptMap.end();
544 I != E; ++I)
545- if (isSupportedCore(I->getKey(), LO))
546+ if (isSupportedCore(I->getKey()))
547 I->second.Enabled = true;
548 }
549
550+ // This enum specifies how OpenCL versions map into values
551+ // for encoding. This is used when generating built-ins
552+ // from tablegen
553+ enum OpenCLVersionsEncodings : unsigned short {
554+ OPENCL_C_100_CODE = 0x1,
555+ OPENCL_C_110_CODE = 0x2,
556+ OPENCL_C_120_CODE = 0x4,
557+ OPENCL_C_200_CODE = 0x8,
558+ OPENCL_C_300_CODE = 0x10,
559+ OPENCL_C_ALL_CODE = 0x1f
560+ };
561+
562+ // Encode version into single integer
563+ static unsigned short EncodeVersion(unsigned OpenCLVersion) {
564+ switch (OpenCLVersion) {
565+ default:
566+ llvm_unreachable("Unknown OpenCL version");
567+ case 0:
568+ return OpenCLVersionsEncodings::OPENCL_C_ALL_CODE;
569+ case 100:
570+ return OpenCLVersionsEncodings::OPENCL_C_100_CODE;
571+ case 110:
572+ return OpenCLVersionsEncodings::OPENCL_C_110_CODE;
573+ case 120:
574+ return OpenCLVersionsEncodings::OPENCL_C_120_CODE;
575+ case 200:
576+ return OpenCLVersionsEncodings::OPENCL_C_200_CODE;
577+ case 300:
578+ return OpenCLVersionsEncodings::OPENCL_C_300_CODE;
579+ }
580+ }
581+
582 friend class ASTWriter;
583 friend class ASTReader;
584 };
585diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
586index 391c895a453b..e03c22c749ad 100644
587--- a/clang/include/clang/Driver/Options.td
588+++ b/clang/include/clang/Driver/Options.td
589@@ -522,7 +522,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group<opencl_Group>, Flags<[CC
590 def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group<opencl_Group>, Flags<[CC1Option]>,
591 HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">;
592 def cl_std_EQ : Joined<["-"], "cl-std=">, Group<opencl_Group>, Flags<[CC1Option]>,
593- HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,clc++,CLC++">;
594+ HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++">;
595 def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group>, Flags<[CC1Option]>,
596 HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
597 def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
598diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
599index 842e49602274..d8ac91bc8a31 100644
600--- a/clang/include/clang/Sema/Sema.h
601+++ b/clang/include/clang/Sema/Sema.h
602@@ -9632,6 +9632,10 @@ public:
603 /// \return true if type is disabled.
604 bool checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E);
605
606+ bool checkOpenCLSubgroupExtForCallExpr(CallExpr *Call);
607+
608+ bool isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const;
609+
610 //===--------------------------------------------------------------------===//
611 // OpenMP directives and clauses.
612 //
613@@ -11102,6 +11106,11 @@ public:
614 /// that the user intended an assignment used as condition.
615 void DiagnoseEqualityWithExtraParens(ParenExpr *ParenE);
616
617+ template <typename DiagLocT, typename DiagInfoT>
618+ void DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName,
619+ DiagLocT DiagLoc, DiagInfoT DiagInfo,
620+ unsigned Selector, SourceRange SrcRange);
621+
622 /// CheckCXXBooleanCondition - Returns true if conversion to bool is invalid.
623 ExprResult CheckCXXBooleanCondition(Expr *CondExpr, bool IsConstexpr = false);
624
625diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
626index 1be72efe4de8..2a81fbcde79d 100644
627--- a/clang/lib/AST/ASTContext.cpp
628+++ b/clang/lib/AST/ASTContext.cpp
629@@ -1490,7 +1490,8 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
630 ObjCSuperType = QualType();
631
632 // void * type
633- if (LangOpts.OpenCLVersion >= 200) {
634+ if (Target.getSupportedOpenCLOpts().isEnabled(
635+ "__opencl_c_generic_address_space")) {
636 auto Q = VoidTy.getQualifiers();
637 Q.setAddressSpace(LangAS::opencl_generic);
638 VoidPtrTy = getPointerType(getCanonicalType(
639diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp
640index 0cd89df41b67..7a3067345098 100644
641--- a/clang/lib/Basic/Builtins.cpp
642+++ b/clang/lib/Basic/Builtins.cpp
643@@ -23,6 +23,8 @@ static const Builtin::Info BuiltinInfo[] = {
644 { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
645 #define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \
646 { #ID, TYPE, ATTRS, nullptr, LANGS, nullptr },
647+#define OPENCLBUILTIN(ID, TYPE, ATTRS, LANGS, FEATURE) \
648+ {#ID, TYPE, ATTRS, nullptr, LANGS, FEATURE},
649 #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \
650 { #ID, TYPE, ATTRS, HEADER, LANGS, nullptr },
651 #include "clang/Basic/Builtins.def"
652@@ -69,16 +71,17 @@ bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo,
653 bool ObjCUnsupported = !LangOpts.ObjC && BuiltinInfo.Langs == OBJC_LANG;
654 bool OclC1Unsupported = (LangOpts.OpenCLVersion / 100) != 1 &&
655 (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES ) == OCLC1X_LANG;
656- bool OclC2Unsupported =
657- (LangOpts.OpenCLVersion != 200 && !LangOpts.OpenCLCPlusPlus) &&
658- (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC20_LANG;
659+ bool OclC2PUnsupported =
660+ (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES) == OCLC2P_LANG &&
661+ ((LangOpts.OpenCLVersion < 200 && !LangOpts.OpenCLCPlusPlus) ||
662+ !OclBuiltinIsSupported(BuiltinInfo, LangOpts));
663 bool OclCUnsupported = !LangOpts.OpenCL &&
664 (BuiltinInfo.Langs & ALL_OCLC_LANGUAGES);
665 bool OpenMPUnsupported = !LangOpts.OpenMP && BuiltinInfo.Langs == OMP_LANG;
666 bool CPlusPlusUnsupported =
667 !LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG;
668 return !BuiltinsUnsupported && !MathBuiltinsUnsupported && !OclCUnsupported &&
669- !OclC1Unsupported && !OclC2Unsupported && !OpenMPUnsupported &&
670+ !OclC1Unsupported && !OclC2PUnsupported && !OpenMPUnsupported &&
671 !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported &&
672 !CPlusPlusUnsupported;
673 }
674@@ -191,3 +194,20 @@ bool Builtin::Context::canBeRedeclared(unsigned ID) const {
675 (!hasReferenceArgsOrResult(ID) &&
676 !hasCustomTypechecking(ID));
677 }
678+
679+bool Builtin::Context::OclBuiltinIsSupported(
680+ const Builtin::Info &BuiltinInfo, const LangOptions &LangOpts) const {
681+ if (!requiresFeatures(BuiltinInfo))
682+ return true;
683+
684+ return llvm::StringSwitch<bool>(BuiltinInfo.Features)
685+ .Case("__opencl_c_device_enqueue", LangOpts.Blocks)
686+ .Case("__opencl_c_generic_address_space", LangOpts.OpenCLGenericKeyword)
687+ .Case("__opencl_c_pipes", LangOpts.OpenCLPipeKeyword)
688+ .Default(false);
689+}
690+
691+bool Builtin::Context::requiresFeatures(
692+ const Builtin::Info &BuiltinInfo) const {
693+ return BuiltinInfo.Features && llvm::StringRef(BuiltinInfo.Features) != "";
694+}
695diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
696index 3a21a19e1f19..795311d5934e 100644
697--- a/clang/lib/Basic/TargetInfo.cpp
698+++ b/clang/lib/Basic/TargetInfo.cpp
699@@ -377,6 +377,17 @@ void TargetInfo::adjust(LangOptions &Opts) {
700 HalfFormat = &llvm::APFloat::IEEEhalf();
701 FloatFormat = &llvm::APFloat::IEEEsingle();
702 LongDoubleFormat = &llvm::APFloat::IEEEquad();
703+
704+ auto &SupportedOCLOpts = getTargetOpts().SupportedOpenCLOptions;
705+
706+ SupportedOCLOpts.setOpenCLVersion(Opts);
707+ SupportedOCLOpts.adjustFeatures();
708+
709+ if (!Opts.OpenCLCPlusPlus && Opts.OpenCLVersion >= 200)
710+ Opts.Blocks = SupportedOCLOpts.isSupported("__opencl_c_device_enqueue");
711+ Opts.OpenCLGenericKeyword =
712+ SupportedOCLOpts.isSupported("__opencl_c_generic_address_space");
713+ Opts.OpenCLPipeKeyword = SupportedOCLOpts.isSupported("__opencl_c_pipes");
714 }
715
716 if (Opts.LongDoubleSize) {
717diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
718index c063f8ca4472..b394be18a234 100644
719--- a/clang/lib/Basic/Targets.cpp
720+++ b/clang/lib/Basic/Targets.cpp
721@@ -39,7 +39,6 @@
722 #include "clang/Basic/Diagnostic.h"
723 #include "llvm/ADT/StringExtras.h"
724 #include "llvm/ADT/Triple.h"
725-
726 using namespace clang;
727
728 namespace clang {
729diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
730index 648e6d9c214a..6d839fa61abc 100644
731--- a/clang/lib/CodeGen/CodeGenFunction.cpp
732+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
733@@ -2303,11 +2303,11 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc,
734 std::string MissingFeature;
735 if (BuiltinID) {
736 SmallVector<StringRef, 1> ReqFeatures;
737- const char *FeatureList =
738- CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID);
739 // Return if the builtin doesn't have any required features.
740- if (!FeatureList || StringRef(FeatureList) == "")
741+ if (!CGM.getContext().BuiltinInfo.requiresFeatures(BuiltinID))
742 return;
743+ const char *FeatureList =
744+ CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID);
745 StringRef(FeatureList).split(ReqFeatures, ',');
746 if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature))
747 CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature)
748diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
749index e98a407ac42f..18fa06bf3c6d 100644
750--- a/clang/lib/Frontend/CompilerInvocation.cpp
751+++ b/clang/lib/Frontend/CompilerInvocation.cpp
752@@ -2298,6 +2298,8 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
753 Opts.OpenCLVersion = 120;
754 else if (LangStd == LangStandard::lang_opencl20)
755 Opts.OpenCLVersion = 200;
756+ else if (LangStd == LangStandard::lang_opencl30)
757+ Opts.OpenCLVersion = 300;
758 else if (LangStd == LangStandard::lang_openclcpp)
759 Opts.OpenCLCPlusPlusVersion = 100;
760
761@@ -2498,14 +2500,15 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
762 // -cl-std only applies for OpenCL language standards.
763 // Override the -std option in this case.
764 if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) {
765- LangStandard::Kind OpenCLLangStd
766- = llvm::StringSwitch<LangStandard::Kind>(A->getValue())
767- .Cases("cl", "CL", LangStandard::lang_opencl10)
768- .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11)
769- .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12)
770- .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20)
771- .Cases("clc++", "CLC++", LangStandard::lang_openclcpp)
772- .Default(LangStandard::lang_unspecified);
773+ LangStandard::Kind OpenCLLangStd =
774+ llvm::StringSwitch<LangStandard::Kind>(A->getValue())
775+ .Cases("cl", "CL", LangStandard::lang_opencl10)
776+ .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11)
777+ .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12)
778+ .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20)
779+ .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30)
780+ .Cases("clc++", "CLC++", LangStandard::lang_openclcpp)
781+ .Default(LangStandard::lang_unspecified);
782
783 if (OpenCLLangStd == LangStandard::lang_unspecified) {
784 Diags.Report(diag::err_drv_invalid_value)
785@@ -2787,8 +2790,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
786
787 Opts.RTTI = Opts.CPlusPlus && !Args.hasArg(OPT_fno_rtti);
788 Opts.RTTIData = Opts.RTTI && !Args.hasArg(OPT_fno_rtti_data);
789- Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL
790- && Opts.OpenCLVersion == 200);
791+ Opts.Blocks = Args.hasArg(OPT_fblocks);
792 Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional);
793 Opts.Coroutines = Opts.CPlusPlus2a || Args.hasArg(OPT_fcoroutines_ts);
794
795diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
796index c273cb96d9b9..aefd208e6cd3 100644
797--- a/clang/lib/Frontend/InitPreprocessor.cpp
798+++ b/clang/lib/Frontend/InitPreprocessor.cpp
799@@ -445,6 +445,9 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
800 case 200:
801 Builder.defineMacro("__OPENCL_C_VERSION__", "200");
802 break;
803+ case 300:
804+ Builder.defineMacro("__OPENCL_C_VERSION__", "300");
805+ break;
806 default:
807 llvm_unreachable("Unsupported OpenCL version");
808 }
809@@ -453,6 +456,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
810 Builder.defineMacro("CL_VERSION_1_1", "110");
811 Builder.defineMacro("CL_VERSION_1_2", "120");
812 Builder.defineMacro("CL_VERSION_2_0", "200");
813+ Builder.defineMacro("CL_VERSION_3_0", "300");
814
815 if (TI.isLittleEndian())
816 Builder.defineMacro("__ENDIAN_LITTLE__");
817@@ -1101,7 +1105,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
818 // OpenCL definitions.
819 if (LangOpts.OpenCL) {
820 #define OPENCLEXT(Ext) \
821- if (TI.getSupportedOpenCLOpts().isSupported(#Ext, LangOpts)) \
822+ if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \
823 Builder.defineMacro(#Ext);
824 #include "clang/Basic/OpenCLExtensions.def"
825
826diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
827index 430e07d36f62..2cc688ccc3da 100644
828--- a/clang/lib/Headers/opencl-c-base.h
829+++ b/clang/lib/Headers/opencl-c-base.h
830@@ -9,6 +9,59 @@
831 #ifndef _OPENCL_BASE_H_
832 #define _OPENCL_BASE_H_
833
834+// Add predefined macros to build headers with standalone executable
835+#ifndef CL_VERSION_3_0
836+ #define CL_VERSION_3_0 300
837+#endif
838+
839+// Define features for 2.0 for header backward compatibility
840+#ifndef __opencl_c_int64
841+ #define __opencl_c_int64 1
842+#endif
843+#if __OPENCL_C_VERSION__ != CL_VERSION_3_0
844+ #ifndef __opencl_c_images
845+ #define __opencl_c_images 1
846+ #endif
847+#endif
848+#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0)
849+#ifndef __opencl_c_pipes
850+ #define __opencl_c_pipes 1
851+#endif
852+#ifndef __opencl_c_generic_address_space
853+ #define __opencl_c_generic_address_space 1
854+#endif
855+#ifndef __opencl_c_work_group_collective_functions
856+ #define __opencl_c_work_group_collective_functions 1
857+#endif
858+#ifndef __opencl_c_atomic_order_acq_rel
859+ #define __opencl_c_atomic_order_acq_rel 1
860+#endif
861+#ifndef __opencl_c_atomic_order_seq_cst
862+ #define __opencl_c_atomic_order_seq_cst 1
863+#endif
864+#ifndef __opencl_c_atomic_scope_device
865+ #define __opencl_c_atomic_scope_device 1
866+#endif
867+#ifndef __opencl_c_atomic_scope_all_devices
868+ #define __opencl_c_atomic_scope_all_devices 1
869+#endif
870+#ifndef __opencl_c_subgroups
871+ #define __opencl_c_subgroups 1
872+#endif
873+#ifndef __opencl_c_3d_image_writes
874+ #define __opencl_c_3d_image_writes 1
875+#endif
876+#ifndef __opencl_c_device_enqueue
877+ #define __opencl_c_device_enqueue 1
878+#endif
879+#ifndef __opencl_c_read_write_images
880+ #define __opencl_c_read_write_images 1
881+#endif
882+#ifndef __opencl_c_program_scope_global_variables
883+ #define __opencl_c_program_scope_global_variables 1
884+#endif
885+#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0)
886+
887 // built-in scalar data types:
888
889 /**
890@@ -115,7 +168,12 @@ typedef half half4 __attribute__((ext_vector_type(4)));
891 typedef half half8 __attribute__((ext_vector_type(8)));
892 typedef half half16 __attribute__((ext_vector_type(16)));
893 #endif
894-#ifdef cl_khr_fp64
895+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
896+
897+#ifndef __opencl_c_fp64
898+ #define __opencl_c_fp64 1
899+#endif
900+
901 #if __OPENCL_C_VERSION__ < CL_VERSION_1_2
902 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
903 #endif
904@@ -281,9 +339,17 @@ typedef uint cl_mem_fence_flags;
905 typedef enum memory_scope {
906 memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
907 memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
908+#ifdef __opencl_c_atomic_scope_device
909 memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
910+#endif
911+#ifdef __opencl_c_atomic_scope_all_devices
912+ #if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
913+ memory_scope_all_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
914+ #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
915 memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
916-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
917+#endif
918+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \
919+ defined(__opencl_c_subgroups)
920 memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
921 #endif
922 } memory_scope;
923@@ -301,13 +367,14 @@ typedef enum memory_scope {
924 #define ATOMIC_FLAG_INIT 0
925
926 // enum values aligned with what clang uses in EmitAtomicExpr()
927-typedef enum memory_order
928-{
929+typedef enum memory_order {
930 memory_order_relaxed = __ATOMIC_RELAXED,
931 memory_order_acquire = __ATOMIC_ACQUIRE,
932 memory_order_release = __ATOMIC_RELEASE,
933 memory_order_acq_rel = __ATOMIC_ACQ_REL,
934+#ifdef __opencl_c_atomic_order_seq_cst
935 memory_order_seq_cst = __ATOMIC_SEQ_CST
936+#endif //__opencl_c_atomic_order_seq_cst
937 } memory_order;
938
939 #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
940diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
941index 3210f93cc851..93a946cec5b1 100644
942--- a/clang/lib/Headers/opencl-c.h
943+++ b/clang/lib/Headers/opencl-c.h
944@@ -35,7 +35,6 @@
945 #define __purefn __attribute__((pure))
946 #define __cnfn __attribute__((const))
947
948-
949 // OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions
950
951 char __ovld __cnfn convert_char_rte(char);
952@@ -4632,7 +4631,7 @@ float16 __ovld __cnfn convert_float16(float16);
953
954 // Conversions with double data type parameters or return value.
955
956-#ifdef cl_khr_fp64
957+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
958 char __ovld __cnfn convert_char(double);
959 char __ovld __cnfn convert_char_rte(double);
960 char __ovld __cnfn convert_char_rtn(double);
961@@ -5452,7 +5451,7 @@ double16 __ovld __cnfn convert_double16_rtz(uchar16);
962 double16 __ovld __cnfn convert_double16_rtz(uint16);
963 double16 __ovld __cnfn convert_double16_rtz(ulong16);
964 double16 __ovld __cnfn convert_double16_rtz(ushort16);
965-#endif //cl_khr_fp64
966+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
967
968 #ifdef cl_khr_fp16
969 // Convert half types to non-double types.
970@@ -6270,7 +6269,7 @@ half16 __ovld __cnfn convert_half16_rtz(float16);
971 half16 __ovld __cnfn convert_half16_rtz(half16);
972
973 // Convert half types to double types.
974-#ifdef cl_khr_fp64
975+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
976 double __ovld __cnfn convert_double(half);
977 double __ovld __cnfn convert_double_rte(half);
978 double __ovld __cnfn convert_double_rtp(half);
979@@ -6333,7 +6332,7 @@ half16 __ovld __cnfn convert_half16_rte(double16);
980 half16 __ovld __cnfn convert_half16_rtp(double16);
981 half16 __ovld __cnfn convert_half16_rtn(double16);
982 half16 __ovld __cnfn convert_half16_rtz(double16);
983-#endif //cl_khr_fp64
984+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
985
986 #endif // cl_khr_fp16
987
988@@ -6404,14 +6403,14 @@ half16 __ovld __cnfn convert_half16_rtz(double16);
989 #define as_float8(x) __builtin_astype((x), float8)
990 #define as_float16(x) __builtin_astype((x), float16)
991
992-#ifdef cl_khr_fp64
993+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
994 #define as_double(x) __builtin_astype((x), double)
995 #define as_double2(x) __builtin_astype((x), double2)
996 #define as_double3(x) __builtin_astype((x), double3)
997 #define as_double4(x) __builtin_astype((x), double4)
998 #define as_double8(x) __builtin_astype((x), double8)
999 #define as_double16(x) __builtin_astype((x), double16)
1000-#endif //cl_khr_fp64
1001+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1002
1003 #ifdef cl_khr_fp16
1004 #define as_half(x) __builtin_astype((x), half)
1005@@ -6534,14 +6533,14 @@ float3 __ovld __cnfn acos(float3);
1006 float4 __ovld __cnfn acos(float4);
1007 float8 __ovld __cnfn acos(float8);
1008 float16 __ovld __cnfn acos(float16);
1009-#ifdef cl_khr_fp64
1010+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1011 double __ovld __cnfn acos(double);
1012 double2 __ovld __cnfn acos(double2);
1013 double3 __ovld __cnfn acos(double3);
1014 double4 __ovld __cnfn acos(double4);
1015 double8 __ovld __cnfn acos(double8);
1016 double16 __ovld __cnfn acos(double16);
1017-#endif //cl_khr_fp64
1018+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1019 #ifdef cl_khr_fp16
1020 half __ovld __cnfn acos(half);
1021 half2 __ovld __cnfn acos(half2);
1022@@ -6560,14 +6559,14 @@ float3 __ovld __cnfn acosh(float3);
1023 float4 __ovld __cnfn acosh(float4);
1024 float8 __ovld __cnfn acosh(float8);
1025 float16 __ovld __cnfn acosh(float16);
1026-#ifdef cl_khr_fp64
1027+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1028 double __ovld __cnfn acosh(double);
1029 double2 __ovld __cnfn acosh(double2);
1030 double3 __ovld __cnfn acosh(double3);
1031 double4 __ovld __cnfn acosh(double4);
1032 double8 __ovld __cnfn acosh(double8);
1033 double16 __ovld __cnfn acosh(double16);
1034-#endif //cl_khr_fp64
1035+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1036 #ifdef cl_khr_fp16
1037 half __ovld __cnfn acosh(half);
1038 half2 __ovld __cnfn acosh(half2);
1039@@ -6586,14 +6585,14 @@ float3 __ovld __cnfn acospi(float3 x);
1040 float4 __ovld __cnfn acospi(float4 x);
1041 float8 __ovld __cnfn acospi(float8 x);
1042 float16 __ovld __cnfn acospi(float16 x);
1043-#ifdef cl_khr_fp64
1044+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1045 double __ovld __cnfn acospi(double x);
1046 double2 __ovld __cnfn acospi(double2 x);
1047 double3 __ovld __cnfn acospi(double3 x);
1048 double4 __ovld __cnfn acospi(double4 x);
1049 double8 __ovld __cnfn acospi(double8 x);
1050 double16 __ovld __cnfn acospi(double16 x);
1051-#endif //cl_khr_fp64
1052+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1053 #ifdef cl_khr_fp16
1054 half __ovld __cnfn acospi(half x);
1055 half2 __ovld __cnfn acospi(half2 x);
1056@@ -6612,14 +6611,14 @@ float3 __ovld __cnfn asin(float3);
1057 float4 __ovld __cnfn asin(float4);
1058 float8 __ovld __cnfn asin(float8);
1059 float16 __ovld __cnfn asin(float16);
1060-#ifdef cl_khr_fp64
1061+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1062 double __ovld __cnfn asin(double);
1063 double2 __ovld __cnfn asin(double2);
1064 double3 __ovld __cnfn asin(double3);
1065 double4 __ovld __cnfn asin(double4);
1066 double8 __ovld __cnfn asin(double8);
1067 double16 __ovld __cnfn asin(double16);
1068-#endif //cl_khr_fp64
1069+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1070 #ifdef cl_khr_fp16
1071 half __ovld __cnfn asin(half);
1072 half2 __ovld __cnfn asin(half2);
1073@@ -6638,14 +6637,14 @@ float3 __ovld __cnfn asinh(float3);
1074 float4 __ovld __cnfn asinh(float4);
1075 float8 __ovld __cnfn asinh(float8);
1076 float16 __ovld __cnfn asinh(float16);
1077-#ifdef cl_khr_fp64
1078+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1079 double __ovld __cnfn asinh(double);
1080 double2 __ovld __cnfn asinh(double2);
1081 double3 __ovld __cnfn asinh(double3);
1082 double4 __ovld __cnfn asinh(double4);
1083 double8 __ovld __cnfn asinh(double8);
1084 double16 __ovld __cnfn asinh(double16);
1085-#endif //cl_khr_fp64
1086+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1087 #ifdef cl_khr_fp16
1088 half __ovld __cnfn asinh(half);
1089 half2 __ovld __cnfn asinh(half2);
1090@@ -6664,14 +6663,14 @@ float3 __ovld __cnfn asinpi(float3 x);
1091 float4 __ovld __cnfn asinpi(float4 x);
1092 float8 __ovld __cnfn asinpi(float8 x);
1093 float16 __ovld __cnfn asinpi(float16 x);
1094-#ifdef cl_khr_fp64
1095+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1096 double __ovld __cnfn asinpi(double x);
1097 double2 __ovld __cnfn asinpi(double2 x);
1098 double3 __ovld __cnfn asinpi(double3 x);
1099 double4 __ovld __cnfn asinpi(double4 x);
1100 double8 __ovld __cnfn asinpi(double8 x);
1101 double16 __ovld __cnfn asinpi(double16 x);
1102-#endif //cl_khr_fp64
1103+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1104 #ifdef cl_khr_fp16
1105 half __ovld __cnfn asinpi(half x);
1106 half2 __ovld __cnfn asinpi(half2 x);
1107@@ -6690,14 +6689,14 @@ float3 __ovld __cnfn atan(float3 y_over_x);
1108 float4 __ovld __cnfn atan(float4 y_over_x);
1109 float8 __ovld __cnfn atan(float8 y_over_x);
1110 float16 __ovld __cnfn atan(float16 y_over_x);
1111-#ifdef cl_khr_fp64
1112+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1113 double __ovld __cnfn atan(double y_over_x);
1114 double2 __ovld __cnfn atan(double2 y_over_x);
1115 double3 __ovld __cnfn atan(double3 y_over_x);
1116 double4 __ovld __cnfn atan(double4 y_over_x);
1117 double8 __ovld __cnfn atan(double8 y_over_x);
1118 double16 __ovld __cnfn atan(double16 y_over_x);
1119-#endif //cl_khr_fp64
1120+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1121 #ifdef cl_khr_fp16
1122 half __ovld __cnfn atan(half y_over_x);
1123 half2 __ovld __cnfn atan(half2 y_over_x);
1124@@ -6716,14 +6715,14 @@ float3 __ovld __cnfn atan2(float3 y, float3 x);
1125 float4 __ovld __cnfn atan2(float4 y, float4 x);
1126 float8 __ovld __cnfn atan2(float8 y, float8 x);
1127 float16 __ovld __cnfn atan2(float16 y, float16 x);
1128-#ifdef cl_khr_fp64
1129+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1130 double __ovld __cnfn atan2(double y, double x);
1131 double2 __ovld __cnfn atan2(double2 y, double2 x);
1132 double3 __ovld __cnfn atan2(double3 y, double3 x);
1133 double4 __ovld __cnfn atan2(double4 y, double4 x);
1134 double8 __ovld __cnfn atan2(double8 y, double8 x);
1135 double16 __ovld __cnfn atan2(double16 y, double16 x);
1136-#endif //cl_khr_fp64
1137+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1138 #ifdef cl_khr_fp16
1139 half __ovld __cnfn atan2(half y, half x);
1140 half2 __ovld __cnfn atan2(half2 y, half2 x);
1141@@ -6742,14 +6741,14 @@ float3 __ovld __cnfn atanh(float3);
1142 float4 __ovld __cnfn atanh(float4);
1143 float8 __ovld __cnfn atanh(float8);
1144 float16 __ovld __cnfn atanh(float16);
1145-#ifdef cl_khr_fp64
1146+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1147 double __ovld __cnfn atanh(double);
1148 double2 __ovld __cnfn atanh(double2);
1149 double3 __ovld __cnfn atanh(double3);
1150 double4 __ovld __cnfn atanh(double4);
1151 double8 __ovld __cnfn atanh(double8);
1152 double16 __ovld __cnfn atanh(double16);
1153-#endif //cl_khr_fp64
1154+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1155 #ifdef cl_khr_fp16
1156 half __ovld __cnfn atanh(half);
1157 half2 __ovld __cnfn atanh(half2);
1158@@ -6768,14 +6767,14 @@ float3 __ovld __cnfn atanpi(float3 x);
1159 float4 __ovld __cnfn atanpi(float4 x);
1160 float8 __ovld __cnfn atanpi(float8 x);
1161 float16 __ovld __cnfn atanpi(float16 x);
1162-#ifdef cl_khr_fp64
1163+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1164 double __ovld __cnfn atanpi(double x);
1165 double2 __ovld __cnfn atanpi(double2 x);
1166 double3 __ovld __cnfn atanpi(double3 x);
1167 double4 __ovld __cnfn atanpi(double4 x);
1168 double8 __ovld __cnfn atanpi(double8 x);
1169 double16 __ovld __cnfn atanpi(double16 x);
1170-#endif //cl_khr_fp64
1171+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1172 #ifdef cl_khr_fp16
1173 half __ovld __cnfn atanpi(half x);
1174 half2 __ovld __cnfn atanpi(half2 x);
1175@@ -6794,14 +6793,14 @@ float3 __ovld __cnfn atan2pi(float3 y, float3 x);
1176 float4 __ovld __cnfn atan2pi(float4 y, float4 x);
1177 float8 __ovld __cnfn atan2pi(float8 y, float8 x);
1178 float16 __ovld __cnfn atan2pi(float16 y, float16 x);
1179-#ifdef cl_khr_fp64
1180+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1181 double __ovld __cnfn atan2pi(double y, double x);
1182 double2 __ovld __cnfn atan2pi(double2 y, double2 x);
1183 double3 __ovld __cnfn atan2pi(double3 y, double3 x);
1184 double4 __ovld __cnfn atan2pi(double4 y, double4 x);
1185 double8 __ovld __cnfn atan2pi(double8 y, double8 x);
1186 double16 __ovld __cnfn atan2pi(double16 y, double16 x);
1187-#endif //cl_khr_fp64
1188+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1189 #ifdef cl_khr_fp16
1190 half __ovld __cnfn atan2pi(half y, half x);
1191 half2 __ovld __cnfn atan2pi(half2 y, half2 x);
1192@@ -6820,14 +6819,14 @@ float3 __ovld __cnfn cbrt(float3);
1193 float4 __ovld __cnfn cbrt(float4);
1194 float8 __ovld __cnfn cbrt(float8);
1195 float16 __ovld __cnfn cbrt(float16);
1196-#ifdef cl_khr_fp64
1197+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1198 double __ovld __cnfn cbrt(double);
1199 double2 __ovld __cnfn cbrt(double2);
1200 double3 __ovld __cnfn cbrt(double3);
1201 double4 __ovld __cnfn cbrt(double4);
1202 double8 __ovld __cnfn cbrt(double8);
1203 double16 __ovld __cnfn cbrt(double16);
1204-#endif //cl_khr_fp64
1205+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1206 #ifdef cl_khr_fp16
1207 half __ovld __cnfn cbrt(half);
1208 half2 __ovld __cnfn cbrt(half2);
1209@@ -6847,14 +6846,14 @@ float3 __ovld __cnfn ceil(float3);
1210 float4 __ovld __cnfn ceil(float4);
1211 float8 __ovld __cnfn ceil(float8);
1212 float16 __ovld __cnfn ceil(float16);
1213-#ifdef cl_khr_fp64
1214+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1215 double __ovld __cnfn ceil(double);
1216 double2 __ovld __cnfn ceil(double2);
1217 double3 __ovld __cnfn ceil(double3);
1218 double4 __ovld __cnfn ceil(double4);
1219 double8 __ovld __cnfn ceil(double8);
1220 double16 __ovld __cnfn ceil(double16);
1221-#endif //cl_khr_fp64
1222+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1223 #ifdef cl_khr_fp16
1224 half __ovld __cnfn ceil(half);
1225 half2 __ovld __cnfn ceil(half2);
1226@@ -6873,14 +6872,14 @@ float3 __ovld __cnfn copysign(float3 x, float3 y);
1227 float4 __ovld __cnfn copysign(float4 x, float4 y);
1228 float8 __ovld __cnfn copysign(float8 x, float8 y);
1229 float16 __ovld __cnfn copysign(float16 x, float16 y);
1230-#ifdef cl_khr_fp64
1231+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1232 double __ovld __cnfn copysign(double x, double y);
1233 double2 __ovld __cnfn copysign(double2 x, double2 y);
1234 double3 __ovld __cnfn copysign(double3 x, double3 y);
1235 double4 __ovld __cnfn copysign(double4 x, double4 y);
1236 double8 __ovld __cnfn copysign(double8 x, double8 y);
1237 double16 __ovld __cnfn copysign(double16 x, double16 y);
1238-#endif //cl_khr_fp64
1239+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1240 #ifdef cl_khr_fp16
1241 half __ovld __cnfn copysign(half x, half y);
1242 half2 __ovld __cnfn copysign(half2 x, half2 y);
1243@@ -6899,14 +6898,14 @@ float3 __ovld __cnfn cos(float3);
1244 float4 __ovld __cnfn cos(float4);
1245 float8 __ovld __cnfn cos(float8);
1246 float16 __ovld __cnfn cos(float16);
1247-#ifdef cl_khr_fp64
1248+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1249 double __ovld __cnfn cos(double);
1250 double2 __ovld __cnfn cos(double2);
1251 double3 __ovld __cnfn cos(double3);
1252 double4 __ovld __cnfn cos(double4);
1253 double8 __ovld __cnfn cos(double8);
1254 double16 __ovld __cnfn cos(double16);
1255-#endif //cl_khr_fp64
1256+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1257 #ifdef cl_khr_fp16
1258 half __ovld __cnfn cos(half);
1259 half2 __ovld __cnfn cos(half2);
1260@@ -6925,14 +6924,14 @@ float3 __ovld __cnfn cosh(float3);
1261 float4 __ovld __cnfn cosh(float4);
1262 float8 __ovld __cnfn cosh(float8);
1263 float16 __ovld __cnfn cosh(float16);
1264-#ifdef cl_khr_fp64
1265+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1266 double __ovld __cnfn cosh(double);
1267 double2 __ovld __cnfn cosh(double2);
1268 double3 __ovld __cnfn cosh(double3);
1269 double4 __ovld __cnfn cosh(double4);
1270 double8 __ovld __cnfn cosh(double8);
1271 double16 __ovld __cnfn cosh(double16);
1272-#endif //cl_khr_fp64
1273+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1274 #ifdef cl_khr_fp16
1275 half __ovld __cnfn cosh(half);
1276 half2 __ovld __cnfn cosh(half2);
1277@@ -6951,14 +6950,14 @@ float3 __ovld __cnfn cospi(float3 x);
1278 float4 __ovld __cnfn cospi(float4 x);
1279 float8 __ovld __cnfn cospi(float8 x);
1280 float16 __ovld __cnfn cospi(float16 x);
1281-#ifdef cl_khr_fp64
1282+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1283 double __ovld __cnfn cospi(double x);
1284 double2 __ovld __cnfn cospi(double2 x);
1285 double3 __ovld __cnfn cospi(double3 x);
1286 double4 __ovld __cnfn cospi(double4 x);
1287 double8 __ovld __cnfn cospi(double8 x);
1288 double16 __ovld __cnfn cospi(double16 x);
1289-#endif //cl_khr_fp64
1290+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1291 #ifdef cl_khr_fp16
1292 half __ovld __cnfn cospi(half x);
1293 half2 __ovld __cnfn cospi(half2 x);
1294@@ -6977,14 +6976,14 @@ float3 __ovld __cnfn erfc(float3);
1295 float4 __ovld __cnfn erfc(float4);
1296 float8 __ovld __cnfn erfc(float8);
1297 float16 __ovld __cnfn erfc(float16);
1298-#ifdef cl_khr_fp64
1299+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1300 double __ovld __cnfn erfc(double);
1301 double2 __ovld __cnfn erfc(double2);
1302 double3 __ovld __cnfn erfc(double3);
1303 double4 __ovld __cnfn erfc(double4);
1304 double8 __ovld __cnfn erfc(double8);
1305 double16 __ovld __cnfn erfc(double16);
1306-#endif //cl_khr_fp64
1307+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1308 #ifdef cl_khr_fp16
1309 half __ovld __cnfn erfc(half);
1310 half2 __ovld __cnfn erfc(half2);
1311@@ -7004,14 +7003,14 @@ float3 __ovld __cnfn erf(float3);
1312 float4 __ovld __cnfn erf(float4);
1313 float8 __ovld __cnfn erf(float8);
1314 float16 __ovld __cnfn erf(float16);
1315-#ifdef cl_khr_fp64
1316+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1317 double __ovld __cnfn erf(double);
1318 double2 __ovld __cnfn erf(double2);
1319 double3 __ovld __cnfn erf(double3);
1320 double4 __ovld __cnfn erf(double4);
1321 double8 __ovld __cnfn erf(double8);
1322 double16 __ovld __cnfn erf(double16);
1323-#endif //cl_khr_fp64
1324+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1325 #ifdef cl_khr_fp16
1326 half __ovld __cnfn erf(half);
1327 half2 __ovld __cnfn erf(half2);
1328@@ -7030,14 +7029,14 @@ float3 __ovld __cnfn exp(float3 x);
1329 float4 __ovld __cnfn exp(float4 x);
1330 float8 __ovld __cnfn exp(float8 x);
1331 float16 __ovld __cnfn exp(float16 x);
1332-#ifdef cl_khr_fp64
1333+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1334 double __ovld __cnfn exp(double x);
1335 double2 __ovld __cnfn exp(double2 x);
1336 double3 __ovld __cnfn exp(double3 x);
1337 double4 __ovld __cnfn exp(double4 x);
1338 double8 __ovld __cnfn exp(double8 x);
1339 double16 __ovld __cnfn exp(double16 x);
1340-#endif //cl_khr_fp64
1341+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1342 #ifdef cl_khr_fp16
1343 half __ovld __cnfn exp(half x);
1344 half2 __ovld __cnfn exp(half2 x);
1345@@ -7056,14 +7055,14 @@ float3 __ovld __cnfn exp2(float3);
1346 float4 __ovld __cnfn exp2(float4);
1347 float8 __ovld __cnfn exp2(float8);
1348 float16 __ovld __cnfn exp2(float16);
1349-#ifdef cl_khr_fp64
1350+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1351 double __ovld __cnfn exp2(double);
1352 double2 __ovld __cnfn exp2(double2);
1353 double3 __ovld __cnfn exp2(double3);
1354 double4 __ovld __cnfn exp2(double4);
1355 double8 __ovld __cnfn exp2(double8);
1356 double16 __ovld __cnfn exp2(double16);
1357-#endif //cl_khr_fp64
1358+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1359 #ifdef cl_khr_fp16
1360 half __ovld __cnfn exp2(half);
1361 half2 __ovld __cnfn exp2(half2);
1362@@ -7082,14 +7081,14 @@ float3 __ovld __cnfn exp10(float3);
1363 float4 __ovld __cnfn exp10(float4);
1364 float8 __ovld __cnfn exp10(float8);
1365 float16 __ovld __cnfn exp10(float16);
1366-#ifdef cl_khr_fp64
1367+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1368 double __ovld __cnfn exp10(double);
1369 double2 __ovld __cnfn exp10(double2);
1370 double3 __ovld __cnfn exp10(double3);
1371 double4 __ovld __cnfn exp10(double4);
1372 double8 __ovld __cnfn exp10(double8);
1373 double16 __ovld __cnfn exp10(double16);
1374-#endif //cl_khr_fp64
1375+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1376 #ifdef cl_khr_fp16
1377 half __ovld __cnfn exp10(half);
1378 half2 __ovld __cnfn exp10(half2);
1379@@ -7108,14 +7107,14 @@ float3 __ovld __cnfn expm1(float3 x);
1380 float4 __ovld __cnfn expm1(float4 x);
1381 float8 __ovld __cnfn expm1(float8 x);
1382 float16 __ovld __cnfn expm1(float16 x);
1383-#ifdef cl_khr_fp64
1384+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1385 double __ovld __cnfn expm1(double x);
1386 double2 __ovld __cnfn expm1(double2 x);
1387 double3 __ovld __cnfn expm1(double3 x);
1388 double4 __ovld __cnfn expm1(double4 x);
1389 double8 __ovld __cnfn expm1(double8 x);
1390 double16 __ovld __cnfn expm1(double16 x);
1391-#endif //cl_khr_fp64
1392+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1393 #ifdef cl_khr_fp16
1394 half __ovld __cnfn expm1(half x);
1395 half2 __ovld __cnfn expm1(half2 x);
1396@@ -7134,14 +7133,14 @@ float3 __ovld __cnfn fabs(float3);
1397 float4 __ovld __cnfn fabs(float4);
1398 float8 __ovld __cnfn fabs(float8);
1399 float16 __ovld __cnfn fabs(float16);
1400-#ifdef cl_khr_fp64
1401+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1402 double __ovld __cnfn fabs(double);
1403 double2 __ovld __cnfn fabs(double2);
1404 double3 __ovld __cnfn fabs(double3);
1405 double4 __ovld __cnfn fabs(double4);
1406 double8 __ovld __cnfn fabs(double8);
1407 double16 __ovld __cnfn fabs(double16);
1408-#endif //cl_khr_fp64
1409+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1410 #ifdef cl_khr_fp16
1411 half __ovld __cnfn fabs(half);
1412 half2 __ovld __cnfn fabs(half2);
1413@@ -7160,14 +7159,14 @@ float3 __ovld __cnfn fdim(float3 x, float3 y);
1414 float4 __ovld __cnfn fdim(float4 x, float4 y);
1415 float8 __ovld __cnfn fdim(float8 x, float8 y);
1416 float16 __ovld __cnfn fdim(float16 x, float16 y);
1417-#ifdef cl_khr_fp64
1418+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1419 double __ovld __cnfn fdim(double x, double y);
1420 double2 __ovld __cnfn fdim(double2 x, double2 y);
1421 double3 __ovld __cnfn fdim(double3 x, double3 y);
1422 double4 __ovld __cnfn fdim(double4 x, double4 y);
1423 double8 __ovld __cnfn fdim(double8 x, double8 y);
1424 double16 __ovld __cnfn fdim(double16 x, double16 y);
1425-#endif //cl_khr_fp64
1426+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1427 #ifdef cl_khr_fp16
1428 half __ovld __cnfn fdim(half x, half y);
1429 half2 __ovld __cnfn fdim(half2 x, half2 y);
1430@@ -7187,14 +7186,14 @@ float3 __ovld __cnfn floor(float3);
1431 float4 __ovld __cnfn floor(float4);
1432 float8 __ovld __cnfn floor(float8);
1433 float16 __ovld __cnfn floor(float16);
1434-#ifdef cl_khr_fp64
1435+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1436 double __ovld __cnfn floor(double);
1437 double2 __ovld __cnfn floor(double2);
1438 double3 __ovld __cnfn floor(double3);
1439 double4 __ovld __cnfn floor(double4);
1440 double8 __ovld __cnfn floor(double8);
1441 double16 __ovld __cnfn floor(double16);
1442-#endif //cl_khr_fp64
1443+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1444 #ifdef cl_khr_fp16
1445 half __ovld __cnfn floor(half);
1446 half2 __ovld __cnfn floor(half2);
1447@@ -7217,14 +7216,14 @@ float3 __ovld __cnfn fma(float3 a, float3 b, float3 c);
1448 float4 __ovld __cnfn fma(float4 a, float4 b, float4 c);
1449 float8 __ovld __cnfn fma(float8 a, float8 b, float8 c);
1450 float16 __ovld __cnfn fma(float16 a, float16 b, float16 c);
1451-#ifdef cl_khr_fp64
1452+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1453 double __ovld __cnfn fma(double a, double b, double c);
1454 double2 __ovld __cnfn fma(double2 a, double2 b, double2 c);
1455 double3 __ovld __cnfn fma(double3 a, double3 b, double3 c);
1456 double4 __ovld __cnfn fma(double4 a, double4 b, double4 c);
1457 double8 __ovld __cnfn fma(double8 a, double8 b, double8 c);
1458 double16 __ovld __cnfn fma(double16 a, double16 b, double16 c);
1459-#endif //cl_khr_fp64
1460+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1461 #ifdef cl_khr_fp16
1462 half __ovld __cnfn fma(half a, half b, half c);
1463 half2 __ovld __cnfn fma(half2 a, half2 b, half2 c);
1464@@ -7251,7 +7250,7 @@ float3 __ovld __cnfn fmax(float3 x, float y);
1465 float4 __ovld __cnfn fmax(float4 x, float y);
1466 float8 __ovld __cnfn fmax(float8 x, float y);
1467 float16 __ovld __cnfn fmax(float16 x, float y);
1468-#ifdef cl_khr_fp64
1469+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1470 double __ovld __cnfn fmax(double x, double y);
1471 double2 __ovld __cnfn fmax(double2 x, double2 y);
1472 double3 __ovld __cnfn fmax(double3 x, double3 y);
1473@@ -7263,7 +7262,7 @@ double3 __ovld __cnfn fmax(double3 x, double y);
1474 double4 __ovld __cnfn fmax(double4 x, double y);
1475 double8 __ovld __cnfn fmax(double8 x, double y);
1476 double16 __ovld __cnfn fmax(double16 x, double y);
1477-#endif //cl_khr_fp64
1478+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1479 #ifdef cl_khr_fp16
1480 half __ovld __cnfn fmax(half x, half y);
1481 half2 __ovld __cnfn fmax(half2 x, half2 y);
1482@@ -7295,7 +7294,7 @@ float3 __ovld __cnfn fmin(float3 x, float y);
1483 float4 __ovld __cnfn fmin(float4 x, float y);
1484 float8 __ovld __cnfn fmin(float8 x, float y);
1485 float16 __ovld __cnfn fmin(float16 x, float y);
1486-#ifdef cl_khr_fp64
1487+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1488 double __ovld __cnfn fmin(double x, double y);
1489 double2 __ovld __cnfn fmin(double2 x, double2 y);
1490 double3 __ovld __cnfn fmin(double3 x, double3 y);
1491@@ -7307,7 +7306,7 @@ double3 __ovld __cnfn fmin(double3 x, double y);
1492 double4 __ovld __cnfn fmin(double4 x, double y);
1493 double8 __ovld __cnfn fmin(double8 x, double y);
1494 double16 __ovld __cnfn fmin(double16 x, double y);
1495-#endif //cl_khr_fp64
1496+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1497 #ifdef cl_khr_fp16
1498 half __ovld __cnfn fmin(half x, half y);
1499 half2 __ovld __cnfn fmin(half2 x, half2 y);
1500@@ -7331,14 +7330,14 @@ float3 __ovld __cnfn fmod(float3 x, float3 y);
1501 float4 __ovld __cnfn fmod(float4 x, float4 y);
1502 float8 __ovld __cnfn fmod(float8 x, float8 y);
1503 float16 __ovld __cnfn fmod(float16 x, float16 y);
1504-#ifdef cl_khr_fp64
1505+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1506 double __ovld __cnfn fmod(double x, double y);
1507 double2 __ovld __cnfn fmod(double2 x, double2 y);
1508 double3 __ovld __cnfn fmod(double3 x, double3 y);
1509 double4 __ovld __cnfn fmod(double4 x, double4 y);
1510 double8 __ovld __cnfn fmod(double8 x, double8 y);
1511 double16 __ovld __cnfn fmod(double16 x, double16 y);
1512-#endif //cl_khr_fp64
1513+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1514 #ifdef cl_khr_fp16
1515 half __ovld __cnfn fmod(half x, half y);
1516 half2 __ovld __cnfn fmod(half2 x, half2 y);
1517@@ -7352,21 +7351,21 @@ half16 __ovld __cnfn fmod(half16 x, half16 y);
1518 * Returns fmin(x - floor (x), 0x1.fffffep-1f ).
1519 * floor(x) is returned in iptr.
1520 */
1521-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1522+#ifdef __opencl_c_generic_address_space
1523 float __ovld fract(float x, float *iptr);
1524 float2 __ovld fract(float2 x, float2 *iptr);
1525 float3 __ovld fract(float3 x, float3 *iptr);
1526 float4 __ovld fract(float4 x, float4 *iptr);
1527 float8 __ovld fract(float8 x, float8 *iptr);
1528 float16 __ovld fract(float16 x, float16 *iptr);
1529-#ifdef cl_khr_fp64
1530+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1531 double __ovld fract(double x, double *iptr);
1532 double2 __ovld fract(double2 x, double2 *iptr);
1533 double3 __ovld fract(double3 x, double3 *iptr);
1534 double4 __ovld fract(double4 x, double4 *iptr);
1535 double8 __ovld fract(double8 x, double8 *iptr);
1536 double16 __ovld fract(double16 x, double16 *iptr);
1537-#endif //cl_khr_fp64
1538+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1539 #ifdef cl_khr_fp16
1540 half __ovld fract(half x, half *iptr);
1541 half2 __ovld fract(half2 x, half2 *iptr);
1542@@ -7375,7 +7374,9 @@ half4 __ovld fract(half4 x, half4 *iptr);
1543 half8 __ovld fract(half8 x, half8 *iptr);
1544 half16 __ovld fract(half16 x, half16 *iptr);
1545 #endif //cl_khr_fp16
1546-#else
1547+#endif //__opencl_c_generic_address_space
1548+
1549+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0)
1550 float __ovld fract(float x, __global float *iptr);
1551 float2 __ovld fract(float2 x, __global float2 *iptr);
1552 float3 __ovld fract(float3 x, __global float3 *iptr);
1553@@ -7394,7 +7395,7 @@ float3 __ovld fract(float3 x, __private float3 *iptr);
1554 float4 __ovld fract(float4 x, __private float4 *iptr);
1555 float8 __ovld fract(float8 x, __private float8 *iptr);
1556 float16 __ovld fract(float16 x, __private float16 *iptr);
1557-#ifdef cl_khr_fp64
1558+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1559 double __ovld fract(double x, __global double *iptr);
1560 double2 __ovld fract(double2 x, __global double2 *iptr);
1561 double3 __ovld fract(double3 x, __global double3 *iptr);
1562@@ -7413,7 +7414,7 @@ double3 __ovld fract(double3 x, __private double3 *iptr);
1563 double4 __ovld fract(double4 x, __private double4 *iptr);
1564 double8 __ovld fract(double8 x, __private double8 *iptr);
1565 double16 __ovld fract(double16 x, __private double16 *iptr);
1566-#endif //cl_khr_fp64
1567+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1568 #ifdef cl_khr_fp16
1569 half __ovld fract(half x, __global half *iptr);
1570 half2 __ovld fract(half2 x, __global half2 *iptr);
1571@@ -7434,29 +7435,29 @@ half4 __ovld fract(half4 x, __private half4 *iptr);
1572 half8 __ovld fract(half8 x, __private half8 *iptr);
1573 half16 __ovld fract(half16 x, __private half16 *iptr);
1574 #endif //cl_khr_fp16
1575-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1576-
1577+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ !=
1578+ //! CL_VERSION_2_0)
1579 /**
1580 * Extract mantissa and exponent from x. For each
1581 * component the mantissa returned is a float with
1582 * magnitude in the interval [1/2, 1) or 0. Each
1583 * component of x equals mantissa returned * 2^exp.
1584 */
1585-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1586+#ifdef __opencl_c_generic_address_space
1587 float __ovld frexp(float x, int *exp);
1588 float2 __ovld frexp(float2 x, int2 *exp);
1589 float3 __ovld frexp(float3 x, int3 *exp);
1590 float4 __ovld frexp(float4 x, int4 *exp);
1591 float8 __ovld frexp(float8 x, int8 *exp);
1592 float16 __ovld frexp(float16 x, int16 *exp);
1593-#ifdef cl_khr_fp64
1594+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1595 double __ovld frexp(double x, int *exp);
1596 double2 __ovld frexp(double2 x, int2 *exp);
1597 double3 __ovld frexp(double3 x, int3 *exp);
1598 double4 __ovld frexp(double4 x, int4 *exp);
1599 double8 __ovld frexp(double8 x, int8 *exp);
1600 double16 __ovld frexp(double16 x, int16 *exp);
1601-#endif //cl_khr_fp64
1602+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1603 #ifdef cl_khr_fp16
1604 half __ovld frexp(half x, int *exp);
1605 half2 __ovld frexp(half2 x, int2 *exp);
1606@@ -7465,7 +7466,9 @@ half4 __ovld frexp(half4 x, int4 *exp);
1607 half8 __ovld frexp(half8 x, int8 *exp);
1608 half16 __ovld frexp(half16 x, int16 *exp);
1609 #endif //cl_khr_fp16
1610-#else
1611+#endif //__opencl_c_generic_address_space
1612+
1613+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0)
1614 float __ovld frexp(float x, __global int *exp);
1615 float2 __ovld frexp(float2 x, __global int2 *exp);
1616 float3 __ovld frexp(float3 x, __global int3 *exp);
1617@@ -7484,7 +7487,7 @@ float3 __ovld frexp(float3 x, __private int3 *exp);
1618 float4 __ovld frexp(float4 x, __private int4 *exp);
1619 float8 __ovld frexp(float8 x, __private int8 *exp);
1620 float16 __ovld frexp(float16 x, __private int16 *exp);
1621-#ifdef cl_khr_fp64
1622+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1623 double __ovld frexp(double x, __global int *exp);
1624 double2 __ovld frexp(double2 x, __global int2 *exp);
1625 double3 __ovld frexp(double3 x, __global int3 *exp);
1626@@ -7503,7 +7506,7 @@ double3 __ovld frexp(double3 x, __private int3 *exp);
1627 double4 __ovld frexp(double4 x, __private int4 *exp);
1628 double8 __ovld frexp(double8 x, __private int8 *exp);
1629 double16 __ovld frexp(double16 x, __private int16 *exp);
1630-#endif //cl_khr_fp64
1631+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1632 #ifdef cl_khr_fp16
1633 half __ovld frexp(half x, __global int *exp);
1634 half2 __ovld frexp(half2 x, __global int2 *exp);
1635@@ -7524,7 +7527,8 @@ half4 __ovld frexp(half4 x, __private int4 *exp);
1636 half8 __ovld frexp(half8 x, __private int8 *exp);
1637 half16 __ovld frexp(half16 x, __private int16 *exp);
1638 #endif //cl_khr_fp16
1639-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1640+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ !=
1641+ //! CL_VERSION_2_0)
1642
1643 /**
1644 * Compute the value of the square root of x^2 + y^2
1645@@ -7536,14 +7540,14 @@ float3 __ovld __cnfn hypot(float3 x, float3 y);
1646 float4 __ovld __cnfn hypot(float4 x, float4 y);
1647 float8 __ovld __cnfn hypot(float8 x, float8 y);
1648 float16 __ovld __cnfn hypot(float16 x, float16 y);
1649-#ifdef cl_khr_fp64
1650+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1651 double __ovld __cnfn hypot(double x, double y);
1652 double2 __ovld __cnfn hypot(double2 x, double2 y);
1653 double3 __ovld __cnfn hypot(double3 x, double3 y);
1654 double4 __ovld __cnfn hypot(double4 x, double4 y);
1655 double8 __ovld __cnfn hypot(double8 x, double8 y);
1656 double16 __ovld __cnfn hypot(double16 x, double16 y);
1657-#endif //cl_khr_fp64
1658+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1659 #ifdef cl_khr_fp16
1660 half __ovld __cnfn hypot(half x, half y);
1661 half2 __ovld __cnfn hypot(half2 x, half2 y);
1662@@ -7562,14 +7566,14 @@ int3 __ovld __cnfn ilogb(float3 x);
1663 int4 __ovld __cnfn ilogb(float4 x);
1664 int8 __ovld __cnfn ilogb(float8 x);
1665 int16 __ovld __cnfn ilogb(float16 x);
1666-#ifdef cl_khr_fp64
1667+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1668 int __ovld __cnfn ilogb(double x);
1669 int2 __ovld __cnfn ilogb(double2 x);
1670 int3 __ovld __cnfn ilogb(double3 x);
1671 int4 __ovld __cnfn ilogb(double4 x);
1672 int8 __ovld __cnfn ilogb(double8 x);
1673 int16 __ovld __cnfn ilogb(double16 x);
1674-#endif //cl_khr_fp64
1675+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1676 #ifdef cl_khr_fp16
1677 int __ovld __cnfn ilogb(half x);
1678 int2 __ovld __cnfn ilogb(half2 x);
1679@@ -7593,7 +7597,7 @@ float3 __ovld __cnfn ldexp(float3 x, int n);
1680 float4 __ovld __cnfn ldexp(float4 x, int n);
1681 float8 __ovld __cnfn ldexp(float8 x, int n);
1682 float16 __ovld __cnfn ldexp(float16 x, int n);
1683-#ifdef cl_khr_fp64
1684+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1685 double __ovld __cnfn ldexp(double x, int n);
1686 double2 __ovld __cnfn ldexp(double2 x, int2 n);
1687 double3 __ovld __cnfn ldexp(double3 x, int3 n);
1688@@ -7605,7 +7609,7 @@ double3 __ovld __cnfn ldexp(double3 x, int n);
1689 double4 __ovld __cnfn ldexp(double4 x, int n);
1690 double8 __ovld __cnfn ldexp(double8 x, int n);
1691 double16 __ovld __cnfn ldexp(double16 x, int n);
1692-#endif //cl_khr_fp64
1693+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1694 #ifdef cl_khr_fp16
1695 half __ovld __cnfn ldexp(half x, int n);
1696 half2 __ovld __cnfn ldexp(half2 x, int2 n);
1697@@ -7632,14 +7636,14 @@ float3 __ovld __cnfn lgamma(float3 x);
1698 float4 __ovld __cnfn lgamma(float4 x);
1699 float8 __ovld __cnfn lgamma(float8 x);
1700 float16 __ovld __cnfn lgamma(float16 x);
1701-#ifdef cl_khr_fp64
1702+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1703 double __ovld __cnfn lgamma(double x);
1704 double2 __ovld __cnfn lgamma(double2 x);
1705 double3 __ovld __cnfn lgamma(double3 x);
1706 double4 __ovld __cnfn lgamma(double4 x);
1707 double8 __ovld __cnfn lgamma(double8 x);
1708 double16 __ovld __cnfn lgamma(double16 x);
1709-#endif //cl_khr_fp64
1710+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1711 #ifdef cl_khr_fp16
1712 half __ovld __cnfn lgamma(half x);
1713 half2 __ovld __cnfn lgamma(half2 x);
1714@@ -7649,21 +7653,21 @@ half8 __ovld __cnfn lgamma(half8 x);
1715 half16 __ovld __cnfn lgamma(half16 x);
1716 #endif //cl_khr_fp16
1717
1718-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1719+#ifdef __opencl_c_generic_address_space
1720 float __ovld lgamma_r(float x, int *signp);
1721 float2 __ovld lgamma_r(float2 x, int2 *signp);
1722 float3 __ovld lgamma_r(float3 x, int3 *signp);
1723 float4 __ovld lgamma_r(float4 x, int4 *signp);
1724 float8 __ovld lgamma_r(float8 x, int8 *signp);
1725 float16 __ovld lgamma_r(float16 x, int16 *signp);
1726-#ifdef cl_khr_fp64
1727+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1728 double __ovld lgamma_r(double x, int *signp);
1729 double2 __ovld lgamma_r(double2 x, int2 *signp);
1730 double3 __ovld lgamma_r(double3 x, int3 *signp);
1731 double4 __ovld lgamma_r(double4 x, int4 *signp);
1732 double8 __ovld lgamma_r(double8 x, int8 *signp);
1733 double16 __ovld lgamma_r(double16 x, int16 *signp);
1734-#endif //cl_khr_fp64
1735+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1736 #ifdef cl_khr_fp16
1737 half __ovld lgamma_r(half x, int *signp);
1738 half2 __ovld lgamma_r(half2 x, int2 *signp);
1739@@ -7672,7 +7676,9 @@ half4 __ovld lgamma_r(half4 x, int4 *signp);
1740 half8 __ovld lgamma_r(half8 x, int8 *signp);
1741 half16 __ovld lgamma_r(half16 x, int16 *signp);
1742 #endif //cl_khr_fp16
1743-#else
1744+#endif //__opencl_c_generic_address_space
1745+
1746+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0)
1747 float __ovld lgamma_r(float x, __global int *signp);
1748 float2 __ovld lgamma_r(float2 x, __global int2 *signp);
1749 float3 __ovld lgamma_r(float3 x, __global int3 *signp);
1750@@ -7691,7 +7697,7 @@ float3 __ovld lgamma_r(float3 x, __private int3 *signp);
1751 float4 __ovld lgamma_r(float4 x, __private int4 *signp);
1752 float8 __ovld lgamma_r(float8 x, __private int8 *signp);
1753 float16 __ovld lgamma_r(float16 x, __private int16 *signp);
1754-#ifdef cl_khr_fp64
1755+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1756 double __ovld lgamma_r(double x, __global int *signp);
1757 double2 __ovld lgamma_r(double2 x, __global int2 *signp);
1758 double3 __ovld lgamma_r(double3 x, __global int3 *signp);
1759@@ -7710,7 +7716,7 @@ double3 __ovld lgamma_r(double3 x, __private int3 *signp);
1760 double4 __ovld lgamma_r(double4 x, __private int4 *signp);
1761 double8 __ovld lgamma_r(double8 x, __private int8 *signp);
1762 double16 __ovld lgamma_r(double16 x, __private int16 *signp);
1763-#endif //cl_khr_fp64
1764+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1765 #ifdef cl_khr_fp16
1766 half __ovld lgamma_r(half x, __global int *signp);
1767 half2 __ovld lgamma_r(half2 x, __global int2 *signp);
1768@@ -7731,8 +7737,8 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp);
1769 half8 __ovld lgamma_r(half8 x, __private int8 *signp);
1770 half16 __ovld lgamma_r(half16 x, __private int16 *signp);
1771 #endif //cl_khr_fp16
1772-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1773-
1774+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ !=
1775+ //! CL_VERSION_2_0)
1776 /**
1777 * Compute natural logarithm.
1778 */
1779@@ -7742,14 +7748,14 @@ float3 __ovld __cnfn log(float3);
1780 float4 __ovld __cnfn log(float4);
1781 float8 __ovld __cnfn log(float8);
1782 float16 __ovld __cnfn log(float16);
1783-#ifdef cl_khr_fp64
1784+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1785 double __ovld __cnfn log(double);
1786 double2 __ovld __cnfn log(double2);
1787 double3 __ovld __cnfn log(double3);
1788 double4 __ovld __cnfn log(double4);
1789 double8 __ovld __cnfn log(double8);
1790 double16 __ovld __cnfn log(double16);
1791-#endif //cl_khr_fp64
1792+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1793 #ifdef cl_khr_fp16
1794 half __ovld __cnfn log(half);
1795 half2 __ovld __cnfn log(half2);
1796@@ -7760,7 +7766,7 @@ half16 __ovld __cnfn log(half16);
1797 #endif //cl_khr_fp16
1798
1799 /**
1800- * Compute a base 2 logarithm.
1801+ * Compute a base 2 logarithm
1802 */
1803 float __ovld __cnfn log2(float);
1804 float2 __ovld __cnfn log2(float2);
1805@@ -7768,14 +7774,14 @@ float3 __ovld __cnfn log2(float3);
1806 float4 __ovld __cnfn log2(float4);
1807 float8 __ovld __cnfn log2(float8);
1808 float16 __ovld __cnfn log2(float16);
1809-#ifdef cl_khr_fp64
1810+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1811 double __ovld __cnfn log2(double);
1812 double2 __ovld __cnfn log2(double2);
1813 double3 __ovld __cnfn log2(double3);
1814 double4 __ovld __cnfn log2(double4);
1815 double8 __ovld __cnfn log2(double8);
1816 double16 __ovld __cnfn log2(double16);
1817-#endif //cl_khr_fp64
1818+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1819 #ifdef cl_khr_fp16
1820 half __ovld __cnfn log2(half);
1821 half2 __ovld __cnfn log2(half2);
1822@@ -7794,14 +7800,14 @@ float3 __ovld __cnfn log10(float3);
1823 float4 __ovld __cnfn log10(float4);
1824 float8 __ovld __cnfn log10(float8);
1825 float16 __ovld __cnfn log10(float16);
1826-#ifdef cl_khr_fp64
1827+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1828 double __ovld __cnfn log10(double);
1829 double2 __ovld __cnfn log10(double2);
1830 double3 __ovld __cnfn log10(double3);
1831 double4 __ovld __cnfn log10(double4);
1832 double8 __ovld __cnfn log10(double8);
1833 double16 __ovld __cnfn log10(double16);
1834-#endif //cl_khr_fp64
1835+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1836 #ifdef cl_khr_fp16
1837 half __ovld __cnfn log10(half);
1838 half2 __ovld __cnfn log10(half2);
1839@@ -7820,14 +7826,14 @@ float3 __ovld __cnfn log1p(float3 x);
1840 float4 __ovld __cnfn log1p(float4 x);
1841 float8 __ovld __cnfn log1p(float8 x);
1842 float16 __ovld __cnfn log1p(float16 x);
1843-#ifdef cl_khr_fp64
1844+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1845 double __ovld __cnfn log1p(double x);
1846 double2 __ovld __cnfn log1p(double2 x);
1847 double3 __ovld __cnfn log1p(double3 x);
1848 double4 __ovld __cnfn log1p(double4 x);
1849 double8 __ovld __cnfn log1p(double8 x);
1850 double16 __ovld __cnfn log1p(double16 x);
1851-#endif //cl_khr_fp64
1852+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1853 #ifdef cl_khr_fp16
1854 half __ovld __cnfn log1p(half x);
1855 half2 __ovld __cnfn log1p(half2 x);
1856@@ -7847,14 +7853,14 @@ float3 __ovld __cnfn logb(float3 x);
1857 float4 __ovld __cnfn logb(float4 x);
1858 float8 __ovld __cnfn logb(float8 x);
1859 float16 __ovld __cnfn logb(float16 x);
1860-#ifdef cl_khr_fp64
1861+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1862 double __ovld __cnfn logb(double x);
1863 double2 __ovld __cnfn logb(double2 x);
1864 double3 __ovld __cnfn logb(double3 x);
1865 double4 __ovld __cnfn logb(double4 x);
1866 double8 __ovld __cnfn logb(double8 x);
1867 double16 __ovld __cnfn logb(double16 x);
1868-#endif //cl_khr_fp64
1869+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1870 #ifdef cl_khr_fp16
1871 half __ovld __cnfn logb(half x);
1872 half2 __ovld __cnfn logb(half2 x);
1873@@ -7877,14 +7883,14 @@ float3 __ovld __cnfn mad(float3 a, float3 b, float3 c);
1874 float4 __ovld __cnfn mad(float4 a, float4 b, float4 c);
1875 float8 __ovld __cnfn mad(float8 a, float8 b, float8 c);
1876 float16 __ovld __cnfn mad(float16 a, float16 b, float16 c);
1877-#ifdef cl_khr_fp64
1878+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1879 double __ovld __cnfn mad(double a, double b, double c);
1880 double2 __ovld __cnfn mad(double2 a, double2 b, double2 c);
1881 double3 __ovld __cnfn mad(double3 a, double3 b, double3 c);
1882 double4 __ovld __cnfn mad(double4 a, double4 b, double4 c);
1883 double8 __ovld __cnfn mad(double8 a, double8 b, double8 c);
1884 double16 __ovld __cnfn mad(double16 a, double16 b, double16 c);
1885-#endif //cl_khr_fp64
1886+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1887 #ifdef cl_khr_fp16
1888 half __ovld __cnfn mad(half a, half b, half c);
1889 half2 __ovld __cnfn mad(half2 a, half2 b, half2 c);
1890@@ -7904,14 +7910,14 @@ float3 __ovld __cnfn maxmag(float3 x, float3 y);
1891 float4 __ovld __cnfn maxmag(float4 x, float4 y);
1892 float8 __ovld __cnfn maxmag(float8 x, float8 y);
1893 float16 __ovld __cnfn maxmag(float16 x, float16 y);
1894-#ifdef cl_khr_fp64
1895+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1896 double __ovld __cnfn maxmag(double x, double y);
1897 double2 __ovld __cnfn maxmag(double2 x, double2 y);
1898 double3 __ovld __cnfn maxmag(double3 x, double3 y);
1899 double4 __ovld __cnfn maxmag(double4 x, double4 y);
1900 double8 __ovld __cnfn maxmag(double8 x, double8 y);
1901 double16 __ovld __cnfn maxmag(double16 x, double16 y);
1902-#endif //cl_khr_fp64
1903+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1904 #ifdef cl_khr_fp16
1905 half __ovld __cnfn maxmag(half x, half y);
1906 half2 __ovld __cnfn maxmag(half2 x, half2 y);
1907@@ -7931,14 +7937,14 @@ float3 __ovld __cnfn minmag(float3 x, float3 y);
1908 float4 __ovld __cnfn minmag(float4 x, float4 y);
1909 float8 __ovld __cnfn minmag(float8 x, float8 y);
1910 float16 __ovld __cnfn minmag(float16 x, float16 y);
1911-#ifdef cl_khr_fp64
1912+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1913 double __ovld __cnfn minmag(double x, double y);
1914 double2 __ovld __cnfn minmag(double2 x, double2 y);
1915 double3 __ovld __cnfn minmag(double3 x, double3 y);
1916 double4 __ovld __cnfn minmag(double4 x, double4 y);
1917 double8 __ovld __cnfn minmag(double8 x, double8 y);
1918 double16 __ovld __cnfn minmag(double16 x, double16 y);
1919-#endif //cl_khr_fp64
1920+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1921 #ifdef cl_khr_fp16
1922 half __ovld __cnfn minmag(half x, half y);
1923 half2 __ovld __cnfn minmag(half2 x, half2 y);
1924@@ -7955,21 +7961,21 @@ half16 __ovld __cnfn minmag(half16 x, half16 y);
1925 * the argument. It stores the integral part in the object
1926 * pointed to by iptr.
1927 */
1928-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1929+#ifdef __opencl_c_generic_address_space
1930 float __ovld modf(float x, float *iptr);
1931 float2 __ovld modf(float2 x, float2 *iptr);
1932 float3 __ovld modf(float3 x, float3 *iptr);
1933 float4 __ovld modf(float4 x, float4 *iptr);
1934 float8 __ovld modf(float8 x, float8 *iptr);
1935 float16 __ovld modf(float16 x, float16 *iptr);
1936-#ifdef cl_khr_fp64
1937+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1938 double __ovld modf(double x, double *iptr);
1939 double2 __ovld modf(double2 x, double2 *iptr);
1940 double3 __ovld modf(double3 x, double3 *iptr);
1941 double4 __ovld modf(double4 x, double4 *iptr);
1942 double8 __ovld modf(double8 x, double8 *iptr);
1943 double16 __ovld modf(double16 x, double16 *iptr);
1944-#endif //cl_khr_fp64
1945+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1946 #ifdef cl_khr_fp16
1947 half __ovld modf(half x, half *iptr);
1948 half2 __ovld modf(half2 x, half2 *iptr);
1949@@ -7978,7 +7984,9 @@ half4 __ovld modf(half4 x, half4 *iptr);
1950 half8 __ovld modf(half8 x, half8 *iptr);
1951 half16 __ovld modf(half16 x, half16 *iptr);
1952 #endif //cl_khr_fp16
1953-#else
1954+#endif //__opencl_c_generic_address_space
1955+
1956+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0)
1957 float __ovld modf(float x, __global float *iptr);
1958 float2 __ovld modf(float2 x, __global float2 *iptr);
1959 float3 __ovld modf(float3 x, __global float3 *iptr);
1960@@ -7997,7 +8005,7 @@ float3 __ovld modf(float3 x, __private float3 *iptr);
1961 float4 __ovld modf(float4 x, __private float4 *iptr);
1962 float8 __ovld modf(float8 x, __private float8 *iptr);
1963 float16 __ovld modf(float16 x, __private float16 *iptr);
1964-#ifdef cl_khr_fp64
1965+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1966 double __ovld modf(double x, __global double *iptr);
1967 double2 __ovld modf(double2 x, __global double2 *iptr);
1968 double3 __ovld modf(double3 x, __global double3 *iptr);
1969@@ -8016,7 +8024,7 @@ double3 __ovld modf(double3 x, __private double3 *iptr);
1970 double4 __ovld modf(double4 x, __private double4 *iptr);
1971 double8 __ovld modf(double8 x, __private double8 *iptr);
1972 double16 __ovld modf(double16 x, __private double16 *iptr);
1973-#endif //cl_khr_fp64
1974+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1975 #ifdef cl_khr_fp16
1976 half __ovld modf(half x, __global half *iptr);
1977 half2 __ovld modf(half2 x, __global half2 *iptr);
1978@@ -8037,7 +8045,8 @@ half4 __ovld modf(half4 x, __private half4 *iptr);
1979 half8 __ovld modf(half8 x, __private half8 *iptr);
1980 half16 __ovld modf(half16 x, __private half16 *iptr);
1981 #endif //cl_khr_fp16
1982-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
1983+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ !=
1984+ //! CL_VERSION_2_0)
1985
1986 /**
1987 * Returns a quiet NaN. The nancode may be placed
1988@@ -8049,14 +8058,14 @@ float3 __ovld __cnfn nan(uint3 nancode);
1989 float4 __ovld __cnfn nan(uint4 nancode);
1990 float8 __ovld __cnfn nan(uint8 nancode);
1991 float16 __ovld __cnfn nan(uint16 nancode);
1992-#ifdef cl_khr_fp64
1993+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
1994 double __ovld __cnfn nan(ulong nancode);
1995 double2 __ovld __cnfn nan(ulong2 nancode);
1996 double3 __ovld __cnfn nan(ulong3 nancode);
1997 double4 __ovld __cnfn nan(ulong4 nancode);
1998 double8 __ovld __cnfn nan(ulong8 nancode);
1999 double16 __ovld __cnfn nan(ulong16 nancode);
2000-#endif //cl_khr_fp64
2001+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2002 #ifdef cl_khr_fp16
2003 half __ovld __cnfn nan(ushort nancode);
2004 half2 __ovld __cnfn nan(ushort2 nancode);
2005@@ -8079,14 +8088,14 @@ float3 __ovld __cnfn nextafter(float3 x, float3 y);
2006 float4 __ovld __cnfn nextafter(float4 x, float4 y);
2007 float8 __ovld __cnfn nextafter(float8 x, float8 y);
2008 float16 __ovld __cnfn nextafter(float16 x, float16 y);
2009-#ifdef cl_khr_fp64
2010+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2011 double __ovld __cnfn nextafter(double x, double y);
2012 double2 __ovld __cnfn nextafter(double2 x, double2 y);
2013 double3 __ovld __cnfn nextafter(double3 x, double3 y);
2014 double4 __ovld __cnfn nextafter(double4 x, double4 y);
2015 double8 __ovld __cnfn nextafter(double8 x, double8 y);
2016 double16 __ovld __cnfn nextafter(double16 x, double16 y);
2017-#endif //cl_khr_fp64
2018+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2019 #ifdef cl_khr_fp16
2020 half __ovld __cnfn nextafter(half x, half y);
2021 half2 __ovld __cnfn nextafter(half2 x, half2 y);
2022@@ -8105,14 +8114,14 @@ float3 __ovld __cnfn pow(float3 x, float3 y);
2023 float4 __ovld __cnfn pow(float4 x, float4 y);
2024 float8 __ovld __cnfn pow(float8 x, float8 y);
2025 float16 __ovld __cnfn pow(float16 x, float16 y);
2026-#ifdef cl_khr_fp64
2027+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2028 double __ovld __cnfn pow(double x, double y);
2029 double2 __ovld __cnfn pow(double2 x, double2 y);
2030 double3 __ovld __cnfn pow(double3 x, double3 y);
2031 double4 __ovld __cnfn pow(double4 x, double4 y);
2032 double8 __ovld __cnfn pow(double8 x, double8 y);
2033 double16 __ovld __cnfn pow(double16 x, double16 y);
2034-#endif //cl_khr_fp64
2035+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2036 #ifdef cl_khr_fp16
2037 half __ovld __cnfn pow(half x, half y);
2038 half2 __ovld __cnfn pow(half2 x, half2 y);
2039@@ -8131,14 +8140,14 @@ float3 __ovld __cnfn pown(float3 x, int3 y);
2040 float4 __ovld __cnfn pown(float4 x, int4 y);
2041 float8 __ovld __cnfn pown(float8 x, int8 y);
2042 float16 __ovld __cnfn pown(float16 x, int16 y);
2043-#ifdef cl_khr_fp64
2044+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2045 double __ovld __cnfn pown(double x, int y);
2046 double2 __ovld __cnfn pown(double2 x, int2 y);
2047 double3 __ovld __cnfn pown(double3 x, int3 y);
2048 double4 __ovld __cnfn pown(double4 x, int4 y);
2049 double8 __ovld __cnfn pown(double8 x, int8 y);
2050 double16 __ovld __cnfn pown(double16 x, int16 y);
2051-#endif //cl_khr_fp64
2052+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2053 #ifdef cl_khr_fp16
2054 half __ovld __cnfn pown(half x, int y);
2055 half2 __ovld __cnfn pown(half2 x, int2 y);
2056@@ -8157,14 +8166,14 @@ float3 __ovld __cnfn powr(float3 x, float3 y);
2057 float4 __ovld __cnfn powr(float4 x, float4 y);
2058 float8 __ovld __cnfn powr(float8 x, float8 y);
2059 float16 __ovld __cnfn powr(float16 x, float16 y);
2060-#ifdef cl_khr_fp64
2061+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2062 double __ovld __cnfn powr(double x, double y);
2063 double2 __ovld __cnfn powr(double2 x, double2 y);
2064 double3 __ovld __cnfn powr(double3 x, double3 y);
2065 double4 __ovld __cnfn powr(double4 x, double4 y);
2066 double8 __ovld __cnfn powr(double8 x, double8 y);
2067 double16 __ovld __cnfn powr(double16 x, double16 y);
2068-#endif //cl_khr_fp64
2069+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2070 #ifdef cl_khr_fp16
2071 half __ovld __cnfn powr(half x, half y);
2072 half2 __ovld __cnfn powr(half2 x, half2 y);
2073@@ -8186,14 +8195,14 @@ float3 __ovld __cnfn remainder(float3 x, float3 y);
2074 float4 __ovld __cnfn remainder(float4 x, float4 y);
2075 float8 __ovld __cnfn remainder(float8 x, float8 y);
2076 float16 __ovld __cnfn remainder(float16 x, float16 y);
2077-#ifdef cl_khr_fp64
2078+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2079 double __ovld __cnfn remainder(double x, double y);
2080 double2 __ovld __cnfn remainder(double2 x, double2 y);
2081 double3 __ovld __cnfn remainder(double3 x, double3 y);
2082 double4 __ovld __cnfn remainder(double4 x, double4 y);
2083 double8 __ovld __cnfn remainder(double8 x, double8 y);
2084 double16 __ovld __cnfn remainder(double16 x, double16 y);
2085-#endif //cl_khr_fp64
2086+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2087 #ifdef cl_khr_fp16
2088 half __ovld __cnfn remainder(half x, half y);
2089 half2 __ovld __cnfn remainder(half2 x, half2 y);
2090@@ -8215,21 +8224,21 @@ half16 __ovld __cnfn remainder(half16 x, half16 y);
2091 * sign as x/y. It stores this signed value in the object
2092 * pointed to by quo.
2093 */
2094-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
2095+#ifdef __opencl_c_generic_address_space
2096 float __ovld remquo(float x, float y, int *quo);
2097 float2 __ovld remquo(float2 x, float2 y, int2 *quo);
2098 float3 __ovld remquo(float3 x, float3 y, int3 *quo);
2099 float4 __ovld remquo(float4 x, float4 y, int4 *quo);
2100 float8 __ovld remquo(float8 x, float8 y, int8 *quo);
2101 float16 __ovld remquo(float16 x, float16 y, int16 *quo);
2102-#ifdef cl_khr_fp64
2103+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2104 double __ovld remquo(double x, double y, int *quo);
2105 double2 __ovld remquo(double2 x, double2 y, int2 *quo);
2106 double3 __ovld remquo(double3 x, double3 y, int3 *quo);
2107 double4 __ovld remquo(double4 x, double4 y, int4 *quo);
2108 double8 __ovld remquo(double8 x, double8 y, int8 *quo);
2109 double16 __ovld remquo(double16 x, double16 y, int16 *quo);
2110-#endif //cl_khr_fp64
2111+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2112 #ifdef cl_khr_fp16
2113 half __ovld remquo(half x, half y, int *quo);
2114 half2 __ovld remquo(half2 x, half2 y, int2 *quo);
2115@@ -8237,9 +8246,10 @@ half3 __ovld remquo(half3 x, half3 y, int3 *quo);
2116 half4 __ovld remquo(half4 x, half4 y, int4 *quo);
2117 half8 __ovld remquo(half8 x, half8 y, int8 *quo);
2118 half16 __ovld remquo(half16 x, half16 y, int16 *quo);
2119-
2120 #endif //cl_khr_fp16
2121-#else
2122+#endif //__opencl_c_generic_address_space
2123+
2124+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0)
2125 float __ovld remquo(float x, float y, __global int *quo);
2126 float2 __ovld remquo(float2 x, float2 y, __global int2 *quo);
2127 float3 __ovld remquo(float3 x, float3 y, __global int3 *quo);
2128@@ -8258,7 +8268,7 @@ float3 __ovld remquo(float3 x, float3 y, __private int3 *quo);
2129 float4 __ovld remquo(float4 x, float4 y, __private int4 *quo);
2130 float8 __ovld remquo(float8 x, float8 y, __private int8 *quo);
2131 float16 __ovld remquo(float16 x, float16 y, __private int16 *quo);
2132-#ifdef cl_khr_fp64
2133+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2134 double __ovld remquo(double x, double y, __global int *quo);
2135 double2 __ovld remquo(double2 x, double2 y, __global int2 *quo);
2136 double3 __ovld remquo(double3 x, double3 y, __global int3 *quo);
2137@@ -8277,7 +8287,7 @@ double3 __ovld remquo(double3 x, double3 y, __private int3 *quo);
2138 double4 __ovld remquo(double4 x, double4 y, __private int4 *quo);
2139 double8 __ovld remquo(double8 x, double8 y, __private int8 *quo);
2140 double16 __ovld remquo(double16 x, double16 y, __private int16 *quo);
2141-#endif //cl_khr_fp64
2142+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2143 #ifdef cl_khr_fp16
2144 half __ovld remquo(half x, half y, __global int *quo);
2145 half2 __ovld remquo(half2 x, half2 y, __global int2 *quo);
2146@@ -8298,7 +8308,8 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo);
2147 half8 __ovld remquo(half8 x, half8 y, __private int8 *quo);
2148 half16 __ovld remquo(half16 x, half16 y, __private int16 *quo);
2149 #endif //cl_khr_fp16
2150-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
2151+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ !=
2152+ //! CL_VERSION_2_0)
2153 /**
2154 * Round to integral value (using round to nearest
2155 * even rounding mode) in floating-point format.
2156@@ -8311,14 +8322,14 @@ float3 __ovld __cnfn rint(float3);
2157 float4 __ovld __cnfn rint(float4);
2158 float8 __ovld __cnfn rint(float8);
2159 float16 __ovld __cnfn rint(float16);
2160-#ifdef cl_khr_fp64
2161+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2162 double __ovld __cnfn rint(double);
2163 double2 __ovld __cnfn rint(double2);
2164 double3 __ovld __cnfn rint(double3);
2165 double4 __ovld __cnfn rint(double4);
2166 double8 __ovld __cnfn rint(double8);
2167 double16 __ovld __cnfn rint(double16);
2168-#endif //cl_khr_fp64
2169+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2170 #ifdef cl_khr_fp16
2171 half __ovld __cnfn rint(half);
2172 half2 __ovld __cnfn rint(half2);
2173@@ -8337,14 +8348,14 @@ float3 __ovld __cnfn rootn(float3 x, int3 y);
2174 float4 __ovld __cnfn rootn(float4 x, int4 y);
2175 float8 __ovld __cnfn rootn(float8 x, int8 y);
2176 float16 __ovld __cnfn rootn(float16 x, int16 y);
2177-#ifdef cl_khr_fp64
2178+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2179 double __ovld __cnfn rootn(double x, int y);
2180 double2 __ovld __cnfn rootn(double2 x, int2 y);
2181 double3 __ovld __cnfn rootn(double3 x, int3 y);
2182 double4 __ovld __cnfn rootn(double4 x, int4 y);
2183 double8 __ovld __cnfn rootn(double8 x, int8 y);
2184 double16 __ovld __cnfn rootn(double16 x, int16 y);
2185-#endif //cl_khr_fp64
2186+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2187 #ifdef cl_khr_fp16
2188 half __ovld __cnfn rootn(half x, int y);
2189 half2 __ovld __cnfn rootn(half2 x, int2 y);
2190@@ -8365,14 +8376,14 @@ float3 __ovld __cnfn round(float3 x);
2191 float4 __ovld __cnfn round(float4 x);
2192 float8 __ovld __cnfn round(float8 x);
2193 float16 __ovld __cnfn round(float16 x);
2194-#ifdef cl_khr_fp64
2195+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2196 double __ovld __cnfn round(double x);
2197 double2 __ovld __cnfn round(double2 x);
2198 double3 __ovld __cnfn round(double3 x);
2199 double4 __ovld __cnfn round(double4 x);
2200 double8 __ovld __cnfn round(double8 x);
2201 double16 __ovld __cnfn round(double16 x);
2202-#endif //cl_khr_fp64
2203+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2204 #ifdef cl_khr_fp16
2205 half __ovld __cnfn round(half x);
2206 half2 __ovld __cnfn round(half2 x);
2207@@ -8391,14 +8402,14 @@ float3 __ovld __cnfn rsqrt(float3);
2208 float4 __ovld __cnfn rsqrt(float4);
2209 float8 __ovld __cnfn rsqrt(float8);
2210 float16 __ovld __cnfn rsqrt(float16);
2211-#ifdef cl_khr_fp64
2212+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2213 double __ovld __cnfn rsqrt(double);
2214 double2 __ovld __cnfn rsqrt(double2);
2215 double3 __ovld __cnfn rsqrt(double3);
2216 double4 __ovld __cnfn rsqrt(double4);
2217 double8 __ovld __cnfn rsqrt(double8);
2218 double16 __ovld __cnfn rsqrt(double16);
2219-#endif //cl_khr_fp64
2220+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2221 #ifdef cl_khr_fp16
2222 half __ovld __cnfn rsqrt(half);
2223 half2 __ovld __cnfn rsqrt(half2);
2224@@ -8417,14 +8428,14 @@ float3 __ovld __cnfn sin(float3);
2225 float4 __ovld __cnfn sin(float4);
2226 float8 __ovld __cnfn sin(float8);
2227 float16 __ovld __cnfn sin(float16);
2228-#ifdef cl_khr_fp64
2229+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2230 double __ovld __cnfn sin(double);
2231 double2 __ovld __cnfn sin(double2);
2232 double3 __ovld __cnfn sin(double3);
2233 double4 __ovld __cnfn sin(double4);
2234 double8 __ovld __cnfn sin(double8);
2235 double16 __ovld __cnfn sin(double16);
2236-#endif //cl_khr_fp64
2237+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2238 #ifdef cl_khr_fp16
2239 half __ovld __cnfn sin(half);
2240 half2 __ovld __cnfn sin(half2);
2241@@ -8439,21 +8450,21 @@ half16 __ovld __cnfn sin(half16);
2242 * is the return value and computed cosine is returned
2243 * in cosval.
2244 */
2245-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
2246+#ifdef __opencl_c_generic_address_space
2247 float __ovld sincos(float x, float *cosval);
2248 float2 __ovld sincos(float2 x, float2 *cosval);
2249 float3 __ovld sincos(float3 x, float3 *cosval);
2250 float4 __ovld sincos(float4 x, float4 *cosval);
2251 float8 __ovld sincos(float8 x, float8 *cosval);
2252 float16 __ovld sincos(float16 x, float16 *cosval);
2253-#ifdef cl_khr_fp64
2254+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2255 double __ovld sincos(double x, double *cosval);
2256 double2 __ovld sincos(double2 x, double2 *cosval);
2257 double3 __ovld sincos(double3 x, double3 *cosval);
2258 double4 __ovld sincos(double4 x, double4 *cosval);
2259 double8 __ovld sincos(double8 x, double8 *cosval);
2260 double16 __ovld sincos(double16 x, double16 *cosval);
2261-#endif //cl_khr_fp64
2262+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2263 #ifdef cl_khr_fp16
2264 half __ovld sincos(half x, half *cosval);
2265 half2 __ovld sincos(half2 x, half2 *cosval);
2266@@ -8462,7 +8473,9 @@ half4 __ovld sincos(half4 x, half4 *cosval);
2267 half8 __ovld sincos(half8 x, half8 *cosval);
2268 half16 __ovld sincos(half16 x, half16 *cosval);
2269 #endif //cl_khr_fp16
2270-#else
2271+#endif //__opencl_c_generic_address_space
2272+
2273+#if !defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ != CL_VERSION_2_0)
2274 float __ovld sincos(float x, __global float *cosval);
2275 float2 __ovld sincos(float2 x, __global float2 *cosval);
2276 float3 __ovld sincos(float3 x, __global float3 *cosval);
2277@@ -8481,7 +8494,7 @@ float3 __ovld sincos(float3 x, __private float3 *cosval);
2278 float4 __ovld sincos(float4 x, __private float4 *cosval);
2279 float8 __ovld sincos(float8 x, __private float8 *cosval);
2280 float16 __ovld sincos(float16 x, __private float16 *cosval);
2281-#ifdef cl_khr_fp64
2282+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2283 double __ovld sincos(double x, __global double *cosval);
2284 double2 __ovld sincos(double2 x, __global double2 *cosval);
2285 double3 __ovld sincos(double3 x, __global double3 *cosval);
2286@@ -8500,7 +8513,7 @@ double3 __ovld sincos(double3 x, __private double3 *cosval);
2287 double4 __ovld sincos(double4 x, __private double4 *cosval);
2288 double8 __ovld sincos(double8 x, __private double8 *cosval);
2289 double16 __ovld sincos(double16 x, __private double16 *cosval);
2290-#endif //cl_khr_fp64
2291+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2292 #ifdef cl_khr_fp16
2293 half __ovld sincos(half x, __global half *cosval);
2294 half2 __ovld sincos(half2 x, __global half2 *cosval);
2295@@ -8521,8 +8534,8 @@ half4 __ovld sincos(half4 x, __private half4 *cosval);
2296 half8 __ovld sincos(half8 x, __private half8 *cosval);
2297 half16 __ovld sincos(half16 x, __private half16 *cosval);
2298 #endif //cl_khr_fp16
2299-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
2300-
2301+#endif //! defined(__OPENCL_CPP_VERSION__) && (__OPENCL_C_VERSION__ !=
2302+ //! CL_VERSION_2_0)
2303 /**
2304 * Compute hyperbolic sine.
2305 */
2306@@ -8532,14 +8545,14 @@ float3 __ovld __cnfn sinh(float3);
2307 float4 __ovld __cnfn sinh(float4);
2308 float8 __ovld __cnfn sinh(float8);
2309 float16 __ovld __cnfn sinh(float16);
2310-#ifdef cl_khr_fp64
2311+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2312 double __ovld __cnfn sinh(double);
2313 double2 __ovld __cnfn sinh(double2);
2314 double3 __ovld __cnfn sinh(double3);
2315 double4 __ovld __cnfn sinh(double4);
2316 double8 __ovld __cnfn sinh(double8);
2317 double16 __ovld __cnfn sinh(double16);
2318-#endif //cl_khr_fp64
2319+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2320 #ifdef cl_khr_fp16
2321 half __ovld __cnfn sinh(half);
2322 half2 __ovld __cnfn sinh(half2);
2323@@ -8558,14 +8571,14 @@ float3 __ovld __cnfn sinpi(float3 x);
2324 float4 __ovld __cnfn sinpi(float4 x);
2325 float8 __ovld __cnfn sinpi(float8 x);
2326 float16 __ovld __cnfn sinpi(float16 x);
2327-#ifdef cl_khr_fp64
2328+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2329 double __ovld __cnfn sinpi(double x);
2330 double2 __ovld __cnfn sinpi(double2 x);
2331 double3 __ovld __cnfn sinpi(double3 x);
2332 double4 __ovld __cnfn sinpi(double4 x);
2333 double8 __ovld __cnfn sinpi(double8 x);
2334 double16 __ovld __cnfn sinpi(double16 x);
2335-#endif //cl_khr_fp64
2336+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2337 #ifdef cl_khr_fp16
2338 half __ovld __cnfn sinpi(half x);
2339 half2 __ovld __cnfn sinpi(half2 x);
2340@@ -8584,14 +8597,14 @@ float3 __ovld __cnfn sqrt(float3);
2341 float4 __ovld __cnfn sqrt(float4);
2342 float8 __ovld __cnfn sqrt(float8);
2343 float16 __ovld __cnfn sqrt(float16);
2344-#ifdef cl_khr_fp64
2345+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2346 double __ovld __cnfn sqrt(double);
2347 double2 __ovld __cnfn sqrt(double2);
2348 double3 __ovld __cnfn sqrt(double3);
2349 double4 __ovld __cnfn sqrt(double4);
2350 double8 __ovld __cnfn sqrt(double8);
2351 double16 __ovld __cnfn sqrt(double16);
2352-#endif //cl_khr_fp64
2353+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2354 #ifdef cl_khr_fp16
2355 half __ovld __cnfn sqrt(half);
2356 half2 __ovld __cnfn sqrt(half2);
2357@@ -8610,14 +8623,14 @@ float3 __ovld __cnfn tan(float3);
2358 float4 __ovld __cnfn tan(float4);
2359 float8 __ovld __cnfn tan(float8);
2360 float16 __ovld __cnfn tan(float16);
2361-#ifdef cl_khr_fp64
2362+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2363 double __ovld __cnfn tan(double);
2364 double2 __ovld __cnfn tan(double2);
2365 double3 __ovld __cnfn tan(double3);
2366 double4 __ovld __cnfn tan(double4);
2367 double8 __ovld __cnfn tan(double8);
2368 double16 __ovld __cnfn tan(double16);
2369-#endif //cl_khr_fp64
2370+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2371 #ifdef cl_khr_fp16
2372 half __ovld __cnfn tan(half);
2373 half2 __ovld __cnfn tan(half2);
2374@@ -8636,14 +8649,14 @@ float3 __ovld __cnfn tanh(float3);
2375 float4 __ovld __cnfn tanh(float4);
2376 float8 __ovld __cnfn tanh(float8);
2377 float16 __ovld __cnfn tanh(float16);
2378-#ifdef cl_khr_fp64
2379+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2380 double __ovld __cnfn tanh(double);
2381 double2 __ovld __cnfn tanh(double2);
2382 double3 __ovld __cnfn tanh(double3);
2383 double4 __ovld __cnfn tanh(double4);
2384 double8 __ovld __cnfn tanh(double8);
2385 double16 __ovld __cnfn tanh(double16);
2386-#endif //cl_khr_fp64
2387+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2388 #ifdef cl_khr_fp16
2389 half __ovld __cnfn tanh(half);
2390 half2 __ovld __cnfn tanh(half2);
2391@@ -8662,14 +8675,14 @@ float3 __ovld __cnfn tanpi(float3 x);
2392 float4 __ovld __cnfn tanpi(float4 x);
2393 float8 __ovld __cnfn tanpi(float8 x);
2394 float16 __ovld __cnfn tanpi(float16 x);
2395-#ifdef cl_khr_fp64
2396+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2397 double __ovld __cnfn tanpi(double x);
2398 double2 __ovld __cnfn tanpi(double2 x);
2399 double3 __ovld __cnfn tanpi(double3 x);
2400 double4 __ovld __cnfn tanpi(double4 x);
2401 double8 __ovld __cnfn tanpi(double8 x);
2402 double16 __ovld __cnfn tanpi(double16 x);
2403-#endif //cl_khr_fp64
2404+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2405 #ifdef cl_khr_fp16
2406 half __ovld __cnfn tanpi(half x);
2407 half2 __ovld __cnfn tanpi(half2 x);
2408@@ -8688,14 +8701,14 @@ float3 __ovld __cnfn tgamma(float3);
2409 float4 __ovld __cnfn tgamma(float4);
2410 float8 __ovld __cnfn tgamma(float8);
2411 float16 __ovld __cnfn tgamma(float16);
2412-#ifdef cl_khr_fp64
2413+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2414 double __ovld __cnfn tgamma(double);
2415 double2 __ovld __cnfn tgamma(double2);
2416 double3 __ovld __cnfn tgamma(double3);
2417 double4 __ovld __cnfn tgamma(double4);
2418 double8 __ovld __cnfn tgamma(double8);
2419 double16 __ovld __cnfn tgamma(double16);
2420-#endif //cl_khr_fp64
2421+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2422 #ifdef cl_khr_fp16
2423 half __ovld __cnfn tgamma(half);
2424 half2 __ovld __cnfn tgamma(half2);
2425@@ -8715,14 +8728,14 @@ float3 __ovld __cnfn trunc(float3);
2426 float4 __ovld __cnfn trunc(float4);
2427 float8 __ovld __cnfn trunc(float8);
2428 float16 __ovld __cnfn trunc(float16);
2429-#ifdef cl_khr_fp64
2430+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2431 double __ovld __cnfn trunc(double);
2432 double2 __ovld __cnfn trunc(double2);
2433 double3 __ovld __cnfn trunc(double3);
2434 double4 __ovld __cnfn trunc(double4);
2435 double8 __ovld __cnfn trunc(double8);
2436 double16 __ovld __cnfn trunc(double16);
2437-#endif //cl_khr_fp64
2438+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2439 #ifdef cl_khr_fp16
2440 half __ovld __cnfn trunc(half);
2441 half2 __ovld __cnfn trunc(half2);
2442@@ -10108,7 +10121,7 @@ float3 __ovld __cnfn clamp(float3 x, float minval, float maxval);
2443 float4 __ovld __cnfn clamp(float4 x, float minval, float maxval);
2444 float8 __ovld __cnfn clamp(float8 x, float minval, float maxval);
2445 float16 __ovld __cnfn clamp(float16 x, float minval, float maxval);
2446-#ifdef cl_khr_fp64
2447+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2448 double __ovld __cnfn clamp(double x, double minval, double maxval);
2449 double2 __ovld __cnfn clamp(double2 x, double2 minval, double2 maxval);
2450 double3 __ovld __cnfn clamp(double3 x, double3 minval, double3 maxval);
2451@@ -10120,7 +10133,7 @@ double3 __ovld __cnfn clamp(double3 x, double minval, double maxval);
2452 double4 __ovld __cnfn clamp(double4 x, double minval, double maxval);
2453 double8 __ovld __cnfn clamp(double8 x, double minval, double maxval);
2454 double16 __ovld __cnfn clamp(double16 x, double minval, double maxval);
2455-#endif //cl_khr_fp64
2456+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2457 #ifdef cl_khr_fp16
2458 half __ovld __cnfn clamp(half x, half minval, half maxval);
2459 half2 __ovld __cnfn clamp(half2 x, half2 minval, half2 maxval);
2460@@ -10145,14 +10158,14 @@ float3 __ovld __cnfn degrees(float3 radians);
2461 float4 __ovld __cnfn degrees(float4 radians);
2462 float8 __ovld __cnfn degrees(float8 radians);
2463 float16 __ovld __cnfn degrees(float16 radians);
2464-#ifdef cl_khr_fp64
2465+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2466 double __ovld __cnfn degrees(double radians);
2467 double2 __ovld __cnfn degrees(double2 radians);
2468 double3 __ovld __cnfn degrees(double3 radians);
2469 double4 __ovld __cnfn degrees(double4 radians);
2470 double8 __ovld __cnfn degrees(double8 radians);
2471 double16 __ovld __cnfn degrees(double16 radians);
2472-#endif //cl_khr_fp64
2473+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2474 #ifdef cl_khr_fp16
2475 half __ovld __cnfn degrees(half radians);
2476 half2 __ovld __cnfn degrees(half2 radians);
2477@@ -10177,7 +10190,7 @@ float3 __ovld __cnfn max(float3 x, float y);
2478 float4 __ovld __cnfn max(float4 x, float y);
2479 float8 __ovld __cnfn max(float8 x, float y);
2480 float16 __ovld __cnfn max(float16 x, float y);
2481-#ifdef cl_khr_fp64
2482+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2483 double __ovld __cnfn max(double x, double y);
2484 double2 __ovld __cnfn max(double2 x, double2 y);
2485 double3 __ovld __cnfn max(double3 x, double3 y);
2486@@ -10189,7 +10202,7 @@ double3 __ovld __cnfn max(double3 x, double y);
2487 double4 __ovld __cnfn max(double4 x, double y);
2488 double8 __ovld __cnfn max(double8 x, double y);
2489 double16 __ovld __cnfn max(double16 x, double y);
2490-#endif //cl_khr_fp64
2491+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2492 #ifdef cl_khr_fp16
2493 half __ovld __cnfn max(half x, half y);
2494 half2 __ovld __cnfn max(half2 x, half2 y);
2495@@ -10219,7 +10232,7 @@ float3 __ovld __cnfn min(float3 x, float y);
2496 float4 __ovld __cnfn min(float4 x, float y);
2497 float8 __ovld __cnfn min(float8 x, float y);
2498 float16 __ovld __cnfn min(float16 x, float y);
2499-#ifdef cl_khr_fp64
2500+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2501 double __ovld __cnfn min(double x, double y);
2502 double2 __ovld __cnfn min(double2 x, double2 y);
2503 double3 __ovld __cnfn min(double3 x, double3 y);
2504@@ -10231,7 +10244,7 @@ double3 __ovld __cnfn min(double3 x, double y);
2505 double4 __ovld __cnfn min(double4 x, double y);
2506 double8 __ovld __cnfn min(double8 x, double y);
2507 double16 __ovld __cnfn min(double16 x, double y);
2508-#endif //cl_khr_fp64
2509+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2510 #ifdef cl_khr_fp16
2511 half __ovld __cnfn min(half x, half y);
2512 half2 __ovld __cnfn min(half2 x, half2 y);
2513@@ -10264,7 +10277,7 @@ float3 __ovld __cnfn mix(float3 x, float3 y, float a);
2514 float4 __ovld __cnfn mix(float4 x, float4 y, float a);
2515 float8 __ovld __cnfn mix(float8 x, float8 y, float a);
2516 float16 __ovld __cnfn mix(float16 x, float16 y, float a);
2517-#ifdef cl_khr_fp64
2518+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2519 double __ovld __cnfn mix(double x, double y, double a);
2520 double2 __ovld __cnfn mix(double2 x, double2 y, double2 a);
2521 double3 __ovld __cnfn mix(double3 x, double3 y, double3 a);
2522@@ -10276,7 +10289,7 @@ double3 __ovld __cnfn mix(double3 x, double3 y, double a);
2523 double4 __ovld __cnfn mix(double4 x, double4 y, double a);
2524 double8 __ovld __cnfn mix(double8 x, double8 y, double a);
2525 double16 __ovld __cnfn mix(double16 x, double16 y, double a);
2526-#endif //cl_khr_fp64
2527+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2528 #ifdef cl_khr_fp16
2529 half __ovld __cnfn mix(half x, half y, half a);
2530 half2 __ovld __cnfn mix(half2 x, half2 y, half2 a);
2531@@ -10301,14 +10314,14 @@ float3 __ovld __cnfn radians(float3 degrees);
2532 float4 __ovld __cnfn radians(float4 degrees);
2533 float8 __ovld __cnfn radians(float8 degrees);
2534 float16 __ovld __cnfn radians(float16 degrees);
2535-#ifdef cl_khr_fp64
2536+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2537 double __ovld __cnfn radians(double degrees);
2538 double2 __ovld __cnfn radians(double2 degrees);
2539 double3 __ovld __cnfn radians(double3 degrees);
2540 double4 __ovld __cnfn radians(double4 degrees);
2541 double8 __ovld __cnfn radians(double8 degrees);
2542 double16 __ovld __cnfn radians(double16 degrees);
2543-#endif //cl_khr_fp64
2544+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2545 #ifdef cl_khr_fp16
2546 half __ovld __cnfn radians(half degrees);
2547 half2 __ovld __cnfn radians(half2 degrees);
2548@@ -10332,7 +10345,7 @@ float3 __ovld __cnfn step(float edge, float3 x);
2549 float4 __ovld __cnfn step(float edge, float4 x);
2550 float8 __ovld __cnfn step(float edge, float8 x);
2551 float16 __ovld __cnfn step(float edge, float16 x);
2552-#ifdef cl_khr_fp64
2553+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2554 double __ovld __cnfn step(double edge, double x);
2555 double2 __ovld __cnfn step(double2 edge, double2 x);
2556 double3 __ovld __cnfn step(double3 edge, double3 x);
2557@@ -10344,7 +10357,7 @@ double3 __ovld __cnfn step(double edge, double3 x);
2558 double4 __ovld __cnfn step(double edge, double4 x);
2559 double8 __ovld __cnfn step(double edge, double8 x);
2560 double16 __ovld __cnfn step(double edge, double16 x);
2561-#endif //cl_khr_fp64
2562+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2563 #ifdef cl_khr_fp16
2564 half __ovld __cnfn step(half edge, half x);
2565 half2 __ovld __cnfn step(half2 edge, half2 x);
2566@@ -10383,7 +10396,7 @@ float3 __ovld __cnfn smoothstep(float edge0, float edge1, float3 x);
2567 float4 __ovld __cnfn smoothstep(float edge0, float edge1, float4 x);
2568 float8 __ovld __cnfn smoothstep(float edge0, float edge1, float8 x);
2569 float16 __ovld __cnfn smoothstep(float edge0, float edge1, float16 x);
2570-#ifdef cl_khr_fp64
2571+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2572 double __ovld __cnfn smoothstep(double edge0, double edge1, double x);
2573 double2 __ovld __cnfn smoothstep(double2 edge0, double2 edge1, double2 x);
2574 double3 __ovld __cnfn smoothstep(double3 edge0, double3 edge1, double3 x);
2575@@ -10395,7 +10408,7 @@ double3 __ovld __cnfn smoothstep(double edge0, double edge1, double3 x);
2576 double4 __ovld __cnfn smoothstep(double edge0, double edge1, double4 x);
2577 double8 __ovld __cnfn smoothstep(double edge0, double edge1, double8 x);
2578 double16 __ovld __cnfn smoothstep(double edge0, double edge1, double16 x);
2579-#endif //cl_khr_fp64
2580+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2581 #ifdef cl_khr_fp16
2582 half __ovld __cnfn smoothstep(half edge0, half edge1, half x);
2583 half2 __ovld __cnfn smoothstep(half2 edge0, half2 edge1, half2 x);
2584@@ -10420,14 +10433,14 @@ float3 __ovld __cnfn sign(float3 x);
2585 float4 __ovld __cnfn sign(float4 x);
2586 float8 __ovld __cnfn sign(float8 x);
2587 float16 __ovld __cnfn sign(float16 x);
2588-#ifdef cl_khr_fp64
2589+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2590 double __ovld __cnfn sign(double x);
2591 double2 __ovld __cnfn sign(double2 x);
2592 double3 __ovld __cnfn sign(double3 x);
2593 double4 __ovld __cnfn sign(double4 x);
2594 double8 __ovld __cnfn sign(double8 x);
2595 double16 __ovld __cnfn sign(double16 x);
2596-#endif //cl_khr_fp64
2597+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2598 #ifdef cl_khr_fp16
2599 half __ovld __cnfn sign(half x);
2600 half2 __ovld __cnfn sign(half2 x);
2601@@ -10445,10 +10458,10 @@ half16 __ovld __cnfn sign(half16 x);
2602 */
2603 float4 __ovld __cnfn cross(float4 p0, float4 p1);
2604 float3 __ovld __cnfn cross(float3 p0, float3 p1);
2605-#ifdef cl_khr_fp64
2606+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2607 double4 __ovld __cnfn cross(double4 p0, double4 p1);
2608 double3 __ovld __cnfn cross(double3 p0, double3 p1);
2609-#endif //cl_khr_fp64
2610+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2611 #ifdef cl_khr_fp16
2612 half4 __ovld __cnfn cross(half4 p0, half4 p1);
2613 half3 __ovld __cnfn cross(half3 p0, half3 p1);
2614@@ -10461,12 +10474,12 @@ float __ovld __cnfn dot(float p0, float p1);
2615 float __ovld __cnfn dot(float2 p0, float2 p1);
2616 float __ovld __cnfn dot(float3 p0, float3 p1);
2617 float __ovld __cnfn dot(float4 p0, float4 p1);
2618-#ifdef cl_khr_fp64
2619+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2620 double __ovld __cnfn dot(double p0, double p1);
2621 double __ovld __cnfn dot(double2 p0, double2 p1);
2622 double __ovld __cnfn dot(double3 p0, double3 p1);
2623 double __ovld __cnfn dot(double4 p0, double4 p1);
2624-#endif //cl_khr_fp64
2625+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2626 #ifdef cl_khr_fp16
2627 half __ovld __cnfn dot(half p0, half p1);
2628 half __ovld __cnfn dot(half2 p0, half2 p1);
2629@@ -10482,12 +10495,12 @@ float __ovld __cnfn distance(float p0, float p1);
2630 float __ovld __cnfn distance(float2 p0, float2 p1);
2631 float __ovld __cnfn distance(float3 p0, float3 p1);
2632 float __ovld __cnfn distance(float4 p0, float4 p1);
2633-#ifdef cl_khr_fp64
2634+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2635 double __ovld __cnfn distance(double p0, double p1);
2636 double __ovld __cnfn distance(double2 p0, double2 p1);
2637 double __ovld __cnfn distance(double3 p0, double3 p1);
2638 double __ovld __cnfn distance(double4 p0, double4 p1);
2639-#endif //cl_khr_fp64
2640+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2641 #ifdef cl_khr_fp16
2642 half __ovld __cnfn distance(half p0, half p1);
2643 half __ovld __cnfn distance(half2 p0, half2 p1);
2644@@ -10503,12 +10516,12 @@ float __ovld __cnfn length(float p);
2645 float __ovld __cnfn length(float2 p);
2646 float __ovld __cnfn length(float3 p);
2647 float __ovld __cnfn length(float4 p);
2648-#ifdef cl_khr_fp64
2649+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2650 double __ovld __cnfn length(double p);
2651 double __ovld __cnfn length(double2 p);
2652 double __ovld __cnfn length(double3 p);
2653 double __ovld __cnfn length(double4 p);
2654-#endif //cl_khr_fp64
2655+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2656 #ifdef cl_khr_fp16
2657 half __ovld __cnfn length(half p);
2658 half __ovld __cnfn length(half2 p);
2659@@ -10524,12 +10537,12 @@ float __ovld __cnfn normalize(float p);
2660 float2 __ovld __cnfn normalize(float2 p);
2661 float3 __ovld __cnfn normalize(float3 p);
2662 float4 __ovld __cnfn normalize(float4 p);
2663-#ifdef cl_khr_fp64
2664+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2665 double __ovld __cnfn normalize(double p);
2666 double2 __ovld __cnfn normalize(double2 p);
2667 double3 __ovld __cnfn normalize(double3 p);
2668 double4 __ovld __cnfn normalize(double4 p);
2669-#endif //cl_khr_fp64
2670+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2671 #ifdef cl_khr_fp16
2672 half __ovld __cnfn normalize(half p);
2673 half2 __ovld __cnfn normalize(half2 p);
2674@@ -10610,14 +10623,14 @@ int3 __ovld __cnfn isequal(float3 x, float3 y);
2675 int4 __ovld __cnfn isequal(float4 x, float4 y);
2676 int8 __ovld __cnfn isequal(float8 x, float8 y);
2677 int16 __ovld __cnfn isequal(float16 x, float16 y);
2678-#ifdef cl_khr_fp64
2679+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2680 int __ovld __cnfn isequal(double x, double y);
2681 long2 __ovld __cnfn isequal(double2 x, double2 y);
2682 long3 __ovld __cnfn isequal(double3 x, double3 y);
2683 long4 __ovld __cnfn isequal(double4 x, double4 y);
2684 long8 __ovld __cnfn isequal(double8 x, double8 y);
2685 long16 __ovld __cnfn isequal(double16 x, double16 y);
2686-#endif //cl_khr_fp64
2687+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2688 #ifdef cl_khr_fp16
2689 int __ovld __cnfn isequal(half x, half y);
2690 short2 __ovld __cnfn isequal(half2 x, half2 y);
2691@@ -10636,14 +10649,14 @@ int3 __ovld __cnfn isnotequal(float3 x, float3 y);
2692 int4 __ovld __cnfn isnotequal(float4 x, float4 y);
2693 int8 __ovld __cnfn isnotequal(float8 x, float8 y);
2694 int16 __ovld __cnfn isnotequal(float16 x, float16 y);
2695-#ifdef cl_khr_fp64
2696+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2697 int __ovld __cnfn isnotequal(double x, double y);
2698 long2 __ovld __cnfn isnotequal(double2 x, double2 y);
2699 long3 __ovld __cnfn isnotequal(double3 x, double3 y);
2700 long4 __ovld __cnfn isnotequal(double4 x, double4 y);
2701 long8 __ovld __cnfn isnotequal(double8 x, double8 y);
2702 long16 __ovld __cnfn isnotequal(double16 x, double16 y);
2703-#endif //cl_khr_fp64
2704+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2705 #ifdef cl_khr_fp16
2706 int __ovld __cnfn isnotequal(half x, half y);
2707 short2 __ovld __cnfn isnotequal(half2 x, half2 y);
2708@@ -10662,14 +10675,14 @@ int3 __ovld __cnfn isgreater(float3 x, float3 y);
2709 int4 __ovld __cnfn isgreater(float4 x, float4 y);
2710 int8 __ovld __cnfn isgreater(float8 x, float8 y);
2711 int16 __ovld __cnfn isgreater(float16 x, float16 y);
2712-#ifdef cl_khr_fp64
2713+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2714 int __ovld __cnfn isgreater(double x, double y);
2715 long2 __ovld __cnfn isgreater(double2 x, double2 y);
2716 long3 __ovld __cnfn isgreater(double3 x, double3 y);
2717 long4 __ovld __cnfn isgreater(double4 x, double4 y);
2718 long8 __ovld __cnfn isgreater(double8 x, double8 y);
2719 long16 __ovld __cnfn isgreater(double16 x, double16 y);
2720-#endif //cl_khr_fp64
2721+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2722 #ifdef cl_khr_fp16
2723 int __ovld __cnfn isgreater(half x, half y);
2724 short2 __ovld __cnfn isgreater(half2 x, half2 y);
2725@@ -10688,14 +10701,14 @@ int3 __ovld __cnfn isgreaterequal(float3 x, float3 y);
2726 int4 __ovld __cnfn isgreaterequal(float4 x, float4 y);
2727 int8 __ovld __cnfn isgreaterequal(float8 x, float8 y);
2728 int16 __ovld __cnfn isgreaterequal(float16 x, float16 y);
2729-#ifdef cl_khr_fp64
2730+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2731 int __ovld __cnfn isgreaterequal(double x, double y);
2732 long2 __ovld __cnfn isgreaterequal(double2 x, double2 y);
2733 long3 __ovld __cnfn isgreaterequal(double3 x, double3 y);
2734 long4 __ovld __cnfn isgreaterequal(double4 x, double4 y);
2735 long8 __ovld __cnfn isgreaterequal(double8 x, double8 y);
2736 long16 __ovld __cnfn isgreaterequal(double16 x, double16 y);
2737-#endif //cl_khr_fp64
2738+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2739 #ifdef cl_khr_fp16
2740 int __ovld __cnfn isgreaterequal(half x, half y);
2741 short2 __ovld __cnfn isgreaterequal(half2 x, half2 y);
2742@@ -10714,14 +10727,14 @@ int3 __ovld __cnfn isless(float3 x, float3 y);
2743 int4 __ovld __cnfn isless(float4 x, float4 y);
2744 int8 __ovld __cnfn isless(float8 x, float8 y);
2745 int16 __ovld __cnfn isless(float16 x, float16 y);
2746-#ifdef cl_khr_fp64
2747+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2748 int __ovld __cnfn isless(double x, double y);
2749 long2 __ovld __cnfn isless(double2 x, double2 y);
2750 long3 __ovld __cnfn isless(double3 x, double3 y);
2751 long4 __ovld __cnfn isless(double4 x, double4 y);
2752 long8 __ovld __cnfn isless(double8 x, double8 y);
2753 long16 __ovld __cnfn isless(double16 x, double16 y);
2754-#endif //cl_khr_fp64
2755+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2756 #ifdef cl_khr_fp16
2757 int __ovld __cnfn isless(half x, half y);
2758 short2 __ovld __cnfn isless(half2 x, half2 y);
2759@@ -10740,14 +10753,14 @@ int3 __ovld __cnfn islessequal(float3 x, float3 y);
2760 int4 __ovld __cnfn islessequal(float4 x, float4 y);
2761 int8 __ovld __cnfn islessequal(float8 x, float8 y);
2762 int16 __ovld __cnfn islessequal(float16 x, float16 y);
2763-#ifdef cl_khr_fp64
2764+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2765 int __ovld __cnfn islessequal(double x, double y);
2766 long2 __ovld __cnfn islessequal(double2 x, double2 y);
2767 long3 __ovld __cnfn islessequal(double3 x, double3 y);
2768 long4 __ovld __cnfn islessequal(double4 x, double4 y);
2769 long8 __ovld __cnfn islessequal(double8 x, double8 y);
2770 long16 __ovld __cnfn islessequal(double16 x, double16 y);
2771-#endif //cl_khr_fp64
2772+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2773 #ifdef cl_khr_fp16
2774 int __ovld __cnfn islessequal(half x, half y);
2775 short2 __ovld __cnfn islessequal(half2 x, half2 y);
2776@@ -10767,14 +10780,14 @@ int3 __ovld __cnfn islessgreater(float3 x, float3 y);
2777 int4 __ovld __cnfn islessgreater(float4 x, float4 y);
2778 int8 __ovld __cnfn islessgreater(float8 x, float8 y);
2779 int16 __ovld __cnfn islessgreater(float16 x, float16 y);
2780-#ifdef cl_khr_fp64
2781+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2782 int __ovld __cnfn islessgreater(double x, double y);
2783 long2 __ovld __cnfn islessgreater(double2 x, double2 y);
2784 long3 __ovld __cnfn islessgreater(double3 x, double3 y);
2785 long4 __ovld __cnfn islessgreater(double4 x, double4 y);
2786 long8 __ovld __cnfn islessgreater(double8 x, double8 y);
2787 long16 __ovld __cnfn islessgreater(double16 x, double16 y);
2788-#endif //cl_khr_fp64
2789+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2790 #ifdef cl_khr_fp16
2791 int __ovld __cnfn islessgreater(half x, half y);
2792 short2 __ovld __cnfn islessgreater(half2 x, half2 y);
2793@@ -10793,14 +10806,14 @@ int3 __ovld __cnfn isfinite(float3);
2794 int4 __ovld __cnfn isfinite(float4);
2795 int8 __ovld __cnfn isfinite(float8);
2796 int16 __ovld __cnfn isfinite(float16);
2797-#ifdef cl_khr_fp64
2798+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2799 int __ovld __cnfn isfinite(double);
2800 long2 __ovld __cnfn isfinite(double2);
2801 long3 __ovld __cnfn isfinite(double3);
2802 long4 __ovld __cnfn isfinite(double4);
2803 long8 __ovld __cnfn isfinite(double8);
2804 long16 __ovld __cnfn isfinite(double16);
2805-#endif //cl_khr_fp64
2806+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2807 #ifdef cl_khr_fp16
2808 int __ovld __cnfn isfinite(half);
2809 short2 __ovld __cnfn isfinite(half2);
2810@@ -10819,14 +10832,14 @@ int3 __ovld __cnfn isinf(float3);
2811 int4 __ovld __cnfn isinf(float4);
2812 int8 __ovld __cnfn isinf(float8);
2813 int16 __ovld __cnfn isinf(float16);
2814-#ifdef cl_khr_fp64
2815+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2816 int __ovld __cnfn isinf(double);
2817 long2 __ovld __cnfn isinf(double2);
2818 long3 __ovld __cnfn isinf(double3);
2819 long4 __ovld __cnfn isinf(double4);
2820 long8 __ovld __cnfn isinf(double8);
2821 long16 __ovld __cnfn isinf(double16);
2822-#endif //cl_khr_fp64
2823+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2824 #ifdef cl_khr_fp16
2825 int __ovld __cnfn isinf(half);
2826 short2 __ovld __cnfn isinf(half2);
2827@@ -10845,14 +10858,14 @@ int3 __ovld __cnfn isnan(float3);
2828 int4 __ovld __cnfn isnan(float4);
2829 int8 __ovld __cnfn isnan(float8);
2830 int16 __ovld __cnfn isnan(float16);
2831-#ifdef cl_khr_fp64
2832+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2833 int __ovld __cnfn isnan(double);
2834 long2 __ovld __cnfn isnan(double2);
2835 long3 __ovld __cnfn isnan(double3);
2836 long4 __ovld __cnfn isnan(double4);
2837 long8 __ovld __cnfn isnan(double8);
2838 long16 __ovld __cnfn isnan(double16);
2839-#endif //cl_khr_fp64
2840+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2841 #ifdef cl_khr_fp16
2842 int __ovld __cnfn isnan(half);
2843 short2 __ovld __cnfn isnan(half2);
2844@@ -10871,14 +10884,14 @@ int3 __ovld __cnfn isnormal(float3);
2845 int4 __ovld __cnfn isnormal(float4);
2846 int8 __ovld __cnfn isnormal(float8);
2847 int16 __ovld __cnfn isnormal(float16);
2848-#ifdef cl_khr_fp64
2849+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2850 int __ovld __cnfn isnormal(double);
2851 long2 __ovld __cnfn isnormal(double2);
2852 long3 __ovld __cnfn isnormal(double3);
2853 long4 __ovld __cnfn isnormal(double4);
2854 long8 __ovld __cnfn isnormal(double8);
2855 long16 __ovld __cnfn isnormal(double16);
2856-#endif //cl_khr_fp64
2857+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2858 #ifdef cl_khr_fp16
2859 int __ovld __cnfn isnormal(half);
2860 short2 __ovld __cnfn isnormal(half2);
2861@@ -10899,14 +10912,14 @@ int3 __ovld __cnfn isordered(float3 x, float3 y);
2862 int4 __ovld __cnfn isordered(float4 x, float4 y);
2863 int8 __ovld __cnfn isordered(float8 x, float8 y);
2864 int16 __ovld __cnfn isordered(float16 x, float16 y);
2865-#ifdef cl_khr_fp64
2866+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2867 int __ovld __cnfn isordered(double x, double y);
2868 long2 __ovld __cnfn isordered(double2 x, double2 y);
2869 long3 __ovld __cnfn isordered(double3 x, double3 y);
2870 long4 __ovld __cnfn isordered(double4 x, double4 y);
2871 long8 __ovld __cnfn isordered(double8 x, double8 y);
2872 long16 __ovld __cnfn isordered(double16 x, double16 y);
2873-#endif //cl_khr_fp64
2874+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2875 #ifdef cl_khr_fp16
2876 int __ovld __cnfn isordered(half x, half y);
2877 short2 __ovld __cnfn isordered(half2 x, half2 y);
2878@@ -10927,14 +10940,14 @@ int3 __ovld __cnfn isunordered(float3 x, float3 y);
2879 int4 __ovld __cnfn isunordered(float4 x, float4 y);
2880 int8 __ovld __cnfn isunordered(float8 x, float8 y);
2881 int16 __ovld __cnfn isunordered(float16 x, float16 y);
2882-#ifdef cl_khr_fp64
2883+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2884 int __ovld __cnfn isunordered(double x, double y);
2885 long2 __ovld __cnfn isunordered(double2 x, double2 y);
2886 long3 __ovld __cnfn isunordered(double3 x, double3 y);
2887 long4 __ovld __cnfn isunordered(double4 x, double4 y);
2888 long8 __ovld __cnfn isunordered(double8 x, double8 y);
2889 long16 __ovld __cnfn isunordered(double16 x, double16 y);
2890-#endif //cl_khr_fp64
2891+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2892 #ifdef cl_khr_fp16
2893 int __ovld __cnfn isunordered(half x, half y);
2894 short2 __ovld __cnfn isunordered(half2 x, half2 y);
2895@@ -10957,14 +10970,14 @@ int3 __ovld __cnfn signbit(float3);
2896 int4 __ovld __cnfn signbit(float4);
2897 int8 __ovld __cnfn signbit(float8);
2898 int16 __ovld __cnfn signbit(float16);
2899-#ifdef cl_khr_fp64
2900+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2901 int __ovld __cnfn signbit(double);
2902 long2 __ovld __cnfn signbit(double2);
2903 long3 __ovld __cnfn signbit(double3);
2904 long4 __ovld __cnfn signbit(double4);
2905 long8 __ovld __cnfn signbit(double8);
2906 long16 __ovld __cnfn signbit(double16);
2907-#endif //cl_khr_fp64
2908+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2909 #ifdef cl_khr_fp16
2910 int __ovld __cnfn signbit(half);
2911 short2 __ovld __cnfn signbit(half2);
2912@@ -11091,14 +11104,14 @@ float3 __ovld __cnfn bitselect(float3 a, float3 b, float3 c);
2913 float4 __ovld __cnfn bitselect(float4 a, float4 b, float4 c);
2914 float8 __ovld __cnfn bitselect(float8 a, float8 b, float8 c);
2915 float16 __ovld __cnfn bitselect(float16 a, float16 b, float16 c);
2916-#ifdef cl_khr_fp64
2917+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2918 double __ovld __cnfn bitselect(double a, double b, double c);
2919 double2 __ovld __cnfn bitselect(double2 a, double2 b, double2 c);
2920 double3 __ovld __cnfn bitselect(double3 a, double3 b, double3 c);
2921 double4 __ovld __cnfn bitselect(double4 a, double4 b, double4 c);
2922 double8 __ovld __cnfn bitselect(double8 a, double8 b, double8 c);
2923 double16 __ovld __cnfn bitselect(double16 a, double16 b, double16 c);
2924-#endif //cl_khr_fp64
2925+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2926 #ifdef cl_khr_fp16
2927 half __ovld __cnfn bitselect(half a, half b, half c);
2928 half2 __ovld __cnfn bitselect(half2 a, half2 b, half2 c);
2929@@ -11231,7 +11244,7 @@ ulong8 __ovld __cnfn select(ulong8 a, ulong8 b, ulong8 c);
2930 long16 __ovld __cnfn select(long16 a, long16 b, ulong16 c);
2931 ulong16 __ovld __cnfn select(ulong16 a, ulong16 b, ulong16 c);
2932
2933-#ifdef cl_khr_fp64
2934+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2935 double __ovld __cnfn select(double a, double b, long c);
2936 double2 __ovld __cnfn select(double2 a, double2 b, long2 c);
2937 double3 __ovld __cnfn select(double3 a, double3 b, long3 c);
2938@@ -11244,7 +11257,7 @@ double3 __ovld __cnfn select(double3 a, double3 b, ulong3 c);
2939 double4 __ovld __cnfn select(double4 a, double4 b, ulong4 c);
2940 double8 __ovld __cnfn select(double8 a, double8 b, ulong8 c);
2941 double16 __ovld __cnfn select(double16 a, double16 b, ulong16 c);
2942-#endif //cl_khr_fp64
2943+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2944 #ifdef cl_khr_fp16
2945 half __ovld __cnfn select(half a, half b, short c);
2946 half2 __ovld __cnfn select(half2 a, half2 b, short2 c);
2947@@ -11323,13 +11336,13 @@ uint16 __ovld vload16(size_t offset, const __constant uint *p);
2948 long16 __ovld vload16(size_t offset, const __constant long *p);
2949 ulong16 __ovld vload16(size_t offset, const __constant ulong *p);
2950 float16 __ovld vload16(size_t offset, const __constant float *p);
2951-#ifdef cl_khr_fp64
2952+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2953 double2 __ovld vload2(size_t offset, const __constant double *p);
2954 double3 __ovld vload3(size_t offset, const __constant double *p);
2955 double4 __ovld vload4(size_t offset, const __constant double *p);
2956 double8 __ovld vload8(size_t offset, const __constant double *p);
2957 double16 __ovld vload16(size_t offset, const __constant double *p);
2958-#endif //cl_khr_fp64
2959+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2960
2961 #ifdef cl_khr_fp16
2962 half __ovld vload(size_t offset, const __constant half *p);
2963@@ -11340,7 +11353,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p);
2964 half16 __ovld vload16(size_t offset, const __constant half *p);
2965 #endif //cl_khr_fp16
2966
2967-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
2968+#ifdef __opencl_c_generic_address_space
2969 char2 __ovld vload2(size_t offset, const char *p);
2970 uchar2 __ovld vload2(size_t offset, const uchar *p);
2971 short2 __ovld vload2(size_t offset, const short *p);
2972@@ -11387,13 +11400,13 @@ long16 __ovld vload16(size_t offset, const long *p);
2973 ulong16 __ovld vload16(size_t offset, const ulong *p);
2974 float16 __ovld vload16(size_t offset, const float *p);
2975
2976-#ifdef cl_khr_fp64
2977+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2978 double2 __ovld vload2(size_t offset, const double *p);
2979 double3 __ovld vload3(size_t offset, const double *p);
2980 double4 __ovld vload4(size_t offset, const double *p);
2981 double8 __ovld vload8(size_t offset, const double *p);
2982 double16 __ovld vload16(size_t offset, const double *p);
2983-#endif //cl_khr_fp64
2984+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
2985
2986 #ifdef cl_khr_fp16
2987 half __ovld vload(size_t offset, const half *p);
2988@@ -11403,7 +11416,7 @@ half4 __ovld vload4(size_t offset, const half *p);
2989 half8 __ovld vload8(size_t offset, const half *p);
2990 half16 __ovld vload16(size_t offset, const half *p);
2991 #endif //cl_khr_fp16
2992-#else
2993+#endif //__opencl_c_generic_address_space
2994 char2 __ovld vload2(size_t offset, const __global char *p);
2995 uchar2 __ovld vload2(size_t offset, const __global uchar *p);
2996 short2 __ovld vload2(size_t offset, const __global short *p);
2997@@ -11540,7 +11553,7 @@ long16 __ovld vload16(size_t offset, const __private long *p);
2998 ulong16 __ovld vload16(size_t offset, const __private ulong *p);
2999 float16 __ovld vload16(size_t offset, const __private float *p);
3000
3001-#ifdef cl_khr_fp64
3002+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3003 double2 __ovld vload2(size_t offset, const __global double *p);
3004 double3 __ovld vload3(size_t offset, const __global double *p);
3005 double4 __ovld vload4(size_t offset, const __global double *p);
3006@@ -11556,7 +11569,7 @@ double3 __ovld vload3(size_t offset, const __private double *p);
3007 double4 __ovld vload4(size_t offset, const __private double *p);
3008 double8 __ovld vload8(size_t offset, const __private double *p);
3009 double16 __ovld vload16(size_t offset, const __private double *p);
3010-#endif //cl_khr_fp64
3011+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3012
3013 #ifdef cl_khr_fp16
3014 half __ovld vload(size_t offset, const __global half *p);
3015@@ -11578,9 +11591,8 @@ half4 __ovld vload4(size_t offset, const __private half *p);
3016 half8 __ovld vload8(size_t offset, const __private half *p);
3017 half16 __ovld vload16(size_t offset, const __private half *p);
3018 #endif //cl_khr_fp16
3019-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3020
3021-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3022+#ifdef __opencl_c_generic_address_space
3023 void __ovld vstore2(char2 data, size_t offset, char *p);
3024 void __ovld vstore2(uchar2 data, size_t offset, uchar *p);
3025 void __ovld vstore2(short2 data, size_t offset, short *p);
3026@@ -11626,13 +11638,13 @@ void __ovld vstore16(uint16 data, size_t offset, uint *p);
3027 void __ovld vstore16(long16 data, size_t offset, long *p);
3028 void __ovld vstore16(ulong16 data, size_t offset, ulong *p);
3029 void __ovld vstore16(float16 data, size_t offset, float *p);
3030-#ifdef cl_khr_fp64
3031+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3032 void __ovld vstore2(double2 data, size_t offset, double *p);
3033 void __ovld vstore3(double3 data, size_t offset, double *p);
3034 void __ovld vstore4(double4 data, size_t offset, double *p);
3035 void __ovld vstore8(double8 data, size_t offset, double *p);
3036 void __ovld vstore16(double16 data, size_t offset, double *p);
3037-#endif //cl_khr_fp64
3038+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3039 #ifdef cl_khr_fp16
3040 void __ovld vstore(half data, size_t offset, half *p);
3041 void __ovld vstore2(half2 data, size_t offset, half *p);
3042@@ -11641,7 +11653,7 @@ void __ovld vstore4(half4 data, size_t offset, half *p);
3043 void __ovld vstore8(half8 data, size_t offset, half *p);
3044 void __ovld vstore16(half16 data, size_t offset, half *p);
3045 #endif //cl_khr_fp16
3046-#else
3047+#endif //__opencl_c_generic_address_space
3048 void __ovld vstore2(char2 data, size_t offset, __global char *p);
3049 void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p);
3050 void __ovld vstore2(short2 data, size_t offset, __global short *p);
3051@@ -11777,7 +11789,7 @@ void __ovld vstore16(uint16 data, size_t offset, __private uint *p);
3052 void __ovld vstore16(long16 data, size_t offset, __private long *p);
3053 void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p);
3054 void __ovld vstore16(float16 data, size_t offset, __private float *p);
3055-#ifdef cl_khr_fp64
3056+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3057 void __ovld vstore2(double2 data, size_t offset, __global double *p);
3058 void __ovld vstore3(double3 data, size_t offset, __global double *p);
3059 void __ovld vstore4(double4 data, size_t offset, __global double *p);
3060@@ -11793,7 +11805,7 @@ void __ovld vstore3(double3 data, size_t offset, __private double *p);
3061 void __ovld vstore4(double4 data, size_t offset, __private double *p);
3062 void __ovld vstore8(double8 data, size_t offset, __private double *p);
3063 void __ovld vstore16(double16 data, size_t offset, __private double *p);
3064-#endif //cl_khr_fp64
3065+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3066 #ifdef cl_khr_fp16
3067 void __ovld vstore(half data, size_t offset, __global half *p);
3068 void __ovld vstore2(half2 data, size_t offset, __global half *p);
3069@@ -11814,7 +11826,6 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p);
3070 void __ovld vstore8(half8 data, size_t offset, __private half *p);
3071 void __ovld vstore16(half16 data, size_t offset, __private half *p);
3072 #endif //cl_khr_fp16
3073-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3074
3075 /**
3076 * Read sizeof (half) bytes of data from address
3077@@ -11825,13 +11836,12 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p);
3078 * must be 16-bit aligned.
3079 */
3080 float __ovld vload_half(size_t offset, const __constant half *p);
3081-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3082+#ifdef __opencl_c_generic_address_space
3083 float __ovld vload_half(size_t offset, const half *p);
3084-#else
3085+#endif //__opencl_c_generic_address_space
3086 float __ovld vload_half(size_t offset, const __global half *p);
3087 float __ovld vload_half(size_t offset, const __local half *p);
3088 float __ovld vload_half(size_t offset, const __private half *p);
3089-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3090
3091 /**
3092 * Read sizeof (halfn) bytes of data from address
3093@@ -11846,13 +11856,13 @@ float3 __ovld vload_half3(size_t offset, const __constant half *p);
3094 float4 __ovld vload_half4(size_t offset, const __constant half *p);
3095 float8 __ovld vload_half8(size_t offset, const __constant half *p);
3096 float16 __ovld vload_half16(size_t offset, const __constant half *p);
3097-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3098+#ifdef __opencl_c_generic_address_space
3099 float2 __ovld vload_half2(size_t offset, const half *p);
3100 float3 __ovld vload_half3(size_t offset, const half *p);
3101 float4 __ovld vload_half4(size_t offset, const half *p);
3102 float8 __ovld vload_half8(size_t offset, const half *p);
3103 float16 __ovld vload_half16(size_t offset, const half *p);
3104-#else
3105+#endif //__opencl_c_generic_address_space
3106 float2 __ovld vload_half2(size_t offset, const __global half *p);
3107 float3 __ovld vload_half3(size_t offset, const __global half *p);
3108 float4 __ovld vload_half4(size_t offset, const __global half *p);
3109@@ -11868,7 +11878,6 @@ float3 __ovld vload_half3(size_t offset, const __private half *p);
3110 float4 __ovld vload_half4(size_t offset, const __private half *p);
3111 float8 __ovld vload_half8(size_t offset, const __private half *p);
3112 float16 __ovld vload_half16(size_t offset, const __private half *p);
3113-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3114
3115 /**
3116 * The float value given by data is first
3117@@ -11881,20 +11890,20 @@ float16 __ovld vload_half16(size_t offset, const __private half *p);
3118 * The default current rounding mode is round to
3119 * nearest even.
3120 */
3121-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3122+#ifdef __opencl_c_generic_address_space
3123 void __ovld vstore_half(float data, size_t offset, half *p);
3124 void __ovld vstore_half_rte(float data, size_t offset, half *p);
3125 void __ovld vstore_half_rtz(float data, size_t offset, half *p);
3126 void __ovld vstore_half_rtp(float data, size_t offset, half *p);
3127 void __ovld vstore_half_rtn(float data, size_t offset, half *p);
3128-#ifdef cl_khr_fp64
3129+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3130 void __ovld vstore_half(double data, size_t offset, half *p);
3131 void __ovld vstore_half_rte(double data, size_t offset, half *p);
3132 void __ovld vstore_half_rtz(double data, size_t offset, half *p);
3133 void __ovld vstore_half_rtp(double data, size_t offset, half *p);
3134 void __ovld vstore_half_rtn(double data, size_t offset, half *p);
3135-#endif //cl_khr_fp64
3136-#else
3137+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3138+#endif //__opencl_c_generic_address_space
3139 void __ovld vstore_half(float data, size_t offset, __global half *p);
3140 void __ovld vstore_half_rte(float data, size_t offset, __global half *p);
3141 void __ovld vstore_half_rtz(float data, size_t offset, __global half *p);
3142@@ -11910,7 +11919,7 @@ void __ovld vstore_half_rte(float data, size_t offset, __private half *p);
3143 void __ovld vstore_half_rtz(float data, size_t offset, __private half *p);
3144 void __ovld vstore_half_rtp(float data, size_t offset, __private half *p);
3145 void __ovld vstore_half_rtn(float data, size_t offset, __private half *p);
3146-#ifdef cl_khr_fp64
3147+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3148 void __ovld vstore_half(double data, size_t offset, __global half *p);
3149 void __ovld vstore_half_rte(double data, size_t offset, __global half *p);
3150 void __ovld vstore_half_rtz(double data, size_t offset, __global half *p);
3151@@ -11926,8 +11935,7 @@ void __ovld vstore_half_rte(double data, size_t offset, __private half *p);
3152 void __ovld vstore_half_rtz(double data, size_t offset, __private half *p);
3153 void __ovld vstore_half_rtp(double data, size_t offset, __private half *p);
3154 void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);
3155-#endif //cl_khr_fp64
3156-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3157+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3158
3159 /**
3160 * The floatn value given by data is converted to
3161@@ -11940,7 +11948,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);
3162 * The default current rounding mode is round to
3163 * nearest even.
3164 */
3165-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3166+#ifdef __opencl_c_generic_address_space
3167 void __ovld vstore_half2(float2 data, size_t offset, half *p);
3168 void __ovld vstore_half3(float3 data, size_t offset, half *p);
3169 void __ovld vstore_half4(float4 data, size_t offset, half *p);
3170@@ -11966,7 +11974,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p);
3171 void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p);
3172 void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p);
3173 void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p);
3174-#ifdef cl_khr_fp64
3175+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3176 void __ovld vstore_half2(double2 data, size_t offset, half *p);
3177 void __ovld vstore_half3(double3 data, size_t offset, half *p);
3178 void __ovld vstore_half4(double4 data, size_t offset, half *p);
3179@@ -11992,8 +12000,8 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p);
3180 void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p);
3181 void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p);
3182 void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p);
3183-#endif //cl_khr_fp64
3184-#else
3185+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3186+#endif //__opencl_c_generic_address_space
3187 void __ovld vstore_half2(float2 data, size_t offset, __global half *p);
3188 void __ovld vstore_half3(float3 data, size_t offset, __global half *p);
3189 void __ovld vstore_half4(float4 data, size_t offset, __global half *p);
3190@@ -12069,7 +12077,7 @@ void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p);
3191 void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p);
3192 void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p);
3193 void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p);
3194-#ifdef cl_khr_fp64
3195+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3196 void __ovld vstore_half2(double2 data, size_t offset, __global half *p);
3197 void __ovld vstore_half3(double3 data, size_t offset, __global half *p);
3198 void __ovld vstore_half4(double4 data, size_t offset, __global half *p);
3199@@ -12145,8 +12153,7 @@ void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p);
3200 void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p);
3201 void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p);
3202 void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);
3203-#endif //cl_khr_fp64
3204-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3205+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3206
3207 /**
3208 * For n = 1, 2, 4, 8 and 16 read sizeof (halfn)
3209@@ -12167,14 +12174,14 @@ float3 __ovld vloada_half3(size_t offset, const __constant half *p);
3210 float4 __ovld vloada_half4(size_t offset, const __constant half *p);
3211 float8 __ovld vloada_half8(size_t offset, const __constant half *p);
3212 float16 __ovld vloada_half16(size_t offset, const __constant half *p);
3213-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3214+#ifdef __opencl_c_generic_address_space
3215 float __ovld vloada_half(size_t offset, const half *p);
3216 float2 __ovld vloada_half2(size_t offset, const half *p);
3217 float3 __ovld vloada_half3(size_t offset, const half *p);
3218 float4 __ovld vloada_half4(size_t offset, const half *p);
3219 float8 __ovld vloada_half8(size_t offset, const half *p);
3220 float16 __ovld vloada_half16(size_t offset, const half *p);
3221-#else
3222+#endif //__opencl_c_generic_address_space
3223 float __ovld vloada_half(size_t offset, const __global half *p);
3224 float2 __ovld vloada_half2(size_t offset, const __global half *p);
3225 float3 __ovld vloada_half3(size_t offset, const __global half *p);
3226@@ -12193,7 +12200,6 @@ float3 __ovld vloada_half3(size_t offset, const __private half *p);
3227 float4 __ovld vloada_half4(size_t offset, const __private half *p);
3228 float8 __ovld vloada_half8(size_t offset, const __private half *p);
3229 float16 __ovld vloada_half16(size_t offset, const __private half *p);
3230-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3231
3232 /**
3233 * The floatn value given by data is converted to
3234@@ -12211,7 +12217,7 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p);
3235 * mode. The default current rounding mode is
3236 * round to nearest even.
3237 */
3238-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3239+#ifdef __opencl_c_generic_address_space
3240 void __ovld vstorea_half(float data, size_t offset, half *p);
3241 void __ovld vstorea_half2(float2 data, size_t offset, half *p);
3242 void __ovld vstorea_half3(float3 data, size_t offset, half *p);
3243@@ -12247,7 +12253,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p);
3244 void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p);
3245 void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p);
3246
3247-#ifdef cl_khr_fp64
3248+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3249 void __ovld vstorea_half(double data, size_t offset, half *p);
3250 void __ovld vstorea_half2(double2 data, size_t offset, half *p);
3251 void __ovld vstorea_half3(double3 data, size_t offset, half *p);
3252@@ -12282,9 +12288,9 @@ void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p);
3253 void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p);
3254 void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p);
3255 void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p);
3256-#endif //cl_khr_fp64
3257+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3258+#endif //__opencl_c_generic_address_space
3259
3260-#else
3261 void __ovld vstorea_half(float data, size_t offset, __global half *p);
3262 void __ovld vstorea_half2(float2 data, size_t offset, __global half *p);
3263 void __ovld vstorea_half3(float3 data, size_t offset, __global half *p);
3264@@ -12390,7 +12396,7 @@ void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p);
3265 void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p);
3266 void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p);
3267
3268-#ifdef cl_khr_fp64
3269+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3270 void __ovld vstorea_half(double data, size_t offset, __global half *p);
3271 void __ovld vstorea_half2(double2 data, size_t offset, __global half *p);
3272 void __ovld vstorea_half3(double3 data, size_t offset, __global half *p);
3273@@ -12495,8 +12501,7 @@ void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p);
3274 void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);
3275 void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p);
3276 void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);
3277-#endif //cl_khr_fp64
3278-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3279+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3280
3281 // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions
3282
3283@@ -12580,7 +12585,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags);
3284
3285 // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions
3286
3287-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3288+#ifdef __opencl_c_generic_address_space
3289 cl_mem_fence_flags __ovld get_fence(const void *ptr);
3290 cl_mem_fence_flags __ovld get_fence(void *ptr);
3291
3292@@ -12591,7 +12596,7 @@ cl_mem_fence_flags __ovld get_fence(void *ptr);
3293 * where gentype is builtin type or user defined type.
3294 */
3295
3296-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
3297+#endif //__opencl_c_generic_address_space
3298
3299 // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch
3300
3301@@ -12730,7 +12735,7 @@ event_t __ovld async_work_group_copy(__global uint16 *dst, const __local uint16
3302 event_t __ovld async_work_group_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, event_t event);
3303 event_t __ovld async_work_group_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, event_t event);
3304 event_t __ovld async_work_group_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, event_t event);
3305-#ifdef cl_khr_fp64
3306+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3307 event_t __ovld async_work_group_copy(__local double *dst, const __global double *src, size_t num_elements, event_t event);
3308 event_t __ovld async_work_group_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, event_t event);
3309 event_t __ovld async_work_group_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, event_t event);
3310@@ -12743,7 +12748,7 @@ event_t __ovld async_work_group_copy(__global double3 *dst, const __local double
3311 event_t __ovld async_work_group_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, event_t event);
3312 event_t __ovld async_work_group_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, event_t event);
3313 event_t __ovld async_work_group_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, event_t event);
3314-#endif //cl_khr_fp64
3315+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3316 #ifdef cl_khr_fp16
3317 event_t __ovld async_work_group_copy(__local half *dst, const __global half *src, size_t num_elements, event_t event);
3318 event_t __ovld async_work_group_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, event_t event);
3319@@ -12893,7 +12898,7 @@ event_t __ovld async_work_group_strided_copy(__global uint16 *dst, const __local
3320 event_t __ovld async_work_group_strided_copy(__global long16 *dst, const __local long16 *src, size_t num_elements, size_t dst_stride, event_t event);
3321 event_t __ovld async_work_group_strided_copy(__global ulong16 *dst, const __local ulong16 *src, size_t num_elements, size_t dst_stride, event_t event);
3322 event_t __ovld async_work_group_strided_copy(__global float16 *dst, const __local float16 *src, size_t num_elements, size_t dst_stride, event_t event);
3323-#ifdef cl_khr_fp64
3324+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3325 event_t __ovld async_work_group_strided_copy(__local double *dst, const __global double *src, size_t num_elements, size_t src_stride, event_t event);
3326 event_t __ovld async_work_group_strided_copy(__local double2 *dst, const __global double2 *src, size_t num_elements, size_t src_stride, event_t event);
3327 event_t __ovld async_work_group_strided_copy(__local double3 *dst, const __global double3 *src, size_t num_elements, size_t src_stride, event_t event);
3328@@ -12906,7 +12911,7 @@ event_t __ovld async_work_group_strided_copy(__global double3 *dst, const __loca
3329 event_t __ovld async_work_group_strided_copy(__global double4 *dst, const __local double4 *src, size_t num_elements, size_t dst_stride, event_t event);
3330 event_t __ovld async_work_group_strided_copy(__global double8 *dst, const __local double8 *src, size_t num_elements, size_t dst_stride, event_t event);
3331 event_t __ovld async_work_group_strided_copy(__global double16 *dst, const __local double16 *src, size_t num_elements, size_t dst_stride, event_t event);
3332-#endif //cl_khr_fp64
3333+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3334 #ifdef cl_khr_fp16
3335 event_t __ovld async_work_group_strided_copy(__local half *dst, const __global half *src, size_t num_elements, size_t src_stride, event_t event);
3336 event_t __ovld async_work_group_strided_copy(__local half2 *dst, const __global half2 *src, size_t num_elements, size_t src_stride, event_t event);
3337@@ -12996,14 +13001,14 @@ void __ovld prefetch(const __global uint16 *p, size_t num_elements);
3338 void __ovld prefetch(const __global long16 *p, size_t num_elements);
3339 void __ovld prefetch(const __global ulong16 *p, size_t num_elements);
3340 void __ovld prefetch(const __global float16 *p, size_t num_elements);
3341-#ifdef cl_khr_fp64
3342+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3343 void __ovld prefetch(const __global double *p, size_t num_elements);
3344 void __ovld prefetch(const __global double2 *p, size_t num_elements);
3345 void __ovld prefetch(const __global double3 *p, size_t num_elements);
3346 void __ovld prefetch(const __global double4 *p, size_t num_elements);
3347 void __ovld prefetch(const __global double8 *p, size_t num_elements);
3348 void __ovld prefetch(const __global double16 *p, size_t num_elements);
3349-#endif //cl_khr_fp64
3350+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3351 #ifdef cl_khr_fp16
3352 void __ovld prefetch(const __global half *p, size_t num_elements);
3353 void __ovld prefetch(const __global half2 *p, size_t num_elements);
3354@@ -13026,9 +13031,11 @@ void __ovld prefetch(const __global half16 *p, size_t num_elements);
3355 * pointed by p. The function returns old.
3356 */
3357 int __ovld atomic_add(volatile __global int *p, int val);
3358-unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val);
3359+unsigned int __ovld atomic_add(volatile __global unsigned int *p,
3360+ unsigned int val);
3361 int __ovld atomic_add(volatile __local int *p, int val);
3362-unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val);
3363+unsigned int __ovld atomic_add(volatile __local unsigned int *p,
3364+ unsigned int val);
3365 #ifdef __OPENCL_CPP_VERSION__
3366 int __ovld atomic_add(volatile int *p, int val);
3367 unsigned int __ovld atomic_add(volatile unsigned int *p, unsigned int val);
3368@@ -13056,9 +13063,11 @@ unsigned long __ovld atom_add(volatile __local unsigned long *p, unsigned long v
3369 * returns old.
3370 */
3371 int __ovld atomic_sub(volatile __global int *p, int val);
3372-unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val);
3373+unsigned int __ovld atomic_sub(volatile __global unsigned int *p,
3374+ unsigned int val);
3375 int __ovld atomic_sub(volatile __local int *p, int val);
3376-unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val);
3377+unsigned int __ovld atomic_sub(volatile __local unsigned int *p,
3378+ unsigned int val);
3379 #ifdef __OPENCL_CPP_VERSION__
3380 int __ovld atomic_sub(volatile int *p, int val);
3381 unsigned int __ovld atomic_sub(volatile unsigned int *p, unsigned int val);
3382@@ -13086,9 +13095,11 @@ unsigned long __ovld atom_sub(volatile __local unsigned long *p, unsigned long v
3383 * value.
3384 */
3385 int __ovld atomic_xchg(volatile __global int *p, int val);
3386-unsigned int __ovld atomic_xchg(volatile __global unsigned int *p, unsigned int val);
3387+unsigned int __ovld atomic_xchg(volatile __global unsigned int *p,
3388+ unsigned int val);
3389 int __ovld atomic_xchg(volatile __local int *p, int val);
3390-unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val);
3391+unsigned int __ovld atomic_xchg(volatile __local unsigned int *p,
3392+ unsigned int val);
3393 float __ovld atomic_xchg(volatile __global float *p, float val);
3394 float __ovld atomic_xchg(volatile __local float *p, float val);
3395 #ifdef __OPENCL_CPP_VERSION__
3396@@ -13183,12 +13194,15 @@ unsigned long __ovld atom_dec(volatile __local unsigned long *p);
3397 * returns old.
3398 */
3399 int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val);
3400-unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val);
3401+unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p,
3402+ unsigned int cmp, unsigned int val);
3403 int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val);
3404-unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val);
3405+unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p,
3406+ unsigned int cmp, unsigned int val);
3407 #ifdef __OPENCL_CPP_VERSION__
3408 int __ovld atomic_cmpxchg(volatile int *p, int cmp, int val);
3409-unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, unsigned int val);
3410+unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp,
3411+ unsigned int val);
3412 #endif
3413
3414 #if defined(cl_khr_global_int32_base_atomics)
3415@@ -13215,9 +13229,11 @@ unsigned long __ovld atom_cmpxchg(volatile __local unsigned long *p, unsigned lo
3416 * returns old.
3417 */
3418 int __ovld atomic_min(volatile __global int *p, int val);
3419-unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val);
3420+unsigned int __ovld atomic_min(volatile __global unsigned int *p,
3421+ unsigned int val);
3422 int __ovld atomic_min(volatile __local int *p, int val);
3423-unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val);
3424+unsigned int __ovld atomic_min(volatile __local unsigned int *p,
3425+ unsigned int val);
3426 #ifdef __OPENCL_CPP_VERSION__
3427 int __ovld atomic_min(volatile int *p, int val);
3428 unsigned int __ovld atomic_min(volatile unsigned int *p, unsigned int val);
3429@@ -13247,9 +13263,11 @@ unsigned long __ovld atom_min(volatile __local unsigned long *p, unsigned long v
3430 * returns old.
3431 */
3432 int __ovld atomic_max(volatile __global int *p, int val);
3433-unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val);
3434+unsigned int __ovld atomic_max(volatile __global unsigned int *p,
3435+ unsigned int val);
3436 int __ovld atomic_max(volatile __local int *p, int val);
3437-unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val);
3438+unsigned int __ovld atomic_max(volatile __local unsigned int *p,
3439+ unsigned int val);
3440 #ifdef __OPENCL_CPP_VERSION__
3441 int __ovld atomic_max(volatile int *p, int val);
3442 unsigned int __ovld atomic_max(volatile unsigned int *p, unsigned int val);
3443@@ -13278,9 +13296,11 @@ unsigned long __ovld atom_max(volatile __local unsigned long *p, unsigned long v
3444 * pointed by p. The function returns old.
3445 */
3446 int __ovld atomic_and(volatile __global int *p, int val);
3447-unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val);
3448+unsigned int __ovld atomic_and(volatile __global unsigned int *p,
3449+ unsigned int val);
3450 int __ovld atomic_and(volatile __local int *p, int val);
3451-unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val);
3452+unsigned int __ovld atomic_and(volatile __local unsigned int *p,
3453+ unsigned int val);
3454 #ifdef __OPENCL_CPP_VERSION__
3455 int __ovld atomic_and(volatile int *p, int val);
3456 unsigned int __ovld atomic_and(volatile unsigned int *p, unsigned int val);
3457@@ -13309,9 +13329,11 @@ unsigned long __ovld atom_and(volatile __local unsigned long *p, unsigned long v
3458 * pointed by p. The function returns old.
3459 */
3460 int __ovld atomic_or(volatile __global int *p, int val);
3461-unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val);
3462+unsigned int __ovld atomic_or(volatile __global unsigned int *p,
3463+ unsigned int val);
3464 int __ovld atomic_or(volatile __local int *p, int val);
3465-unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val);
3466+unsigned int __ovld atomic_or(volatile __local unsigned int *p,
3467+ unsigned int val);
3468 #ifdef __OPENCL_CPP_VERSION__
3469 int __ovld atomic_or(volatile int *p, int val);
3470 unsigned int __ovld atomic_or(volatile unsigned int *p, unsigned int val);
3471@@ -13340,9 +13362,11 @@ unsigned long __ovld atom_or(volatile __local unsigned long *p, unsigned long va
3472 * pointed by p. The function returns old.
3473 */
3474 int __ovld atomic_xor(volatile __global int *p, int val);
3475-unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val);
3476+unsigned int __ovld atomic_xor(volatile __global unsigned int *p,
3477+ unsigned int val);
3478 int __ovld atomic_xor(volatile __local int *p, int val);
3479-unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val);
3480+unsigned int __ovld atomic_xor(volatile __local unsigned int *p,
3481+ unsigned int val);
3482 #ifdef __OPENCL_CPP_VERSION__
3483 int __ovld atomic_xor(volatile int *p, int val);
3484 unsigned int __ovld atomic_xor(volatile unsigned int *p, unsigned int val);
3485@@ -13380,120 +13404,78 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v
3486 #endif
3487
3488 // atomic_init()
3489+#ifdef __opencl_c_generic_address_space
3490 void __ovld atomic_init(volatile atomic_int *object, int value);
3491 void __ovld atomic_init(volatile atomic_uint *object, uint value);
3492 void __ovld atomic_init(volatile atomic_float *object, float value);
3493 #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
3494 void __ovld atomic_init(volatile atomic_long *object, long value);
3495 void __ovld atomic_init(volatile atomic_ulong *object, ulong value);
3496-#ifdef cl_khr_fp64
3497+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3498 void __ovld atomic_init(volatile atomic_double *object, double value);
3499-#endif //cl_khr_fp64
3500-#endif
3501+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3502+#endif // defined(cl_khr_int64_base_atomics) &&
3503+ // defined(cl_khr_int64_extended_atomics)
3504+#endif // __opencl_c_generic_address_space
3505+
3506+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
3507+void __ovld atomic_init(volatile atomic_int __global *object, int value);
3508+void __ovld atomic_init(volatile atomic_int __local *object, int value);
3509+void __ovld atomic_init(volatile atomic_uint __global *object, uint value);
3510+void __ovld atomic_init(volatile atomic_uint __local *object, uint value);
3511+void __ovld atomic_init(volatile atomic_float __global *object, float value);
3512+void __ovld atomic_init(volatile atomic_float __local *object, float value);
3513+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
3514+void __ovld atomic_init(volatile atomic_long __global *object, long value);
3515+void __ovld atomic_init(volatile atomic_long __local *object, long value);
3516+void __ovld atomic_init(volatile atomic_ulong __global *object, ulong value);
3517+void __ovld atomic_init(volatile atomic_ulong __local *object, ulong value);
3518+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
3519+void __ovld atomic_init(volatile atomic_double __global *object, double value);
3520+void __ovld atomic_init(volatile atomic_double __local *object, double value);
3521+#endif // cl_khr_fp64
3522+#endif // defined(cl_khr_int64_base_atomics) &&
3523+ // defined(cl_khr_int64_extended_atomics)
3524+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
3525
3526 // atomic_work_item_fence()
3527-void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order, memory_scope scope);
3528+void __ovld atomic_work_item_fence(cl_mem_fence_flags flags, memory_order order,
3529+ memory_scope scope);
3530
3531 // atomic_fetch()
3532-
3533+#if defined(__opencl_c_atomic_scope_device) && \
3534+ defined(__opencl_c_atomic_order_seq_cst)
3535+#ifdef __opencl_c_generic_address_space
3536 int __ovld atomic_fetch_add(volatile atomic_int *object, int operand);
3537-int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order);
3538-int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
3539 uint __ovld atomic_fetch_add(volatile atomic_uint *object, uint operand);
3540-uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order);
3541-uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
3542 int __ovld atomic_fetch_sub(volatile atomic_int *object, int operand);
3543-int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order);
3544-int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
3545 uint __ovld atomic_fetch_sub(volatile atomic_uint *object, uint operand);
3546-uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order);
3547-uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
3548 int __ovld atomic_fetch_or(volatile atomic_int *object, int operand);
3549-int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order);
3550-int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
3551 uint __ovld atomic_fetch_or(volatile atomic_uint *object, uint operand);
3552-uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order);
3553-uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
3554 int __ovld atomic_fetch_xor(volatile atomic_int *object, int operand);
3555-int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order);
3556-int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
3557 uint __ovld atomic_fetch_xor(volatile atomic_uint *object, uint operand);
3558-uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order);
3559-uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
3560 int __ovld atomic_fetch_and(volatile atomic_int *object, int operand);
3561-int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order);
3562-int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
3563 uint __ovld atomic_fetch_and(volatile atomic_uint *object, uint operand);
3564-uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order);
3565-uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
3566 int __ovld atomic_fetch_min(volatile atomic_int *object, int operand);
3567-int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order);
3568-int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
3569 uint __ovld atomic_fetch_min(volatile atomic_uint *object, uint operand);
3570-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order);
3571-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
3572-uint __ovld atomic_fetch_min(volatile atomic_uint *object, int operand);
3573-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order);
3574-uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);
3575 int __ovld atomic_fetch_max(volatile atomic_int *object, int operand);
3576-int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order);
3577-int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand, memory_order order, memory_scope scope);
3578 uint __ovld atomic_fetch_max(volatile atomic_uint *object, uint operand);
3579-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order);
3580-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, uint operand, memory_order order, memory_scope scope);
3581-uint __ovld atomic_fetch_max(volatile atomic_uint *object, int operand);
3582-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order);
3583-uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object, int operand, memory_order order, memory_scope scope);
3584
3585 #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
3586 long __ovld atomic_fetch_add(volatile atomic_long *object, long operand);
3587-long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order);
3588-long __ovld atomic_fetch_add_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
3589 ulong __ovld atomic_fetch_add(volatile atomic_ulong *object, ulong operand);
3590-ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
3591-ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
3592 long __ovld atomic_fetch_sub(volatile atomic_long *object, long operand);
3593-long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order);
3594-long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
3595 ulong __ovld atomic_fetch_sub(volatile atomic_ulong *object, ulong operand);
3596-ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
3597-ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
3598 long __ovld atomic_fetch_or(volatile atomic_long *object, long operand);
3599-long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order);
3600-long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
3601 ulong __ovld atomic_fetch_or(volatile atomic_ulong *object, ulong operand);
3602-ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
3603-ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
3604 long __ovld atomic_fetch_xor(volatile atomic_long *object, long operand);
3605-long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order);
3606-long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
3607 ulong __ovld atomic_fetch_xor(volatile atomic_ulong *object, ulong operand);
3608-ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
3609-ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
3610 long __ovld atomic_fetch_and(volatile atomic_long *object, long operand);
3611-long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order);
3612-long __ovld atomic_fetch_and_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
3613 ulong __ovld atomic_fetch_and(volatile atomic_ulong *object, ulong operand);
3614-ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
3615-ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
3616 long __ovld atomic_fetch_min(volatile atomic_long *object, long operand);
3617-long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order);
3618-long __ovld atomic_fetch_min_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
3619 ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, ulong operand);
3620-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
3621-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
3622-ulong __ovld atomic_fetch_min(volatile atomic_ulong *object, long operand);
3623-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order);
3624-ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);
3625 long __ovld atomic_fetch_max(volatile atomic_long *object, long operand);
3626-long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order);
3627-long __ovld atomic_fetch_max_explicit(volatile atomic_long *object, long operand, memory_order order, memory_scope scope);
3628 ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, ulong operand);
3629-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order);
3630-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, ulong operand, memory_order order, memory_scope scope);
3631-ulong __ovld atomic_fetch_max(volatile atomic_ulong *object, long operand);
3632-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order);
3633-ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long operand, memory_order order, memory_scope scope);
3634 #endif //defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
3635
3636 // OpenCL v2.0 s6.13.11.7.5:
3637@@ -13501,196 +13483,2239 @@ ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object, long opera
3638 // or/xor/and/min/max: atomic type argument can be intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.
3639
3640 #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
3641-uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object, ptrdiff_t operand);
3642-uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
3643-uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
3644-uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object, ptrdiff_t operand);
3645-uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order);
3646-uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object, ptrdiff_t operand, memory_order order, memory_scope scope);
3647-
3648-uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object, intptr_t operand);
3649-uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);
3650-uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
3651-uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object, intptr_t operand);
3652-uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);
3653-uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
3654-uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object, intptr_t operand);
3655-uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order);
3656-uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object, intptr_t operand, memory_order order, memory_scope scope);
3657-uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object, intptr_t opermax);
3658-uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);
3659-uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);
3660-uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object, intptr_t opermax);
3661-uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder);
3662-uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object, intptr_t opermax, memory_order minder, memory_scope scope);
3663-
3664-intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object, uintptr_t operand);
3665-intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);
3666-intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
3667-intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object, uintptr_t operand);
3668-intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);
3669-intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
3670-intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object, uintptr_t operand);
3671-intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order);
3672-intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object, uintptr_t operand, memory_order order, memory_scope scope);
3673-intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object, uintptr_t opermax);
3674-intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);
3675-intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);
3676-intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object, uintptr_t opermax);
3677-intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder);
3678-intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object, uintptr_t opermax, memory_order minder, memory_scope scope);
3679+uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t *object,
3680+ ptrdiff_t operand);
3681+uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t *object,
3682+ ptrdiff_t operand);
3683+
3684+uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t *object,
3685+ intptr_t operand);
3686+uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t *object,
3687+ intptr_t operand);
3688+uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t *object,
3689+ intptr_t operand);
3690+uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t *object,
3691+ intptr_t opermax);
3692+uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t *object,
3693+ intptr_t opermax);
3694+
3695+intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t *object,
3696+ uintptr_t operand);
3697+intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t *object,
3698+ uintptr_t operand);
3699+intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t *object,
3700+ uintptr_t operand);
3701+intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t *object,
3702+ uintptr_t opermax);
3703+intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t *object,
3704+ uintptr_t opermax);
3705+#endif // defined(cl_khr_int64_base_atomics) &&
3706+ // defined(cl_khr_int64_extended_atomics)
3707+#endif // __opencl_c_generic_address_space
3708+
3709+#if(__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
3710+int __ovld atomic_fetch_add(volatile atomic_int __global *object, int operand);
3711+uint __ovld atomic_fetch_add(volatile atomic_uint __local *object,
3712+ uint operand);
3713+int __ovld atomic_fetch_sub(volatile atomic_int __global *object, int operand);
3714+int __ovld atomic_fetch_sub(volatile atomic_int __local *object, int operand);
3715+uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object,
3716+ uint operand);
3717+uint __ovld atomic_fetch_sub(volatile atomic_uint __global *object,
3718+ uint operand);
3719+int __ovld atomic_fetch_or(volatile atomic_int __global *object, int operand);
3720+uint __ovld atomic_fetch_sub(volatile atomic_uint __local *object,
3721+ uint operand);
3722+uint __ovld atomic_fetch_or(volatile atomic_uint __global *object,
3723+ uint operand);
3724+uint __ovld atomic_fetch_or(volatile atomic_uint __local *object, uint operand);
3725+int __ovld atomic_fetch_xor(volatile atomic_int __global *object, int operand);
3726+int __ovld atomic_fetch_xor(volatile atomic_int __local *object, int operand);
3727+uint __ovld atomic_fetch_xor(volatile atomic_uint __global *object,
3728+ uint operand);
3729+uint __ovld atomic_fetch_xor(volatile atomic_uint __local *object,
3730+ uint operand);
3731+int __ovld atomic_fetch_and(volatile atomic_int __global *object, int operand);
3732+int __ovld atomic_fetch_and(volatile atomic_int __local *object, int operand);
3733+uint __ovld atomic_fetch_and(volatile atomic_uint __global *object,
3734+ uint operand);
3735+uint __ovld atomic_fetch_and(volatile atomic_uint __local *object,
3736+ uint operand);
3737+int __ovld atomic_fetch_min(volatile atomic_int __global *object, int operand);
3738+int __ovld atomic_fetch_min(volatile atomic_int __local *object, int operand);
3739+uint __ovld atomic_fetch_min(volatile atomic_uint __global *object,
3740+ uint operand);
3741+uint __ovld atomic_fetch_min(volatile atomic_uint __local *object,
3742+ uint operand);
3743+int __ovld atomic_fetch_max(volatile atomic_int __global *object, int operand);
3744+int __ovld atomic_fetch_max(volatile atomic_int __local *object, int operand);
3745+uint __ovld atomic_fetch_max(volatile atomic_uint __global *object,
3746+ uint operand);
3747+uint __ovld atomic_fetch_max(volatile atomic_uint __local *object,
3748+ uint operand);
3749+
3750+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
3751+long __ovld atomic_fetch_add(volatile atomic_long __global *object,
3752+ long operand);
3753+long __ovld atomic_fetch_add(volatile atomic_long __local *object,
3754+ long operand);
3755+ulong __ovld atomic_fetch_add(volatile atomic_ulong __global *object,
3756+ ulong operand);
3757+ulong __ovld atomic_fetch_add(volatile atomic_ulong __local *object,
3758+ ulong operand);
3759+long __ovld atomic_fetch_sub(volatile atomic_long __global *object,
3760+ long operand);
3761+long __ovld atomic_fetch_sub(volatile atomic_long __local *object,
3762+ long operand);
3763+ulong __ovld atomic_fetch_sub(volatile atomic_ulong __global *object,
3764+ ulong operand);
3765+ulong __ovld atomic_fetch_sub(volatile atomic_ulong __local *object,
3766+ ulong operand);
3767+long __ovld atomic_fetch_or(volatile atomic_long __global *object,
3768+ long operand);
3769+long __ovld atomic_fetch_or(volatile atomic_long __local *object, long operand);
3770+ulong __ovld atomic_fetch_or(volatile atomic_ulong __global *object,
3771+ ulong operand);
3772+ulong __ovld atomic_fetch_or(volatile atomic_ulong __local *object,
3773+ ulong operand);
3774+long __ovld atomic_fetch_xor(volatile atomic_long __global *object,
3775+ long operand);
3776+long __ovld atomic_fetch_xor(volatile atomic_long __local *object,
3777+ long operand);
3778+ulong __ovld atomic_fetch_xor(volatile atomic_ulong __global *object,
3779+ ulong operand);
3780+ulong __ovld atomic_fetch_xor(volatile atomic_ulong __local *object,
3781+ ulong operand);
3782+long __ovld atomic_fetch_and(volatile atomic_long __global *object,
3783+ long operand);
3784+long __ovld atomic_fetch_and(volatile atomic_long __local *object,
3785+ long operand);
3786+ulong __ovld atomic_fetch_and(volatile atomic_ulong __global *object,
3787+ ulong operand);
3788+ulong __ovld atomic_fetch_and(volatile atomic_ulong __local *object,
3789+ ulong operand);
3790+long __ovld atomic_fetch_min(volatile atomic_long __global *object,
3791+ long operand);
3792+long __ovld atomic_fetch_min(volatile atomic_long __local *object,
3793+ long operand);
3794+ulong __ovld atomic_fetch_min(volatile atomic_ulong __global *object,
3795+ ulong operand);
3796+ulong __ovld atomic_fetch_min(volatile atomic_ulong __local *object,
3797+ ulong operand);
3798+long __ovld atomic_fetch_max(volatile atomic_long __global *object,
3799+ long operand);
3800+long __ovld atomic_fetch_max(volatile atomic_long __local *object,
3801+ long operand);
3802+ulong __ovld atomic_fetch_max(volatile atomic_ulong __global *object,
3803+ ulong operand);
3804+ulong __ovld atomic_fetch_max(volatile atomic_ulong __local *object,
3805+ ulong operand);
3806+#endif // defined(cl_khr_int64_base_atomics) &&
3807+ // defined(cl_khr_int64_extended_atomics)
3808+
3809+// OpenCL v2.0 s6.13.11.7.5:
3810+// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument
3811+// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be
3812+// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.
3813+
3814+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
3815+uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __global *object,
3816+ ptrdiff_t operand);
3817+uintptr_t __ovld atomic_fetch_add(volatile atomic_uintptr_t __local *object,
3818+ ptrdiff_t operand);
3819+uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __global *object,
3820+ ptrdiff_t operand);
3821+uintptr_t __ovld atomic_fetch_sub(volatile atomic_uintptr_t __local *object,
3822+ ptrdiff_t operand);
3823+
3824+uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __global *object,
3825+ intptr_t operand);
3826+uintptr_t __ovld atomic_fetch_or(volatile atomic_uintptr_t __local *object,
3827+ intptr_t operand);
3828+uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __global *object,
3829+ intptr_t operand);
3830+uintptr_t __ovld atomic_fetch_xor(volatile atomic_uintptr_t __local *object,
3831+ intptr_t operand);
3832+uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __global *object,
3833+ intptr_t operand);
3834+uintptr_t __ovld atomic_fetch_and(volatile atomic_uintptr_t __local *object,
3835+ intptr_t operand);
3836+uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __global *object,
3837+ intptr_t opermax);
3838+uintptr_t __ovld atomic_fetch_min(volatile atomic_uintptr_t __local *object,
3839+ intptr_t opermax);
3840+uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __global *object,
3841+ intptr_t opermax);
3842+uintptr_t __ovld atomic_fetch_max(volatile atomic_uintptr_t __local *object,
3843+ intptr_t opermax);
3844+
3845+intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __global *object,
3846+ uintptr_t operand);
3847+intptr_t __ovld atomic_fetch_or(volatile atomic_intptr_t __local *object,
3848+ uintptr_t operand);
3849+intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __global *object,
3850+ uintptr_t operand);
3851+intptr_t __ovld atomic_fetch_xor(volatile atomic_intptr_t __local *object,
3852+ uintptr_t operand);
3853+intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __global *object,
3854+ uintptr_t operand);
3855+intptr_t __ovld atomic_fetch_and(volatile atomic_intptr_t __local *object,
3856+ uintptr_t operand);
3857+intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __global *object,
3858+ uintptr_t opermax);
3859+intptr_t __ovld atomic_fetch_min(volatile atomic_intptr_t __local *object,
3860+ uintptr_t opermax);
3861+intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __global *object,
3862+ uintptr_t opermax);
3863+intptr_t __ovld atomic_fetch_max(volatile atomic_intptr_t __local *object,
3864+ uintptr_t opermax);
3865+#endif // defined(cl_khr_int64_base_atomics) &&
3866+ // defined(cl_khr_int64_extended_atomics)
3867+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
3868+
3869+#endif // defined(__opencl_c_atomic_scope_device) &&
3870+ // defined(__opencl_c_atomic_order_seq_cst)
3871+
3872+#ifdef __opencl_c_generic_address_space
3873+#ifdef __opencl_c_atomic_scope_device
3874+int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand,
3875+ memory_order order);
3876+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object,
3877+ uint operand, memory_order order);
3878+int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand,
3879+ memory_order order);
3880+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object,
3881+ uint operand, memory_order order);
3882+int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand,
3883+ memory_order order);
3884+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand,
3885+ memory_order order);
3886+int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand,
3887+ memory_order order);
3888+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object,
3889+ uint operand, memory_order order);
3890+int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand,
3891+ memory_order order);
3892+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object,
3893+ uint operand, memory_order order);
3894+int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand,
3895+ memory_order order);
3896+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object,
3897+ uint operand, memory_order order);
3898+int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand,
3899+ memory_order order);
3900+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object,
3901+ uint operand, memory_order order);
3902+#endif // __opencl_c_atomic_scope_device
3903+int __ovld atomic_fetch_add_explicit(volatile atomic_int *object, int operand,
3904+ memory_order order, memory_scope scope);
3905+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint *object,
3906+ uint operand, memory_order order,
3907+ memory_scope scope);
3908+int __ovld atomic_fetch_sub_explicit(volatile atomic_int *object, int operand,
3909+ memory_order order, memory_scope scope);
3910+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint *object,
3911+ uint operand, memory_order order,
3912+ memory_scope scope);
3913+int __ovld atomic_fetch_or_explicit(volatile atomic_int *object, int operand,
3914+ memory_order order, memory_scope scope);
3915+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint *object, uint operand,
3916+ memory_order order, memory_scope scope);
3917+int __ovld atomic_fetch_xor_explicit(volatile atomic_int *object, int operand,
3918+ memory_order order, memory_scope scope);
3919+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint *object,
3920+ uint operand, memory_order order,
3921+ memory_scope scope);
3922+int __ovld atomic_fetch_and_explicit(volatile atomic_int *object, int operand,
3923+ memory_order order, memory_scope scope);
3924+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint *object,
3925+ uint operand, memory_order order,
3926+ memory_scope scope);
3927+int __ovld atomic_fetch_min_explicit(volatile atomic_int *object, int operand,
3928+ memory_order order, memory_scope scope);
3929+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint *object,
3930+ uint operand, memory_order order,
3931+ memory_scope scope);
3932+int __ovld atomic_fetch_max_explicit(volatile atomic_int *object, int operand,
3933+ memory_order order, memory_scope scope);
3934+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint *object,
3935+ uint operand, memory_order order,
3936+ memory_scope scope);
3937+
3938+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
3939+#ifdef __opencl_c_atomic_scope_device
3940+long __ovld atomic_fetch_add_explicit(volatile atomic_long *object,
3941+ long operand, memory_order order);
3942+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object,
3943+ ulong operand, memory_order order);
3944+long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object,
3945+ long operand, memory_order order);
3946+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object,
3947+ ulong operand, memory_order order);
3948+long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand,
3949+ memory_order order);
3950+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object,
3951+ ulong operand, memory_order order);
3952+long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object,
3953+ long operand, memory_order order);
3954+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object,
3955+ ulong operand, memory_order order);
3956+long __ovld atomic_fetch_and_explicit(volatile atomic_long *object,
3957+ long operand, memory_order order);
3958+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object,
3959+ ulong operand, memory_order order);
3960+long __ovld atomic_fetch_min_explicit(volatile atomic_long *object,
3961+ long operand, memory_order order);
3962+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object,
3963+ ulong operand, memory_order order);
3964+long __ovld atomic_fetch_max_explicit(volatile atomic_long *object,
3965+ long operand, memory_order order);
3966+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object,
3967+ ulong operand, memory_order order);
3968+#endif // __opencl_c_atomic_scope_device
3969+long __ovld atomic_fetch_add_explicit(volatile atomic_long *object,
3970+ long operand, memory_order order,
3971+ memory_scope scope);
3972+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong *object,
3973+ ulong operand, memory_order order,
3974+ memory_scope scope);
3975+long __ovld atomic_fetch_sub_explicit(volatile atomic_long *object,
3976+ long operand, memory_order order,
3977+ memory_scope scope);
3978+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong *object,
3979+ ulong operand, memory_order order,
3980+ memory_scope scope);
3981+long __ovld atomic_fetch_or_explicit(volatile atomic_long *object, long operand,
3982+ memory_order order, memory_scope scope);
3983+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong *object,
3984+ ulong operand, memory_order order,
3985+ memory_scope scope);
3986+long __ovld atomic_fetch_xor_explicit(volatile atomic_long *object,
3987+ long operand, memory_order order,
3988+ memory_scope scope);
3989+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong *object,
3990+ ulong operand, memory_order order,
3991+ memory_scope scope);
3992+long __ovld atomic_fetch_and_explicit(volatile atomic_long *object,
3993+ long operand, memory_order order,
3994+ memory_scope scope);
3995+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong *object,
3996+ ulong operand, memory_order order,
3997+ memory_scope scope);
3998+long __ovld atomic_fetch_min_explicit(volatile atomic_long *object,
3999+ long operand, memory_order order,
4000+ memory_scope scope);
4001+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong *object,
4002+ ulong operand, memory_order order,
4003+ memory_scope scope);
4004+long __ovld atomic_fetch_max_explicit(volatile atomic_long *object,
4005+ long operand, memory_order order,
4006+ memory_scope scope);
4007+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong *object,
4008+ ulong operand, memory_order order,
4009+ memory_scope scope);
4010+#endif // defined(cl_khr_int64_base_atomics) &&
4011+ // defined(cl_khr_int64_extended_atomics)
4012+
4013+// OpenCL v2.0 s6.13.11.7.5:
4014+// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument
4015+// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be
4016+// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.
4017+
4018+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4019+#ifdef __opencl_c_atomic_scope_device
4020+uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object,
4021+ ptrdiff_t operand,
4022+ memory_order order);
4023+uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object,
4024+ ptrdiff_t operand,
4025+ memory_order order);
4026+uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object,
4027+ intptr_t operand, memory_order order);
4028+uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object,
4029+ intptr_t operand,
4030+ memory_order order);
4031+uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object,
4032+ intptr_t operand,
4033+ memory_order order);
4034+uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object,
4035+ intptr_t opermax,
4036+ memory_order minder);
4037+uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object,
4038+ intptr_t opermax,
4039+ memory_order minder);
4040+intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object,
4041+ uintptr_t operand, memory_order order);
4042+intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object,
4043+ uintptr_t operand,
4044+ memory_order order);
4045+intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object,
4046+ uintptr_t operand,
4047+ memory_order order);
4048+intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object,
4049+ uintptr_t opermax,
4050+ memory_order minder);
4051+intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object,
4052+ uintptr_t opermax,
4053+ memory_order minder);
4054+#endif // __opencl_c_atomic_scope_device
4055+uintptr_t __ovld atomic_fetch_add_explicit(volatile atomic_uintptr_t *object,
4056+ ptrdiff_t operand,
4057+ memory_order order,
4058+ memory_scope scope);
4059+uintptr_t __ovld atomic_fetch_sub_explicit(volatile atomic_uintptr_t *object,
4060+ ptrdiff_t operand,
4061+ memory_order order,
4062+ memory_scope scope);
4063+
4064+uintptr_t __ovld atomic_fetch_or_explicit(volatile atomic_uintptr_t *object,
4065+ intptr_t operand, memory_order order,
4066+ memory_scope scope);
4067+uintptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_uintptr_t *object,
4068+ intptr_t operand, memory_order order,
4069+ memory_scope scope);
4070+uintptr_t __ovld atomic_fetch_and_explicit(volatile atomic_uintptr_t *object,
4071+ intptr_t operand, memory_order order,
4072+ memory_scope scope);
4073+uintptr_t __ovld atomic_fetch_min_explicit(volatile atomic_uintptr_t *object,
4074+ intptr_t opermax,
4075+ memory_order minder,
4076+ memory_scope scope);
4077+uintptr_t __ovld atomic_fetch_max_explicit(volatile atomic_uintptr_t *object,
4078+ intptr_t opermax,
4079+ memory_order minder,
4080+ memory_scope scope);
4081+
4082+intptr_t __ovld atomic_fetch_or_explicit(volatile atomic_intptr_t *object,
4083+ uintptr_t operand, memory_order order,
4084+ memory_scope scope);
4085+intptr_t __ovld atomic_fetch_xor_explicit(volatile atomic_intptr_t *object,
4086+ uintptr_t operand, memory_order order,
4087+ memory_scope scope);
4088+intptr_t __ovld atomic_fetch_and_explicit(volatile atomic_intptr_t *object,
4089+ uintptr_t operand, memory_order order,
4090+ memory_scope scope);
4091+intptr_t __ovld atomic_fetch_min_explicit(volatile atomic_intptr_t *object,
4092+ uintptr_t opermax,
4093+ memory_order minder,
4094+ memory_scope scope);
4095+intptr_t __ovld atomic_fetch_max_explicit(volatile atomic_intptr_t *object,
4096+ uintptr_t opermax,
4097+ memory_order minder,
4098+ memory_scope scope);
4099 #endif
4100+#endif // __opencl_c_generic_address_space
4101+
4102+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4103+#ifdef __opencl_c_atomic_scope_device
4104+int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object,
4105+ int operand, memory_order order);
4106+int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object,
4107+ int operand, memory_order order);
4108+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object,
4109+ uint operand, memory_order order);
4110+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object,
4111+ uint operand, memory_order order);
4112+int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object,
4113+ int operand, memory_order order);
4114+int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object,
4115+ int operand, memory_order order);
4116+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object,
4117+ uint operand, memory_order order);
4118+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object,
4119+ uint operand, memory_order order);
4120+int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object,
4121+ int operand, memory_order order);
4122+int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object,
4123+ int operand, memory_order order);
4124+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object,
4125+ uint operand, memory_order order);
4126+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object,
4127+ uint operand, memory_order order);
4128+int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object,
4129+ int operand, memory_order order);
4130+int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object,
4131+ int operand, memory_order order);
4132+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object,
4133+ uint operand, memory_order order);
4134+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object,
4135+ uint operand, memory_order order);
4136+int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object,
4137+ int operand, memory_order order);
4138+int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object,
4139+ int operand, memory_order order);
4140+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object,
4141+ uint operand, memory_order order);
4142+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object,
4143+ uint operand, memory_order order);
4144+int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object,
4145+ int operand, memory_order order);
4146+int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object,
4147+ int operand, memory_order order);
4148+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object,
4149+ uint operand, memory_order order);
4150+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object,
4151+ uint operand, memory_order order);
4152+int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object,
4153+ int operand, memory_order order);
4154+int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object,
4155+ int operand, memory_order order);
4156+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object,
4157+ uint operand, memory_order order);
4158+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object,
4159+ uint operand, memory_order order);
4160+#endif // __opencl_c_atomic_scope_device
4161+int __ovld atomic_fetch_add_explicit(volatile atomic_int __global *object,
4162+ int operand, memory_order order,
4163+ memory_scope scope);
4164+int __ovld atomic_fetch_add_explicit(volatile atomic_int __local *object,
4165+ int operand, memory_order order,
4166+ memory_scope scope);
4167+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __global *object,
4168+ uint operand, memory_order order,
4169+ memory_scope scope);
4170+uint __ovld atomic_fetch_add_explicit(volatile atomic_uint __local *object,
4171+ uint operand, memory_order order,
4172+ memory_scope scope);
4173+int __ovld atomic_fetch_sub_explicit(volatile atomic_int __global *object,
4174+ int operand, memory_order order,
4175+ memory_scope scope);
4176+int __ovld atomic_fetch_sub_explicit(volatile atomic_int __local *object,
4177+ int operand, memory_order order,
4178+ memory_scope scope);
4179+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __global *object,
4180+ uint operand, memory_order order,
4181+ memory_scope scope);
4182+uint __ovld atomic_fetch_sub_explicit(volatile atomic_uint __local *object,
4183+ uint operand, memory_order order,
4184+ memory_scope scope);
4185+int __ovld atomic_fetch_or_explicit(volatile atomic_int __global *object,
4186+ int operand, memory_order order,
4187+ memory_scope scope);
4188+int __ovld atomic_fetch_or_explicit(volatile atomic_int __local *object,
4189+ int operand, memory_order order,
4190+ memory_scope scope);
4191+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __global *object,
4192+ uint operand, memory_order order,
4193+ memory_scope scope);
4194+uint __ovld atomic_fetch_or_explicit(volatile atomic_uint __local *object,
4195+ uint operand, memory_order order,
4196+ memory_scope scope);
4197+int __ovld atomic_fetch_xor_explicit(volatile atomic_int __global *object,
4198+ int operand, memory_order order,
4199+ memory_scope scope);
4200+int __ovld atomic_fetch_xor_explicit(volatile atomic_int __local *object,
4201+ int operand, memory_order order,
4202+ memory_scope scope);
4203+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __global *object,
4204+ uint operand, memory_order order,
4205+ memory_scope scope);
4206+uint __ovld atomic_fetch_xor_explicit(volatile atomic_uint __local *object,
4207+ uint operand, memory_order order,
4208+ memory_scope scope);
4209+int __ovld atomic_fetch_and_explicit(volatile atomic_int __global *object,
4210+ int operand, memory_order order,
4211+ memory_scope scope);
4212+int __ovld atomic_fetch_and_explicit(volatile atomic_int __local *object,
4213+ int operand, memory_order order,
4214+ memory_scope scope);
4215+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __global *object,
4216+ uint operand, memory_order order,
4217+ memory_scope scope);
4218+uint __ovld atomic_fetch_and_explicit(volatile atomic_uint __local *object,
4219+ uint operand, memory_order order,
4220+ memory_scope scope);
4221+int __ovld atomic_fetch_min_explicit(volatile atomic_int __global *object,
4222+ int operand, memory_order order,
4223+ memory_scope scope);
4224+int __ovld atomic_fetch_min_explicit(volatile atomic_int __local *object,
4225+ int operand, memory_order order,
4226+ memory_scope scope);
4227+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __global *object,
4228+ uint operand, memory_order order,
4229+ memory_scope scope);
4230+uint __ovld atomic_fetch_min_explicit(volatile atomic_uint __local *object,
4231+ uint operand, memory_order order,
4232+ memory_scope scope);
4233+int __ovld atomic_fetch_max_explicit(volatile atomic_int __global *object,
4234+ int operand, memory_order order,
4235+ memory_scope scope);
4236+int __ovld atomic_fetch_max_explicit(volatile atomic_int __local *object,
4237+ int operand, memory_order order,
4238+ memory_scope scope);
4239+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __global *object,
4240+ uint operand, memory_order order,
4241+ memory_scope scope);
4242+uint __ovld atomic_fetch_max_explicit(volatile atomic_uint __local *object,
4243+ uint operand, memory_order order,
4244+ memory_scope scope);
4245+
4246+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4247+#ifdef __opencl_c_atomic_scope_device
4248+long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object,
4249+ long operand, memory_order order);
4250+long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object,
4251+ long operand, memory_order order);
4252+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object,
4253+ ulong operand, memory_order order);
4254+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object,
4255+ ulong operand, memory_order order);
4256+long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object,
4257+ long operand, memory_order order);
4258+long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object,
4259+ long operand, memory_order order);
4260+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object,
4261+ ulong operand, memory_order order);
4262+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object,
4263+ ulong operand, memory_order order);
4264+long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object,
4265+ long operand, memory_order order);
4266+long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object,
4267+ long operand, memory_order order);
4268+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object,
4269+ ulong operand, memory_order order);
4270+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object,
4271+ ulong operand, memory_order order);
4272+long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object,
4273+ long operand, memory_order order);
4274+long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object,
4275+ long operand, memory_order order);
4276+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object,
4277+ ulong operand, memory_order order);
4278+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object,
4279+ ulong operand, memory_order order);
4280+long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object,
4281+ long operand, memory_order order);
4282+long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object,
4283+ long operand, memory_order order);
4284+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object,
4285+ ulong operand, memory_order order);
4286+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object,
4287+ ulong operand, memory_order order);
4288+long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object,
4289+ long operand, memory_order order);
4290+long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object,
4291+ long operand, memory_order order);
4292+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object,
4293+ ulong operand, memory_order order);
4294+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object,
4295+ ulong operand, memory_order order);
4296+long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object,
4297+ long operand, memory_order order);
4298+long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object,
4299+ long operand, memory_order order);
4300+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object,
4301+ ulong operand, memory_order order);
4302+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object,
4303+ ulong operand, memory_order order);
4304+#endif // __opencl_c_atomic_scope_device
4305+long __ovld atomic_fetch_add_explicit(volatile atomic_long __global *object,
4306+ long operand, memory_order order,
4307+ memory_scope scope);
4308+long __ovld atomic_fetch_add_explicit(volatile atomic_long __local *object,
4309+ long operand, memory_order order,
4310+ memory_scope scope);
4311+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __global *object,
4312+ ulong operand, memory_order order,
4313+ memory_scope scope);
4314+ulong __ovld atomic_fetch_add_explicit(volatile atomic_ulong __local *object,
4315+ ulong operand, memory_order order,
4316+ memory_scope scope);
4317+long __ovld atomic_fetch_sub_explicit(volatile atomic_long __global *object,
4318+ long operand, memory_order order,
4319+ memory_scope scope);
4320+long __ovld atomic_fetch_sub_explicit(volatile atomic_long __local *object,
4321+ long operand, memory_order order,
4322+ memory_scope scope);
4323+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __global *object,
4324+ ulong operand, memory_order order,
4325+ memory_scope scope);
4326+ulong __ovld atomic_fetch_sub_explicit(volatile atomic_ulong __local *object,
4327+ ulong operand, memory_order order,
4328+ memory_scope scope);
4329+long __ovld atomic_fetch_or_explicit(volatile atomic_long __global *object,
4330+ long operand, memory_order order,
4331+ memory_scope scope);
4332+long __ovld atomic_fetch_or_explicit(volatile atomic_long __local *object,
4333+ long operand, memory_order order,
4334+ memory_scope scope);
4335+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __global *object,
4336+ ulong operand, memory_order order,
4337+ memory_scope scope);
4338+ulong __ovld atomic_fetch_or_explicit(volatile atomic_ulong __local *object,
4339+ ulong operand, memory_order order,
4340+ memory_scope scope);
4341+long __ovld atomic_fetch_xor_explicit(volatile atomic_long __global *object,
4342+ long operand, memory_order order,
4343+ memory_scope scope);
4344+long __ovld atomic_fetch_xor_explicit(volatile atomic_long __local *object,
4345+ long operand, memory_order order,
4346+ memory_scope scope);
4347+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __global *object,
4348+ ulong operand, memory_order order,
4349+ memory_scope scope);
4350+ulong __ovld atomic_fetch_xor_explicit(volatile atomic_ulong __local *object,
4351+ ulong operand, memory_order order,
4352+ memory_scope scope);
4353+long __ovld atomic_fetch_and_explicit(volatile atomic_long __global *object,
4354+ long operand, memory_order order,
4355+ memory_scope scope);
4356+long __ovld atomic_fetch_and_explicit(volatile atomic_long __local *object,
4357+ long operand, memory_order order,
4358+ memory_scope scope);
4359+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __global *object,
4360+ ulong operand, memory_order order,
4361+ memory_scope scope);
4362+ulong __ovld atomic_fetch_and_explicit(volatile atomic_ulong __local *object,
4363+ ulong operand, memory_order order,
4364+ memory_scope scope);
4365+long __ovld atomic_fetch_min_explicit(volatile atomic_long __global *object,
4366+ long operand, memory_order order,
4367+ memory_scope scope);
4368+long __ovld atomic_fetch_min_explicit(volatile atomic_long __local *object,
4369+ long operand, memory_order order,
4370+ memory_scope scope);
4371+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __global *object,
4372+ ulong operand, memory_order order,
4373+ memory_scope scope);
4374+ulong __ovld atomic_fetch_min_explicit(volatile atomic_ulong __local *object,
4375+ ulong operand, memory_order order,
4376+ memory_scope scope);
4377+long __ovld atomic_fetch_max_explicit(volatile atomic_long __global *object,
4378+ long operand, memory_order order,
4379+ memory_scope scope);
4380+long __ovld atomic_fetch_max_explicit(volatile atomic_long __local *object,
4381+ long operand, memory_order order,
4382+ memory_scope scope);
4383+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __global *object,
4384+ ulong operand, memory_order order,
4385+ memory_scope scope);
4386+ulong __ovld atomic_fetch_max_explicit(volatile atomic_ulong __local *object,
4387+ ulong operand, memory_order order,
4388+ memory_scope scope);
4389+#endif // defined(cl_khr_int64_base_atomics) &&
4390+ // defined(cl_khr_int64_extended_atomics)
4391+
4392+// OpenCL v2.0 s6.13.11.7.5:
4393+// add/sub: atomic type argument can be uintptr_t/intptr_t, value type argument
4394+// can be ptrdiff_t. or/xor/and/min/max: atomic type argument can be
4395+// intptr_t/uintptr_t, value type argument can be intptr_t/uintptr_t.
4396+
4397+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4398+#ifdef __opencl_c_atomic_scope_device
4399+uintptr_t __ovld
4400+atomic_fetch_add_explicit(volatile atomic_uintptr_t __global *object,
4401+ ptrdiff_t operand, memory_order order);
4402+uintptr_t __ovld
4403+atomic_fetch_add_explicit(volatile atomic_uintptr_t __local *object,
4404+ ptrdiff_t operand, memory_order order);
4405+uintptr_t __ovld
4406+atomic_fetch_sub_explicit(volatile atomic_uintptr_t __global *object,
4407+ ptrdiff_t operand, memory_order order);
4408+uintptr_t __ovld
4409+atomic_fetch_sub_explicit(volatile atomic_uintptr_t __local *object,
4410+ ptrdiff_t operand, memory_order order);
4411+uintptr_t __ovld
4412+atomic_fetch_or_explicit(volatile atomic_uintptr_t __global *object,
4413+ intptr_t operand, memory_order order);
4414+uintptr_t __ovld
4415+atomic_fetch_or_explicit(volatile atomic_uintptr_t __local *object,
4416+ intptr_t operand, memory_order order);
4417+uintptr_t __ovld
4418+atomic_fetch_xor_explicit(volatile atomic_uintptr_t __global *object,
4419+ intptr_t operand, memory_order order);
4420+uintptr_t __ovld
4421+atomic_fetch_xor_explicit(volatile atomic_uintptr_t __local *object,
4422+ intptr_t operand, memory_order order);
4423+uintptr_t __ovld
4424+atomic_fetch_and_explicit(volatile atomic_uintptr_t __global *object,
4425+ intptr_t operand, memory_order order);
4426+uintptr_t __ovld
4427+atomic_fetch_and_explicit(volatile atomic_uintptr_t __local *object,
4428+ intptr_t operand, memory_order order);
4429+uintptr_t __ovld
4430+atomic_fetch_min_explicit(volatile atomic_uintptr_t __global *object,
4431+ intptr_t opermax, memory_order minder);
4432+uintptr_t __ovld
4433+atomic_fetch_min_explicit(volatile atomic_uintptr_t __local *object,
4434+ intptr_t opermax, memory_order minder);
4435+uintptr_t __ovld
4436+atomic_fetch_max_explicit(volatile atomic_uintptr_t __global *object,
4437+ intptr_t opermax, memory_order minder);
4438+uintptr_t __ovld
4439+atomic_fetch_max_explicit(volatile atomic_uintptr_t __local *object,
4440+ intptr_t opermax, memory_order minder);
4441+intptr_t __ovld
4442+atomic_fetch_or_explicit(volatile atomic_intptr_t __global *object,
4443+ uintptr_t operand, memory_order order);
4444+intptr_t __ovld
4445+atomic_fetch_or_explicit(volatile atomic_intptr_t __local *object,
4446+ uintptr_t operand, memory_order order);
4447+intptr_t __ovld
4448+atomic_fetch_xor_explicit(volatile atomic_intptr_t __global *object,
4449+ uintptr_t operand, memory_order order);
4450+intptr_t __ovld
4451+atomic_fetch_xor_explicit(volatile atomic_intptr_t __local *object,
4452+ uintptr_t operand, memory_order order);
4453+intptr_t __ovld
4454+atomic_fetch_and_explicit(volatile atomic_intptr_t __global *object,
4455+ uintptr_t operand, memory_order order);
4456+intptr_t __ovld
4457+atomic_fetch_and_explicit(volatile atomic_intptr_t __local *object,
4458+ uintptr_t operand, memory_order order);
4459+intptr_t __ovld
4460+atomic_fetch_min_explicit(volatile atomic_intptr_t __global *object,
4461+ uintptr_t opermax, memory_order minder);
4462+intptr_t __ovld
4463+atomic_fetch_min_explicit(volatile atomic_intptr_t __local *object,
4464+ uintptr_t opermax, memory_order minder);
4465+intptr_t __ovld
4466+atomic_fetch_max_explicit(volatile atomic_intptr_t __global *object,
4467+ uintptr_t opermax, memory_order minder);
4468+intptr_t __ovld
4469+atomic_fetch_max_explicit(volatile atomic_intptr_t __local *object,
4470+ uintptr_t opermax, memory_order minder);
4471+#endif // __opencl_c_atomic_scope_device
4472+uintptr_t __ovld atomic_fetch_add_explicit(
4473+ volatile atomic_uintptr_t __global *object, ptrdiff_t operand,
4474+ memory_order order, memory_scope scope);
4475+uintptr_t __ovld atomic_fetch_add_explicit(
4476+ volatile atomic_uintptr_t __local *object, ptrdiff_t operand,
4477+ memory_order order, memory_scope scope);
4478+uintptr_t __ovld atomic_fetch_sub_explicit(
4479+ volatile atomic_uintptr_t __global *object, ptrdiff_t operand,
4480+ memory_order order, memory_scope scope);
4481+uintptr_t __ovld atomic_fetch_sub_explicit(
4482+ volatile atomic_uintptr_t __local *object, ptrdiff_t operand,
4483+ memory_order order, memory_scope scope);
4484+
4485+uintptr_t __ovld atomic_fetch_or_explicit(
4486+ volatile atomic_uintptr_t __global *object, intptr_t operand,
4487+ memory_order order, memory_scope scope);
4488+uintptr_t __ovld atomic_fetch_or_explicit(
4489+ volatile atomic_uintptr_t __local *object, intptr_t operand,
4490+ memory_order order, memory_scope scope);
4491+uintptr_t __ovld atomic_fetch_xor_explicit(
4492+ volatile atomic_uintptr_t __global *object, intptr_t operand,
4493+ memory_order order, memory_scope scope);
4494+uintptr_t __ovld atomic_fetch_xor_explicit(
4495+ volatile atomic_uintptr_t __local *object, intptr_t operand,
4496+ memory_order order, memory_scope scope);
4497+uintptr_t __ovld atomic_fetch_and_explicit(
4498+ volatile atomic_uintptr_t __global *object, intptr_t operand,
4499+ memory_order order, memory_scope scope);
4500+uintptr_t __ovld atomic_fetch_and_explicit(
4501+ volatile atomic_uintptr_t __local *object, intptr_t operand,
4502+ memory_order order, memory_scope scope);
4503+uintptr_t __ovld atomic_fetch_min_explicit(
4504+ volatile atomic_uintptr_t __global *object, intptr_t opermax,
4505+ memory_order minder, memory_scope scope);
4506+uintptr_t __ovld atomic_fetch_min_explicit(
4507+ volatile atomic_uintptr_t __local *object, intptr_t opermax,
4508+ memory_order minder, memory_scope scope);
4509+uintptr_t __ovld atomic_fetch_max_explicit(
4510+ volatile atomic_uintptr_t __global *object, intptr_t opermax,
4511+ memory_order minder, memory_scope scope);
4512+uintptr_t __ovld atomic_fetch_max_explicit(
4513+ volatile atomic_uintptr_t __local *object, intptr_t opermax,
4514+ memory_order minder, memory_scope scope);
4515+
4516+intptr_t __ovld atomic_fetch_or_explicit(
4517+ volatile atomic_intptr_t __global *object, uintptr_t operand,
4518+ memory_order order, memory_scope scope);
4519+intptr_t __ovld atomic_fetch_or_explicit(
4520+ volatile atomic_intptr_t __local *object, uintptr_t operand,
4521+ memory_order order, memory_scope scope);
4522+intptr_t __ovld atomic_fetch_xor_explicit(
4523+ volatile atomic_intptr_t __global *object, uintptr_t operand,
4524+ memory_order order, memory_scope scope);
4525+intptr_t __ovld atomic_fetch_xor_explicit(
4526+ volatile atomic_intptr_t __local *object, uintptr_t operand,
4527+ memory_order order, memory_scope scope);
4528+intptr_t __ovld atomic_fetch_and_explicit(
4529+ volatile atomic_intptr_t __global *object, uintptr_t operand,
4530+ memory_order order, memory_scope scope);
4531+intptr_t __ovld atomic_fetch_and_explicit(
4532+ volatile atomic_intptr_t __local *object, uintptr_t operand,
4533+ memory_order order, memory_scope scope);
4534+intptr_t __ovld atomic_fetch_min_explicit(
4535+ volatile atomic_intptr_t __global *object, uintptr_t opermax,
4536+ memory_order minder, memory_scope scope);
4537+intptr_t __ovld atomic_fetch_min_explicit(
4538+ volatile atomic_intptr_t __local *object, uintptr_t opermax,
4539+ memory_order minder, memory_scope scope);
4540+intptr_t __ovld atomic_fetch_max_explicit(
4541+ volatile atomic_intptr_t __global *object, uintptr_t opermax,
4542+ memory_order minder, memory_scope scope);
4543+intptr_t __ovld atomic_fetch_max_explicit(
4544+ volatile atomic_intptr_t __local *object, uintptr_t opermax,
4545+ memory_order minder, memory_scope scope);
4546+#endif // defined(cl_khr_int64_base_atomics) &&
4547+ // defined(cl_khr_int64_extended_atomics)
4548+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4549
4550 // atomic_store()
4551
4552+#if defined(__opencl_c_atomic_scope_device) && \
4553+ defined(__opencl_c_atomic_order_seq_cst)
4554+
4555+#ifdef __opencl_c_generic_address_space
4556 void __ovld atomic_store(volatile atomic_int *object, int desired);
4557-void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order);
4558-void __ovld atomic_store_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);
4559 void __ovld atomic_store(volatile atomic_uint *object, uint desired);
4560-void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order);
4561-void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);
4562 void __ovld atomic_store(volatile atomic_float *object, float desired);
4563-void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order);
4564-void __ovld atomic_store_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);
4565 #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4566-#ifdef cl_khr_fp64
4567+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4568 void __ovld atomic_store(volatile atomic_double *object, double desired);
4569-void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order);
4570-void __ovld atomic_store_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);
4571-#endif //cl_khr_fp64
4572+#endif
4573 void __ovld atomic_store(volatile atomic_long *object, long desired);
4574-void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order);
4575-void __ovld atomic_store_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);
4576 void __ovld atomic_store(volatile atomic_ulong *object, ulong desired);
4577-void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);
4578-void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
4579+#endif // defined(cl_khr_int64_base_atomics) &&
4580+ // defined(cl_khr_int64_extended_atomics)
4581+#endif // __opencl_c_generic_address_space
4582+
4583+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4584+void __ovld atomic_store(volatile atomic_int __global *object, int desired);
4585+void __ovld atomic_store(volatile atomic_int __local *object, int desired);
4586+void __ovld atomic_store(volatile atomic_uint __global *object, uint desired);
4587+void __ovld atomic_store(volatile atomic_uint __local *object, uint desired);
4588+void __ovld atomic_store(volatile atomic_float __global *object, float desired);
4589+void __ovld atomic_store(volatile atomic_float __local *object, float desired);
4590+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4591+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4592+void __ovld atomic_store(volatile atomic_double __global *object,
4593+ double desired);
4594+void __ovld atomic_store(volatile atomic_double __local *object,
4595+ double desired);
4596 #endif
4597+void __ovld atomic_store(volatile atomic_long __global *object, long desired);
4598+void __ovld atomic_store(volatile atomic_long __local *object, long desired);
4599+void __ovld atomic_store(volatile atomic_ulong __global *object, ulong desired);
4600+void __ovld atomic_store(volatile atomic_ulong __local *object, ulong desired);
4601+#endif // defined(cl_khr_int64_base_atomics) &&
4602+ // defined(cl_khr_int64_extended_atomics)
4603+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4604+
4605+#endif // defined(__opencl_c_atomic_scope_device) &&
4606+ // defined(__opencl_c_atomic_order_seq_cst)
4607+
4608+#ifdef __opencl_c_generic_address_space
4609+#ifdef __opencl_c_atomic_scope_device
4610+void __ovld atomic_store_explicit(volatile atomic_int *object, int desired,
4611+ memory_order order);
4612+void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired,
4613+ memory_order order);
4614+void __ovld atomic_store_explicit(volatile atomic_float *object, float desired,
4615+ memory_order order);
4616+#endif // __opencl_c_atomic_scope_device
4617+void __ovld atomic_store_explicit(volatile atomic_int *object, int desired,
4618+ memory_order order, memory_scope scope);
4619+void __ovld atomic_store_explicit(volatile atomic_uint *object, uint desired,
4620+ memory_order order, memory_scope scope);
4621+void __ovld atomic_store_explicit(volatile atomic_float *object, float desired,
4622+ memory_order order, memory_scope scope);
4623+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4624+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4625+#ifdef __opencl_c_atomic_scope_device
4626+void __ovld atomic_store_explicit(volatile atomic_double *object,
4627+ double desired, memory_order order);
4628+#endif //__opencl_c_atomic_scope_device
4629+void __ovld atomic_store_explicit(volatile atomic_double *object,
4630+ double desired, memory_order order,
4631+ memory_scope scope);
4632+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4633+#ifdef __opencl_c_atomic_scope_device
4634+void __ovld atomic_store_explicit(volatile atomic_long *object, long desired,
4635+ memory_order order);
4636+void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired,
4637+ memory_order order);
4638+#endif //__opencl_c_atomic_scope_device
4639+void __ovld atomic_store_explicit(volatile atomic_long *object, long desired,
4640+ memory_order order, memory_scope scope);
4641+void __ovld atomic_store_explicit(volatile atomic_ulong *object, ulong desired,
4642+ memory_order order, memory_scope scope);
4643+#endif // defined(cl_khr_int64_base_atomics) &&
4644+ // defined(cl_khr_int64_extended_atomics)
4645+#endif // __opencl_c_generic_address_space
4646+
4647+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4648+#ifdef __opencl_c_atomic_scope_device
4649+void __ovld atomic_store_explicit(volatile atomic_int __global *object,
4650+ int desired, memory_order order);
4651+void __ovld atomic_store_explicit(volatile atomic_int __local *object,
4652+ int desired, memory_order order);
4653+void __ovld atomic_store_explicit(volatile atomic_uint __global *object,
4654+ uint desired, memory_order order);
4655+void __ovld atomic_store_explicit(volatile atomic_uint __local *object,
4656+ uint desired, memory_order order);
4657+void __ovld atomic_store_explicit(volatile atomic_float __global *object,
4658+ float desired, memory_order order);
4659+void __ovld atomic_store_explicit(volatile atomic_float __local *object,
4660+ float desired, memory_order order);
4661+#endif // __opencl_c_atomic_scope_device
4662+void __ovld atomic_store_explicit(volatile atomic_int __global *object,
4663+ int desired, memory_order order,
4664+ memory_scope scope);
4665+void __ovld atomic_store_explicit(volatile atomic_int __local *object,
4666+ int desired, memory_order order,
4667+ memory_scope scope);
4668+void __ovld atomic_store_explicit(volatile atomic_uint __global *object,
4669+ uint desired, memory_order order,
4670+ memory_scope scope);
4671+void __ovld atomic_store_explicit(volatile atomic_uint __local *object,
4672+ uint desired, memory_order order,
4673+ memory_scope scope);
4674+void __ovld atomic_store_explicit(volatile atomic_float __global *object,
4675+ float desired, memory_order order,
4676+ memory_scope scope);
4677+void __ovld atomic_store_explicit(volatile atomic_float __local *object,
4678+ float desired, memory_order order,
4679+ memory_scope scope);
4680+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4681+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4682+#ifdef __opencl_c_atomic_scope_device
4683+void __ovld atomic_store_explicit(volatile atomic_double __global *object,
4684+ double desired, memory_order order);
4685+void __ovld atomic_store_explicit(volatile atomic_double __local *object,
4686+ double desired, memory_order order);
4687+#endif //__opencl_c_atomic_scope_device
4688+void __ovld atomic_store_explicit(volatile atomic_double __global *object,
4689+ double desired, memory_order order,
4690+ memory_scope scope);
4691+void __ovld atomic_store_explicit(volatile atomic_double __local *object,
4692+ double desired, memory_order order,
4693+ memory_scope scope);
4694+#endif // cl_khr_fp64
4695+#ifdef __opencl_c_atomic_scope_device
4696+void __ovld atomic_store_explicit(volatile atomic_long __global *object,
4697+ long desired, memory_order order);
4698+void __ovld atomic_store_explicit(volatile atomic_long __local *object,
4699+ long desired, memory_order order);
4700+void __ovld atomic_store_explicit(volatile atomic_ulong __global *object,
4701+ ulong desired, memory_order order);
4702+void __ovld atomic_store_explicit(volatile atomic_ulong __local *object,
4703+ ulong desired, memory_order order);
4704+#endif // __opencl_c_atomic_scope_device
4705+void __ovld atomic_store_explicit(volatile atomic_long __global *object,
4706+ long desired, memory_order order,
4707+ memory_scope scope);
4708+void __ovld atomic_store_explicit(volatile atomic_long __local *object,
4709+ long desired, memory_order order,
4710+ memory_scope scope);
4711+void __ovld atomic_store_explicit(volatile atomic_ulong __global *object,
4712+ ulong desired, memory_order order,
4713+ memory_scope scope);
4714+void __ovld atomic_store_explicit(volatile atomic_ulong __local *object,
4715+ ulong desired, memory_order order,
4716+ memory_scope scope);
4717+#endif // defined(cl_khr_int64_base_atomics) &&
4718+ // defined(cl_khr_int64_extended_atomics)
4719+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4720
4721 // atomic_load()
4722-
4723+#ifdef __opencl_c_generic_address_space
4724+#if defined(__opencl_c_atomic_scope_device) && \
4725+ defined(__opencl_c_atomic_order_seq_cst)
4726 int __ovld atomic_load(volatile atomic_int *object);
4727-int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order);
4728-int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order, memory_scope scope);
4729 uint __ovld atomic_load(volatile atomic_uint *object);
4730-uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order);
4731-uint __ovld atomic_load_explicit(volatile atomic_uint *object, memory_order order, memory_scope scope);
4732 float __ovld atomic_load(volatile atomic_float *object);
4733-float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order);
4734-float __ovld atomic_load_explicit(volatile atomic_float *object, memory_order order, memory_scope scope);
4735 #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4736-#ifdef cl_khr_fp64
4737+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4738 double __ovld atomic_load(volatile atomic_double *object);
4739-double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order);
4740-double __ovld atomic_load_explicit(volatile atomic_double *object, memory_order order, memory_scope scope);
4741-#endif //cl_khr_fp64
4742+#endif // cl_khr_fp64
4743 long __ovld atomic_load(volatile atomic_long *object);
4744-long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order);
4745-long __ovld atomic_load_explicit(volatile atomic_long *object, memory_order order, memory_scope scope);
4746 ulong __ovld atomic_load(volatile atomic_ulong *object);
4747-ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order);
4748-ulong __ovld atomic_load_explicit(volatile atomic_ulong *object, memory_order order, memory_scope scope);
4749-#endif
4750+#endif // defined(cl_khr_int64_base_atomics) &&
4751+ // defined(cl_khr_int64_extended_atomics)
4752+#endif // defined(__opencl_c_atomic_scope_device) &&
4753+ // defined(__opencl_c_atomic_order_seq_cst)
4754+#endif //__opencl_c_generic_address_space
4755+
4756+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4757+#if defined(__opencl_c_atomic_scope_device) && \
4758+ defined(__opencl_c_atomic_order_seq_cst)
4759+int __ovld atomic_load(volatile atomic_int __global *object);
4760+int __ovld atomic_load(volatile atomic_int __local *object);
4761+uint __ovld atomic_load(volatile atomic_uint __global *object);
4762+uint __ovld atomic_load(volatile atomic_uint __local *object);
4763+float __ovld atomic_load(volatile atomic_float __global *object);
4764+float __ovld atomic_load(volatile atomic_float __local *object);
4765+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4766+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4767+double __ovld atomic_load(volatile atomic_double __global *object);
4768+double __ovld atomic_load(volatile atomic_double __local *object);
4769+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4770+long __ovld atomic_load(volatile atomic_long __global *object);
4771+long __ovld atomic_load(volatile atomic_long __local *object);
4772+ulong __ovld atomic_load(volatile atomic_ulong __global *object);
4773+ulong __ovld atomic_load(volatile atomic_ulong __local *object);
4774+#endif // defined(cl_khr_int64_base_atomics) &&
4775+ // defined(cl_khr_int64_extended_atomics)
4776+#endif // defined(__opencl_c_atomic_scope_device) &&
4777+ // defined(__opencl_c_atomic_order_seq_cst)
4778+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4779+
4780+#ifdef __opencl_c_generic_address_space
4781+#ifdef __opencl_c_atomic_scope_device
4782+int __ovld atomic_load_explicit(volatile atomic_int *object,
4783+ memory_order order);
4784+uint __ovld atomic_load_explicit(volatile atomic_uint *object,
4785+ memory_order order);
4786+float __ovld atomic_load_explicit(volatile atomic_float *object,
4787+ memory_order order);
4788+#endif // __opencl_c_atomic_scope_device
4789+int __ovld atomic_load_explicit(volatile atomic_int *object, memory_order order,
4790+ memory_scope scope);
4791+uint __ovld atomic_load_explicit(volatile atomic_uint *object,
4792+ memory_order order, memory_scope scope);
4793+float __ovld atomic_load_explicit(volatile atomic_float *object,
4794+ memory_order order, memory_scope scope);
4795+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4796+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4797+#ifdef __opencl_c_atomic_scope_device
4798+double __ovld atomic_load_explicit(volatile atomic_double *object,
4799+ memory_order order);
4800+#endif // __opencl_c_atomic_scope_device
4801+double __ovld atomic_load_explicit(volatile atomic_double *object,
4802+ memory_order order, memory_scope scope);
4803+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4804+#ifdef __opencl_c_atomic_scope_device
4805+long __ovld atomic_load_explicit(volatile atomic_long *object,
4806+ memory_order order);
4807+ulong __ovld atomic_load_explicit(volatile atomic_ulong *object,
4808+ memory_order order);
4809+#endif //__opencl_c_atomic_scope_device
4810+long __ovld atomic_load_explicit(volatile atomic_long *object,
4811+ memory_order order, memory_scope scope);
4812+ulong __ovld atomic_load_explicit(volatile atomic_ulong *object,
4813+ memory_order order, memory_scope scope);
4814+#endif // defined(cl_khr_int64_base_atomics) &&
4815+ // defined(cl_khr_int64_extended_atomics)
4816+#endif // __opencl_c_generic_address_space
4817+
4818+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4819+#ifdef __opencl_c_atomic_scope_device
4820+int __ovld atomic_load_explicit(volatile atomic_int __global *object,
4821+ memory_order order);
4822+int __ovld atomic_load_explicit(volatile atomic_int __local *object,
4823+ memory_order order);
4824+uint __ovld atomic_load_explicit(volatile atomic_uint __global *object,
4825+ memory_order order);
4826+uint __ovld atomic_load_explicit(volatile atomic_uint __local *object,
4827+ memory_order order);
4828+float __ovld atomic_load_explicit(volatile atomic_float __global *object,
4829+ memory_order order);
4830+float __ovld atomic_load_explicit(volatile atomic_float __local *object,
4831+ memory_order order);
4832+#endif // __opencl_c_atomic_scope_device
4833+int __ovld atomic_load_explicit(volatile atomic_int __global *object,
4834+ memory_order order, memory_scope scope);
4835+int __ovld atomic_load_explicit(volatile atomic_int __local *object,
4836+ memory_order order, memory_scope scope);
4837+uint __ovld atomic_load_explicit(volatile atomic_uint __global *object,
4838+ memory_order order, memory_scope scope);
4839+uint __ovld atomic_load_explicit(volatile atomic_uint __local *object,
4840+ memory_order order, memory_scope scope);
4841+float __ovld atomic_load_explicit(volatile atomic_float __global *object,
4842+ memory_order order, memory_scope scope);
4843+float __ovld atomic_load_explicit(volatile atomic_float __local *object,
4844+ memory_order order, memory_scope scope);
4845+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4846+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4847+#ifdef __opencl_c_atomic_scope_device
4848+double __ovld atomic_load_explicit(volatile atomic_double __global *object,
4849+ memory_order order);
4850+double __ovld atomic_load_explicit(volatile atomic_double __local *object,
4851+ memory_order order);
4852+#endif // __opencl_c_atomic_scope_device
4853+double __ovld atomic_load_explicit(volatile atomic_double __global *object,
4854+ memory_order order, memory_scope scope);
4855+double __ovld atomic_load_explicit(volatile atomic_double __local *object,
4856+ memory_order order, memory_scope scope);
4857+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4858+#ifdef __opencl_c_atomic_scope_device
4859+long __ovld atomic_load_explicit(volatile atomic_long __global *object,
4860+ memory_order order);
4861+long __ovld atomic_load_explicit(volatile atomic_long __local *object,
4862+ memory_order order);
4863+ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object,
4864+ memory_order order);
4865+ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object,
4866+ memory_order order);
4867+#endif // __opencl_c_atomic_scope_device
4868+long __ovld atomic_load_explicit(volatile atomic_long __global *object,
4869+ memory_order order, memory_scope scope);
4870+long __ovld atomic_load_explicit(volatile atomic_long __local *object,
4871+ memory_order order, memory_scope scope);
4872+ulong __ovld atomic_load_explicit(volatile atomic_ulong __global *object,
4873+ memory_order order, memory_scope scope);
4874+ulong __ovld atomic_load_explicit(volatile atomic_ulong __local *object,
4875+ memory_order order, memory_scope scope);
4876+#endif // defined(cl_khr_int64_base_atomics) &&
4877+ // defined(cl_khr_int64_extended_atomics)
4878+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4879
4880 // atomic_exchange()
4881-
4882+#ifdef __opencl_c_generic_address_space
4883+#if defined(__opencl_c_atomic_scope_device) && \
4884+ defined(__opencl_c_atomic_order_seq_cst)
4885 int __ovld atomic_exchange(volatile atomic_int *object, int desired);
4886-int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order);
4887-int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired, memory_order order, memory_scope scope);
4888 uint __ovld atomic_exchange(volatile atomic_uint *object, uint desired);
4889-uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order);
4890-uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired, memory_order order, memory_scope scope);
4891 float __ovld atomic_exchange(volatile atomic_float *object, float desired);
4892-float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order);
4893-float __ovld atomic_exchange_explicit(volatile atomic_float *object, float desired, memory_order order, memory_scope scope);
4894 #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4895-#ifdef cl_khr_fp64
4896+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4897 double __ovld atomic_exchange(volatile atomic_double *object, double desired);
4898-double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order);
4899-double __ovld atomic_exchange_explicit(volatile atomic_double *object, double desired, memory_order order, memory_scope scope);
4900-#endif //cl_khr_fp64
4901+#endif
4902 long __ovld atomic_exchange(volatile atomic_long *object, long desired);
4903-long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order);
4904-long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired, memory_order order, memory_scope scope);
4905 ulong __ovld atomic_exchange(volatile atomic_ulong *object, ulong desired);
4906-ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order);
4907-ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object, ulong desired, memory_order order, memory_scope scope);
4908+#endif // defined(cl_khr_int64_base_atomics) &&
4909+ // defined(cl_khr_int64_extended_atomics)
4910+#endif // defined(__opencl_c_atomic_scope_device) &&
4911+ // defined(__opencl_c_atomic_order_seq_cst)
4912+#endif // __opencl_c_generic_address_space
4913+
4914+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4915+#if defined(__opencl_c_atomic_scope_device) && \
4916+ defined(__opencl_c_atomic_order_seq_cst)
4917+int __ovld atomic_exchange(volatile atomic_int __global *object, int desired);
4918+int __ovld atomic_exchange(volatile atomic_int __local *object, int desired);
4919+uint __ovld atomic_exchange(volatile atomic_uint __global *object,
4920+ uint desired);
4921+uint __ovld atomic_exchange(volatile atomic_uint __local *object, uint desired);
4922+float __ovld atomic_exchange(volatile atomic_float __global *object,
4923+ float desired);
4924+float __ovld atomic_exchange(volatile atomic_float __local *object,
4925+ float desired);
4926+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4927+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4928+double __ovld atomic_exchange(volatile atomic_double __global *object,
4929+ double desired);
4930+double __ovld atomic_exchange(volatile atomic_double __local *object,
4931+ double desired);
4932 #endif
4933+long __ovld atomic_exchange(volatile atomic_long __global *object,
4934+ long desired);
4935+long __ovld atomic_exchange(volatile atomic_long __local *object, long desired);
4936+ulong __ovld atomic_exchange(volatile atomic_ulong __global *object,
4937+ ulong desired);
4938+ulong __ovld atomic_exchange(volatile atomic_ulong __local *object,
4939+ ulong desired);
4940+#endif // defined(cl_khr_int64_base_atomics) &&
4941+ // defined(cl_khr_int64_extended_atomics)
4942+#endif // defined(__opencl_c_atomic_scope_device) &&
4943+ // defined(__opencl_c_atomic_order_seq_cst)
4944+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4945+
4946+#ifdef __opencl_c_generic_address_space
4947+#ifdef __opencl_c_atomic_scope_device
4948+int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired,
4949+ memory_order order);
4950+uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired,
4951+ memory_order order);
4952+float __ovld atomic_exchange_explicit(volatile atomic_float *object,
4953+ float desired, memory_order order);
4954+#endif // __opencl_c_atomic_scope_device
4955+int __ovld atomic_exchange_explicit(volatile atomic_int *object, int desired,
4956+ memory_order order, memory_scope scope);
4957+uint __ovld atomic_exchange_explicit(volatile atomic_uint *object, uint desired,
4958+ memory_order order, memory_scope scope);
4959+float __ovld atomic_exchange_explicit(volatile atomic_float *object,
4960+ float desired, memory_order order,
4961+ memory_scope scope);
4962+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
4963+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4964+#ifdef __opencl_c_atomic_scope_device
4965+double __ovld atomic_exchange_explicit(volatile atomic_double *object,
4966+ double desired, memory_order order);
4967+#endif // __opencl_c_atomic_scope_device
4968+double __ovld atomic_exchange_explicit(volatile atomic_double *object,
4969+ double desired, memory_order order,
4970+ memory_scope scope);
4971+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
4972+#ifdef __opencl_c_atomic_scope_device
4973+long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired,
4974+ memory_order order);
4975+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object,
4976+ ulong desired, memory_order order);
4977+#endif // __opencl_c_atomic_scope_device
4978+long __ovld atomic_exchange_explicit(volatile atomic_long *object, long desired,
4979+ memory_order order, memory_scope scope);
4980+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong *object,
4981+ ulong desired, memory_order order,
4982+ memory_scope scope);
4983+#endif // defined(cl_khr_int64_base_atomics) &&
4984+ // defined(cl_khr_int64_extended_atomics)
4985+#endif // __opencl_c_generic_address_space
4986+
4987+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
4988+#ifdef __opencl_c_atomic_scope_device
4989+int __ovld atomic_exchange_explicit(volatile atomic_int __global *object,
4990+ int desired, memory_order order);
4991+int __ovld atomic_exchange_explicit(volatile atomic_int __local *object,
4992+ int desired, memory_order order);
4993+uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object,
4994+ uint desired, memory_order order);
4995+uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object,
4996+ uint desired, memory_order order);
4997+float __ovld atomic_exchange_explicit(volatile atomic_float __global *object,
4998+ float desired, memory_order order);
4999+float __ovld atomic_exchange_explicit(volatile atomic_float __local *object,
5000+ float desired, memory_order order);
5001+#endif // __opencl_c_atomic_scope_device
5002+int __ovld atomic_exchange_explicit(volatile atomic_int __global *object,
5003+ int desired, memory_order order,
5004+ memory_scope scope);
5005+int __ovld atomic_exchange_explicit(volatile atomic_int __local *object,
5006+ int desired, memory_order order,
5007+ memory_scope scope);
5008+uint __ovld atomic_exchange_explicit(volatile atomic_uint __global *object,
5009+ uint desired, memory_order order,
5010+ memory_scope scope);
5011+uint __ovld atomic_exchange_explicit(volatile atomic_uint __local *object,
5012+ uint desired, memory_order order,
5013+ memory_scope scope);
5014+float __ovld atomic_exchange_explicit(volatile atomic_float __global *object,
5015+ float desired, memory_order order,
5016+ memory_scope scope);
5017+float __ovld atomic_exchange_explicit(volatile atomic_float __local *object,
5018+ float desired, memory_order order,
5019+ memory_scope scope);
5020+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
5021+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
5022+#ifdef __opencl_c_atomic_scope_device
5023+double __ovld atomic_exchange_explicit(volatile atomic_double __global *object,
5024+ double desired, memory_order order);
5025+double __ovld atomic_exchange_explicit(volatile atomic_double __local *object,
5026+ double desired, memory_order order);
5027+#endif // __opencl_c_atomic_scope_device
5028+double __ovld atomic_exchange_explicit(volatile atomic_double __global *object,
5029+ double desired, memory_order order,
5030+ memory_scope scope);
5031+double __ovld atomic_exchange_explicit(volatile atomic_double __local *object,
5032+ double desired, memory_order order,
5033+ memory_scope scope);
5034+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
5035+#ifdef __opencl_c_atomic_scope_device
5036+long __ovld atomic_exchange_explicit(volatile atomic_long __global *object,
5037+ long desired, memory_order order);
5038+long __ovld atomic_exchange_explicit(volatile atomic_long __local *object,
5039+ long desired, memory_order order);
5040+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object,
5041+ ulong desired, memory_order order);
5042+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object,
5043+ ulong desired, memory_order order);
5044+#endif // __opencl_c_atomic_scope_device
5045+long __ovld atomic_exchange_explicit(volatile atomic_long __global *object,
5046+ long desired, memory_order order,
5047+ memory_scope scope);
5048+long __ovld atomic_exchange_explicit(volatile atomic_long __local *object,
5049+ long desired, memory_order order,
5050+ memory_scope scope);
5051+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __global *object,
5052+ ulong desired, memory_order order,
5053+ memory_scope scope);
5054+ulong __ovld atomic_exchange_explicit(volatile atomic_ulong __local *object,
5055+ ulong desired, memory_order order,
5056+ memory_scope scope);
5057+#endif // defined(cl_khr_int64_base_atomics) &&
5058+ // defined(cl_khr_int64_extended_atomics)
5059+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
5060
5061 // atomic_compare_exchange_strong() and atomic_compare_exchange_weak()
5062-
5063-bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object, int *expected, int desired);
5064-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,
5065- int desired, memory_order success, memory_order failure);
5066-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object, int *expected,
5067- int desired, memory_order success, memory_order failure, memory_scope scope);
5068-bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object, uint *expected, uint desired);
5069-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,
5070- uint desired, memory_order success, memory_order failure);
5071-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_uint *object, uint *expected,
5072- uint desired, memory_order success, memory_order failure, memory_scope scope);
5073-bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object, int *expected, int desired);
5074-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,
5075- int desired, memory_order success, memory_order failure);
5076-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object, int *expected,
5077- int desired, memory_order success, memory_order failure, memory_scope scope);
5078-bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object, uint *expected, uint desired);
5079-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,
5080- uint desired, memory_order success, memory_order failure);
5081-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object, uint *expected,
5082- uint desired, memory_order success, memory_order failure, memory_scope scope);
5083-bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object, float *expected, float desired);
5084-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,
5085- float desired, memory_order success, memory_order failure);
5086-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_float *object, float *expected,
5087- float desired, memory_order success, memory_order failure, memory_scope scope);
5088-bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object, float *expected, float desired);
5089-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,
5090- float desired, memory_order success, memory_order failure);
5091-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object, float *expected,
5092- float desired, memory_order success, memory_order failure, memory_scope scope);
5093+#ifdef __opencl_c_generic_address_space
5094+#if defined(__opencl_c_atomic_scope_device) && \
5095+ defined(__opencl_c_atomic_order_seq_cst)
5096+bool __ovld atomic_compare_exchange_strong(volatile atomic_int *object,
5097+ int *expected, int desired);
5098+bool __ovld atomic_compare_exchange_strong(volatile atomic_uint *object,
5099+ uint *expected, uint desired);
5100+bool __ovld atomic_compare_exchange_weak(volatile atomic_int *object,
5101+ int *expected, int desired);
5102+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint *object,
5103+ uint *expected, uint desired);
5104+bool __ovld atomic_compare_exchange_strong(volatile atomic_float *object,
5105+ float *expected, float desired);
5106+bool __ovld atomic_compare_exchange_weak(volatile atomic_float *object,
5107+ float *expected, float desired);
5108+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
5109+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
5110+bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object,
5111+ double *expected, double desired);
5112+bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object,
5113+ double *expected, double desired);
5114+#endif
5115+bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object,
5116+ long *expected, long desired);
5117+bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object,
5118+ long *expected, long desired);
5119+bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object,
5120+ ulong *expected, ulong desired);
5121+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object,
5122+ ulong *expected, ulong desired);
5123+#endif // defined(cl_khr_int64_base_atomics) &&
5124+ // defined(cl_khr_int64_extended_atomics)
5125+#endif // defined(__opencl_c_atomic_scope_device) &&
5126+ // defined(__opencl_c_atomic_order_seq_cst)
5127+#endif // __opencl_c_generic_address_space
5128+
5129+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
5130+#if defined(__opencl_c_atomic_scope_device) && \
5131+ defined(__opencl_c_atomic_order_seq_cst)
5132+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object,
5133+ int __global *expected, int desired);
5134+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object,
5135+ int __global *expected, int desired);
5136+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object,
5137+ int __local *expected, int desired);
5138+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object,
5139+ int __local *expected, int desired);
5140+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __global *object,
5141+ int __private *expected,
5142+ int desired);
5143+bool __ovld atomic_compare_exchange_strong(volatile atomic_int __local *object,
5144+ int __private *expected,
5145+ int desired);
5146+bool __ovld
5147+atomic_compare_exchange_strong(volatile atomic_uint __global *object,
5148+ uint __global *expected, uint desired);
5149+bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object,
5150+ uint __global *expected,
5151+ uint desired);
5152+bool __ovld
5153+atomic_compare_exchange_strong(volatile atomic_uint __global *object,
5154+ uint __local *expected, uint desired);
5155+bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object,
5156+ uint __local *expected,
5157+ uint desired);
5158+bool __ovld
5159+atomic_compare_exchange_strong(volatile atomic_uint __global *object,
5160+ uint __private *expected, uint desired);
5161+bool __ovld atomic_compare_exchange_strong(volatile atomic_uint __local *object,
5162+ uint __private *expected,
5163+ uint desired);
5164+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object,
5165+ int __global *expected, int desired);
5166+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object,
5167+ int __global *expected, int desired);
5168+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object,
5169+ int __local *expected, int desired);
5170+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object,
5171+ int __local *expected, int desired);
5172+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __global *object,
5173+ int __private *expected, int desired);
5174+bool __ovld atomic_compare_exchange_weak(volatile atomic_int __local *object,
5175+ int __private *expected, int desired);
5176+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object,
5177+ uint __global *expected, uint desired);
5178+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object,
5179+ uint __global *expected, uint desired);
5180+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object,
5181+ uint __local *expected, uint desired);
5182+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object,
5183+ uint __local *expected, uint desired);
5184+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __global *object,
5185+ uint __private *expected,
5186+ uint desired);
5187+bool __ovld atomic_compare_exchange_weak(volatile atomic_uint __local *object,
5188+ uint __private *expected,
5189+ uint desired);
5190+bool __ovld
5191+atomic_compare_exchange_strong(volatile atomic_float __global *object,
5192+ float __global *expected, float desired);
5193+bool __ovld
5194+atomic_compare_exchange_strong(volatile atomic_float __local *object,
5195+ float __global *expected, float desired);
5196+bool __ovld
5197+atomic_compare_exchange_strong(volatile atomic_float __global *object,
5198+ float __local *expected, float desired);
5199+bool __ovld
5200+atomic_compare_exchange_strong(volatile atomic_float __local *object,
5201+ float __local *expected, float desired);
5202+bool __ovld
5203+atomic_compare_exchange_strong(volatile atomic_float __global *object,
5204+ float __private *expected, float desired);
5205+bool __ovld
5206+atomic_compare_exchange_strong(volatile atomic_float __local *object,
5207+ float __private *expected, float desired);
5208+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object,
5209+ float __global *expected,
5210+ float desired);
5211+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object,
5212+ float __global *expected,
5213+ float desired);
5214+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object,
5215+ float __local *expected,
5216+ float desired);
5217+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object,
5218+ float __local *expected,
5219+ float desired);
5220+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __global *object,
5221+ float __private *expected,
5222+ float desired);
5223+bool __ovld atomic_compare_exchange_weak(volatile atomic_float __local *object,
5224+ float __private *expected,
5225+ float desired);
5226 #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
5227-#ifdef cl_khr_fp64
5228-bool __ovld atomic_compare_exchange_strong(volatile atomic_double *object, double *expected, double desired);
5229-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,
5230- double desired, memory_order success, memory_order failure);
5231-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_double *object, double *expected,
5232- double desired, memory_order success, memory_order failure, memory_scope scope);
5233-bool __ovld atomic_compare_exchange_weak(volatile atomic_double *object, double *expected, double desired);
5234-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,
5235- double desired, memory_order success, memory_order failure);
5236-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_double *object, double *expected,
5237- double desired, memory_order success, memory_order failure, memory_scope scope);
5238-#endif //cl_khr_fp64
5239-bool __ovld atomic_compare_exchange_strong(volatile atomic_long *object, long *expected, long desired);
5240-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,
5241- long desired, memory_order success, memory_order failure);
5242-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_long *object, long *expected,
5243- long desired, memory_order success, memory_order failure, memory_scope scope);
5244-bool __ovld atomic_compare_exchange_weak(volatile atomic_long *object, long *expected, long desired);
5245-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,
5246- long desired, memory_order success, memory_order failure);
5247-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object, long *expected,
5248- long desired, memory_order success, memory_order failure, memory_scope scope);
5249-bool __ovld atomic_compare_exchange_strong(volatile atomic_ulong *object, ulong *expected, ulong desired);
5250-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,
5251- ulong desired, memory_order success, memory_order failure);
5252-bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_ulong *object, ulong *expected,
5253- ulong desired, memory_order success, memory_order failure, memory_scope scope);
5254-bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong *object, ulong *expected, ulong desired);
5255-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,
5256- ulong desired, memory_order success, memory_order failure);
5257-bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object, ulong *expected,
5258- ulong desired, memory_order success, memory_order failure, memory_scope scope);
5259+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
5260+bool __ovld
5261+atomic_compare_exchange_strong(volatile atomic_double __global *object,
5262+ double __global *expected, double desired);
5263+bool __ovld
5264+atomic_compare_exchange_strong(volatile atomic_double __local *object,
5265+ double __global *expected, double desired);
5266+bool __ovld
5267+atomic_compare_exchange_strong(volatile atomic_double __global *object,
5268+ double __local *expected, double desired);
5269+bool __ovld
5270+atomic_compare_exchange_strong(volatile atomic_double __local *object,
5271+ double __local *expected, double desired);
5272+bool __ovld
5273+atomic_compare_exchange_strong(volatile atomic_double __global *object,
5274+ double __private *expected, double desired);
5275+bool __ovld
5276+atomic_compare_exchange_strong(volatile atomic_double __local *object,
5277+ double __private *expected, double desired);
5278+bool __ovld
5279+atomic_compare_exchange_weak(volatile atomic_double __global *object,
5280+ double __global *expected, double desired);
5281+bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object,
5282+ double __global *expected,
5283+ double desired);
5284+bool __ovld
5285+atomic_compare_exchange_weak(volatile atomic_double __global *object,
5286+ double __local *expected, double desired);
5287+bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object,
5288+ double __local *expected,
5289+ double desired);
5290+bool __ovld
5291+atomic_compare_exchange_weak(volatile atomic_double __global *object,
5292+ double __private *expected, double desired);
5293+bool __ovld atomic_compare_exchange_weak(volatile atomic_double __local *object,
5294+ double __private *expected,
5295+ double desired);
5296 #endif
5297+bool __ovld
5298+atomic_compare_exchange_strong(volatile atomic_long __global *object,
5299+ long __global *expected, long desired);
5300+bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object,
5301+ long __global *expected,
5302+ long desired);
5303+bool __ovld
5304+atomic_compare_exchange_strong(volatile atomic_long __global *object,
5305+ long __local *expected, long desired);
5306+bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object,
5307+ long __local *expected,
5308+ long desired);
5309+bool __ovld
5310+atomic_compare_exchange_strong(volatile atomic_long __global *object,
5311+ long __private *expected, long desired);
5312+bool __ovld atomic_compare_exchange_strong(volatile atomic_long __local *object,
5313+ long __private *expected,
5314+ long desired);
5315+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object,
5316+ long __global *expected, long desired);
5317+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object,
5318+ long __global *expected, long desired);
5319+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object,
5320+ long __local *expected, long desired);
5321+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object,
5322+ long __local *expected, long desired);
5323+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __global *object,
5324+ long __private *expected,
5325+ long desired);
5326+bool __ovld atomic_compare_exchange_weak(volatile atomic_long __local *object,
5327+ long __private *expected,
5328+ long desired);
5329+bool __ovld
5330+atomic_compare_exchange_strong(volatile atomic_ulong __global *object,
5331+ ulong __global *expected, ulong desired);
5332+bool __ovld
5333+atomic_compare_exchange_strong(volatile atomic_ulong __local *object,
5334+ ulong __global *expected, ulong desired);
5335+bool __ovld
5336+atomic_compare_exchange_strong(volatile atomic_ulong __global *object,
5337+ ulong __local *expected, ulong desired);
5338+bool __ovld
5339+atomic_compare_exchange_strong(volatile atomic_ulong __local *object,
5340+ ulong __local *expected, ulong desired);
5341+bool __ovld
5342+atomic_compare_exchange_strong(volatile atomic_ulong __global *object,
5343+ ulong __private *expected, ulong desired);
5344+bool __ovld
5345+atomic_compare_exchange_strong(volatile atomic_ulong __local *object,
5346+ ulong __private *expected, ulong desired);
5347+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object,
5348+ ulong __global *expected,
5349+ ulong desired);
5350+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object,
5351+ ulong __global *expected,
5352+ ulong desired);
5353+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object,
5354+ ulong __local *expected,
5355+ ulong desired);
5356+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object,
5357+ ulong __local *expected,
5358+ ulong desired);
5359+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __global *object,
5360+ ulong __private *expected,
5361+ ulong desired);
5362+bool __ovld atomic_compare_exchange_weak(volatile atomic_ulong __local *object,
5363+ ulong __private *expected,
5364+ ulong desired);
5365+#endif // defined(cl_khr_int64_base_atomics) &&
5366+ // defined(cl_khr_int64_extended_atomics)
5367+#endif // defined(__opencl_c_atomic_scope_device) &&
5368+ // defined(__opencl_c_atomic_order_seq_cst)
5369+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
5370+
5371+#ifdef __opencl_c_generic_address_space
5372+#ifdef __opencl_c_atomic_scope_device
5373+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object,
5374+ int *expected, int desired,
5375+ memory_order success,
5376+ memory_order failure);
5377+bool __ovld atomic_compare_exchange_strong_explicit(
5378+ volatile atomic_uint *object, uint *expected, uint desired,
5379+ memory_order success, memory_order failure);
5380+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object,
5381+ int *expected, int desired,
5382+ memory_order success,
5383+ memory_order failure);
5384+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object,
5385+ uint *expected, uint desired,
5386+ memory_order success,
5387+ memory_order failure);
5388+bool __ovld atomic_compare_exchange_strong_explicit(
5389+ volatile atomic_float *object, float *expected, float desired,
5390+ memory_order success, memory_order failure);
5391+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_float *object,
5392+ float *expected,
5393+ float desired,
5394+ memory_order success,
5395+ memory_order failure);
5396+#endif // __opencl_c_atomic_scope_device
5397+bool __ovld atomic_compare_exchange_strong_explicit(volatile atomic_int *object,
5398+ int *expected, int desired,
5399+ memory_order success,
5400+ memory_order failure,
5401+ memory_scope scope);
5402+bool __ovld atomic_compare_exchange_strong_explicit(
5403+ volatile atomic_uint *object, uint *expected, uint desired,
5404+ memory_order success, memory_order failure, memory_scope scope);
5405+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_int *object,
5406+ int *expected, int desired,
5407+ memory_order success,
5408+ memory_order failure,
5409+ memory_scope scope);
5410+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_uint *object,
5411+ uint *expected, uint desired,
5412+ memory_order success,
5413+ memory_order failure,
5414+ memory_scope scope);
5415+bool __ovld atomic_compare_exchange_strong_explicit(
5416+ volatile atomic_float *object, float *expected, float desired,
5417+ memory_order success, memory_order failure, memory_scope scope);
5418+bool __ovld atomic_compare_exchange_weak_explicit(
5419+ volatile atomic_float *object, float *expected, float desired,
5420+ memory_order success, memory_order failure, memory_scope scope);
5421+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
5422+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
5423+#ifdef __opencl_c_atomic_scope_device
5424+bool __ovld atomic_compare_exchange_strong_explicit(
5425+ volatile atomic_double *object, double *expected, double desired,
5426+ memory_order success, memory_order failure);
5427+bool __ovld atomic_compare_exchange_weak_explicit(
5428+ volatile atomic_double *object, double *expected, double desired,
5429+ memory_order success, memory_order failure);
5430+#endif // __opencl_c_atomic_scope_device
5431+bool __ovld atomic_compare_exchange_strong_explicit(
5432+ volatile atomic_double *object, double *expected, double desired,
5433+ memory_order success, memory_order failure, memory_scope scope);
5434+bool __ovld atomic_compare_exchange_weak_explicit(
5435+ volatile atomic_double *object, double *expected, double desired,
5436+ memory_order success, memory_order failure, memory_scope scope);
5437+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
5438+#ifdef __opencl_c_atomic_scope_device
5439+bool __ovld atomic_compare_exchange_strong_explicit(
5440+ volatile atomic_long *object, long *expected, long desired,
5441+ memory_order success, memory_order failure);
5442+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object,
5443+ long *expected, long desired,
5444+ memory_order success,
5445+ memory_order failure);
5446+bool __ovld atomic_compare_exchange_strong_explicit(
5447+ volatile atomic_ulong *object, ulong *expected, ulong desired,
5448+ memory_order success, memory_order failure);
5449+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_ulong *object,
5450+ ulong *expected,
5451+ ulong desired,
5452+ memory_order success,
5453+ memory_order failure);
5454+#endif // __opencl_c_atomic_scope_device
5455+bool __ovld atomic_compare_exchange_strong_explicit(
5456+ volatile atomic_long *object, long *expected, long desired,
5457+ memory_order success, memory_order failure, memory_scope scope);
5458+bool __ovld atomic_compare_exchange_weak_explicit(volatile atomic_long *object,
5459+ long *expected, long desired,
5460+ memory_order success,
5461+ memory_order failure,
5462+ memory_scope scope);
5463+bool __ovld atomic_compare_exchange_strong_explicit(
5464+ volatile atomic_ulong *object, ulong *expected, ulong desired,
5465+ memory_order success, memory_order failure, memory_scope scope);
5466+bool __ovld atomic_compare_exchange_weak_explicit(
5467+ volatile atomic_ulong *object, ulong *expected, ulong desired,
5468+ memory_order success, memory_order failure, memory_scope scope);
5469+#endif // defined(cl_khr_int64_base_atomics) &&
5470+ // defined(cl_khr_int64_extended_atomics)
5471+#endif // __opencl_c_generic_address_space
5472+
5473+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
5474+#ifdef __opencl_c_atomic_scope_device
5475+bool __ovld atomic_compare_exchange_strong_explicit(
5476+ volatile atomic_int __global *object, int __global *expected, int desired,
5477+ memory_order success, memory_order failure);
5478+bool __ovld atomic_compare_exchange_strong_explicit(
5479+ volatile atomic_int __local *object, int __global *expected, int desired,
5480+ memory_order success, memory_order failure);
5481+bool __ovld atomic_compare_exchange_strong_explicit(
5482+ volatile atomic_int __global *object, int __local *expected, int desired,
5483+ memory_order success, memory_order failure);
5484+bool __ovld atomic_compare_exchange_strong_explicit(
5485+ volatile atomic_int __local *object, int __local *expected, int desired,
5486+ memory_order success, memory_order failure);
5487+bool __ovld atomic_compare_exchange_strong_explicit(
5488+ volatile atomic_int __global *object, int __private *expected, int desired,
5489+ memory_order success, memory_order failure);
5490+bool __ovld atomic_compare_exchange_strong_explicit(
5491+ volatile atomic_int __local *object, int __private *expected, int desired,
5492+ memory_order success, memory_order failure);
5493+bool __ovld atomic_compare_exchange_strong_explicit(
5494+ volatile atomic_uint __global *object, uint __global *expected,
5495+ uint desired, memory_order success, memory_order failure);
5496+bool __ovld atomic_compare_exchange_strong_explicit(
5497+ volatile atomic_uint __local *object, uint __global *expected, uint desired,
5498+ memory_order success, memory_order failure);
5499+bool __ovld atomic_compare_exchange_strong_explicit(
5500+ volatile atomic_uint __global *object, uint __local *expected, uint desired,
5501+ memory_order success, memory_order failure);
5502+bool __ovld atomic_compare_exchange_strong_explicit(
5503+ volatile atomic_uint __local *object, uint __local *expected, uint desired,
5504+ memory_order success, memory_order failure);
5505+bool __ovld atomic_compare_exchange_strong_explicit(
5506+ volatile atomic_uint __global *object, uint __private *expected,
5507+ uint desired, memory_order success, memory_order failure);
5508+bool __ovld atomic_compare_exchange_strong_explicit(
5509+ volatile atomic_uint __local *object, uint __private *expected,
5510+ uint desired, memory_order success, memory_order failure);
5511+bool __ovld atomic_compare_exchange_weak_explicit(
5512+ volatile atomic_int __global *object, int __global *expected, int desired,
5513+ memory_order success, memory_order failure);
5514+bool __ovld atomic_compare_exchange_weak_explicit(
5515+ volatile atomic_int __local *object, int __global *expected, int desired,
5516+ memory_order success, memory_order failure);
5517+bool __ovld atomic_compare_exchange_weak_explicit(
5518+ volatile atomic_int __global *object, int __local *expected, int desired,
5519+ memory_order success, memory_order failure);
5520+bool __ovld atomic_compare_exchange_weak_explicit(
5521+ volatile atomic_int __local *object, int __local *expected, int desired,
5522+ memory_order success, memory_order failure);
5523+bool __ovld atomic_compare_exchange_weak_explicit(
5524+ volatile atomic_int __global *object, int __private *expected, int desired,
5525+ memory_order success, memory_order failure);
5526+bool __ovld atomic_compare_exchange_weak_explicit(
5527+ volatile atomic_int __local *object, int __private *expected, int desired,
5528+ memory_order success, memory_order failure);
5529+bool __ovld atomic_compare_exchange_weak_explicit(
5530+ volatile atomic_uint __global *object, uint __global *expected,
5531+ uint desired, memory_order success, memory_order failure);
5532+bool __ovld atomic_compare_exchange_weak_explicit(
5533+ volatile atomic_uint __local *object, uint __global *expected, uint desired,
5534+ memory_order success, memory_order failure);
5535+bool __ovld atomic_compare_exchange_weak_explicit(
5536+ volatile atomic_uint __global *object, uint __local *expected, uint desired,
5537+ memory_order success, memory_order failure);
5538+bool __ovld atomic_compare_exchange_weak_explicit(
5539+ volatile atomic_uint __local *object, uint __local *expected, uint desired,
5540+ memory_order success, memory_order failure);
5541+bool __ovld atomic_compare_exchange_weak_explicit(
5542+ volatile atomic_uint __global *object, uint __private *expected,
5543+ uint desired, memory_order success, memory_order failure);
5544+bool __ovld atomic_compare_exchange_weak_explicit(
5545+ volatile atomic_uint __local *object, uint __private *expected,
5546+ uint desired, memory_order success, memory_order failure);
5547+bool __ovld atomic_compare_exchange_strong_explicit(
5548+ volatile atomic_float __global *object, float __global *expected,
5549+ float desired, memory_order success, memory_order failure);
5550+bool __ovld atomic_compare_exchange_strong_explicit(
5551+ volatile atomic_float __local *object, float __global *expected,
5552+ float desired, memory_order success, memory_order failure);
5553+bool __ovld atomic_compare_exchange_strong_explicit(
5554+ volatile atomic_float __global *object, float __local *expected,
5555+ float desired, memory_order success, memory_order failure);
5556+bool __ovld atomic_compare_exchange_strong_explicit(
5557+ volatile atomic_float __local *object, float __local *expected,
5558+ float desired, memory_order success, memory_order failure);
5559+bool __ovld atomic_compare_exchange_strong_explicit(
5560+ volatile atomic_float __global *object, float __private *expected,
5561+ float desired, memory_order success, memory_order failure);
5562+bool __ovld atomic_compare_exchange_strong_explicit(
5563+ volatile atomic_float __local *object, float __private *expected,
5564+ float desired, memory_order success, memory_order failure);
5565+bool __ovld atomic_compare_exchange_weak_explicit(
5566+ volatile atomic_float __global *object, float __global *expected,
5567+ float desired, memory_order success, memory_order failure);
5568+bool __ovld atomic_compare_exchange_weak_explicit(
5569+ volatile atomic_float __local *object, float __global *expected,
5570+ float desired, memory_order success, memory_order failure);
5571+bool __ovld atomic_compare_exchange_weak_explicit(
5572+ volatile atomic_float __global *object, float __local *expected,
5573+ float desired, memory_order success, memory_order failure);
5574+bool __ovld atomic_compare_exchange_weak_explicit(
5575+ volatile atomic_float __local *object, float __local *expected,
5576+ float desired, memory_order success, memory_order failure);
5577+bool __ovld atomic_compare_exchange_weak_explicit(
5578+ volatile atomic_float __global *object, float __private *expected,
5579+ float desired, memory_order success, memory_order failure);
5580+bool __ovld atomic_compare_exchange_weak_explicit(
5581+ volatile atomic_float __local *object, float __private *expected,
5582+ float desired, memory_order success, memory_order failure);
5583+#endif // __opencl_c_atomic_scope_device
5584+bool __ovld atomic_compare_exchange_strong_explicit(
5585+ volatile atomic_int __global *object, int __global *expected, int desired,
5586+ memory_order success, memory_order failure, memory_scope scope);
5587+bool __ovld atomic_compare_exchange_strong_explicit(
5588+ volatile atomic_int __local *object, int __global *expected, int desired,
5589+ memory_order success, memory_order failure, memory_scope scope);
5590+bool __ovld atomic_compare_exchange_strong_explicit(
5591+ volatile atomic_int __global *object, int __local *expected, int desired,
5592+ memory_order success, memory_order failure, memory_scope scope);
5593+bool __ovld atomic_compare_exchange_strong_explicit(
5594+ volatile atomic_int __local *object, int __local *expected, int desired,
5595+ memory_order success, memory_order failure, memory_scope scope);
5596+bool __ovld atomic_compare_exchange_strong_explicit(
5597+ volatile atomic_int __global *object, int __private *expected, int desired,
5598+ memory_order success, memory_order failure, memory_scope scope);
5599+bool __ovld atomic_compare_exchange_strong_explicit(
5600+ volatile atomic_int __local *object, int __private *expected, int desired,
5601+ memory_order success, memory_order failure, memory_scope scope);
5602+bool __ovld atomic_compare_exchange_strong_explicit(
5603+ volatile atomic_uint __global *object, uint __global *expected,
5604+ uint desired, memory_order success, memory_order failure,
5605+ memory_scope scope);
5606+bool __ovld atomic_compare_exchange_strong_explicit(
5607+ volatile atomic_uint __local *object, uint __global *expected, uint desired,
5608+ memory_order success, memory_order failure, memory_scope scope);
5609+bool __ovld atomic_compare_exchange_strong_explicit(
5610+ volatile atomic_uint __global *object, uint __local *expected, uint desired,
5611+ memory_order success, memory_order failure, memory_scope scope);
5612+bool __ovld atomic_compare_exchange_strong_explicit(
5613+ volatile atomic_uint __local *object, uint __local *expected, uint desired,
5614+ memory_order success, memory_order failure, memory_scope scope);
5615+bool __ovld atomic_compare_exchange_strong_explicit(
5616+ volatile atomic_uint __global *object, uint __private *expected,
5617+ uint desired, memory_order success, memory_order failure,
5618+ memory_scope scope);
5619+bool __ovld atomic_compare_exchange_strong_explicit(
5620+ volatile atomic_uint __local *object, uint __private *expected,
5621+ uint desired, memory_order success, memory_order failure,
5622+ memory_scope scope);
5623+bool __ovld atomic_compare_exchange_weak_explicit(
5624+ volatile atomic_int __global *object, int __global *expected, int desired,
5625+ memory_order success, memory_order failure, memory_scope scope);
5626+bool __ovld atomic_compare_exchange_weak_explicit(
5627+ volatile atomic_int __local *object, int __global *expected, int desired,
5628+ memory_order success, memory_order failure, memory_scope scope);
5629+bool __ovld atomic_compare_exchange_weak_explicit(
5630+ volatile atomic_int __global *object, int __local *expected, int desired,
5631+ memory_order success, memory_order failure, memory_scope scope);
5632+bool __ovld atomic_compare_exchange_weak_explicit(
5633+ volatile atomic_int __local *object, int __local *expected, int desired,
5634+ memory_order success, memory_order failure, memory_scope scope);
5635+bool __ovld atomic_compare_exchange_weak_explicit(
5636+ volatile atomic_int __global *object, int __private *expected, int desired,
5637+ memory_order success, memory_order failure, memory_scope scope);
5638+bool __ovld atomic_compare_exchange_weak_explicit(
5639+ volatile atomic_int __local *object, int __private *expected, int desired,
5640+ memory_order success, memory_order failure, memory_scope scope);
5641+bool __ovld atomic_compare_exchange_weak_explicit(
5642+ volatile atomic_uint __global *object, uint __global *expected,
5643+ uint desired, memory_order success, memory_order failure,
5644+ memory_scope scope);
5645+bool __ovld atomic_compare_exchange_weak_explicit(
5646+ volatile atomic_uint __local *object, uint __global *expected, uint desired,
5647+ memory_order success, memory_order failure, memory_scope scope);
5648+bool __ovld atomic_compare_exchange_weak_explicit(
5649+ volatile atomic_uint __global *object, uint __local *expected, uint desired,
5650+ memory_order success, memory_order failure, memory_scope scope);
5651+bool __ovld atomic_compare_exchange_weak_explicit(
5652+ volatile atomic_uint __local *object, uint __local *expected, uint desired,
5653+ memory_order success, memory_order failure, memory_scope scope);
5654+bool __ovld atomic_compare_exchange_weak_explicit(
5655+ volatile atomic_uint __global *object, uint __private *expected,
5656+ uint desired, memory_order success, memory_order failure,
5657+ memory_scope scope);
5658+bool __ovld atomic_compare_exchange_weak_explicit(
5659+ volatile atomic_uint __local *object, uint __private *expected,
5660+ uint desired, memory_order success, memory_order failure,
5661+ memory_scope scope);
5662+bool __ovld atomic_compare_exchange_strong_explicit(
5663+ volatile atomic_float __global *object, float __global *expected,
5664+ float desired, memory_order success, memory_order failure,
5665+ memory_scope scope);
5666+bool __ovld atomic_compare_exchange_strong_explicit(
5667+ volatile atomic_float __local *object, float __global *expected,
5668+ float desired, memory_order success, memory_order failure,
5669+ memory_scope scope);
5670+bool __ovld atomic_compare_exchange_strong_explicit(
5671+ volatile atomic_float __global *object, float __local *expected,
5672+ float desired, memory_order success, memory_order failure,
5673+ memory_scope scope);
5674+bool __ovld atomic_compare_exchange_strong_explicit(
5675+ volatile atomic_float __local *object, float __local *expected,
5676+ float desired, memory_order success, memory_order failure,
5677+ memory_scope scope);
5678+bool __ovld atomic_compare_exchange_strong_explicit(
5679+ volatile atomic_float __global *object, float __private *expected,
5680+ float desired, memory_order success, memory_order failure,
5681+ memory_scope scope);
5682+bool __ovld atomic_compare_exchange_strong_explicit(
5683+ volatile atomic_float __local *object, float __private *expected,
5684+ float desired, memory_order success, memory_order failure,
5685+ memory_scope scope);
5686+bool __ovld atomic_compare_exchange_weak_explicit(
5687+ volatile atomic_float __global *object, float __global *expected,
5688+ float desired, memory_order success, memory_order failure,
5689+ memory_scope scope);
5690+bool __ovld atomic_compare_exchange_weak_explicit(
5691+ volatile atomic_float __local *object, float __global *expected,
5692+ float desired, memory_order success, memory_order failure,
5693+ memory_scope scope);
5694+bool __ovld atomic_compare_exchange_weak_explicit(
5695+ volatile atomic_float __global *object, float __local *expected,
5696+ float desired, memory_order success, memory_order failure,
5697+ memory_scope scope);
5698+bool __ovld atomic_compare_exchange_weak_explicit(
5699+ volatile atomic_float __local *object, float __local *expected,
5700+ float desired, memory_order success, memory_order failure,
5701+ memory_scope scope);
5702+bool __ovld atomic_compare_exchange_weak_explicit(
5703+ volatile atomic_float __global *object, float __private *expected,
5704+ float desired, memory_order success, memory_order failure,
5705+ memory_scope scope);
5706+bool __ovld atomic_compare_exchange_weak_explicit(
5707+ volatile atomic_float __local *object, float __private *expected,
5708+ float desired, memory_order success, memory_order failure,
5709+ memory_scope scope);
5710+#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
5711+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
5712+#ifdef __opencl_c_atomic_scope_device
5713+bool __ovld atomic_compare_exchange_strong_explicit(
5714+ volatile atomic_double __global *object, double __global *expected,
5715+ double desired, memory_order success, memory_order failure);
5716+bool __ovld atomic_compare_exchange_strong_explicit(
5717+ volatile atomic_double __local *object, double __global *expected,
5718+ double desired, memory_order success, memory_order failure);
5719+bool __ovld atomic_compare_exchange_strong_explicit(
5720+ volatile atomic_double __global *object, double __local *expected,
5721+ double desired, memory_order success, memory_order failure);
5722+bool __ovld atomic_compare_exchange_strong_explicit(
5723+ volatile atomic_double __local *object, double __local *expected,
5724+ double desired, memory_order success, memory_order failure);
5725+bool __ovld atomic_compare_exchange_strong_explicit(
5726+ volatile atomic_double __global *object, double __private *expected,
5727+ double desired, memory_order success, memory_order failure);
5728+bool __ovld atomic_compare_exchange_strong_explicit(
5729+ volatile atomic_double __local *object, double __private *expected,
5730+ double desired, memory_order success, memory_order failure);
5731+bool __ovld atomic_compare_exchange_weak_explicit(
5732+ volatile atomic_double __global *object, double __global *expected,
5733+ double desired, memory_order success, memory_order failure);
5734+bool __ovld atomic_compare_exchange_weak_explicit(
5735+ volatile atomic_double __local *object, double __global *expected,
5736+ double desired, memory_order success, memory_order failure);
5737+bool __ovld atomic_compare_exchange_weak_explicit(
5738+ volatile atomic_double __global *object, double __local *expected,
5739+ double desired, memory_order success, memory_order failure);
5740+bool __ovld atomic_compare_exchange_weak_explicit(
5741+ volatile atomic_double __local *object, double __local *expected,
5742+ double desired, memory_order success, memory_order failure);
5743+bool __ovld atomic_compare_exchange_weak_explicit(
5744+ volatile atomic_double __global *object, double __private *expected,
5745+ double desired, memory_order success, memory_order failure);
5746+bool __ovld atomic_compare_exchange_weak_explicit(
5747+ volatile atomic_double __local *object, double __private *expected,
5748+ double desired, memory_order success, memory_order failure);
5749+#endif // __opencl_c_atomic_scope_device
5750+bool __ovld atomic_compare_exchange_strong_explicit(
5751+ volatile atomic_double __global *object, double __global *expected,
5752+ double desired, memory_order success, memory_order failure,
5753+ memory_scope scope);
5754+bool __ovld atomic_compare_exchange_strong_explicit(
5755+ volatile atomic_double __local *object, double __global *expected,
5756+ double desired, memory_order success, memory_order failure,
5757+ memory_scope scope);
5758+bool __ovld atomic_compare_exchange_strong_explicit(
5759+ volatile atomic_double __global *object, double __local *expected,
5760+ double desired, memory_order success, memory_order failure,
5761+ memory_scope scope);
5762+bool __ovld atomic_compare_exchange_strong_explicit(
5763+ volatile atomic_double __local *object, double __local *expected,
5764+ double desired, memory_order success, memory_order failure,
5765+ memory_scope scope);
5766+bool __ovld atomic_compare_exchange_strong_explicit(
5767+ volatile atomic_double __global *object, double __private *expected,
5768+ double desired, memory_order success, memory_order failure,
5769+ memory_scope scope);
5770+bool __ovld atomic_compare_exchange_strong_explicit(
5771+ volatile atomic_double __local *object, double __private *expected,
5772+ double desired, memory_order success, memory_order failure,
5773+ memory_scope scope);
5774+bool __ovld atomic_compare_exchange_weak_explicit(
5775+ volatile atomic_double __global *object, double __global *expected,
5776+ double desired, memory_order success, memory_order failure,
5777+ memory_scope scope);
5778+bool __ovld atomic_compare_exchange_weak_explicit(
5779+ volatile atomic_double __local *object, double __global *expected,
5780+ double desired, memory_order success, memory_order failure,
5781+ memory_scope scope);
5782+bool __ovld atomic_compare_exchange_weak_explicit(
5783+ volatile atomic_double __global *object, double __local *expected,
5784+ double desired, memory_order success, memory_order failure,
5785+ memory_scope scope);
5786+bool __ovld atomic_compare_exchange_weak_explicit(
5787+ volatile atomic_double __local *object, double __local *expected,
5788+ double desired, memory_order success, memory_order failure,
5789+ memory_scope scope);
5790+bool __ovld atomic_compare_exchange_weak_explicit(
5791+ volatile atomic_double __global *object, double __private *expected,
5792+ double desired, memory_order success, memory_order failure,
5793+ memory_scope scope);
5794+bool __ovld atomic_compare_exchange_weak_explicit(
5795+ volatile atomic_double __local *object, double __private *expected,
5796+ double desired, memory_order success, memory_order failure,
5797+ memory_scope scope);
5798+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
5799+#ifdef __opencl_c_atomic_scope_device
5800+bool __ovld atomic_compare_exchange_strong_explicit(
5801+ volatile atomic_long __global *object, long __global *expected,
5802+ long desired, memory_order success, memory_order failure);
5803+bool __ovld atomic_compare_exchange_strong_explicit(
5804+ volatile atomic_long __local *object, long __global *expected, long desired,
5805+ memory_order success, memory_order failure);
5806+bool __ovld atomic_compare_exchange_strong_explicit(
5807+ volatile atomic_long __global *object, long __local *expected, long desired,
5808+ memory_order success, memory_order failure);
5809+bool __ovld atomic_compare_exchange_strong_explicit(
5810+ volatile atomic_long __local *object, long __local *expected, long desired,
5811+ memory_order success, memory_order failure);
5812+bool __ovld atomic_compare_exchange_strong_explicit(
5813+ volatile atomic_long __global *object, long __private *expected,
5814+ long desired, memory_order success, memory_order failure);
5815+bool __ovld atomic_compare_exchange_strong_explicit(
5816+ volatile atomic_long __local *object, long __private *expected,
5817+ long desired, memory_order success, memory_order failure);
5818+bool __ovld atomic_compare_exchange_weak_explicit(
5819+ volatile atomic_long __global *object, long __global *expected,
5820+ long desired, memory_order success, memory_order failure);
5821+bool __ovld atomic_compare_exchange_weak_explicit(
5822+ volatile atomic_long __local *object, long __global *expected, long desired,
5823+ memory_order success, memory_order failure);
5824+bool __ovld atomic_compare_exchange_weak_explicit(
5825+ volatile atomic_long __global *object, long __local *expected, long desired,
5826+ memory_order success, memory_order failure);
5827+bool __ovld atomic_compare_exchange_weak_explicit(
5828+ volatile atomic_long __local *object, long __local *expected, long desired,
5829+ memory_order success, memory_order failure);
5830+bool __ovld atomic_compare_exchange_weak_explicit(
5831+ volatile atomic_long __global *object, long __private *expected,
5832+ long desired, memory_order success, memory_order failure);
5833+bool __ovld atomic_compare_exchange_weak_explicit(
5834+ volatile atomic_long __local *object, long __private *expected,
5835+ long desired, memory_order success, memory_order failure);
5836+bool __ovld atomic_compare_exchange_strong_explicit(
5837+ volatile atomic_ulong __global *object, ulong __global *expected,
5838+ ulong desired, memory_order success, memory_order failure);
5839+bool __ovld atomic_compare_exchange_strong_explicit(
5840+ volatile atomic_ulong __local *object, ulong __global *expected,
5841+ ulong desired, memory_order success, memory_order failure);
5842+bool __ovld atomic_compare_exchange_strong_explicit(
5843+ volatile atomic_ulong __global *object, ulong __local *expected,
5844+ ulong desired, memory_order success, memory_order failure);
5845+bool __ovld atomic_compare_exchange_strong_explicit(
5846+ volatile atomic_ulong __local *object, ulong __local *expected,
5847+ ulong desired, memory_order success, memory_order failure);
5848+bool __ovld atomic_compare_exchange_strong_explicit(
5849+ volatile atomic_ulong __global *object, ulong __private *expected,
5850+ ulong desired, memory_order success, memory_order failure);
5851+bool __ovld atomic_compare_exchange_strong_explicit(
5852+ volatile atomic_ulong __local *object, ulong __private *expected,
5853+ ulong desired, memory_order success, memory_order failure);
5854+bool __ovld atomic_compare_exchange_weak_explicit(
5855+ volatile atomic_ulong __global *object, ulong __global *expected,
5856+ ulong desired, memory_order success, memory_order failure);
5857+bool __ovld atomic_compare_exchange_weak_explicit(
5858+ volatile atomic_ulong __local *object, ulong __global *expected,
5859+ ulong desired, memory_order success, memory_order failure);
5860+bool __ovld atomic_compare_exchange_weak_explicit(
5861+ volatile atomic_ulong __global *object, ulong __local *expected,
5862+ ulong desired, memory_order success, memory_order failure);
5863+bool __ovld atomic_compare_exchange_weak_explicit(
5864+ volatile atomic_ulong __local *object, ulong __local *expected,
5865+ ulong desired, memory_order success, memory_order failure);
5866+bool __ovld atomic_compare_exchange_weak_explicit(
5867+ volatile atomic_ulong __global *object, ulong __private *expected,
5868+ ulong desired, memory_order success, memory_order failure);
5869+bool __ovld atomic_compare_exchange_weak_explicit(
5870+ volatile atomic_ulong __local *object, ulong __private *expected,
5871+ ulong desired, memory_order success, memory_order failure);
5872+#endif // __opencl_c_atomic_scope_device
5873+bool __ovld atomic_compare_exchange_strong_explicit(
5874+ volatile atomic_long __global *object, long __global *expected,
5875+ long desired, memory_order success, memory_order failure,
5876+ memory_scope scope);
5877+bool __ovld atomic_compare_exchange_strong_explicit(
5878+ volatile atomic_long __local *object, long __global *expected, long desired,
5879+ memory_order success, memory_order failure, memory_scope scope);
5880+bool __ovld atomic_compare_exchange_strong_explicit(
5881+ volatile atomic_long __global *object, long __local *expected, long desired,
5882+ memory_order success, memory_order failure, memory_scope scope);
5883+bool __ovld atomic_compare_exchange_strong_explicit(
5884+ volatile atomic_long __local *object, long __local *expected, long desired,
5885+ memory_order success, memory_order failure, memory_scope scope);
5886+bool __ovld atomic_compare_exchange_strong_explicit(
5887+ volatile atomic_long __global *object, long __private *expected,
5888+ long desired, memory_order success, memory_order failure,
5889+ memory_scope scope);
5890+bool __ovld atomic_compare_exchange_strong_explicit(
5891+ volatile atomic_long __local *object, long __private *expected,
5892+ long desired, memory_order success, memory_order failure,
5893+ memory_scope scope);
5894+bool __ovld atomic_compare_exchange_weak_explicit(
5895+ volatile atomic_long __global *object, long __global *expected,
5896+ long desired, memory_order success, memory_order failure,
5897+ memory_scope scope);
5898+bool __ovld atomic_compare_exchange_weak_explicit(
5899+ volatile atomic_long __local *object, long __global *expected, long desired,
5900+ memory_order success, memory_order failure, memory_scope scope);
5901+bool __ovld atomic_compare_exchange_weak_explicit(
5902+ volatile atomic_long __global *object, long __local *expected, long desired,
5903+ memory_order success, memory_order failure, memory_scope scope);
5904+bool __ovld atomic_compare_exchange_weak_explicit(
5905+ volatile atomic_long __local *object, long __local *expected, long desired,
5906+ memory_order success, memory_order failure, memory_scope scope);
5907+bool __ovld atomic_compare_exchange_weak_explicit(
5908+ volatile atomic_long __global *object, long __private *expected,
5909+ long desired, memory_order success, memory_order failure,
5910+ memory_scope scope);
5911+bool __ovld atomic_compare_exchange_weak_explicit(
5912+ volatile atomic_long __local *object, long __private *expected,
5913+ long desired, memory_order success, memory_order failure,
5914+ memory_scope scope);
5915+bool __ovld atomic_compare_exchange_strong_explicit(
5916+ volatile atomic_ulong __global *object, ulong __global *expected,
5917+ ulong desired, memory_order success, memory_order failure,
5918+ memory_scope scope);
5919+bool __ovld atomic_compare_exchange_strong_explicit(
5920+ volatile atomic_ulong __local *object, ulong __global *expected,
5921+ ulong desired, memory_order success, memory_order failure,
5922+ memory_scope scope);
5923+bool __ovld atomic_compare_exchange_strong_explicit(
5924+ volatile atomic_ulong __global *object, ulong __local *expected,
5925+ ulong desired, memory_order success, memory_order failure,
5926+ memory_scope scope);
5927+bool __ovld atomic_compare_exchange_strong_explicit(
5928+ volatile atomic_ulong __local *object, ulong __local *expected,
5929+ ulong desired, memory_order success, memory_order failure,
5930+ memory_scope scope);
5931+bool __ovld atomic_compare_exchange_strong_explicit(
5932+ volatile atomic_ulong __global *object, ulong __private *expected,
5933+ ulong desired, memory_order success, memory_order failure,
5934+ memory_scope scope);
5935+bool __ovld atomic_compare_exchange_strong_explicit(
5936+ volatile atomic_ulong __local *object, ulong __private *expected,
5937+ ulong desired, memory_order success, memory_order failure,
5938+ memory_scope scope);
5939+bool __ovld atomic_compare_exchange_weak_explicit(
5940+ volatile atomic_ulong __global *object, ulong __global *expected,
5941+ ulong desired, memory_order success, memory_order failure,
5942+ memory_scope scope);
5943+bool __ovld atomic_compare_exchange_weak_explicit(
5944+ volatile atomic_ulong __local *object, ulong __global *expected,
5945+ ulong desired, memory_order success, memory_order failure,
5946+ memory_scope scope);
5947+bool __ovld atomic_compare_exchange_weak_explicit(
5948+ volatile atomic_ulong __global *object, ulong __local *expected,
5949+ ulong desired, memory_order success, memory_order failure,
5950+ memory_scope scope);
5951+bool __ovld atomic_compare_exchange_weak_explicit(
5952+ volatile atomic_ulong __local *object, ulong __local *expected,
5953+ ulong desired, memory_order success, memory_order failure,
5954+ memory_scope scope);
5955+bool __ovld atomic_compare_exchange_weak_explicit(
5956+ volatile atomic_ulong __global *object, ulong __private *expected,
5957+ ulong desired, memory_order success, memory_order failure,
5958+ memory_scope scope);
5959+bool __ovld atomic_compare_exchange_weak_explicit(
5960+ volatile atomic_ulong __local *object, ulong __private *expected,
5961+ ulong desired, memory_order success, memory_order failure,
5962+ memory_scope scope);
5963+#endif // defined(cl_khr_int64_base_atomics) &&
5964+ // defined(cl_khr_int64_extended_atomics)
5965+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
5966
5967 // atomic_flag_test_and_set() and atomic_flag_clear()
5968-
5969+#if defined(__opencl_c_atomic_scope_device) && \
5970+ defined(__opencl_c_atomic_order_seq_cst)
5971+#ifdef __opencl_c_generic_address_space
5972 bool __ovld atomic_flag_test_and_set(volatile atomic_flag *object);
5973-bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order);
5974-bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);
5975 void __ovld atomic_flag_clear(volatile atomic_flag *object);
5976-void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order);
5977-void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);
5978+#endif // __opencl_c_generic_address_space
5979+
5980+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
5981+bool __ovld atomic_flag_test_and_set(volatile atomic_flag __global *object);
5982+bool __ovld atomic_flag_test_and_set(volatile atomic_flag __local *object);
5983+void __ovld atomic_flag_clear(volatile atomic_flag __global *object);
5984+void __ovld atomic_flag_clear(volatile atomic_flag __local *object);
5985+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
5986+#endif // defined(__opencl_c_atomic_scope_device) &&
5987+ // defined(__opencl_c_atomic_order_seq_cst)
5988+
5989+#ifdef __opencl_c_generic_address_space
5990+#ifdef __opencl_c_atomic_scope_device
5991+bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object,
5992+ memory_order order);
5993+void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object,
5994+ memory_order order);
5995+#endif // __opencl_c_atomic_scope_device
5996+bool __ovld atomic_flag_test_and_set_explicit(volatile atomic_flag *object,
5997+ memory_order order,
5998+ memory_scope scope);
5999+void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object,
6000+ memory_order order, memory_scope scope);
6001+#endif // __opencl_c_generic_address_space
6002+
6003+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
6004+#ifdef __opencl_c_atomic_scope_device
6005+bool __ovld atomic_flag_test_and_set_explicit(
6006+ volatile atomic_flag __global *object, memory_order order);
6007+bool __ovld atomic_flag_test_and_set_explicit(
6008+ volatile atomic_flag __local *object, memory_order order);
6009+void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object,
6010+ memory_order order);
6011+void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object,
6012+ memory_order order);
6013+#endif // __opencl_c_atomic_scope_device
6014+bool __ovld
6015+atomic_flag_test_and_set_explicit(volatile atomic_flag __global *object,
6016+ memory_order order, memory_scope scope);
6017+bool __ovld
6018+atomic_flag_test_and_set_explicit(volatile atomic_flag __local *object,
6019+ memory_order order, memory_scope scope);
6020+void __ovld atomic_flag_clear_explicit(volatile atomic_flag __global *object,
6021+ memory_order order, memory_scope scope);
6022+void __ovld atomic_flag_clear_explicit(volatile atomic_flag __local *object,
6023+ memory_order order, memory_scope scope);
6024+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
6025
6026 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6027
6028@@ -13918,7 +15943,7 @@ float16 __ovld __cnfn shuffle(float4 x, uint16 mask);
6029 float16 __ovld __cnfn shuffle(float8 x, uint16 mask);
6030 float16 __ovld __cnfn shuffle(float16 x, uint16 mask);
6031
6032-#ifdef cl_khr_fp64
6033+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6034 double2 __ovld __cnfn shuffle(double2 x, ulong2 mask);
6035 double2 __ovld __cnfn shuffle(double4 x, ulong2 mask);
6036 double2 __ovld __cnfn shuffle(double8 x, ulong2 mask);
6037@@ -13938,7 +15963,7 @@ double16 __ovld __cnfn shuffle(double2 x, ulong16 mask);
6038 double16 __ovld __cnfn shuffle(double4 x, ulong16 mask);
6039 double16 __ovld __cnfn shuffle(double8 x, ulong16 mask);
6040 double16 __ovld __cnfn shuffle(double16 x, ulong16 mask);
6041-#endif //cl_khr_fp64
6042+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6043
6044 #ifdef cl_khr_fp16
6045 half2 __ovld __cnfn shuffle(half2 x, ushort2 mask);
6046@@ -14142,7 +16167,7 @@ float16 __ovld __cnfn shuffle2(float4 x, float4 y, uint16 mask);
6047 float16 __ovld __cnfn shuffle2(float8 x, float8 y, uint16 mask);
6048 float16 __ovld __cnfn shuffle2(float16 x, float16 y, uint16 mask);
6049
6050-#ifdef cl_khr_fp64
6051+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6052 double2 __ovld __cnfn shuffle2(double2 x, double2 y, ulong2 mask);
6053 double2 __ovld __cnfn shuffle2(double4 x, double4 y, ulong2 mask);
6054 double2 __ovld __cnfn shuffle2(double8 x, double8 y, ulong2 mask);
6055@@ -14162,7 +16187,7 @@ double16 __ovld __cnfn shuffle2(double2 x, double2 y, ulong16 mask);
6056 double16 __ovld __cnfn shuffle2(double4 x, double4 y, ulong16 mask);
6057 double16 __ovld __cnfn shuffle2(double8 x, double8 y, ulong16 mask);
6058 double16 __ovld __cnfn shuffle2(double16 x, double16 y, ulong16 mask);
6059-#endif //cl_khr_fp64
6060+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6061
6062 #ifdef cl_khr_fp16
6063 half2 __ovld __cnfn shuffle2(half2 x, half2 y, ushort2 mask);
6064@@ -14198,6 +16223,7 @@ int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)))
6065 #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable
6066 #endif //cl_khr_gl_msaa_sharing
6067
6068+#ifdef __opencl_c_images
6069 /**
6070 * Use the coordinate (coord.xy) to do an element lookup in
6071 * the 2D image object specified by image.
6072@@ -14476,6 +16502,7 @@ half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);
6073
6074 // Image read functions for read_write images
6075 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6076+#ifdef __opencl_c_read_write_images
6077 float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);
6078 int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);
6079 uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);
6080@@ -14519,6 +16546,7 @@ float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, i
6081 #endif //cl_khr_gl_msaa_sharing
6082
6083 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6084+#ifdef __opencl_c_read_write_images
6085 #ifdef cl_khr_mipmap_image
6086 float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);
6087 int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);
6088@@ -14569,6 +16597,7 @@ int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler,
6089 uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
6090
6091 #endif //cl_khr_mipmap_image
6092+#endif //__opencl_c_read_write_images
6093 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6094
6095 // Image read functions returning half4 type
6096@@ -14580,6 +16609,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);
6097 half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);
6098 half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);
6099 #endif //cl_khr_fp16
6100+#endif //__opencl_c_read_write_images
6101 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6102
6103 /**
6104@@ -14669,7 +16699,7 @@ void __ovld write_imagef(write_only image1d_array_t image_array, int2 coord, flo
6105 void __ovld write_imagei(write_only image1d_array_t image_array, int2 coord, int4 color);
6106 void __ovld write_imageui(write_only image1d_array_t image_array, int2 coord, uint4 color);
6107
6108-#ifdef cl_khr_3d_image_writes
6109+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6110 void __ovld write_imagef(write_only image3d_t image, int4 coord, float4 color);
6111 void __ovld write_imagei(write_only image3d_t image, int4 coord, int4 color);
6112 void __ovld write_imageui(write_only image3d_t image, int4 coord, uint4 color);
6113@@ -14702,7 +16732,7 @@ void __ovld write_imageui(write_only image2d_array_t image_array, int4 coord, in
6114 void __ovld write_imagef(write_only image2d_depth_t image, int2 coord, int lod, float depth);
6115 void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, int lod, float depth);
6116
6117-#ifdef cl_khr_3d_image_writes
6118+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6119 void __ovld write_imagef(write_only image3d_t image, int4 coord, int lod, float4 color);
6120 void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 color);
6121 void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color);
6122@@ -14715,7 +16745,7 @@ void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4
6123 #ifdef cl_khr_fp16
6124 void __ovld write_imageh(write_only image1d_t image, int coord, half4 color);
6125 void __ovld write_imageh(write_only image2d_t image, int2 coord, half4 color);
6126-#ifdef cl_khr_3d_image_writes
6127+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6128 void __ovld write_imageh(write_only image3d_t image, int4 coord, half4 color);
6129 #endif
6130 void __ovld write_imageh(write_only image1d_array_t image, int2 coord, half4 color);
6131@@ -14725,6 +16755,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col
6132
6133 // Image write functions for read_write images
6134 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6135+#ifdef __opencl_c_read_write_images
6136 void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);
6137 void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);
6138 void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);
6139@@ -14745,7 +16776,7 @@ void __ovld write_imagef(read_write image1d_array_t image_array, int2 coord, flo
6140 void __ovld write_imagei(read_write image1d_array_t image_array, int2 coord, int4 color);
6141 void __ovld write_imageui(read_write image1d_array_t image_array, int2 coord, uint4 color);
6142
6143-#ifdef cl_khr_3d_image_writes
6144+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6145 void __ovld write_imagef(read_write image3d_t image, int4 coord, float4 color);
6146 void __ovld write_imagei(read_write image3d_t image, int4 coord, int4 color);
6147 void __ovld write_imageui(read_write image3d_t image, int4 coord, uint4 color);
6148@@ -14777,7 +16808,7 @@ void __ovld write_imageui(read_write image2d_array_t image_array, int4 coord, in
6149 void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, int lod, float color);
6150 void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, int lod, float color);
6151
6152-#ifdef cl_khr_3d_image_writes
6153+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6154 void __ovld write_imagef(read_write image3d_t image, int4 coord, int lod, float4 color);
6155 void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 color);
6156 void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color);
6157@@ -14790,13 +16821,14 @@ void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4
6158 #ifdef cl_khr_fp16
6159 void __ovld write_imageh(read_write image1d_t image, int coord, half4 color);
6160 void __ovld write_imageh(read_write image2d_t image, int2 coord, half4 color);
6161-#ifdef cl_khr_3d_image_writes
6162+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6163 void __ovld write_imageh(read_write image3d_t image, int4 coord, half4 color);
6164 #endif
6165 void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 color);
6166 void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);
6167 void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);
6168 #endif //cl_khr_fp16
6169+#endif //__opencl_c_read_write_images
6170 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6171
6172 // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have
6173@@ -14810,7 +16842,7 @@ void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 col
6174 int __ovld __cnfn get_image_width(read_only image1d_t image);
6175 int __ovld __cnfn get_image_width(read_only image1d_buffer_t image);
6176 int __ovld __cnfn get_image_width(read_only image2d_t image);
6177-#ifdef cl_khr_3d_image_writes
6178+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6179 int __ovld __cnfn get_image_width(read_only image3d_t image);
6180 #endif
6181 int __ovld __cnfn get_image_width(read_only image1d_array_t image);
6182@@ -14829,7 +16861,7 @@ int __ovld __cnfn get_image_width(read_only image2d_array_msaa_depth_t image);
6183 int __ovld __cnfn get_image_width(write_only image1d_t image);
6184 int __ovld __cnfn get_image_width(write_only image1d_buffer_t image);
6185 int __ovld __cnfn get_image_width(write_only image2d_t image);
6186-#ifdef cl_khr_3d_image_writes
6187+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6188 int __ovld __cnfn get_image_width(write_only image3d_t image);
6189 #endif
6190 int __ovld __cnfn get_image_width(write_only image1d_array_t image);
6191@@ -14846,6 +16878,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);
6192 #endif //cl_khr_gl_msaa_sharing
6193
6194 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6195+#ifdef __opencl_c_read_write_images
6196 int __ovld __cnfn get_image_width(read_write image1d_t image);
6197 int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);
6198 int __ovld __cnfn get_image_width(read_write image2d_t image);
6199@@ -14862,6 +16895,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);
6200 int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);
6201 int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);
6202 #endif //cl_khr_gl_msaa_sharing
6203+#endif //__opencl_c_read_write_images
6204 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6205
6206 /**
6207@@ -14882,7 +16916,7 @@ int __ovld __cnfn get_image_height(read_only image2d_array_msaa_depth_t image);
6208 #endif //cl_khr_gl_msaa_sharing
6209
6210 int __ovld __cnfn get_image_height(write_only image2d_t image);
6211-#ifdef cl_khr_3d_image_writes
6212+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6213 int __ovld __cnfn get_image_height(write_only image3d_t image);
6214 #endif
6215 int __ovld __cnfn get_image_height(write_only image2d_array_t image);
6216@@ -14898,6 +16932,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);
6217 #endif //cl_khr_gl_msaa_sharing
6218
6219 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6220+#ifdef __opencl_c_read_write_images
6221 int __ovld __cnfn get_image_height(read_write image2d_t image);
6222 int __ovld __cnfn get_image_height(read_write image3d_t image);
6223 int __ovld __cnfn get_image_height(read_write image2d_array_t image);
6224@@ -14911,6 +16946,7 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);
6225 int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);
6226 int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);
6227 #endif //cl_khr_gl_msaa_sharing
6228+#endif //__opencl_c_read_write_images
6229 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6230
6231 /**
6232@@ -14918,12 +16954,14 @@ int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);
6233 */
6234 int __ovld __cnfn get_image_depth(read_only image3d_t image);
6235
6236-#ifdef cl_khr_3d_image_writes
6237+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6238 int __ovld __cnfn get_image_depth(write_only image3d_t image);
6239 #endif
6240
6241 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6242+#ifdef __opencl_c_read_write_images
6243 int __ovld __cnfn get_image_depth(read_write image3d_t image);
6244+#endif //__opencl_c_read_write_images
6245 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6246
6247 // OpenCL Extension v2.0 s9.18 - Mipmaps
6248@@ -14939,13 +16977,15 @@ int __ovld get_image_num_mip_levels(read_only image3d_t image);
6249
6250 int __ovld get_image_num_mip_levels(write_only image1d_t image);
6251 int __ovld get_image_num_mip_levels(write_only image2d_t image);
6252-#ifdef cl_khr_3d_image_writes
6253+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6254 int __ovld get_image_num_mip_levels(write_only image3d_t image);
6255 #endif
6256
6257+#ifdef __opencl_c_read_write_images
6258 int __ovld get_image_num_mip_levels(read_write image1d_t image);
6259 int __ovld get_image_num_mip_levels(read_write image2d_t image);
6260 int __ovld get_image_num_mip_levels(read_write image3d_t image);
6261+#endif //__opencl_c_read_write_images
6262
6263 int __ovld get_image_num_mip_levels(read_only image1d_array_t image);
6264 int __ovld get_image_num_mip_levels(read_only image2d_array_t image);
6265@@ -14957,10 +16997,12 @@ int __ovld get_image_num_mip_levels(write_only image2d_array_t image);
6266 int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image);
6267 int __ovld get_image_num_mip_levels(write_only image2d_depth_t image);
6268
6269+#ifdef __opencl_c_read_write_images
6270 int __ovld get_image_num_mip_levels(read_write image1d_array_t image);
6271 int __ovld get_image_num_mip_levels(read_write image2d_array_t image);
6272 int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);
6273 int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);
6274+#endif //__opencl_c_read_write_images
6275
6276 #endif //cl_khr_mipmap_image
6277 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6278@@ -15004,7 +17046,7 @@ int __ovld __cnfn get_image_channel_data_type(read_only image2d_array_msaa_depth
6279 int __ovld __cnfn get_image_channel_data_type(write_only image1d_t image);
6280 int __ovld __cnfn get_image_channel_data_type(write_only image1d_buffer_t image);
6281 int __ovld __cnfn get_image_channel_data_type(write_only image2d_t image);
6282-#ifdef cl_khr_3d_image_writes
6283+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6284 int __ovld __cnfn get_image_channel_data_type(write_only image3d_t image);
6285 #endif
6286 int __ovld __cnfn get_image_channel_data_type(write_only image1d_array_t image);
6287@@ -15021,6 +17063,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_dept
6288 #endif //cl_khr_gl_msaa_sharing
6289
6290 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6291+#ifdef __opencl_c_read_write_images
6292 int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);
6293 int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);
6294 int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);
6295@@ -15037,6 +17080,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im
6296 int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);
6297 int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);
6298 #endif //cl_khr_gl_msaa_sharing
6299+#endif //__opencl_c_read_write_images
6300 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6301
6302 /**
6303@@ -15076,7 +17120,7 @@ int __ovld __cnfn get_image_channel_order(read_only image2d_array_msaa_depth_t i
6304 int __ovld __cnfn get_image_channel_order(write_only image1d_t image);
6305 int __ovld __cnfn get_image_channel_order(write_only image1d_buffer_t image);
6306 int __ovld __cnfn get_image_channel_order(write_only image2d_t image);
6307-#ifdef cl_khr_3d_image_writes
6308+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6309 int __ovld __cnfn get_image_channel_order(write_only image3d_t image);
6310 #endif
6311 int __ovld __cnfn get_image_channel_order(write_only image1d_array_t image);
6312@@ -15093,6 +17137,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t
6313 #endif //cl_khr_gl_msaa_sharing
6314
6315 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6316+#ifdef __opencl_c_read_write_images
6317 int __ovld __cnfn get_image_channel_order(read_write image1d_t image);
6318 int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);
6319 int __ovld __cnfn get_image_channel_order(read_write image2d_t image);
6320@@ -15109,6 +17154,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image)
6321 int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);
6322 int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);
6323 #endif //cl_khr_gl_msaa_sharing
6324+#endif //__opencl_c_read_write_images
6325 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6326
6327 /**
6328@@ -15143,6 +17189,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);
6329 #endif //cl_khr_gl_msaa_sharing
6330
6331 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6332+#ifdef __opencl_c_read_write_images
6333 int2 __ovld __cnfn get_image_dim(read_write image2d_t image);
6334 int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);
6335 #ifdef cl_khr_depth_images
6336@@ -15155,6 +17202,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);
6337 int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);
6338 int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);
6339 #endif //cl_khr_gl_msaa_sharing
6340+#endif //__opencl_c_read_write_images
6341 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6342
6343 /**
6344@@ -15164,11 +17212,13 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);
6345 * component and the w component is 0.
6346 */
6347 int4 __ovld __cnfn get_image_dim(read_only image3d_t image);
6348-#ifdef cl_khr_3d_image_writes
6349+#if defined(cl_khr_3d_image_writes) || defined(__opencl_c_3d_image_writes)
6350 int4 __ovld __cnfn get_image_dim(write_only image3d_t image);
6351 #endif
6352 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6353+#ifdef __opencl_c_read_write_images
6354 int4 __ovld __cnfn get_image_dim(read_write image3d_t image);
6355+#endif //__opencl_c_read_write_images
6356 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6357
6358 /**
6359@@ -15196,6 +17246,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t
6360 #endif //cl_khr_gl_msaa_sharing
6361
6362 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6363+#ifdef __opencl_c_read_write_images
6364 size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);
6365 size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);
6366 #ifdef cl_khr_depth_images
6367@@ -15205,6 +17256,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image
6368 size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);
6369 size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);
6370 #endif //cl_khr_gl_msaa_sharing
6371+#endif //__opencl_c_read_write_images
6372 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6373
6374 /**
6375@@ -15222,16 +17274,21 @@ int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);
6376 int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);
6377
6378 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6379+#ifdef __opencl_c_read_write_images
6380 int __ovld get_image_num_samples(read_write image2d_msaa_t image);
6381 int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);
6382 int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);
6383 int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
6384+#endif //__opencl_c_read_write_images
6385 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6386 #endif
6387
6388+#endif //__opencl_c_images
6389+
6390 // OpenCL v2.0 s6.13.15 - Work-group Functions
6391
6392 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6393+#ifdef __opencl_c_work_group_collective_functions
6394 int __ovld __conv work_group_all(int predicate);
6395 int __ovld __conv work_group_any(int predicate);
6396
6397@@ -15255,11 +17312,11 @@ ulong __ovld __conv work_group_broadcast(ulong a, size_t x, size_t y, size_t z);
6398 float __ovld __conv work_group_broadcast(float a, size_t local_id);
6399 float __ovld __conv work_group_broadcast(float a, size_t x, size_t y);
6400 float __ovld __conv work_group_broadcast(float a, size_t x, size_t y, size_t z);
6401-#ifdef cl_khr_fp64
6402+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6403 double __ovld __conv work_group_broadcast(double a, size_t local_id);
6404 double __ovld __conv work_group_broadcast(double a, size_t x, size_t y);
6405 double __ovld __conv work_group_broadcast(double a, size_t x, size_t y, size_t z);
6406-#endif //cl_khr_fp64
6407+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6408
6409 #ifdef cl_khr_fp16
6410 half __ovld __conv work_group_reduce_add(half x);
6411@@ -15317,7 +17374,7 @@ float __ovld __conv work_group_scan_exclusive_max(float x);
6412 float __ovld __conv work_group_scan_inclusive_add(float x);
6413 float __ovld __conv work_group_scan_inclusive_min(float x);
6414 float __ovld __conv work_group_scan_inclusive_max(float x);
6415-#ifdef cl_khr_fp64
6416+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6417 double __ovld __conv work_group_reduce_add(double x);
6418 double __ovld __conv work_group_reduce_min(double x);
6419 double __ovld __conv work_group_reduce_max(double x);
6420@@ -15327,19 +17384,18 @@ double __ovld __conv work_group_scan_exclusive_max(double x);
6421 double __ovld __conv work_group_scan_inclusive_add(double x);
6422 double __ovld __conv work_group_scan_inclusive_min(double x);
6423 double __ovld __conv work_group_scan_inclusive_max(double x);
6424-#endif //cl_khr_fp64
6425-
6426+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6427+#endif //__opencl_c_work_group_collective_functions
6428 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6429
6430 // OpenCL v2.0 s6.13.16 - Pipe Functions
6431-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6432+#ifdef __opencl_c_pipes
6433 bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
6434-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6435-
6436+#endif //__opencl_c_pipes
6437
6438 // OpenCL v2.0 s6.13.17 - Enqueue Kernels
6439 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6440-
6441+#ifdef __opencl_c_device_enqueue
6442 ndrange_t __ovld ndrange_1D(size_t);
6443 ndrange_t __ovld ndrange_1D(size_t, size_t);
6444 ndrange_t __ovld ndrange_1D(size_t, size_t, size_t);
6445@@ -15367,11 +17423,13 @@ bool __ovld is_valid_event (clk_event_t event);
6446 void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value);
6447
6448 queue_t __ovld get_default_queue(void);
6449+#endif //__opencl_c_device_enqueue
6450 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6451
6452 // OpenCL Extension v2.0 s9.17 - Sub-groups
6453
6454-#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
6455+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) || \
6456+ defined(__opencl_c_subgroups)
6457 // Shared Sub Group Functions
6458 uint __ovld get_sub_group_size(void);
6459 uint __ovld get_max_sub_group_size(void);
6460@@ -15457,7 +17515,7 @@ half __ovld __conv sub_group_scan_inclusive_min(half x);
6461 half __ovld __conv sub_group_scan_inclusive_max(half x);
6462 #endif //cl_khr_fp16
6463
6464-#ifdef cl_khr_fp64
6465+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6466 double __ovld __conv sub_group_broadcast(double x, uint sub_group_local_id);
6467 double __ovld __conv sub_group_reduce_add(double x);
6468 double __ovld __conv sub_group_reduce_min(double x);
6469@@ -15468,7 +17526,7 @@ double __ovld __conv sub_group_scan_exclusive_max(double x);
6470 double __ovld __conv sub_group_scan_inclusive_add(double x);
6471 double __ovld __conv sub_group_scan_inclusive_min(double x);
6472 double __ovld __conv sub_group_scan_inclusive_max(double x);
6473-#endif //cl_khr_fp64
6474+#endif // defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6475
6476 #endif //cl_khr_subgroups cl_intel_subgroups
6477
6478@@ -15570,16 +17628,22 @@ uint16 __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );
6479 long __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );
6480 ulong __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );
6481
6482+#ifdef __opencl_c_images
6483 uint __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );
6484 uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );
6485 uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );
6486 uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );
6487+#endif //__opencl_c_images
6488
6489 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6490+#ifdef __opencl_c_images
6491+#ifdef __opencl_c_read_write_images
6492 uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);
6493 uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);
6494 uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);
6495 uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);
6496+#endif //__opencl_c_read_write_images
6497+#endif //__opencl_c_images
6498 #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6499
6500 uint __ovld __conv intel_sub_group_block_read( const __global uint* p );
6501@@ -15587,16 +17651,22 @@ uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );
6502 uint4 __ovld __conv intel_sub_group_block_read4( const __global uint* p );
6503 uint8 __ovld __conv intel_sub_group_block_read8( const __global uint* p );
6504
6505+#ifdef __opencl_c_images
6506 void __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);
6507 void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);
6508 void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);
6509 void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);
6510+#endif //__opencl_c_images
6511
6512 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6513+#ifdef __opencl_c_images
6514+#ifdef __opencl_c_read_write_images
6515 void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);
6516 void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);
6517 void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);
6518 void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);
6519+#endif // __opencl_c_read_write_images
6520+#endif // __opencl_c_images
6521 #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6522
6523 void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );
6524@@ -15611,7 +17681,7 @@ half __ovld __conv intel_sub_group_shuffle_up( half prev, half cur, uint c );
6525 half __ovld __conv intel_sub_group_shuffle_xor( half x, uint c );
6526 #endif
6527
6528-#if defined(cl_khr_fp64)
6529+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
6530 double __ovld __conv intel_sub_group_shuffle( double x, uint c );
6531 double __ovld __conv intel_sub_group_shuffle_down( double prev, double cur, uint c );
6532 double __ovld __conv intel_sub_group_shuffle_up( double prev, double cur, uint c );
6533@@ -15710,16 +17780,22 @@ ushort __ovld __conv intel_sub_group_scan_inclusive_min( ushort x );
6534 short __ovld __conv intel_sub_group_scan_inclusive_max( short x );
6535 ushort __ovld __conv intel_sub_group_scan_inclusive_max( ushort x );
6536
6537+#ifdef __opencl_c_images
6538 uint __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );
6539 uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );
6540 uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );
6541 uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );
6542+#endif //__opencl_c_images
6543
6544 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6545+#ifdef __opencl_c_images
6546+#ifdef __opencl_c_read_write_images
6547 uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );
6548 uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );
6549 uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );
6550 uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );
6551+#endif //__opencl_c_read_write_images
6552+#endif //__opencl_c_images
6553 #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6554
6555 uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
6556@@ -15727,16 +17803,22 @@ uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p
6557 uint4 __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
6558 uint8 __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
6559
6560+#ifdef __opencl_c_images
6561 void __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );
6562 void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );
6563 void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );
6564 void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );
6565+#endif //__opencl_c_images
6566
6567 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6568+#ifdef __opencl_c_images
6569+#ifdef __opencl_c_read_write_images
6570 void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );
6571 void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );
6572 void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );
6573 void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );
6574+#endif //__opencl_c_read_write_images
6575+#endif //__opencl_c_images
6576 #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6577
6578 void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
6579@@ -15744,16 +17826,22 @@ void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint
6580 void __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
6581 void __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
6582
6583+#ifdef __opencl_c_images
6584 ushort __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );
6585 ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );
6586 ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );
6587 ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );
6588+#endif //__opencl_c_images
6589
6590 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6591+#ifdef __opencl_c_images
6592+#ifdef __opencl_c_read_write_images
6593 ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);
6594 ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);
6595 ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);
6596 ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);
6597+#endif //__opencl_c_read_write_images
6598+#endif //__opencl_c_images
6599 #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6600
6601 ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );
6602@@ -15761,16 +17849,22 @@ ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort*
6603 ushort4 __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );
6604 ushort8 __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );
6605
6606+#ifdef __opencl_c_images
6607 void __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort data);
6608 void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);
6609 void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);
6610 void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);
6611+#endif //__opencl_c_images
6612
6613 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6614+#ifdef __opencl_c_images
6615+#ifdef __opencl_c_read_write_images
6616 void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);
6617 void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);
6618 void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);
6619 void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);
6620+#endif //__opencl_c_read_write_images
6621+#endif //__opencl_c_images
6622 #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
6623
6624 void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );
6625@@ -15889,6 +17983,7 @@ short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset(
6626 short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size,
6627 ushort2 image_size);
6628
6629+#ifdef __opencl_c_images
6630 intel_sub_group_avc_ime_result_t __ovld
6631 intel_sub_group_avc_ime_evaluate_with_single_reference(
6632 read_only image2d_t src_image, read_only image2d_t ref_image,
6633@@ -15929,6 +18024,7 @@ intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(
6634 read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
6635 intel_sub_group_avc_ime_payload_t payload,
6636 intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);
6637+#endif //__opencl_c_images
6638
6639 intel_sub_group_avc_ime_single_reference_streamin_t __ovld
6640 intel_sub_group_avc_ime_get_single_reference_streamin(
6641@@ -15993,6 +18089,7 @@ intel_sub_group_avc_ref_payload_t __ovld
6642 intel_sub_group_avc_ref_set_bilinear_filter_enable(
6643 intel_sub_group_avc_ref_payload_t payload);
6644
6645+#ifdef __opencl_c_images
6646 intel_sub_group_avc_ref_result_t __ovld
6647 intel_sub_group_avc_ref_evaluate_with_single_reference(
6648 read_only image2d_t src_image, read_only image2d_t ref_image,
6649@@ -16011,6 +18108,7 @@ intel_sub_group_avc_ref_evaluate_with_multi_reference(
6650 read_only image2d_t src_image, uint packed_reference_ids,
6651 uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
6652 intel_sub_group_avc_ref_payload_t payload);
6653+#endif //__opencl_c_images
6654
6655 // SIC built-in functions
6656 intel_sub_group_avc_sic_payload_t __ovld
6657@@ -16061,6 +18159,7 @@ intel_sub_group_avc_sic_set_block_based_raw_skip_sad(
6658 uchar block_based_skip_type,
6659 intel_sub_group_avc_sic_payload_t payload);
6660
6661+#ifdef __opencl_c_images
6662 intel_sub_group_avc_sic_result_t __ovld
6663 intel_sub_group_avc_sic_evaluate_ipe(
6664 read_only image2d_t src_image, sampler_t vme_media_sampler,
6665@@ -16083,6 +18182,7 @@ intel_sub_group_avc_sic_evaluate_with_multi_reference(
6666 read_only image2d_t src_image, uint packed_reference_ids,
6667 uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
6668 intel_sub_group_avc_sic_payload_t payload);
6669+#endif //__opencl_c_images
6670
6671 uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape(
6672 intel_sub_group_avc_sic_result_t result);
6673diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
6674index 6353e14bc41a..a8c216de6e04 100644
6675--- a/clang/lib/Parse/ParseDecl.cpp
6676+++ b/clang/lib/Parse/ParseDecl.cpp
6677@@ -3904,8 +3904,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
6678 isInvalid = DS.SetTypeAltiVecBool(true, Loc, PrevSpec, DiagID, Policy);
6679 break;
6680 case tok::kw_pipe:
6681- if (!getLangOpts().OpenCL || (getLangOpts().OpenCLVersion < 200 &&
6682- !getLangOpts().OpenCLCPlusPlus)) {
6683+ if (!getLangOpts().OpenCLPipeKeyword) {
6684 // OpenCL 2.0 defined this keyword. OpenCL 1.2 and earlier should
6685 // support the "pipe" word as identifier.
6686 Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
6687@@ -4027,8 +4026,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
6688 case tok::kw___generic:
6689 // generic address space is introduced only in OpenCL v2.0
6690 // see OpenCL C Spec v2.0 s6.5.5
6691- if (Actions.getLangOpts().OpenCLVersion < 200 &&
6692- !Actions.getLangOpts().OpenCLCPlusPlus) {
6693+ if (!Actions.getLangOpts().OpenCLGenericKeyword) {
6694 DiagID = diag::err_opencl_unknown_type_specifier;
6695 PrevSpec = Tok.getIdentifierInfo()->getNameStart();
6696 isInvalid = true;
6697@@ -5050,8 +5048,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
6698 default: return false;
6699
6700 case tok::kw_pipe:
6701- return (getLangOpts().OpenCL && getLangOpts().OpenCLVersion >= 200) ||
6702- getLangOpts().OpenCLCPlusPlus;
6703+ return getLangOpts().OpenCLPipeKeyword;
6704
6705 case tok::identifier: // foo::bar
6706 // Unfortunate hack to support "Class.factoryMethod" notation.
6707@@ -5557,8 +5554,7 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang,
6708 if (Kind == tok::star || Kind == tok::caret)
6709 return true;
6710
6711- if (Kind == tok::kw_pipe &&
6712- ((Lang.OpenCL && Lang.OpenCLVersion >= 200) || Lang.OpenCLCPlusPlus))
6713+ if (Kind == tok::kw_pipe && Lang.OpenCLPipeKeyword)
6714 return true;
6715
6716 if (!Lang.CPlusPlus)
6717diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp
6718index df411e1928d6..9a40ce888695 100644
6719--- a/clang/lib/Parse/ParsePragma.cpp
6720+++ b/clang/lib/Parse/ParsePragma.cpp
6721@@ -697,12 +697,14 @@ void Parser::HandlePragmaOpenCLExtension() {
6722 if (Name == "all") {
6723 if (State == Disable) {
6724 Opt.disableAll();
6725- Opt.enableSupportedCore(getLangOpts());
6726+ Opt.enableSupportedCore();
6727 } else {
6728 PP.Diag(NameLoc, diag::warn_pragma_expected_predicate) << 1;
6729 }
6730+ } else if (Opt.isFeature(Name)) {
6731+ PP.Diag(NameLoc, diag::warn_opencl_pragma_feature_ignore) << Ident;
6732 } else if (State == Begin) {
6733- if (!Opt.isKnown(Name) || !Opt.isSupported(Name, getLangOpts())) {
6734+ if (!Opt.isKnown(Name) || !Opt.isSupported(Name)) {
6735 Opt.support(Name);
6736 }
6737 Actions.setCurrentOpenCLExtension(Name);
6738@@ -712,9 +714,9 @@ void Parser::HandlePragmaOpenCLExtension() {
6739 Actions.setCurrentOpenCLExtension("");
6740 } else if (!Opt.isKnown(Name))
6741 PP.Diag(NameLoc, diag::warn_pragma_unknown_extension) << Ident;
6742- else if (Opt.isSupportedExtension(Name, getLangOpts()))
6743+ else if (Opt.isSupportedExtension(Name))
6744 Opt.enable(Name, State == Enable);
6745- else if (Opt.isSupportedCore(Name, getLangOpts()))
6746+ else if (Opt.isSupportedCore(Name))
6747 PP.Diag(NameLoc, diag::warn_pragma_extension_is_core) << Ident;
6748 else
6749 PP.Diag(NameLoc, diag::warn_pragma_unsupported_extension) << Ident;
6750diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td
6751index 9d6bb411eff8..d352d35f1e46 100644
6752--- a/clang/lib/Sema/OpenCLBuiltins.td
6753+++ b/clang/lib/Sema/OpenCLBuiltins.td
6754@@ -22,11 +22,13 @@
6755 class Version<int _Version> {
6756 int ID = _Version;
6757 }
6758+
6759 def CLAll : Version< 0>;
6760 def CL10 : Version<100>;
6761 def CL11 : Version<110>;
6762 def CL12 : Version<120>;
6763 def CL20 : Version<200>;
6764+def CL30 : Version<300>;
6765
6766 // Address spaces
6767 // Pointer types need to be assigned an address space.
6768@@ -65,6 +67,14 @@ def FuncExtKhrGlMsaaSharing : FunctionExtension<"cl_khr_gl_msaa_sha
6769 // Multiple extensions
6770 def FuncExtKhrMipmapAndWrite3d : FunctionExtension<"cl_khr_mipmap_image cl_khr_3d_image_writes">;
6771
6772+// Features
6773+def FuncExtGenericAddressSpace : FunctionExtension<"__opencl_c_generic_address_space">;
6774+def FuncExtWorkGroupCollective : FunctionExtension<"__opencl_c_work_group_collective_functions">;
6775+def FuncExtPipes : FunctionExtension<"__opencl_c_pipes">;
6776+def FuncExtDeviceSidEenqueue : FunctionExtension<"__opencl_c_device_enqueue">;
6777+def FuncNonExplicitAtomicFeatures : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device">;
6778+def FuncNonExplicitAtomicFeaturesAndGenericAS : FunctionExtension<"__opencl_c_atomic_order_seq_cst __opencl_c_atomic_scope_device __opencl_c_generic_address_space">;
6779+
6780 // Qualified Type. These map to ASTContext::QualType.
6781 class QualType<string _Name, bit _IsAbstract=0> {
6782 // Name of the field or function in a clang::ASTContext
6783@@ -230,13 +240,9 @@ class Builtin<string _Name, list<Type> _Signature, list<bit> _Attributes = Attr.
6784 bit IsConv = _Attributes[2];
6785 // OpenCL extensions to which the function belongs.
6786 FunctionExtension Extension = FuncExtNone;
6787- // Version of OpenCL from which the function is available (e.g.: CL10).
6788- // MinVersion is inclusive.
6789- Version MinVersion = CL10;
6790- // Version of OpenCL from which the function is not supported anymore.
6791- // MaxVersion is exclusive.
6792+ // List of OpenCL version in which this function available.
6793 // CLAll makes the function available for all versions.
6794- Version MaxVersion = CLAll;
6795+ list<Version> Versions = [CLAll];
6796 }
6797
6798 //===----------------------------------------------------------------------===//
6799@@ -398,7 +404,7 @@ foreach name = ["get_global_size", "get_global_id", "get_local_size",
6800 def : Builtin<name, [Size, UInt], Attr.Const>;
6801 }
6802
6803-let MinVersion = CL20 in {
6804+let Versions = [CL20, CL30] in {
6805 def : Builtin<"get_enqueued_local_size", [Size, UInt]>;
6806 foreach name = ["get_global_linear_id", "get_local_linear_id"] in {
6807 def : Builtin<name, [Size]>;
6808@@ -471,7 +477,7 @@ foreach name = ["fma", "mad"] in {
6809 }
6810
6811 // --- Version dependent ---
6812-let MaxVersion = CL20 in {
6813+let Versions = [CL10, CL11, CL12, CL30] in {
6814 foreach AS = [GlobalAS, LocalAS, PrivateAS] in {
6815 foreach name = ["fract", "modf", "sincos"] in {
6816 def : Builtin<name, [FGenTypeN, FGenTypeN, PointerType<FGenTypeN, AS>]>;
6817@@ -488,7 +494,9 @@ let MaxVersion = CL20 in {
6818 }
6819 }
6820 }
6821-let MinVersion = CL20 in {
6822+
6823+let Versions = [CL20, CL30] in {
6824+ let Extension = FuncExtGenericAddressSpace in {
6825 foreach name = ["fract", "modf", "sincos"] in {
6826 def : Builtin<name, [FGenTypeN, FGenTypeN, PointerType<FGenTypeN, GenericAS>]>;
6827 }
6828@@ -501,6 +509,7 @@ let MinVersion = CL20 in {
6829 def : Builtin<name, [Type, Type, Type, PointerType<GenTypeIntVecAndScalar, GenericAS>]>;
6830 }
6831 }
6832+ }
6833 }
6834
6835 // --- Table 9 ---
6836@@ -531,7 +540,7 @@ foreach name = ["abs"] in {
6837 foreach name = ["clz", "popcount"] in {
6838 def : Builtin<name, [AIGenTypeN, AIGenTypeN], Attr.Const>;
6839 }
6840-let MinVersion = CL20 in {
6841+let Versions = [CL20, CL30] in {
6842 foreach name = ["ctz"] in {
6843 def : Builtin<name, [AIGenTypeN, AIGenTypeN]>;
6844 }
6845@@ -705,7 +714,7 @@ foreach name = ["select"] in {
6846 // --- Table 15 ---
6847 // Variants for OpenCL versions below 2.0, using pointers to the global, local
6848 // and private address spaces.
6849-let MaxVersion = CL20 in {
6850+let Versions = [CL10, CL11, CL12, CL30] in {
6851 foreach AS = [GlobalAS, LocalAS, PrivateAS] in {
6852 foreach VSize = [2, 3, 4, 8, 16] in {
6853 foreach name = ["vload" # VSize] in {
6854@@ -748,7 +757,8 @@ let MaxVersion = CL20 in {
6855 }
6856 // Variants for OpenCL versions above 2.0, using pointers to the generic
6857 // address space.
6858-let MinVersion = CL20 in {
6859+let Versions = [CL20, CL30] in {
6860+ let Extension = FuncExtGenericAddressSpace in {
6861 foreach VSize = [2, 3, 4, 8, 16] in {
6862 foreach name = ["vload" # VSize] in {
6863 def : Builtin<name, [VectorType<Char, VSize>, Size, PointerType<ConstType<Char>, GenericAS>]>;
6864@@ -786,6 +796,7 @@ let MinVersion = CL20 in {
6865 }
6866 }
6867 }
6868+ }
6869 }
6870 // Variants using pointers to the constant address space.
6871 foreach VSize = [2, 3, 4, 8, 16] in {
6872@@ -812,7 +823,7 @@ foreach VSize = [2, 3, 4, 8, 16] in {
6873 }
6874 }
6875 }
6876-let MaxVersion = CL20 in {
6877+let Versions = [CL10, CL11, CL12, CL30] in {
6878 foreach AS = [GlobalAS, LocalAS, PrivateAS] in {
6879 def : Builtin<"vload_half", [Float, Size, PointerType<ConstType<Half>, AS>]>;
6880 foreach VSize = [2, 3, 4, 8, 16] in {
6881@@ -832,7 +843,8 @@ let MaxVersion = CL20 in {
6882 }
6883 }
6884 }
6885-let MinVersion = CL20 in {
6886+let Versions = [CL20, CL30] in {
6887+ let Extension = FuncExtGenericAddressSpace in {
6888 foreach AS = [GenericAS] in {
6889 def : Builtin<"vload_half", [Float, Size, PointerType<ConstType<Half>, AS>]>;
6890 foreach VSize = [2, 3, 4, 8, 16] in {
6891@@ -851,6 +863,7 @@ let MinVersion = CL20 in {
6892 }
6893 }
6894 }
6895+ }
6896 }
6897
6898 foreach AS = [ConstantAS] in {
6899@@ -875,7 +888,9 @@ foreach name = ["async_work_group_strided_copy"] in {
6900 def : Builtin<name, [Event, PointerType<AGenTypeN, GlobalAS>, PointerType<ConstType<AGenTypeN>, LocalAS>, Size, Size, Event]>;
6901 }
6902 foreach name = ["wait_group_events"] in {
6903+ let Extension = FuncExtGenericAddressSpace in {
6904 def : Builtin<name, [Void, Int, PointerType<Event, GenericAS>]>;
6905+ }
6906 }
6907 foreach name = ["prefetch"] in {
6908 def : Builtin<name, [Void, PointerType<ConstType<AGenTypeN>, GlobalAS>, Size]>;
6909@@ -1154,7 +1169,8 @@ foreach aQual = ["WO", "RW"] in {
6910 //--------------------------------------------------------------------
6911 // OpenCL v2.0 s6.13.15 - Work-group Functions
6912 // --- Table 26 ---
6913-let MinVersion = CL20 in {
6914+let Versions = [CL20, CL30] in {
6915+ let Extension = FuncExtWorkGroupCollective in {
6916 foreach name = ["work_group_all", "work_group_any"] in {
6917 def : Builtin<name, [Int, Int], Attr.Convergent>;
6918 }
6919@@ -1169,11 +1185,12 @@ let MinVersion = CL20 in {
6920 def : Builtin<name # op, [IntLongFloatGenType1, IntLongFloatGenType1], Attr.Convergent>;
6921 }
6922 }
6923+ }
6924 }
6925
6926
6927 // OpenCL v2.0 s9.17.3: Additions to section 6.13.1: Work-Item Functions
6928-let MinVersion = CL20 in {
6929+let Versions = [CL20] in {
6930 let Extension = FuncExtKhrSubgroups in {
6931 def get_sub_group_size : Builtin<"get_sub_group_size", [UInt]>;
6932 def get_max_sub_group_size : Builtin<"get_max_sub_group_size", [UInt]>;
6933diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
6934index 9cfce5a63b1d..c26f45f62668 100644
6935--- a/clang/lib/Sema/Sema.cpp
6936+++ b/clang/lib/Sema/Sema.cpp
6937@@ -286,9 +286,10 @@ void Sema::Initialize() {
6938 // Initialize predefined OpenCL types and supported extensions and (optional)
6939 // core features.
6940 if (getLangOpts().OpenCL) {
6941+ getOpenCLOptions().setOpenCLVersion(getLangOpts());
6942 getOpenCLOptions().addSupport(
6943 Context.getTargetInfo().getSupportedOpenCLOpts());
6944- getOpenCLOptions().enableSupportedCore(getLangOpts());
6945+ getOpenCLOptions().enableSupportedCore();
6946 addImplicitTypedef("sampler_t", Context.OCLSamplerTy);
6947 addImplicitTypedef("event_t", Context.OCLEventTy);
6948 if (getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) {
6949@@ -344,12 +345,18 @@ void Sema::Initialize() {
6950 "cl_khr_int64_base_atomics cl_khr_int64_extended_atomics");
6951
6952 setOpenCLExtensionForType(AtomicDoubleT, "cl_khr_fp64");
6953+ setOpenCLExtensionForType(Context.OCLReserveIDTy, "__opencl_c_pipes");
6954+ setOpenCLExtensionForType(Context.OCLClkEventTy,
6955+ "__opencl_c_device_enqueue");
6956+ setOpenCLExtensionForType(Context.OCLQueueTy,
6957+ "__opencl_c_device_enqueue");
6958 }
6959
6960 setOpenCLExtensionForType(Context.DoubleTy, "cl_khr_fp64");
6961
6962-#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \
6963- setOpenCLExtensionForType(Context.Id, Ext);
6964+#define GENERIC_IMAGE_TYPE_EXT(Type, Id, Ext) \
6965+ setOpenCLExtensionForType(Context.Id, Ext); \
6966+ setOpenCLExtensionForType(Context.Id, "__opencl_c_images");
6967 #include "clang/Basic/OpenCLImageTypes.def"
6968 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
6969 addImplicitTypedef(#ExtType, Context.Id##Ty); \
6970@@ -2293,6 +2300,27 @@ bool Sema::isOpenCLDisabledDecl(Decl *FD) {
6971 return false;
6972 }
6973
6974+template <typename DiagLocT, typename DiagInfoT>
6975+void Sema::DiagnoseOpenCLRequiresOption(llvm::StringRef OpenCLOptName,
6976+ DiagLocT DiagLoc, DiagInfoT DiagInfo,
6977+ unsigned Selector,
6978+ SourceRange SrcRange) {
6979+ const auto &LO = getLangOpts();
6980+ auto CLVer = LO.OpenCLCPlusPlus ? 200 : LO.OpenCLVersion;
6981+ // For versions higher that 3.0 diagnosing feature
6982+ if (CLVer >= 300) {
6983+ OpenCLOptName =
6984+ llvm::StringSwitch<llvm::StringRef>(OpenCLOptName)
6985+ .Case("cl_khr_3d_image_writes", "__opencl_c_3d_image_writes")
6986+ .Case("cl_khr_subgroups", "__opencl_c_subgroups")
6987+ .Case("cl_khr_fp64", "__opencl_c_fp64")
6988+ .Default(OpenCLOptName);
6989+ }
6990+
6991+ Diag(DiagLoc, diag::err_opencl_requires_extension)
6992+ << Selector << DiagInfo << OpenCLOptName << SrcRange;
6993+}
6994+
6995 template <typename T, typename DiagLocT, typename DiagInfoT, typename MapT>
6996 bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc,
6997 DiagInfoT DiagInfo, MapT &Map,
6998@@ -2304,8 +2332,7 @@ bool Sema::checkOpenCLDisabledTypeOrDecl(T D, DiagLocT DiagLoc,
6999 bool Disabled = false;
7000 for (auto &I : Loc->second) {
7001 if (I != CurrOpenCLExtension && !getOpenCLOptions().isEnabled(I)) {
7002- Diag(DiagLoc, diag::err_opencl_requires_extension) << Selector << DiagInfo
7003- << I << SrcRange;
7004+ DiagnoseOpenCLRequiresOption(I, DiagLoc, DiagInfo, Selector, SrcRange);
7005 Disabled = true;
7006 }
7007 }
7008@@ -2341,3 +2368,13 @@ bool Sema::checkOpenCLDisabledDecl(const NamedDecl &D, const Expr &E) {
7009 return checkOpenCLDisabledTypeOrDecl(&D, E.getBeginLoc(), FnName,
7010 OpenCLDeclExtMap, 1, D.getSourceRange());
7011 }
7012+
7013+bool Sema::checkOpenCLSubgroupExtForCallExpr(CallExpr *Call) {
7014+ if (!getOpenCLOptions().isEnabled("cl_khr_subgroups")) {
7015+ DiagnoseOpenCLRequiresOption("cl_khr_subgroups", Call->getBeginLoc(),
7016+ Call->getDirectCallee(), 1,
7017+ Call->getSourceRange());
7018+ return true;
7019+ }
7020+ return false;
7021+}
7022diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
7023index 74742023d1b3..efa3b6ab0eb6 100644
7024--- a/clang/lib/Sema/SemaChecking.cpp
7025+++ b/clang/lib/Sema/SemaChecking.cpp
7026@@ -597,20 +597,11 @@ static bool checkOpenCLBlockArgs(Sema &S, Expr *BlockArg) {
7027 return IllegalParams;
7028 }
7029
7030-static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) {
7031- if (!S.getOpenCLOptions().isEnabled("cl_khr_subgroups")) {
7032- S.Diag(Call->getBeginLoc(), diag::err_opencl_requires_extension)
7033- << 1 << Call->getDirectCallee() << "cl_khr_subgroups";
7034- return true;
7035- }
7036- return false;
7037-}
7038-
7039 static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) {
7040 if (checkArgCount(S, TheCall, 2))
7041 return true;
7042
7043- if (checkOpenCLSubgroupExt(S, TheCall))
7044+ if (S.checkOpenCLSubgroupExtForCallExpr(TheCall))
7045 return true;
7046
7047 // First argument is an ndrange_t type.
7048@@ -1564,7 +1555,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
7049 break;
7050 case Builtin::BIsub_group_reserve_read_pipe:
7051 case Builtin::BIsub_group_reserve_write_pipe:
7052- if (checkOpenCLSubgroupExt(*this, TheCall) ||
7053+ if (checkOpenCLSubgroupExtForCallExpr(TheCall) ||
7054 SemaBuiltinReserveRWPipe(*this, TheCall))
7055 return ExprError();
7056 break;
7057@@ -1577,7 +1568,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
7058 break;
7059 case Builtin::BIsub_group_commit_read_pipe:
7060 case Builtin::BIsub_group_commit_write_pipe:
7061- if (checkOpenCLSubgroupExt(*this, TheCall) ||
7062+ if (checkOpenCLSubgroupExtForCallExpr(TheCall) ||
7063 SemaBuiltinCommitRWPipe(*this, TheCall))
7064 return ExprError();
7065 break;
7066@@ -4314,6 +4305,20 @@ DiagnoseCStringFormatDirectiveInCFAPI(Sema &S,
7067 }
7068 }
7069
7070+bool Sema::isSupportedOpenCLOMemoryOrdering(int64_t Ordering) const {
7071+ assert(llvm::isValidAtomicOrderingCABI(Ordering));
7072+ auto OrderingCABI = (llvm::AtomicOrderingCABI)Ordering;
7073+ switch (OrderingCABI) {
7074+ case llvm::AtomicOrderingCABI::acquire:
7075+ case llvm::AtomicOrderingCABI::release:
7076+ case llvm::AtomicOrderingCABI::acq_rel:
7077+ return OpenCLFeatures.isEnabled("__opencl_c_atomic_order_acq_rel");
7078+
7079+ default:
7080+ return true;
7081+ }
7082+}
7083+
7084 /// Determine whether the given type has a non-null nullability annotation.
7085 static bool isNonNullType(ASTContext &ctx, QualType type) {
7086 if (auto nullability = type->getNullability(ctx))
7087@@ -5067,10 +5072,17 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
7088 if (SubExprs.size() >= 2 && Form != Init) {
7089 llvm::APSInt Result(32);
7090 if (SubExprs[1]->isIntegerConstantExpr(Result, Context) &&
7091- !isValidOrderingForOp(Result.getSExtValue(), Op))
7092+ !isValidOrderingForOp(Result.getSExtValue(), Op)) {
7093 Diag(SubExprs[1]->getBeginLoc(),
7094 diag::warn_atomic_op_has_invalid_memory_order)
7095 << SubExprs[1]->getSourceRange();
7096+ } else if (IsOpenCL &&
7097+ !isSupportedOpenCLOMemoryOrdering(Result.getSExtValue())) {
7098+ Diag(SubExprs[1]->getBeginLoc(),
7099+ diag::err_opencl_memory_ordering_require_feat)
7100+ << SubExprs[1]->getSourceRange();
7101+ return ExprError();
7102+ }
7103 }
7104
7105 if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) {
7106diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
7107index 64146f4a912f..c1e629b5dc76 100644
7108--- a/clang/lib/Sema/SemaDecl.cpp
7109+++ b/clang/lib/Sema/SemaDecl.cpp
7110@@ -6152,7 +6152,9 @@ void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) {
7111 if (Type->isSamplerT() || Type->isVoidType())
7112 return;
7113 LangAS ImplAS = LangAS::opencl_private;
7114- if ((getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) &&
7115+ if ((getLangOpts().OpenCLCPlusPlus ||
7116+ getOpenCLOptions().isEnabled(
7117+ "__opencl_c_program_scope_global_variables")) &&
7118 Var->hasGlobalStorage())
7119 ImplAS = LangAS::opencl_global;
7120 // If the original type from a decayed type is an array type and that array
7121@@ -7682,6 +7684,10 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
7122 // OpenCL C v2.0 s6.5.1 - Variables defined at program scope and static
7123 // variables inside a function can also be declared in the global
7124 // address space.
7125+ // OpenCL C v3.0 s5.5 - For OpenCL C 2.0, or with the
7126+ // __opencl_c_program_scope_global_variables feature macro, the
7127+ // address space for a variable at program scope or a static variable
7128+ // also be __global
7129 // C++ for OpenCL inherits rule from OpenCL C v2.0.
7130 // FIXME: Adding local AS in C++ for OpenCL might make sense.
7131 if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() ||
7132@@ -7689,10 +7695,11 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
7133 if (!T->isSamplerT() &&
7134 !(T.getAddressSpace() == LangAS::opencl_constant ||
7135 (T.getAddressSpace() == LangAS::opencl_global &&
7136- (getLangOpts().OpenCLVersion == 200 ||
7137- getLangOpts().OpenCLCPlusPlus)))) {
7138+ (OpenCLFeatures.isEnabled(
7139+ "__opencl_c_program_scope_global_variables"))))) {
7140 int Scope = NewVD->isStaticLocal() | NewVD->hasExternalStorage() << 1;
7141- if (getLangOpts().OpenCLVersion == 200 || getLangOpts().OpenCLCPlusPlus)
7142+ if (OpenCLFeatures.isEnabled(
7143+ "__opencl_c_program_scope_global_variables"))
7144 Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
7145 << Scope << "global or constant";
7146 else
7147diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
7148index 849bc09063b3..c963b95b131a 100644
7149--- a/clang/lib/Sema/SemaDeclAttr.cpp
7150+++ b/clang/lib/Sema/SemaDeclAttr.cpp
7151@@ -6362,7 +6362,7 @@ static void handleInternalLinkageAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
7152 }
7153
7154 static void handleOpenCLNoSVMAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
7155- if (S.LangOpts.OpenCLVersion != 200)
7156+ if (S.LangOpts.OpenCLVersion < 200)
7157 S.Diag(AL.getLoc(), diag::err_attribute_requires_opencl_version)
7158 << AL << "2.0" << 0;
7159 else
7160@@ -6446,6 +6446,13 @@ static void handleOpenCLAccessAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
7161 << AL << PDecl->getType() << DeclTy->isImageType();
7162 D->setInvalidDecl(true);
7163 return;
7164+ } else if ((!S.getLangOpts().OpenCLCPlusPlus &&
7165+ S.getLangOpts().OpenCLVersion >= 200) &&
7166+ !S.getOpenCLOptions().isEnabled(
7167+ "__opencl_c_read_write_images")) {
7168+ S.Diag(AL.getLoc(), diag::err_opencl_requires_extension)
7169+ << 0 << PDecl->getType() << "__opencl_c_read_write_images";
7170+ return;
7171 }
7172 }
7173 }
7174diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
7175index 831e55046e80..4481a59b4517 100644
7176--- a/clang/lib/Sema/SemaDeclCXX.cpp
7177+++ b/clang/lib/Sema/SemaDeclCXX.cpp
7178@@ -14906,6 +14906,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl,
7179 if (auto *PtrTy = ResultType->getAs<PointerType>()) {
7180 ResultType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy);
7181 }
7182+ if (CanQual<PointerType> ExpectedPtrTy =
7183+ ExpectedResultType->getAs<PointerType>()) {
7184+ ExpectedResultType = SemaRef.Context.getCanonicalType(
7185+ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr()));
7186+ }
7187 }
7188
7189 // Check that the result type is what we expect.
7190@@ -14939,6 +14944,11 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl,
7191 FnDecl->getParamDecl(0)->getType()->getAs<PointerType>()) {
7192 FirstParamType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy);
7193 }
7194+ if (CanQual<PointerType> ExpectedPtrTy =
7195+ ExpectedFirstParamType->getAs<PointerType>()) {
7196+ ExpectedFirstParamType = SemaRef.Context.getCanonicalType(
7197+ RemoveAddressSpaceFromPtr(SemaRef, ExpectedPtrTy->getTypePtr()));
7198+ }
7199 }
7200 if (SemaRef.Context.getCanonicalType(FirstParamType).getUnqualifiedType() !=
7201 ExpectedFirstParamType)
7202diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
7203index 8d96404a5c27..8f21203b9508 100644
7204--- a/clang/lib/Sema/SemaLookup.cpp
7205+++ b/clang/lib/Sema/SemaLookup.cpp
7206@@ -771,19 +771,20 @@ static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR,
7207 // as argument. Only meaningful for generic types, otherwise equals 1.
7208 unsigned GenTypeMaxCnt;
7209
7210+ ASTContext &Context = S.Context;
7211+
7212+ // Ignore this BIF if its version does not match the language options.
7213+ unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion;
7214+ if (Context.getLangOpts().OpenCLCPlusPlus)
7215+ OpenCLVersion = 200;
7216+
7217+ unsigned short VersionCode = OpenCLOptions::EncodeVersion(OpenCLVersion);
7218+
7219 for (unsigned SignatureIndex = 0; SignatureIndex < Len; SignatureIndex++) {
7220 const OpenCLBuiltinStruct &OpenCLBuiltin =
7221 BuiltinTable[FctIndex + SignatureIndex];
7222- ASTContext &Context = S.Context;
7223
7224- // Ignore this BIF if its version does not match the language options.
7225- unsigned OpenCLVersion = Context.getLangOpts().OpenCLVersion;
7226- if (Context.getLangOpts().OpenCLCPlusPlus)
7227- OpenCLVersion = 200;
7228- if (OpenCLVersion < OpenCLBuiltin.MinVersion)
7229- continue;
7230- if ((OpenCLBuiltin.MaxVersion != 0) &&
7231- (OpenCLVersion >= OpenCLBuiltin.MaxVersion))
7232+ if (!(OpenCLBuiltin.AllVersions & VersionCode))
7233 continue;
7234
7235 SmallVector<QualType, 1> RetTypes;
7236diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
7237index 93ddd047e09b..93211b801f8d 100644
7238--- a/clang/lib/Sema/SemaType.cpp
7239+++ b/clang/lib/Sema/SemaType.cpp
7240@@ -2016,7 +2016,7 @@ static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) {
7241 !PointeeType.hasAddressSpace())
7242 PointeeType = S.getASTContext().getAddrSpaceQualType(
7243 PointeeType,
7244- S.getLangOpts().OpenCLCPlusPlus || S.getLangOpts().OpenCLVersion == 200
7245+ S.getOpenCLOptions().isEnabled("__opencl_c_generic_address_space")
7246 ? LangAS::opencl_generic
7247 : LangAS::opencl_private);
7248 return PointeeType;
7249@@ -5160,9 +5160,15 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
7250 }
7251
7252 case DeclaratorChunk::Pipe: {
7253- T = S.BuildReadPipeType(T, DeclType.Loc);
7254- processTypeAttrs(state, T, TAL_DeclSpec,
7255- D.getMutableDeclSpec().getAttributes());
7256+ if (S.getOpenCLOptions().isEnabled("__opencl_c_pipes")) {
7257+ T = S.BuildReadPipeType(T, DeclType.Loc);
7258+ processTypeAttrs(state, T, TAL_DeclSpec,
7259+ D.getMutableDeclSpec().getAttributes());
7260+ } else {
7261+ D.setInvalidType(true);
7262+ T = Context.IntTy;
7263+ S.Diag(D.getIdentifierLoc(), diag::err_opencl_pipes_require_feat);
7264+ }
7265 break;
7266 }
7267 }
7268diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
7269index cdbf28bbcad8..0bedff5ef0f3 100644
7270--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
7271+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
7272@@ -2,7 +2,8 @@
7273 // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
7274 // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s
7275 // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s
7276-
7277+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s
7278+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s
7279 typedef int int2 __attribute__((ext_vector_type(2)));
7280
7281 typedef struct {
7282@@ -39,7 +40,7 @@ struct LargeStructTwoMember {
7283 int2 y[20];
7284 };
7285
7286-#if __OPENCL_C_VERSION__ >= 200
7287+#ifdef __opencl_c_program_scope_global_variables
7288 struct LargeStructOneMember g_s;
7289 #endif
7290
7291@@ -98,7 +99,7 @@ void FuncOneLargeMember(struct LargeStructOneMember u) {
7292 // AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
7293 // AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false)
7294 // AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
7295-#if __OPENCL_C_VERSION__ >= 200
7296+#ifdef __opencl_c_program_scope_global_variables
7297 void test_indirect_arg_globl(void) {
7298 FuncOneLargeMember(g_s);
7299 }
7300diff --git a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
7301index 52feccc540bb..9ecffcca5ee9 100644
7302--- a/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
7303+++ b/clang/test/CodeGenOpenCL/address-spaces-conversions.cl
7304@@ -1,5 +1,7 @@
7305 // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s
7306+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -ffake-address-space-map -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck %s
7307 // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s
7308+// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=CHECK-NOFAKE %s
7309 // When -ffake-address-space-map is not used, all addr space mapped to 0 for x86_64.
7310
7311 // test that we generate address space casts everywhere we need conversions of
7312diff --git a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
7313index 50622f099143..e19ec8451d0d 100644
7314--- a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
7315+++ b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
7316@@ -6,6 +6,7 @@
7317 // We check that the address spaces are mangled the same in both version of OpenCL
7318 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
7319 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s
7320+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s
7321
7322 // We can't name this f as private is equivalent to default
7323 // no specifier given address space so we get multiple definition
7324@@ -47,7 +48,7 @@ void f(constant int *arg) { }
7325 // OCL-20-DAG: @_Z1fPU3AS2i
7326 // OCL-12-DAG: @_Z1fPU3AS2i
7327
7328-#if __OPENCL_C_VERSION__ >= 200
7329+#if __OPENCL_C_VERSION__ == 200
7330 __attribute__((overloadable))
7331 void f(generic int *arg) { }
7332 // ASMANG20: @_Z1fPU3AS4i
7333diff --git a/clang/test/CodeGenOpenCL/address-spaces.cl b/clang/test/CodeGenOpenCL/address-spaces.cl
7334index 3c8fea2a80bc..26a741338b3a 100644
7335--- a/clang/test/CodeGenOpenCL/address-spaces.cl
7336+++ b/clang/test/CodeGenOpenCL/address-spaces.cl
7337@@ -1,9 +1,13 @@
7338 // RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
7339+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
7340 // RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR
7341 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
7342+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
7343 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN
7344 // RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
7345+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
7346 // RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
7347+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
7348
7349 // SPIR: %struct.S = type { i32, i32, i32* }
7350 // CL20SPIR: %struct.S = type { i32, i32, i32 addrspace(4)* }
7351diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
7352index 7216cb517495..8d3b30fe8074 100644
7353--- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
7354+++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
7355@@ -1,4 +1,5 @@
7356 // RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s
7357+// RUN: %clang_cc1 -O0 -cl-std=CL3.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s
7358 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s
7359
7360 // CL12-LABEL: define void @func1(i32 addrspace(5)* %x)
7361diff --git a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl
7362index a5d438933fa4..8073c7756eb6 100644
7363--- a/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl
7364+++ b/clang/test/CodeGenOpenCL/amdgpu-sizeof-alignof.cl
7365@@ -4,6 +4,17 @@
7366 // RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s
7367 // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL1.2 %s -emit-llvm -o - | FileCheck %s
7368 // RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL2.0 %s -emit-llvm -o - | FileCheck %s
7369+// RUN: %clang_cc1 -triple r600 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7370+// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7371+// RUN: %clang_cc1 -triple amdgcn---opencl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7372+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7373+// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7374+// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7375+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_generic_address_space -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7376+// RUN: %clang_cc1 -triple r600 -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7377+// RUN: %clang_cc1 -triple amdgcn-mesa-mesa3d -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7378+// RUN: %clang_cc1 -triple amdgcn---opencl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7379+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -cl-ext=__opencl_c_fp64 -cl-std=CL3.0 %s -emit-llvm -o - | FileCheck %s
7380
7381 #ifdef __AMDGCN__
7382 #define PTSIZE 8
7383@@ -11,7 +22,7 @@
7384 #define PTSIZE 4
7385 #endif
7386
7387-#ifdef cl_khr_fp64
7388+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
7389 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
7390 #endif
7391 #ifdef cl_khr_fp16
7392@@ -59,8 +70,12 @@ void test() {
7393 check(__alignof__(double) == 8);
7394 #endif
7395
7396- check(sizeof(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4));
7397- check(__alignof__(void*) == (__OPENCL_C_VERSION__ >= 200 ? 8 : 4));
7398+ check(sizeof(private void*) == 4);
7399+ check(__alignof__(private void*) == 4);
7400+#ifdef __opencl_c_generic_address_space
7401+ check(sizeof(generic void*) == 8);
7402+ check(__alignof__(generic void*) == 8);
7403+#endif
7404 check(sizeof(global_ptr_t) == PTSIZE);
7405 check(__alignof__(global_ptr_t) == PTSIZE);
7406 check(sizeof(constant_ptr_t) == PTSIZE);
7407diff --git a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
7408index d1ab6aceac5c..70c5bace023b 100644
7409--- a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
7410+++ b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
7411@@ -1,4 +1,5 @@
7412 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL1.2 -emit-llvm -o - -O0 | FileCheck %s
7413+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL3.0 -emit-llvm -o - -O0 | FileCheck %s
7414
7415 #pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
7416 void test_int8(uchar4 ua, uchar4 ub, char4 sa, char4 sb) {
7417diff --git a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
7418index 76ace5dca21e..5dc43e222f75 100644
7419--- a/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
7420+++ b/clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl
7421@@ -1,6 +1,8 @@
7422 // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
7423 // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
7424 // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
7425+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
7426+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
7427
7428 kernel void ker() {};
7429 // CHECK: define{{.*}}@ker() #0
7430diff --git a/clang/test/CodeGenOpenCL/fpmath.cl b/clang/test/CodeGenOpenCL/fpmath.cl
7431index 0108d909c94e..b28392739c71 100644
7432--- a/clang/test/CodeGenOpenCL/fpmath.cl
7433+++ b/clang/test/CodeGenOpenCL/fpmath.cl
7434@@ -2,6 +2,8 @@
7435 // RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s
7436 // RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s
7437 // RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s
7438+// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL3.0 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s
7439+// RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s
7440
7441 typedef __attribute__(( ext_vector_type(4) )) float float4;
7442
7443diff --git a/clang/test/CodeGenOpenCL/generic-address-space-feature.cl b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl
7444new file mode 100644
7445index 000000000000..890860abe4d9
7446--- /dev/null
7447+++ b/clang/test/CodeGenOpenCL/generic-address-space-feature.cl
7448@@ -0,0 +1,28 @@
7449+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL12
7450+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL12
7451+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL20
7452+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL2.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL20
7453+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30
7454+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -finclude-default-header -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC
7455+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64 -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30
7456+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -cl-std=CL3.0 -cl-ext=-cl_khr_fp64,__opencl_c_generic_address_space -fdeclare-opencl-builtins -emit-llvm -o - | FileCheck %s --check-prefixes=CL30-GENERIC
7457+
7458+void test(global float* src1, local float *src2, private float *src3, float *src4, float tmp) {
7459+ // CL20: %{{.+}} = addrspacecast float addrspace(1)* %{{.+}} to float addrspace(4)*
7460+ // CL12-NOT: addrspacecast
7461+ // CL30-NOT: addrspacecast
7462+ // CL30-GENERIC-NOT: addrspacecast
7463+ tmp = sincos(tmp, src1);
7464+ // CL20: %{{.+}} = addrspacecast float addrspace(3)* %{{.+}} to float addrspace(4)*
7465+ // CL12-NOT: addrspacecast
7466+ // CL30-NOT: addrspacecast
7467+ // CL30-GENERIC-NOT: addrspacecast
7468+ tmp = sincos(tmp, src2);
7469+
7470+ // CL12: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}})
7471+ // CL20: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}})
7472+ // CL30: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float* {{.+}})
7473+ // CL30-GENERIC: %{{.+}} = call {{(spir_func )?}}float [[FUNCNAME:@.*sincos.*]](float %{{.+}}, float addrspace(4)* {{.+}})
7474+ // CHECK: addrspacecast
7475+ tmp = sincos(tmp, src4);
7476+}
7477diff --git a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl
7478index 515f13f6e768..5aa31ac6f345 100644
7479--- a/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl
7480+++ b/clang/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl
7481@@ -1,4 +1,5 @@
7482 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s
7483+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s
7484
7485 // CHECK: %opencl.intel_sub_group_avc_mce_payload_t = type opaque
7486 // CHECK: %opencl.intel_sub_group_avc_ime_payload_t = type opaque
7487diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
7488index e89237623478..3d6708ac361f 100644
7489--- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
7490+++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
7491@@ -1,5 +1,8 @@
7492 // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s
7493 // RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s
7494+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s
7495+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s
7496+
7497 // Test that the kernels always use the SPIR calling convention
7498 // to have unambiguous mapping of arguments to feasibly implement
7499 // clSetKernelArg().
7500diff --git a/clang/test/CodeGenOpenCL/logical-ops.cl b/clang/test/CodeGenOpenCL/logical-ops.cl
7501index f083a8580ee7..499eab65039b 100644
7502--- a/clang/test/CodeGenOpenCL/logical-ops.cl
7503+++ b/clang/test/CodeGenOpenCL/logical-ops.cl
7504@@ -1,4 +1,5 @@
7505 // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
7506+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -cl-ext=__opencl_c_fp64 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
7507 // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=clc++ -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
7508
7509 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
7510diff --git a/clang/test/CodeGenOpenCL/no-half.cl b/clang/test/CodeGenOpenCL/no-half.cl
7511index aee8f678f01a..46da7fa339e8 100644
7512--- a/clang/test/CodeGenOpenCL/no-half.cl
7513+++ b/clang/test/CodeGenOpenCL/no-half.cl
7514@@ -1,6 +1,7 @@
7515 // RUN: %clang_cc1 %s -cl-std=cl2.0 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
7516 // RUN: %clang_cc1 %s -cl-std=cl1.2 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
7517 // RUN: %clang_cc1 %s -cl-std=cl1.1 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
7518+// RUN: %clang_cc1 %s -cl-std=cl3.0 -cl-ext=__opencl_c_fp64 -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
7519
7520 #pragma OPENCL EXTENSION cl_khr_fp64:enable
7521
7522diff --git a/clang/test/CodeGenOpenCL/pipe_builtin.cl b/clang/test/CodeGenOpenCL/pipe_builtin.cl
7523index 02b9669b7ab1..0bf35c336199 100644
7524--- a/clang/test/CodeGenOpenCL/pipe_builtin.cl
7525+++ b/clang/test/CodeGenOpenCL/pipe_builtin.cl
7526@@ -1,4 +1,7 @@
7527 // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=clc++ -o - %s | FileCheck %s
7528+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl2.0 -o - %s | FileCheck %s
7529+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=cl3.0 -cl-ext=__opencl_c_pipes,__opencl_c_subgroups -o - %s | FileCheck %s
7530+
7531 // FIXME: Add MS ABI manglings of OpenCL things and remove %itanium_abi_triple
7532 // above to support OpenCL in the MS C++ ABI.
7533
7534diff --git a/clang/test/CodeGenOpenCL/pipe_types.cl b/clang/test/CodeGenOpenCL/pipe_types.cl
7535index ba064c6d7557..b7a523d4f084 100644
7536--- a/clang/test/CodeGenOpenCL/pipe_types.cl
7537+++ b/clang/test/CodeGenOpenCL/pipe_types.cl
7538@@ -1,4 +1,5 @@
7539 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
7540+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -o - %s | FileCheck %s
7541
7542 // CHECK: %opencl.pipe_ro_t = type opaque
7543 // CHECK: %opencl.pipe_wo_t = type opaque
7544diff --git a/clang/test/CodeGenOpenCL/printf.cl b/clang/test/CodeGenOpenCL/printf.cl
7545index fc139d776db6..0133c5595d81 100644
7546--- a/clang/test/CodeGenOpenCL/printf.cl
7547+++ b/clang/test/CodeGenOpenCL/printf.cl
7548@@ -1,5 +1,7 @@
7549 // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s
7550 // RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s
7551+// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s
7552+// RUN: %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s
7553
7554 typedef __attribute__((ext_vector_type(2))) float float2;
7555 typedef __attribute__((ext_vector_type(2))) half half2;
7556diff --git a/clang/test/CodeGenOpenCL/unroll-hint.cl b/clang/test/CodeGenOpenCL/unroll-hint.cl
7557index 0f84450a1ae6..9347c935869b 100644
7558--- a/clang/test/CodeGenOpenCL/unroll-hint.cl
7559+++ b/clang/test/CodeGenOpenCL/unroll-hint.cl
7560@@ -1,5 +1,6 @@
7561 // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
7562 // RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s | FileCheck %s
7563+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL3.0 -o - %s | FileCheck %s
7564
7565 /*** for ***/
7566 void for_count()
7567diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c
7568index 5c0bfb69f9a3..eabdf0b5938d 100644
7569--- a/clang/test/Driver/autocomplete.c
7570+++ b/clang/test/Driver/autocomplete.c
7571@@ -43,6 +43,8 @@
7572 // CLSTDALL-NEXT: CL1.2
7573 // CLSTDALL-NEXT: cl2.0
7574 // CLSTDALL-NEXT: CL2.0
7575+// CLSTDALL-NEXT: cl3.0
7576+// CLSTDALL-NEXT: CL3.0
7577 // CLSTDALL-NEXT: clc++
7578 // CLSTDALL-NEXT: CLC++
7579 // RUN: %clang --autocomplete=-fno-sanitize-coverage=,f | FileCheck %s -check-prefix=FNOSANICOVER
7580diff --git a/clang/test/Driver/opencl.cl b/clang/test/Driver/opencl.cl
7581index 63b04bc1af41..ffdd4f7d65bb 100644
7582--- a/clang/test/Driver/opencl.cl
7583+++ b/clang/test/Driver/opencl.cl
7584@@ -2,6 +2,7 @@
7585 // RUN: %clang -S -### -cl-std=CL1.1 %s 2>&1 | FileCheck --check-prefix=CHECK-CL11 %s
7586 // RUN: %clang -S -### -cl-std=CL1.2 %s 2>&1 | FileCheck --check-prefix=CHECK-CL12 %s
7587 // RUN: %clang -S -### -cl-std=CL2.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL20 %s
7588+// RUN: %clang -S -### -cl-std=CL3.0 %s 2>&1 | FileCheck --check-prefix=CHECK-CL30 %s
7589 // RUN: %clang -S -### -cl-std=clc++ %s 2>&1 | FileCheck --check-prefix=CHECK-CLCPP %s
7590 // RUN: %clang -S -### -cl-opt-disable %s 2>&1 | FileCheck --check-prefix=CHECK-OPT-DISABLE %s
7591 // RUN: %clang -S -### -cl-strict-aliasing %s 2>&1 | FileCheck --check-prefix=CHECK-STRICT-ALIASING %s
7592@@ -22,6 +23,7 @@
7593 // CHECK-CL11: "-cc1" {{.*}} "-cl-std=CL1.1"
7594 // CHECK-CL12: "-cc1" {{.*}} "-cl-std=CL1.2"
7595 // CHECK-CL20: "-cc1" {{.*}} "-cl-std=CL2.0"
7596+// CHECK-CL30: "-cc1" {{.*}} "-cl-std=CL3.0"
7597 // CHECK-CLCPP: "-cc1" {{.*}} "-cl-std=clc++"
7598 // CHECK-OPT-DISABLE: "-cc1" {{.*}} "-cl-opt-disable"
7599 // CHECK-STRICT-ALIASING: "-cc1" {{.*}} "-cl-strict-aliasing"
7600diff --git a/clang/test/Driver/unknown-std.cl b/clang/test/Driver/unknown-std.cl
7601index 6f371bac13ac..00209fb62556 100644
7602--- a/clang/test/Driver/unknown-std.cl
7603+++ b/clang/test/Driver/unknown-std.cl
7604@@ -10,6 +10,7 @@
7605 // CHECK-NEXT: note: use 'cl1.1' for 'OpenCL 1.1' standard
7606 // CHECK-NEXT: note: use 'cl1.2' for 'OpenCL 1.2' standard
7607 // CHECK-NEXT: note: use 'cl2.0' for 'OpenCL 2.0' standard
7608+// CHECK-NEXT: note: use 'cl3.0' for 'OpenCL 3.0' standard
7609 // CHECK-NEXT: note: use 'clc++' for 'C++ for OpenCL' standard
7610
7611 // Make sure that no other output is present.
7612diff --git a/clang/test/Frontend/stdlang.c b/clang/test/Frontend/stdlang.c
7613index 51484999e37a..eac4632fbdd6 100644
7614--- a/clang/test/Frontend/stdlang.c
7615+++ b/clang/test/Frontend/stdlang.c
7616@@ -9,6 +9,7 @@
7617 // RUN: %clang_cc1 -x cl -cl-std=CL1.1 -DOPENCL %s
7618 // RUN: %clang_cc1 -x cl -cl-std=CL1.2 -DOPENCL %s
7619 // RUN: %clang_cc1 -x cl -cl-std=CL2.0 -DOPENCL %s
7620+// RUN: %clang_cc1 -x cl -cl-std=CL3.0 -DOPENCL %s
7621 // RUN: %clang_cc1 -x cl -cl-std=CLC++ -DOPENCL %s
7622 // RUN: not %clang_cc1 -x cl -std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s
7623 // RUN: not %clang_cc1 -x cl -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s
7624diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl
7625index 1b151ffdd16a..2716076acdcf 100644
7626--- a/clang/test/Headers/opencl-c-header.cl
7627+++ b/clang/test/Headers/opencl-c-header.cl
7628@@ -1,6 +1,7 @@
7629 // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify | FileCheck %s
7630 // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.1 | FileCheck %s
7631 // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL1.2 | FileCheck %s
7632+// RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL3.0 | FileCheck %s
7633 // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=clc++ | FileCheck %s --check-prefix=CHECK20
7634
7635 // Test including the default header as a module.
7636@@ -39,9 +40,11 @@
7637 // RUN: rm -rf %t
7638 // RUN: mkdir -p %t
7639 // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s
7640+// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s
7641 // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s
7642 // RUN: chmod u-w %t
7643 // RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL1.2 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s
7644+// RUN: %clang_cc1 -triple spir64-unknown-unknown -emit-llvm -o - -cl-std=CL3.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MOD %s
7645 // RUN: %clang_cc1 -triple amdgcn--amdhsa -O0 -emit-llvm -o - -cl-std=CL2.0 -finclude-default-header -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -ftime-report %s 2>&1 | FileCheck --check-prefix=CHECK20 --check-prefix=CHECK-MOD %s
7646 // RUN: chmod u+w %t
7647
7648@@ -67,7 +70,7 @@ char f(char x) {
7649 // from OpenCL 2.0 onwards.
7650
7651 // CHECK20: _Z12write_imagef14ocl_image3d_wo
7652-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
7653+#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0)
7654 void test_image3dwo(write_only image3d_t img) {
7655 write_imagef(img, (0), (0.0f));
7656 }
7657@@ -81,7 +84,7 @@ void test_atomics(__generic volatile unsigned int* a) {
7658 #endif
7659
7660 // Verify that ATOMIC_VAR_INIT is defined.
7661-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
7662+#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0)
7663 global atomic_int z = ATOMIC_VAR_INIT(99);
7664 #endif //__OPENCL_C_VERSION__
7665
7666diff --git a/clang/test/Index/pipe-size.cl b/clang/test/Index/pipe-size.cl
7667index 94a1255f0a48..59b76051eda1 100644
7668--- a/clang/test/Index/pipe-size.cl
7669+++ b/clang/test/Index/pipe-size.cl
7670@@ -2,6 +2,13 @@
7671 // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR
7672 // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64
7673 // RUN: %clang_cc1 -x cl -O0 -cl-std=CL2.0 -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN
7674+// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefix=X86
7675+// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR
7676+// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck %s --check-prefix=SPIR64
7677+// RUN: %clang_cc1 -x cl -O0 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -emit-llvm -triple amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=AMDGCN
7678+
7679+
7680+
7681 __kernel void testPipe( pipe int test )
7682 {
7683 int s = sizeof(test);
7684diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c
7685index def105f4c52e..b088a37ba665 100644
7686--- a/clang/test/Preprocessor/predefined-macros.c
7687+++ b/clang/test/Preprocessor/predefined-macros.c
7688@@ -129,6 +129,8 @@
7689 // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL12
7690 // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL2.0 \
7691 // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL20
7692+// RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=CL3.0 \
7693+// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-CL30
7694 // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-fast-relaxed-math \
7695 // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FRM
7696 // RUN: %clang_cc1 %s -E -dM -o - -x cl -cl-std=clc++ \
7697@@ -137,26 +139,37 @@
7698 // CHECK-CL10: #define CL_VERSION_1_1 110
7699 // CHECK-CL10: #define CL_VERSION_1_2 120
7700 // CHECK-CL10: #define CL_VERSION_2_0 200
7701+// CHECK-CL10: #define CL_VERSION_3_0 300
7702 // CHECK-CL10: #define __OPENCL_C_VERSION__ 100
7703 // CHECK-CL10-NOT: #define __FAST_RELAXED_MATH__ 1
7704 // CHECK-CL11: #define CL_VERSION_1_0 100
7705 // CHECK-CL11: #define CL_VERSION_1_1 110
7706 // CHECK-CL11: #define CL_VERSION_1_2 120
7707 // CHECK-CL11: #define CL_VERSION_2_0 200
7708+// CHECK-CL11: #define CL_VERSION_3_0 300
7709 // CHECK-CL11: #define __OPENCL_C_VERSION__ 110
7710 // CHECK-CL11-NOT: #define __FAST_RELAXED_MATH__ 1
7711 // CHECK-CL12: #define CL_VERSION_1_0 100
7712 // CHECK-CL12: #define CL_VERSION_1_1 110
7713 // CHECK-CL12: #define CL_VERSION_1_2 120
7714 // CHECK-CL12: #define CL_VERSION_2_0 200
7715+// CHECK-CL12: #define CL_VERSION_3_0 300
7716 // CHECK-CL12: #define __OPENCL_C_VERSION__ 120
7717 // CHECK-CL12-NOT: #define __FAST_RELAXED_MATH__ 1
7718 // CHECK-CL20: #define CL_VERSION_1_0 100
7719 // CHECK-CL20: #define CL_VERSION_1_1 110
7720 // CHECK-CL20: #define CL_VERSION_1_2 120
7721 // CHECK-CL20: #define CL_VERSION_2_0 200
7722+// CHECK-CL20: #define CL_VERSION_3_0 300
7723 // CHECK-CL20: #define __OPENCL_C_VERSION__ 200
7724 // CHECK-CL20-NOT: #define __FAST_RELAXED_MATH__ 1
7725+// CHECK-CL30: #define CL_VERSION_1_0 100
7726+// CHECK-CL30: #define CL_VERSION_1_1 110
7727+// CHECK-CL30: #define CL_VERSION_1_2 120
7728+// CHECK-CL30: #define CL_VERSION_2_0 200
7729+// CHECK-CL30: #define CL_VERSION_3_0 300
7730+// CHECK-CL30: #define __OPENCL_C_VERSION__ 300
7731+// CHECK-CL30-NOT: #define __FAST_RELAXED_MATH__ 1
7732 // CHECK-FRM: #define __FAST_RELAXED_MATH__ 1
7733 // CHECK-CLCPP10: #define __CL_CPP_VERSION_1_0__ 100
7734 // CHECK-CLCPP10: #define __OPENCL_CPP_VERSION__ 100
7735diff --git a/clang/test/Sema/feature-extensions-simult-support.cl b/clang/test/Sema/feature-extensions-simult-support.cl
7736new file mode 100644
7737index 000000000000..0789105002b2
7738--- /dev/null
7739+++ b/clang/test/Sema/feature-extensions-simult-support.cl
7740@@ -0,0 +1,75 @@
7741+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_fp64
7742+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_fp64
7743+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_3d_image_writes
7744+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_3d_image_writes
7745+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=-cl_khr_subgroups
7746+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2 -cl-ext=+cl_khr_subgroups
7747+
7748+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_fp64
7749+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_fp64
7750+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_3d_image_writes
7751+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_3d_image_writes
7752+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-cl_khr_subgroups
7753+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups
7754+
7755+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64
7756+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes
7757+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups
7758+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes
7759+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_device_enqueue
7760+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_read_write_images
7761+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64,-cl_khr_fp64
7762+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_3d_image_writes,-cl_khr_3d_image_writes
7763+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_subgroups,-cl_khr_subgroups
7764+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_fp64
7765+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_3d_image_writes
7766+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+cl_khr_subgroups
7767+
7768+// expected-no-diagnostics
7769+
7770+#ifdef cl_khr_fp64
7771+ #ifndef __opencl_c_fp64
7772+ #error macros were not properly set up
7773+ #endif
7774+#endif
7775+#ifdef __opencl_c_fp64
7776+ #ifndef cl_khr_fp64
7777+ #error macros were not properly set up
7778+ #endif
7779+#endif
7780+
7781+#ifdef cl_khr_3d_image_writes
7782+ #ifndef __opencl_c_3d_image_writes
7783+ #error macros were not properly set up
7784+ #endif
7785+#endif
7786+#ifdef __opencl_c_3d_image_writes
7787+ #ifndef cl_khr_3d_image_writes
7788+ #error macros were not properly set up
7789+ #endif
7790+#endif
7791+
7792+#ifdef cl_khr_subgroups
7793+ #ifndef __opencl_c_subgroups
7794+ #error macros were not properly set up
7795+ #endif
7796+#endif
7797+#ifdef __opencl_c_subgroups
7798+ #ifndef cl_khr_subgroups
7799+ #error macros were not properly set up
7800+ #endif
7801+#endif
7802+
7803+#if defined(__opencl_c_pipes) || defined(__opencl_c_device_enqueue)
7804+ #ifndef __opencl_c_generic_address_space
7805+ #error macros were not properly set up
7806+ #endif
7807+#endif
7808+
7809+#if defined(__opencl_c_3d_image_writes) || defined(__opencl_c_read_write_images)
7810+ #ifndef __opencl_c_images
7811+ #error macros were not properly set up
7812+ #endif
7813+#endif
7814+
7815+kernel void test(){}
7816diff --git a/clang/test/Sema/features-ignore-pragma.cl b/clang/test/Sema/features-ignore-pragma.cl
7817new file mode 100644
7818index 000000000000..046ce5390754
7819--- /dev/null
7820+++ b/clang/test/Sema/features-ignore-pragma.cl
7821@@ -0,0 +1,24 @@
7822+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0
7823+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_fp64
7824+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups
7825+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_3d_image_writes
7826+
7827+#pragma OPENCL EXTENSION __opencl_c_fp64 : enable
7828+// expected-warning@-1 {{OpenCL feature support can't be controlled via pragma, ignoring}}
7829+
7830+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
7831+#ifndef __opencl_c_fp64
7832+// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_fp64' - ignoring}}
7833+#endif
7834+
7835+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
7836+#ifndef __opencl_c_subgroups
7837+// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_subgroups' - ignoring}}
7838+#endif
7839+
7840+#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
7841+#ifndef __opencl_c_3d_image_writes
7842+// expected-warning@-2{{unsupported OpenCL extension 'cl_khr_3d_image_writes' - ignoring}}
7843+#endif
7844+
7845+kernel void foo() {}
7846diff --git a/clang/test/Sema/opencl-features-pipes.cl b/clang/test/Sema/opencl-features-pipes.cl
7847new file mode 100644
7848index 000000000000..c0ac778f24a6
7849--- /dev/null
7850+++ b/clang/test/Sema/opencl-features-pipes.cl
7851@@ -0,0 +1,18 @@
7852+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.1
7853+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2
7854+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -DHAS
7855+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0
7856+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -DHAS
7857+// expected-no-diagnostics
7858+
7859+#ifdef HAS
7860+ #ifndef __opencl_c_pipes
7861+ #error Feature should be defined
7862+ #endif
7863+#else
7864+ #ifdef __opencl_c_pipes
7865+ #error Feature should not be defined
7866+ #endif
7867+#endif
7868+
7869+kernel void foo() {}
7870diff --git a/clang/test/Sema/opencl-features.cl b/clang/test/Sema/opencl-features.cl
7871new file mode 100644
7872index 000000000000..aa432f6b60bf
7873--- /dev/null
7874+++ b/clang/test/Sema/opencl-features.cl
7875@@ -0,0 +1,128 @@
7876+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0
7877+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=-__opencl_c_device_enqueue,-__opencl_c_pipes,-__opencl_c_read_write_images
7878+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CLC++
7879+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0
7880+// expected-no-diagnostics
7881+
7882+#ifndef __opencl_c_int64
7883+ #error Feature __opencl_c_int64 shouldn't be defined
7884+#endif
7885+
7886+#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ == CL_VERSION_2_0)
7887+ #ifndef __opencl_c_3d_image_writes
7888+ #error Feature __opencl_c_3d_image_writes should be defined
7889+ #endif
7890+
7891+ #ifndef __opencl_c_atomic_order_acq_rel
7892+ #error Feature __opencl_c_atomic_order_acq_rel should be defined
7893+ #endif
7894+
7895+ #ifndef __opencl_c_atomic_order_seq_cst
7896+ #error Feature __opencl_c_atomic_order_seq_cst should be defined
7897+ #endif
7898+
7899+ #ifndef __opencl_c_atomic_scope_device
7900+ #error Feature __opencl_c_atomic_scope_device should be defined
7901+ #endif
7902+
7903+ #ifndef __opencl_c_atomic_scope_all_devices
7904+ #error Feature __opencl_c_atomic_scope_all_devices should be defined
7905+ #endif
7906+
7907+ #ifndef __opencl_c_device_enqueue
7908+ #error Feature __opencl_c_device_enqueue should be defined
7909+ #endif
7910+
7911+ #ifndef __opencl_c_generic_address_space
7912+ #error Feature __opencl_c_generic_address_space should be defined
7913+ #endif
7914+
7915+ #ifndef __opencl_c_pipes
7916+ #error Feature __opencl_c_pipes should be defined
7917+ #endif
7918+
7919+ #ifndef __opencl_c_program_scope_global_variables
7920+ #error Feature __opencl_c_program_scope_global_variables should be defined
7921+ #endif
7922+
7923+ #ifndef __opencl_c_read_write_images
7924+ #error Feature __opencl_c_read_write_images should be defined
7925+ #endif
7926+
7927+ #ifndef __opencl_c_subgroups
7928+ #error Feature __opencl_c_subgroups should be defined
7929+ #endif
7930+
7931+ #ifndef __opencl_c_work_group_collective_functions
7932+ #error Feature __opencl_c_work_group_collective_functions should be defined
7933+ #endif
7934+
7935+ #ifndef __opencl_c_fp64
7936+ #error Feature __opencl_c_fp64 should be defined
7937+ #endif
7938+
7939+ #ifndef __opencl_c_images
7940+ #error Feature __opencl_c_images should be defined
7941+ #endif
7942+#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
7943+
7944+
7945+#if __OPENCL_C_VERSION__ == CL_VERSION_3_0
7946+ #ifdef __opencl_c_3d_image_writes
7947+ #error Feature __opencl_c_3d_image_writes shouldn't be defined
7948+ #endif
7949+
7950+ #ifdef __opencl_c_atomic_order_acq_rel
7951+ #error Feature __opencl_c_atomic_order_acq_rel shouldn't be defined
7952+ #endif
7953+
7954+ #ifdef __opencl_c_atomic_order_seq_cst
7955+ #error Feature __opencl_c_atomic_order_seq_cst shouldn't be defined
7956+ #endif
7957+
7958+ #ifdef __opencl_c_atomic_scope_device
7959+ #error Feature __opencl_c_atomic_scope_device shouldn't be defined
7960+ #endif
7961+
7962+ #ifdef __opencl_c_atomic_scope_all_devices
7963+ #error Feature __opencl_c_atomic_scope_all_devices shouldn't be defined
7964+ #endif
7965+
7966+ #ifdef __opencl_c_device_enqueue
7967+ #error Feature __opencl_c_device_enqueue shouldn't be defined
7968+ #endif
7969+
7970+ #ifdef __opencl_c_generic_address_space
7971+ #error Feature __opencl_c_generic_address_space shouldn't be defined
7972+ #endif
7973+
7974+ #ifdef __opencl_c_pipes
7975+ #error Feature __opencl_c_pipes shouldn't be defined
7976+ #endif
7977+
7978+ #ifdef __opencl_c_program_scope_global_variables
7979+ #error Feature __opencl_c_program_scope_global_variables shouldn't be defined
7980+ #endif
7981+
7982+ #ifdef __opencl_c_read_write_images
7983+ #error Feature __opencl_c_read_write_images shouldn't be defined
7984+ #endif
7985+
7986+ #ifdef __opencl_c_subgroups
7987+ #error Feature __opencl_c_subgroups shouldn't be defined
7988+ #endif
7989+
7990+ #ifdef __opencl_c_work_group_collective_functions
7991+ #error Feature __opencl_c_work_group_collective_functions shouldn't be defined
7992+ #endif
7993+
7994+ #ifdef __opencl_c_fp64
7995+ #error Feature __opencl_c_fp64 shouldn't be defined
7996+ #endif
7997+
7998+ #ifdef __opencl_c_images
7999+ #error Feature __opencl_c_images shouldn't be defined
8000+ #endif
8001+#endif // __OPENCL_C_VERSION__ == CL_VERSION_3_0
8002+
8003+kernel void foo() {}
8004diff --git a/clang/test/Sema/pipe_builtins_feature.cl b/clang/test/Sema/pipe_builtins_feature.cl
8005new file mode 100644
8006index 000000000000..56fa94fc7705
8007--- /dev/null
8008+++ b/clang/test/Sema/pipe_builtins_feature.cl
8009@@ -0,0 +1,21 @@
8010+// RUN: %clang_cc1 -cl-std=CL2.0 -fsyntax-only -verify %s
8011+// RUN: %clang_cc1 -cl-std=CL3.0 -fsyntax-only -verify %s
8012+// RUN: %clang_cc1 -cl-std=CL3.0 -cl-ext=__opencl_c_pipes -fsyntax-only -verify %s
8013+
8014+#ifdef __opencl_c_pipes
8015+ #ifndef __opencl_c_generic_address_space
8016+ #error Generic address space feature must also be defined
8017+ #endif
8018+// CHECK: expected-no-diagnostics
8019+// check that pragma disable all doesn't touch feature support
8020+ #pragma OPENCL EXTENSION all : disable
8021+#endif
8022+
8023+void test(read_only pipe int p, global int *ptr) {
8024+ reserve_id_t rid;
8025+}
8026+
8027+#ifndef __opencl_c_pipes
8028+// expected-error@-5 {{expected parameter declarator}} expected-error@-5 {{expected ')'}} expected-note@-5 {{to match this '('}}
8029+// expected-error@-5 {{use of type 'reserve_id_t' requires __opencl_c_pipes extension to be enabled}}
8030+#endif
8031diff --git a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl
8032index 5efea216346a..09aea27ae6de 100644
8033--- a/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl
8034+++ b/clang/test/SemaOpenCL/address-spaces-conversions-cl2.0.cl
8035@@ -1,6 +1,9 @@
8036 // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL2.0
8037 // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL2.0
8038 // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL2.0
8039+// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space
8040+// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space
8041+// RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space
8042 // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DCONSTANT -cl-std=clc++
8043 // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGLOBAL -cl-std=clc++
8044 // RUN: %clang_cc1 %s -ffake-address-space-map -verify -pedantic -fsyntax-only -DGENERIC -cl-std=clc++
8045diff --git a/clang/test/SemaOpenCL/address-spaces.cl b/clang/test/SemaOpenCL/address-spaces.cl
8046index 07547ea19680..e367a7c57292 100644
8047--- a/clang/test/SemaOpenCL/address-spaces.cl
8048+++ b/clang/test/SemaOpenCL/address-spaces.cl
8049@@ -1,5 +1,6 @@
8050 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only
8051 // RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only
8052+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=__opencl_c_generic_address_space -verify -pedantic -fsyntax-only
8053 // RUN: %clang_cc1 %s -cl-std=clc++ -verify -pedantic -fsyntax-only
8054
8055 __constant int ci = 1;
8056diff --git a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl
8057index f63e2913c749..727141190a0b 100644
8058--- a/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl
8059+++ b/clang/test/SemaOpenCL/cl20-device-side-enqueue.cl
8060@@ -2,6 +2,12 @@
8061 // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir-unknown-unknown" -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile"
8062 // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS=
8063 // RUN: %clang_cc1 %s -cl-std=CL2.0 -triple "spir64-unknown-unknown" -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile"
8064+// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS=
8065+// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -DB32 -DQUALS="const volatile"
8066+// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS=
8067+// RUN: %clang_cc1 %s -cl-std=CL3.0 -triple "spir64-unknown-unknown" -cl-ext=__opencl_c_device_enqueue,__opencl_c_subgroups -verify -pedantic -fsyntax-only -Wconversion -DWCONV -DQUALS="const volatile"
8068+
8069+
8070
8071 typedef struct {int a;} ndrange_t;
8072 // Diagnostic tests for different overloads of enqueue_kernel from Table 6.13.17.1 of OpenCL 2.0 Spec.
8073@@ -235,11 +241,17 @@ kernel void bar(global unsigned int *buf)
8074 kernel void foo1(global unsigned int *buf)
8075 {
8076 ndrange_t n;
8077- buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
8078+ buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){});
8079+#if __OPENCL_C_VERSION__ < 300
8080+// expected-error@-2 {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
8081+#endif
8082 }
8083
8084 kernel void bar1(global unsigned int *buf)
8085 {
8086 ndrange_t n;
8087- buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
8088+ buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){});
8089+#if __OPENCL_C_VERSION__ < 300
8090+// expected-error@-2 {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
8091+#endif
8092 }
8093diff --git a/clang/test/SemaOpenCL/forget-unsupported-builtins.cl b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl
8094new file mode 100644
8095index 000000000000..14dd03e2c7db
8096--- /dev/null
8097+++ b/clang/test/SemaOpenCL/forget-unsupported-builtins.cl
8098@@ -0,0 +1,22 @@
8099+// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -verify %s -triple spir-unknown-unknown
8100+// RUN: %clang_cc1 -cl-std=cl3.0 -fsyntax-only -cl-ext=__opencl_c_pipes,__opencl_c_generic_address_space,__opencl_c_device_enqueue -verify %s -triple spir-unknown-unknown -DFEATURES
8101+
8102+#ifndef FEATURES
8103+ // expected-no-diagnostics
8104+#else
8105+ // expected-error@+10 {{cannot redeclare builtin function 'get_pipe_max_packets'}}
8106+ // expected-note@+9 {{'get_pipe_max_packets' is a builtin with type 'unsigned int ()'}}
8107+ // expected-error@+9 {{cannot redeclare builtin function 'to_local'}}
8108+ // expected-note@+8 {{'to_local' is a builtin with type 'void *(void *)'}}
8109+ // expected-error@+8 {{cannot redeclare builtin function 'to_global'}}
8110+ // expected-note@+7 {{'to_global' is a builtin with type 'void *(void *)'}}
8111+ // expected-error@+7 {{cannot redeclare builtin function 'get_kernel_work_group_size'}}
8112+ // expected-note@+6 {{'get_kernel_work_group_size' is a builtin with type 'unsigned int ()'}}
8113+#endif
8114+
8115+int get_pipe_max_packets(int);
8116+int to_local(int);
8117+int to_global(int);
8118+int get_kernel_work_group_size(int);
8119+
8120+kernel void test(global int *dst) {}
8121diff --git a/clang/test/SemaOpenCL/image-features.cl b/clang/test/SemaOpenCL/image-features.cl
8122new file mode 100644
8123index 000000000000..ace6913bb31e
8124--- /dev/null
8125+++ b/clang/test/SemaOpenCL/image-features.cl
8126@@ -0,0 +1,20 @@
8127+// RUN: %clang_cc1 -cl-std=cl2.0 -fsyntax-only -verify %s -triple spir-unknown-unknown
8128+// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_images -fsyntax-only -verify %s -triple spir-unknown-unknown
8129+// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_3d_image_writes -fsyntax-only -verify %s -triple spir-unknown-unknown
8130+// RUN: %clang_cc1 -cl-std=cl3.0 -cl-ext=__opencl_c_read_write_images -fsyntax-only -verify %s -triple spir-unknown-unknown
8131+
8132+#if defined(__opencl_c_read_write_images) && defined(__opencl_c_3d_image_writes)
8133+ // expected-no-diagnostics
8134+#endif
8135+
8136+__kernel void write_3d_image(__write_only image3d_t i) {}
8137+
8138+#ifndef __opencl_c_3d_image_writes
8139+ // expected-error@-3 {{use of type '__write_only image3d_t' requires __opencl_c_3d_image_writes extension to be enabled}}
8140+#endif
8141+
8142+__kernel void read_write_3d_image(__read_write image3d_t i) { }
8143+
8144+#ifndef __opencl_c_read_write_images
8145+ // expected-error@-3 {{use of type '__read_write image3d_t' requires __opencl_c_read_write_images extension to be enabled}}
8146+#endif
8147diff --git a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl
8148index 36e76621d24a..38b0a04726e3 100644
8149--- a/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl
8150+++ b/clang/test/SemaOpenCL/invalid-pipe-builtin-cl2.0.cl
8151@@ -1,4 +1,5 @@
8152 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0 -cl-ext=+cl_khr_subgroups
8153+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_subgroups,__opencl_c_pipes
8154
8155 #pragma OPENCL EXTENSION cl_khr_subgroups : enable
8156
8157diff --git a/clang/test/SemaOpenCL/storageclass-cl20.cl b/clang/test/SemaOpenCL/storageclass-cl20.cl
8158index 581701d2a6a5..469c526ebc30 100644
8159--- a/clang/test/SemaOpenCL/storageclass-cl20.cl
8160+++ b/clang/test/SemaOpenCL/storageclass-cl20.cl
8161@@ -1,4 +1,5 @@
8162 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0
8163+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=__opencl_c_program_scope_global_variables,__opencl_c_generic_address_space
8164
8165 int G2 = 0;
8166 global int G3 = 0;
8167diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
8168index 41d33b550680..f50652493e5e 100644
8169--- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
8170+++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
8171@@ -56,6 +56,7 @@
8172 //===----------------------------------------------------------------------===//
8173
8174 #include "TableGenBackends.h"
8175+#include "clang/Basic/OpenCLOptions.h"
8176 #include "llvm/ADT/MapVector.h"
8177 #include "llvm/ADT/STLExtras.h"
8178 #include "llvm/ADT/SmallString.h"
8179@@ -69,6 +70,7 @@
8180 #include "llvm/TableGen/Record.h"
8181 #include "llvm/TableGen/StringMatcher.h"
8182 #include "llvm/TableGen/TableGenBackend.h"
8183+#include <numeric>
8184 #include <set>
8185
8186 using namespace llvm;
8187@@ -228,6 +230,10 @@ private:
8188 // The function "tan", having the same signatures, would be mapped to the
8189 // same entry (<I1, I2, I3>).
8190 MapVector<BuiltinIndexListTy *, BuiltinTableEntries> SignatureListMap;
8191+
8192+ // Encode all versions
8193+ unsigned short
8194+ EncodeBuiltinVersions(std::vector<Record *> BuiltinVersionsRecords) const;
8195 };
8196 } // namespace
8197
8198@@ -338,12 +344,10 @@ struct OpenCLBuiltinStruct {
8199 const bool IsConst;
8200 // Function attribute __attribute__((convergent))
8201 const bool IsConv;
8202+ // All opencl versions encoded
8203+ const unsigned char AllVersions : 5;
8204 // OpenCL extension(s) required for this overload.
8205 const unsigned short Extension;
8206- // First OpenCL version in which this overload was introduced (e.g. CL20).
8207- const unsigned short MinVersion;
8208- // First OpenCL version in which this overload was removed (e.g. CL20).
8209- const unsigned short MaxVersion;
8210 };
8211
8212 )";
8213@@ -503,11 +507,9 @@ void BuiltinNameEmitter::EmitBuiltinTable() {
8214 << (Overload.first->getValueAsBit("IsPure")) << ", "
8215 << (Overload.first->getValueAsBit("IsConst")) << ", "
8216 << (Overload.first->getValueAsBit("IsConv")) << ", "
8217- << FunctionExtensionIndex[ExtName] << ", "
8218- << Overload.first->getValueAsDef("MinVersion")->getValueAsInt("ID")
8219- << ", "
8220- << Overload.first->getValueAsDef("MaxVersion")->getValueAsInt("ID")
8221- << " },\n";
8222+ << EncodeBuiltinVersions(
8223+ Overload.first->getValueAsListOfDefs("Versions"))
8224+ << ", " << FunctionExtensionIndex[ExtName] << " },\n";
8225 Index++;
8226 }
8227 }
8228@@ -528,10 +530,8 @@ bool BuiltinNameEmitter::CanReuseSignature(
8229 if (Rec->getValueAsBit("IsPure") == Rec2->getValueAsBit("IsPure") &&
8230 Rec->getValueAsBit("IsConst") == Rec2->getValueAsBit("IsConst") &&
8231 Rec->getValueAsBit("IsConv") == Rec2->getValueAsBit("IsConv") &&
8232- Rec->getValueAsDef("MinVersion")->getValueAsInt("ID") ==
8233- Rec2->getValueAsDef("MinVersion")->getValueAsInt("ID") &&
8234- Rec->getValueAsDef("MaxVersion")->getValueAsInt("ID") ==
8235- Rec2->getValueAsDef("MaxVersion")->getValueAsInt("ID") &&
8236+ EncodeBuiltinVersions(Rec->getValueAsListOfDefs("Versions")) ==
8237+ EncodeBuiltinVersions(Rec2->getValueAsListOfDefs("Versions")) &&
8238 Rec->getValueAsDef("Extension")->getName() ==
8239 Rec2->getValueAsDef("Extension")->getName()) {
8240 return true;
8241@@ -806,6 +806,15 @@ static void OCL2Qual(ASTContext &Context, const OpenCLTypeStruct &Ty,
8242 OS << "\n} // OCL2Qual\n";
8243 }
8244
8245+unsigned short BuiltinNameEmitter::EncodeBuiltinVersions(
8246+ std::vector<Record *> BuiltinVersionsRecords) const {
8247+ return std::accumulate(
8248+ BuiltinVersionsRecords.begin(), BuiltinVersionsRecords.end(),
8249+ (unsigned short)0, [](unsigned short C, Record *R) {
8250+ return C |= clang::OpenCLOptions::EncodeVersion(R->getValueAsInt("ID"));
8251+ });
8252+}
8253+
8254 void clang::EmitClangOpenCLBuiltins(RecordKeeper &Records, raw_ostream &OS) {
8255 BuiltinNameEmitter NameChecker(Records, OS);
8256 NameChecker.Emit();
8257--
82582.17.1
8259
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch
deleted file mode 100644
index 84a4ba19..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0001-llvm-spirv-skip-building-tests.patch
+++ /dev/null
@@ -1,51 +0,0 @@
1From 661021749a168c423d69d0ba7cdfa16fed860836 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Wed, 21 Aug 2019 14:35:31 +0800
4Subject: [PATCH 1/3] llvm-spirv: skip building tests
5
6Some of these need clang to be built and since we're building this in-tree,
7that leads to problems when compiling libcxx, compiler-rt which aren't built
8in-tree.
9
10Instead of using SPIRV_SKIP_CLANG_BUILD to skip clang build and adding this to
11all components, disable the building of tests altogether.
12
13Upstream-Status: Inappropriate
14
15Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
16Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
17---
18 CMakeLists.txt | 10 ----------
19 1 file changed, 10 deletions(-)
20
21diff --git a/CMakeLists.txt b/CMakeLists.txt
22index 92c50370..80999c98 100644
23--- a/CMakeLists.txt
24+++ b/CMakeLists.txt
25@@ -25,13 +25,6 @@ if(LLVM_SPIRV_BUILD_EXTERNAL)
26 set(CMAKE_CXX_STANDARD 14)
27 set(CMAKE_CXX_STANDARD_REQUIRED ON)
28
29- if(LLVM_SPIRV_INCLUDE_TESTS)
30- set(LLVM_TEST_COMPONENTS
31- llvm-as
32- llvm-dis
33- )
34- endif(LLVM_SPIRV_INCLUDE_TESTS)
35-
36 find_package(LLVM 10.0.0 REQUIRED
37 COMPONENTS
38 Analysis
39@@ -63,9 +56,6 @@ set(LLVM_SPIRV_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
40
41 add_subdirectory(lib/SPIRV)
42 add_subdirectory(tools/llvm-spirv)
43-if(LLVM_SPIRV_INCLUDE_TESTS)
44- add_subdirectory(test)
45-endif(LLVM_SPIRV_INCLUDE_TESTS)
46
47 install(
48 FILES
49--
502.17.1
51
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch
deleted file mode 100644
index 3f1b24e7..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch
+++ /dev/null
@@ -1,812 +0,0 @@
1From 3f544cfe44ee5f113a3fb554aca2cf5d64996062 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Wed, 7 Apr 2021 16:38:38 +0800
4Subject: [PATCH 2/7] Add cl_khr_extended_subgroup extensions.
5
6Added extensions and their function declarations into
7the standard header.
8
9Patch by Piotr Fusik!
10
11Tags: #clang
12
13Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4a4402f0d72167477a6252e4c3daf5089ebc8f9a]
14Signed-off-by: Anastasia Stulova <anastasia.stulova@arm.com>
15Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
16---
17 .../include/clang/Basic/OpenCLExtensions.def | 7 +
18 clang/lib/Headers/opencl-c.h | 668 ++++++++++++++++++
19 clang/test/SemaOpenCL/extension-version.cl | 83 +++
20 3 files changed, 758 insertions(+)
21
22diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def
23index 608f78a13eef..d1574164f9b2 100644
24--- a/clang/include/clang/Basic/OpenCLExtensions.def
25+++ b/clang/include/clang/Basic/OpenCLExtensions.def
26@@ -74,6 +74,13 @@ OPENCLEXT_INTERNAL(cl_khr_mipmap_image_writes, 200, ~0U)
27 OPENCLEXT_INTERNAL(cl_khr_srgb_image_writes, 200, ~0U)
28 OPENCLEXT_INTERNAL(cl_khr_subgroups, 200, ~0U)
29 OPENCLEXT_INTERNAL(cl_khr_terminate_context, 200, ~0U)
30+OPENCLEXT_INTERNAL(cl_khr_subgroup_extended_types, 200, ~0U)
31+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_vote, 200, ~0U)
32+OPENCLEXT_INTERNAL(cl_khr_subgroup_ballot, 200, ~0U)
33+OPENCLEXT_INTERNAL(cl_khr_subgroup_non_uniform_arithmetic, 200, ~0U)
34+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle, 200, ~0U)
35+OPENCLEXT_INTERNAL(cl_khr_subgroup_shuffle_relative, 200, ~0U)
36+OPENCLEXT_INTERNAL(cl_khr_subgroup_clustered_reduce, 200, ~0U)
37
38 // Clang Extensions.
39 OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U)
40diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
41index 93a946cec5b1..67d900eb1c3d 100644
42--- a/clang/lib/Headers/opencl-c.h
43+++ b/clang/lib/Headers/opencl-c.h
44@@ -17530,6 +17530,674 @@ double __ovld __conv sub_group_scan_inclusive_max(double x);
45
46 #endif //cl_khr_subgroups cl_intel_subgroups
47
48+#if defined(cl_khr_subgroup_extended_types)
49+char __ovld __conv sub_group_broadcast( char value, uint index );
50+char2 __ovld __conv sub_group_broadcast( char2 value, uint index );
51+char3 __ovld __conv sub_group_broadcast( char3 value, uint index );
52+char4 __ovld __conv sub_group_broadcast( char4 value, uint index );
53+char8 __ovld __conv sub_group_broadcast( char8 value, uint index );
54+char16 __ovld __conv sub_group_broadcast( char16 value, uint index );
55+
56+uchar __ovld __conv sub_group_broadcast( uchar value, uint index );
57+uchar2 __ovld __conv sub_group_broadcast( uchar2 value, uint index );
58+uchar3 __ovld __conv sub_group_broadcast( uchar3 value, uint index );
59+uchar4 __ovld __conv sub_group_broadcast( uchar4 value, uint index );
60+uchar8 __ovld __conv sub_group_broadcast( uchar8 value, uint index );
61+uchar16 __ovld __conv sub_group_broadcast( uchar16 value, uint index );
62+
63+short __ovld __conv sub_group_broadcast( short value, uint index );
64+short2 __ovld __conv sub_group_broadcast( short2 value, uint index );
65+short3 __ovld __conv sub_group_broadcast( short3 value, uint index );
66+short4 __ovld __conv sub_group_broadcast( short4 value, uint index );
67+short8 __ovld __conv sub_group_broadcast( short8 value, uint index );
68+short16 __ovld __conv sub_group_broadcast( short16 value, uint index );
69+
70+ushort __ovld __conv sub_group_broadcast( ushort value, uint index );
71+ushort2 __ovld __conv sub_group_broadcast( ushort2 value, uint index );
72+ushort3 __ovld __conv sub_group_broadcast( ushort3 value, uint index );
73+ushort4 __ovld __conv sub_group_broadcast( ushort4 value, uint index );
74+ushort8 __ovld __conv sub_group_broadcast( ushort8 value, uint index );
75+ushort16 __ovld __conv sub_group_broadcast( ushort16 value, uint index );
76+
77+// scalar int broadcast is part of cl_khr_subgroups
78+int2 __ovld __conv sub_group_broadcast( int2 value, uint index );
79+int3 __ovld __conv sub_group_broadcast( int3 value, uint index );
80+int4 __ovld __conv sub_group_broadcast( int4 value, uint index );
81+int8 __ovld __conv sub_group_broadcast( int8 value, uint index );
82+int16 __ovld __conv sub_group_broadcast( int16 value, uint index );
83+
84+// scalar uint broadcast is part of cl_khr_subgroups
85+uint2 __ovld __conv sub_group_broadcast( uint2 value, uint index );
86+uint3 __ovld __conv sub_group_broadcast( uint3 value, uint index );
87+uint4 __ovld __conv sub_group_broadcast( uint4 value, uint index );
88+uint8 __ovld __conv sub_group_broadcast( uint8 value, uint index );
89+uint16 __ovld __conv sub_group_broadcast( uint16 value, uint index );
90+
91+// scalar long broadcast is part of cl_khr_subgroups
92+long2 __ovld __conv sub_group_broadcast( long2 value, uint index );
93+long3 __ovld __conv sub_group_broadcast( long3 value, uint index );
94+long4 __ovld __conv sub_group_broadcast( long4 value, uint index );
95+long8 __ovld __conv sub_group_broadcast( long8 value, uint index );
96+long16 __ovld __conv sub_group_broadcast( long16 value, uint index );
97+
98+// scalar ulong broadcast is part of cl_khr_subgroups
99+ulong2 __ovld __conv sub_group_broadcast( ulong2 value, uint index );
100+ulong3 __ovld __conv sub_group_broadcast( ulong3 value, uint index );
101+ulong4 __ovld __conv sub_group_broadcast( ulong4 value, uint index );
102+ulong8 __ovld __conv sub_group_broadcast( ulong8 value, uint index );
103+ulong16 __ovld __conv sub_group_broadcast( ulong16 value, uint index );
104+
105+// scalar float broadcast is part of cl_khr_subgroups
106+float2 __ovld __conv sub_group_broadcast( float2 value, uint index );
107+float3 __ovld __conv sub_group_broadcast( float3 value, uint index );
108+float4 __ovld __conv sub_group_broadcast( float4 value, uint index );
109+float8 __ovld __conv sub_group_broadcast( float8 value, uint index );
110+float16 __ovld __conv sub_group_broadcast( float16 value, uint index );
111+
112+char __ovld __conv sub_group_reduce_add( char value );
113+uchar __ovld __conv sub_group_reduce_add( uchar value );
114+short __ovld __conv sub_group_reduce_add( short value );
115+ushort __ovld __conv sub_group_reduce_add( ushort value );
116+
117+char __ovld __conv sub_group_reduce_min( char value );
118+uchar __ovld __conv sub_group_reduce_min( uchar value );
119+short __ovld __conv sub_group_reduce_min( short value );
120+ushort __ovld __conv sub_group_reduce_min( ushort value );
121+
122+char __ovld __conv sub_group_reduce_max( char value );
123+uchar __ovld __conv sub_group_reduce_max( uchar value );
124+short __ovld __conv sub_group_reduce_max( short value );
125+ushort __ovld __conv sub_group_reduce_max( ushort value );
126+
127+char __ovld __conv sub_group_scan_inclusive_add( char value );
128+uchar __ovld __conv sub_group_scan_inclusive_add( uchar value );
129+short __ovld __conv sub_group_scan_inclusive_add( short value );
130+ushort __ovld __conv sub_group_scan_inclusive_add( ushort value );
131+
132+char __ovld __conv sub_group_scan_inclusive_min( char value );
133+uchar __ovld __conv sub_group_scan_inclusive_min( uchar value );
134+short __ovld __conv sub_group_scan_inclusive_min( short value );
135+ushort __ovld __conv sub_group_scan_inclusive_min( ushort value );
136+
137+char __ovld __conv sub_group_scan_inclusive_max( char value );
138+uchar __ovld __conv sub_group_scan_inclusive_max( uchar value );
139+short __ovld __conv sub_group_scan_inclusive_max( short value );
140+ushort __ovld __conv sub_group_scan_inclusive_max( ushort value );
141+
142+char __ovld __conv sub_group_scan_exclusive_add( char value );
143+uchar __ovld __conv sub_group_scan_exclusive_add( uchar value );
144+short __ovld __conv sub_group_scan_exclusive_add( short value );
145+ushort __ovld __conv sub_group_scan_exclusive_add( ushort value );
146+
147+char __ovld __conv sub_group_scan_exclusive_min( char value );
148+uchar __ovld __conv sub_group_scan_exclusive_min( uchar value );
149+short __ovld __conv sub_group_scan_exclusive_min( short value );
150+ushort __ovld __conv sub_group_scan_exclusive_min( ushort value );
151+
152+char __ovld __conv sub_group_scan_exclusive_max( char value );
153+uchar __ovld __conv sub_group_scan_exclusive_max( uchar value );
154+short __ovld __conv sub_group_scan_exclusive_max( short value );
155+ushort __ovld __conv sub_group_scan_exclusive_max( ushort value );
156+
157+#if defined(cl_khr_fp16)
158+// scalar half broadcast is part of cl_khr_subgroups
159+half2 __ovld __conv sub_group_broadcast( half2 value, uint index );
160+half3 __ovld __conv sub_group_broadcast( half3 value, uint index );
161+half4 __ovld __conv sub_group_broadcast( half4 value, uint index );
162+half8 __ovld __conv sub_group_broadcast( half8 value, uint index );
163+half16 __ovld __conv sub_group_broadcast( half16 value, uint index );
164+#endif // cl_khr_fp16
165+
166+#if defined(cl_khr_fp64)
167+// scalar double broadcast is part of cl_khr_subgroups
168+double2 __ovld __conv sub_group_broadcast( double2 value, uint index );
169+double3 __ovld __conv sub_group_broadcast( double3 value, uint index );
170+double4 __ovld __conv sub_group_broadcast( double4 value, uint index );
171+double8 __ovld __conv sub_group_broadcast( double8 value, uint index );
172+double16 __ovld __conv sub_group_broadcast( double16 value, uint index );
173+#endif // cl_khr_fp64
174+
175+#endif // cl_khr_subgroup_extended_types
176+
177+#if defined(cl_khr_subgroup_non_uniform_vote)
178+int __ovld sub_group_elect(void);
179+int __ovld sub_group_non_uniform_all( int predicate );
180+int __ovld sub_group_non_uniform_any( int predicate );
181+
182+int __ovld sub_group_non_uniform_all_equal( char value );
183+int __ovld sub_group_non_uniform_all_equal( uchar value );
184+int __ovld sub_group_non_uniform_all_equal( short value );
185+int __ovld sub_group_non_uniform_all_equal( ushort value );
186+int __ovld sub_group_non_uniform_all_equal( int value );
187+int __ovld sub_group_non_uniform_all_equal( uint value );
188+int __ovld sub_group_non_uniform_all_equal( long value );
189+int __ovld sub_group_non_uniform_all_equal( ulong value );
190+int __ovld sub_group_non_uniform_all_equal( float value );
191+
192+#if defined(cl_khr_fp16)
193+int __ovld sub_group_non_uniform_all_equal( half value );
194+#endif // cl_khr_fp16
195+
196+#if defined(cl_khr_fp64)
197+int __ovld sub_group_non_uniform_all_equal( double value );
198+#endif // cl_khr_fp64
199+
200+#endif // cl_khr_subgroup_non_uniform_vote
201+
202+#if defined(cl_khr_subgroup_ballot)
203+char __ovld sub_group_non_uniform_broadcast( char value, uint index );
204+char2 __ovld sub_group_non_uniform_broadcast( char2 value, uint index );
205+char3 __ovld sub_group_non_uniform_broadcast( char3 value, uint index );
206+char4 __ovld sub_group_non_uniform_broadcast( char4 value, uint index );
207+char8 __ovld sub_group_non_uniform_broadcast( char8 value, uint index );
208+char16 __ovld sub_group_non_uniform_broadcast( char16 value, uint index );
209+
210+uchar __ovld sub_group_non_uniform_broadcast( uchar value, uint index );
211+uchar2 __ovld sub_group_non_uniform_broadcast( uchar2 value, uint index );
212+uchar3 __ovld sub_group_non_uniform_broadcast( uchar3 value, uint index );
213+uchar4 __ovld sub_group_non_uniform_broadcast( uchar4 value, uint index );
214+uchar8 __ovld sub_group_non_uniform_broadcast( uchar8 value, uint index );
215+uchar16 __ovld sub_group_non_uniform_broadcast( uchar16 value, uint index );
216+
217+short __ovld sub_group_non_uniform_broadcast( short value, uint index );
218+short2 __ovld sub_group_non_uniform_broadcast( short2 value, uint index );
219+short3 __ovld sub_group_non_uniform_broadcast( short3 value, uint index );
220+short4 __ovld sub_group_non_uniform_broadcast( short4 value, uint index );
221+short8 __ovld sub_group_non_uniform_broadcast( short8 value, uint index );
222+short16 __ovld sub_group_non_uniform_broadcast( short16 value, uint index );
223+
224+ushort __ovld sub_group_non_uniform_broadcast( ushort value, uint index );
225+ushort2 __ovld sub_group_non_uniform_broadcast( ushort2 value, uint index );
226+ushort3 __ovld sub_group_non_uniform_broadcast( ushort3 value, uint index );
227+ushort4 __ovld sub_group_non_uniform_broadcast( ushort4 value, uint index );
228+ushort8 __ovld sub_group_non_uniform_broadcast( ushort8 value, uint index );
229+ushort16 __ovld sub_group_non_uniform_broadcast( ushort16 value, uint index );
230+
231+int __ovld sub_group_non_uniform_broadcast( int value, uint index );
232+int2 __ovld sub_group_non_uniform_broadcast( int2 value, uint index );
233+int3 __ovld sub_group_non_uniform_broadcast( int3 value, uint index );
234+int4 __ovld sub_group_non_uniform_broadcast( int4 value, uint index );
235+int8 __ovld sub_group_non_uniform_broadcast( int8 value, uint index );
236+int16 __ovld sub_group_non_uniform_broadcast( int16 value, uint index );
237+
238+uint __ovld sub_group_non_uniform_broadcast( uint value, uint index );
239+uint2 __ovld sub_group_non_uniform_broadcast( uint2 value, uint index );
240+uint3 __ovld sub_group_non_uniform_broadcast( uint3 value, uint index );
241+uint4 __ovld sub_group_non_uniform_broadcast( uint4 value, uint index );
242+uint8 __ovld sub_group_non_uniform_broadcast( uint8 value, uint index );
243+uint16 __ovld sub_group_non_uniform_broadcast( uint16 value, uint index );
244+
245+long __ovld sub_group_non_uniform_broadcast( long value, uint index );
246+long2 __ovld sub_group_non_uniform_broadcast( long2 value, uint index );
247+long3 __ovld sub_group_non_uniform_broadcast( long3 value, uint index );
248+long4 __ovld sub_group_non_uniform_broadcast( long4 value, uint index );
249+long8 __ovld sub_group_non_uniform_broadcast( long8 value, uint index );
250+long16 __ovld sub_group_non_uniform_broadcast( long16 value, uint index );
251+
252+ulong __ovld sub_group_non_uniform_broadcast( ulong value, uint index );
253+ulong2 __ovld sub_group_non_uniform_broadcast( ulong2 value, uint index );
254+ulong3 __ovld sub_group_non_uniform_broadcast( ulong3 value, uint index );
255+ulong4 __ovld sub_group_non_uniform_broadcast( ulong4 value, uint index );
256+ulong8 __ovld sub_group_non_uniform_broadcast( ulong8 value, uint index );
257+ulong16 __ovld sub_group_non_uniform_broadcast( ulong16 value, uint index );
258+
259+float __ovld sub_group_non_uniform_broadcast( float value, uint index );
260+float2 __ovld sub_group_non_uniform_broadcast( float2 value, uint index );
261+float3 __ovld sub_group_non_uniform_broadcast( float3 value, uint index );
262+float4 __ovld sub_group_non_uniform_broadcast( float4 value, uint index );
263+float8 __ovld sub_group_non_uniform_broadcast( float8 value, uint index );
264+float16 __ovld sub_group_non_uniform_broadcast( float16 value, uint index );
265+
266+char __ovld sub_group_broadcast_first( char value );
267+uchar __ovld sub_group_broadcast_first( uchar value );
268+short __ovld sub_group_broadcast_first( short value );
269+ushort __ovld sub_group_broadcast_first( ushort value );
270+int __ovld sub_group_broadcast_first( int value );
271+uint __ovld sub_group_broadcast_first( uint value );
272+long __ovld sub_group_broadcast_first( long value );
273+ulong __ovld sub_group_broadcast_first( ulong value );
274+float __ovld sub_group_broadcast_first( float value );
275+
276+uint4 __ovld sub_group_ballot( int predicate );
277+int __ovld __cnfn sub_group_inverse_ballot( uint4 value );
278+int __ovld __cnfn sub_group_ballot_bit_extract( uint4 value, uint index );
279+uint __ovld __cnfn sub_group_ballot_bit_count( uint4 value );
280+
281+uint __ovld sub_group_ballot_inclusive_scan( uint4 value );
282+uint __ovld sub_group_ballot_exclusive_scan( uint4 value );
283+uint __ovld sub_group_ballot_find_lsb( uint4 value );
284+uint __ovld sub_group_ballot_find_msb( uint4 value );
285+
286+uint4 __ovld __cnfn get_sub_group_eq_mask(void);
287+uint4 __ovld __cnfn get_sub_group_ge_mask(void);
288+uint4 __ovld __cnfn get_sub_group_gt_mask(void);
289+uint4 __ovld __cnfn get_sub_group_le_mask(void);
290+uint4 __ovld __cnfn get_sub_group_lt_mask(void);
291+
292+#if defined(cl_khr_fp16)
293+half __ovld sub_group_non_uniform_broadcast( half value, uint index );
294+half2 __ovld sub_group_non_uniform_broadcast( half2 value, uint index );
295+half3 __ovld sub_group_non_uniform_broadcast( half3 value, uint index );
296+half4 __ovld sub_group_non_uniform_broadcast( half4 value, uint index );
297+half8 __ovld sub_group_non_uniform_broadcast( half8 value, uint index );
298+half16 __ovld sub_group_non_uniform_broadcast( half16 value, uint index );
299+
300+half __ovld sub_group_broadcast_first( half value );
301+#endif // cl_khr_fp16
302+
303+#if defined(cl_khr_fp64)
304+double __ovld sub_group_non_uniform_broadcast( double value, uint index );
305+double2 __ovld sub_group_non_uniform_broadcast( double2 value, uint index );
306+double3 __ovld sub_group_non_uniform_broadcast( double3 value, uint index );
307+double4 __ovld sub_group_non_uniform_broadcast( double4 value, uint index );
308+double8 __ovld sub_group_non_uniform_broadcast( double8 value, uint index );
309+double16 __ovld sub_group_non_uniform_broadcast( double16 value, uint index );
310+
311+double __ovld sub_group_broadcast_first( double value );
312+#endif // cl_khr_fp64
313+
314+#endif // cl_khr_subgroup_ballot
315+
316+#if defined(cl_khr_subgroup_non_uniform_arithmetic)
317+char __ovld sub_group_non_uniform_reduce_add( char value );
318+uchar __ovld sub_group_non_uniform_reduce_add( uchar value );
319+short __ovld sub_group_non_uniform_reduce_add( short value );
320+ushort __ovld sub_group_non_uniform_reduce_add( ushort value );
321+int __ovld sub_group_non_uniform_reduce_add( int value );
322+uint __ovld sub_group_non_uniform_reduce_add( uint value );
323+long __ovld sub_group_non_uniform_reduce_add( long value );
324+ulong __ovld sub_group_non_uniform_reduce_add( ulong value );
325+float __ovld sub_group_non_uniform_reduce_add( float value );
326+
327+char __ovld sub_group_non_uniform_reduce_mul( char value );
328+uchar __ovld sub_group_non_uniform_reduce_mul( uchar value );
329+short __ovld sub_group_non_uniform_reduce_mul( short value );
330+ushort __ovld sub_group_non_uniform_reduce_mul( ushort value );
331+int __ovld sub_group_non_uniform_reduce_mul( int value );
332+uint __ovld sub_group_non_uniform_reduce_mul( uint value );
333+long __ovld sub_group_non_uniform_reduce_mul( long value );
334+ulong __ovld sub_group_non_uniform_reduce_mul( ulong value );
335+float __ovld sub_group_non_uniform_reduce_mul( float value );
336+
337+char __ovld sub_group_non_uniform_reduce_min( char value );
338+uchar __ovld sub_group_non_uniform_reduce_min( uchar value );
339+short __ovld sub_group_non_uniform_reduce_min( short value );
340+ushort __ovld sub_group_non_uniform_reduce_min( ushort value );
341+int __ovld sub_group_non_uniform_reduce_min( int value );
342+uint __ovld sub_group_non_uniform_reduce_min( uint value );
343+long __ovld sub_group_non_uniform_reduce_min( long value );
344+ulong __ovld sub_group_non_uniform_reduce_min( ulong value );
345+float __ovld sub_group_non_uniform_reduce_min( float value );
346+
347+char __ovld sub_group_non_uniform_reduce_max( char value );
348+uchar __ovld sub_group_non_uniform_reduce_max( uchar value );
349+short __ovld sub_group_non_uniform_reduce_max( short value );
350+ushort __ovld sub_group_non_uniform_reduce_max( ushort value );
351+int __ovld sub_group_non_uniform_reduce_max( int value );
352+uint __ovld sub_group_non_uniform_reduce_max( uint value );
353+long __ovld sub_group_non_uniform_reduce_max( long value );
354+ulong __ovld sub_group_non_uniform_reduce_max( ulong value );
355+float __ovld sub_group_non_uniform_reduce_max( float value );
356+
357+char __ovld sub_group_non_uniform_scan_inclusive_add( char value );
358+uchar __ovld sub_group_non_uniform_scan_inclusive_add( uchar value );
359+short __ovld sub_group_non_uniform_scan_inclusive_add( short value );
360+ushort __ovld sub_group_non_uniform_scan_inclusive_add( ushort value );
361+int __ovld sub_group_non_uniform_scan_inclusive_add( int value );
362+uint __ovld sub_group_non_uniform_scan_inclusive_add( uint value );
363+long __ovld sub_group_non_uniform_scan_inclusive_add( long value );
364+ulong __ovld sub_group_non_uniform_scan_inclusive_add( ulong value );
365+float __ovld sub_group_non_uniform_scan_inclusive_add( float value );
366+
367+char __ovld sub_group_non_uniform_scan_inclusive_mul( char value );
368+uchar __ovld sub_group_non_uniform_scan_inclusive_mul( uchar value );
369+short __ovld sub_group_non_uniform_scan_inclusive_mul( short value );
370+ushort __ovld sub_group_non_uniform_scan_inclusive_mul( ushort value );
371+int __ovld sub_group_non_uniform_scan_inclusive_mul( int value );
372+uint __ovld sub_group_non_uniform_scan_inclusive_mul( uint value );
373+long __ovld sub_group_non_uniform_scan_inclusive_mul( long value );
374+ulong __ovld sub_group_non_uniform_scan_inclusive_mul( ulong value );
375+float __ovld sub_group_non_uniform_scan_inclusive_mul( float value );
376+
377+char __ovld sub_group_non_uniform_scan_inclusive_min( char value );
378+uchar __ovld sub_group_non_uniform_scan_inclusive_min( uchar value );
379+short __ovld sub_group_non_uniform_scan_inclusive_min( short value );
380+ushort __ovld sub_group_non_uniform_scan_inclusive_min( ushort value );
381+int __ovld sub_group_non_uniform_scan_inclusive_min( int value );
382+uint __ovld sub_group_non_uniform_scan_inclusive_min( uint value );
383+long __ovld sub_group_non_uniform_scan_inclusive_min( long value );
384+ulong __ovld sub_group_non_uniform_scan_inclusive_min( ulong value );
385+float __ovld sub_group_non_uniform_scan_inclusive_min( float value );
386+
387+char __ovld sub_group_non_uniform_scan_inclusive_max( char value );
388+uchar __ovld sub_group_non_uniform_scan_inclusive_max( uchar value );
389+short __ovld sub_group_non_uniform_scan_inclusive_max( short value );
390+ushort __ovld sub_group_non_uniform_scan_inclusive_max( ushort value );
391+int __ovld sub_group_non_uniform_scan_inclusive_max( int value );
392+uint __ovld sub_group_non_uniform_scan_inclusive_max( uint value );
393+long __ovld sub_group_non_uniform_scan_inclusive_max( long value );
394+ulong __ovld sub_group_non_uniform_scan_inclusive_max( ulong value );
395+float __ovld sub_group_non_uniform_scan_inclusive_max( float value );
396+
397+char __ovld sub_group_non_uniform_scan_exclusive_add( char value );
398+uchar __ovld sub_group_non_uniform_scan_exclusive_add( uchar value );
399+short __ovld sub_group_non_uniform_scan_exclusive_add( short value );
400+ushort __ovld sub_group_non_uniform_scan_exclusive_add( ushort value );
401+int __ovld sub_group_non_uniform_scan_exclusive_add( int value );
402+uint __ovld sub_group_non_uniform_scan_exclusive_add( uint value );
403+long __ovld sub_group_non_uniform_scan_exclusive_add( long value );
404+ulong __ovld sub_group_non_uniform_scan_exclusive_add( ulong value );
405+float __ovld sub_group_non_uniform_scan_exclusive_add( float value );
406+
407+char __ovld sub_group_non_uniform_scan_exclusive_mul( char value );
408+uchar __ovld sub_group_non_uniform_scan_exclusive_mul( uchar value );
409+short __ovld sub_group_non_uniform_scan_exclusive_mul( short value );
410+ushort __ovld sub_group_non_uniform_scan_exclusive_mul( ushort value );
411+int __ovld sub_group_non_uniform_scan_exclusive_mul( int value );
412+uint __ovld sub_group_non_uniform_scan_exclusive_mul( uint value );
413+long __ovld sub_group_non_uniform_scan_exclusive_mul( long value );
414+ulong __ovld sub_group_non_uniform_scan_exclusive_mul( ulong value );
415+float __ovld sub_group_non_uniform_scan_exclusive_mul( float value );
416+
417+char __ovld sub_group_non_uniform_scan_exclusive_min( char value );
418+uchar __ovld sub_group_non_uniform_scan_exclusive_min( uchar value );
419+short __ovld sub_group_non_uniform_scan_exclusive_min( short value );
420+ushort __ovld sub_group_non_uniform_scan_exclusive_min( ushort value );
421+int __ovld sub_group_non_uniform_scan_exclusive_min( int value );
422+uint __ovld sub_group_non_uniform_scan_exclusive_min( uint value );
423+long __ovld sub_group_non_uniform_scan_exclusive_min( long value );
424+ulong __ovld sub_group_non_uniform_scan_exclusive_min( ulong value );
425+float __ovld sub_group_non_uniform_scan_exclusive_min( float value );
426+
427+char __ovld sub_group_non_uniform_scan_exclusive_max( char value );
428+uchar __ovld sub_group_non_uniform_scan_exclusive_max( uchar value );
429+short __ovld sub_group_non_uniform_scan_exclusive_max( short value );
430+ushort __ovld sub_group_non_uniform_scan_exclusive_max( ushort value );
431+int __ovld sub_group_non_uniform_scan_exclusive_max( int value );
432+uint __ovld sub_group_non_uniform_scan_exclusive_max( uint value );
433+long __ovld sub_group_non_uniform_scan_exclusive_max( long value );
434+ulong __ovld sub_group_non_uniform_scan_exclusive_max( ulong value );
435+float __ovld sub_group_non_uniform_scan_exclusive_max( float value );
436+
437+char __ovld sub_group_non_uniform_reduce_and( char value );
438+uchar __ovld sub_group_non_uniform_reduce_and( uchar value );
439+short __ovld sub_group_non_uniform_reduce_and( short value );
440+ushort __ovld sub_group_non_uniform_reduce_and( ushort value );
441+int __ovld sub_group_non_uniform_reduce_and( int value );
442+uint __ovld sub_group_non_uniform_reduce_and( uint value );
443+long __ovld sub_group_non_uniform_reduce_and( long value );
444+ulong __ovld sub_group_non_uniform_reduce_and( ulong value );
445+
446+char __ovld sub_group_non_uniform_reduce_or( char value );
447+uchar __ovld sub_group_non_uniform_reduce_or( uchar value );
448+short __ovld sub_group_non_uniform_reduce_or( short value );
449+ushort __ovld sub_group_non_uniform_reduce_or( ushort value );
450+int __ovld sub_group_non_uniform_reduce_or( int value );
451+uint __ovld sub_group_non_uniform_reduce_or( uint value );
452+long __ovld sub_group_non_uniform_reduce_or( long value );
453+ulong __ovld sub_group_non_uniform_reduce_or( ulong value );
454+
455+char __ovld sub_group_non_uniform_reduce_xor( char value );
456+uchar __ovld sub_group_non_uniform_reduce_xor( uchar value );
457+short __ovld sub_group_non_uniform_reduce_xor( short value );
458+ushort __ovld sub_group_non_uniform_reduce_xor( ushort value );
459+int __ovld sub_group_non_uniform_reduce_xor( int value );
460+uint __ovld sub_group_non_uniform_reduce_xor( uint value );
461+long __ovld sub_group_non_uniform_reduce_xor( long value );
462+ulong __ovld sub_group_non_uniform_reduce_xor( ulong value );
463+
464+char __ovld sub_group_non_uniform_scan_inclusive_and( char value );
465+uchar __ovld sub_group_non_uniform_scan_inclusive_and( uchar value );
466+short __ovld sub_group_non_uniform_scan_inclusive_and( short value );
467+ushort __ovld sub_group_non_uniform_scan_inclusive_and( ushort value );
468+int __ovld sub_group_non_uniform_scan_inclusive_and( int value );
469+uint __ovld sub_group_non_uniform_scan_inclusive_and( uint value );
470+long __ovld sub_group_non_uniform_scan_inclusive_and( long value );
471+ulong __ovld sub_group_non_uniform_scan_inclusive_and( ulong value );
472+
473+char __ovld sub_group_non_uniform_scan_inclusive_or( char value );
474+uchar __ovld sub_group_non_uniform_scan_inclusive_or( uchar value );
475+short __ovld sub_group_non_uniform_scan_inclusive_or( short value );
476+ushort __ovld sub_group_non_uniform_scan_inclusive_or( ushort value );
477+int __ovld sub_group_non_uniform_scan_inclusive_or( int value );
478+uint __ovld sub_group_non_uniform_scan_inclusive_or( uint value );
479+long __ovld sub_group_non_uniform_scan_inclusive_or( long value );
480+ulong __ovld sub_group_non_uniform_scan_inclusive_or( ulong value );
481+
482+char __ovld sub_group_non_uniform_scan_inclusive_xor( char value );
483+uchar __ovld sub_group_non_uniform_scan_inclusive_xor( uchar value );
484+short __ovld sub_group_non_uniform_scan_inclusive_xor( short value );
485+ushort __ovld sub_group_non_uniform_scan_inclusive_xor( ushort value );
486+int __ovld sub_group_non_uniform_scan_inclusive_xor( int value );
487+uint __ovld sub_group_non_uniform_scan_inclusive_xor( uint value );
488+long __ovld sub_group_non_uniform_scan_inclusive_xor( long value );
489+ulong __ovld sub_group_non_uniform_scan_inclusive_xor( ulong value );
490+
491+char __ovld sub_group_non_uniform_scan_exclusive_and( char value );
492+uchar __ovld sub_group_non_uniform_scan_exclusive_and( uchar value );
493+short __ovld sub_group_non_uniform_scan_exclusive_and( short value );
494+ushort __ovld sub_group_non_uniform_scan_exclusive_and( ushort value );
495+int __ovld sub_group_non_uniform_scan_exclusive_and( int value );
496+uint __ovld sub_group_non_uniform_scan_exclusive_and( uint value );
497+long __ovld sub_group_non_uniform_scan_exclusive_and( long value );
498+ulong __ovld sub_group_non_uniform_scan_exclusive_and( ulong value );
499+
500+char __ovld sub_group_non_uniform_scan_exclusive_or( char value );
501+uchar __ovld sub_group_non_uniform_scan_exclusive_or( uchar value );
502+short __ovld sub_group_non_uniform_scan_exclusive_or( short value );
503+ushort __ovld sub_group_non_uniform_scan_exclusive_or( ushort value );
504+int __ovld sub_group_non_uniform_scan_exclusive_or( int value );
505+uint __ovld sub_group_non_uniform_scan_exclusive_or( uint value );
506+long __ovld sub_group_non_uniform_scan_exclusive_or( long value );
507+ulong __ovld sub_group_non_uniform_scan_exclusive_or( ulong value );
508+
509+char __ovld sub_group_non_uniform_scan_exclusive_xor( char value );
510+uchar __ovld sub_group_non_uniform_scan_exclusive_xor( uchar value );
511+short __ovld sub_group_non_uniform_scan_exclusive_xor( short value );
512+ushort __ovld sub_group_non_uniform_scan_exclusive_xor( ushort value );
513+int __ovld sub_group_non_uniform_scan_exclusive_xor( int value );
514+uint __ovld sub_group_non_uniform_scan_exclusive_xor( uint value );
515+long __ovld sub_group_non_uniform_scan_exclusive_xor( long value );
516+ulong __ovld sub_group_non_uniform_scan_exclusive_xor( ulong value );
517+
518+int __ovld sub_group_non_uniform_reduce_logical_and( int predicate );
519+int __ovld sub_group_non_uniform_reduce_logical_or( int predicate );
520+int __ovld sub_group_non_uniform_reduce_logical_xor( int predicate );
521+
522+int __ovld sub_group_non_uniform_scan_inclusive_logical_and( int predicate );
523+int __ovld sub_group_non_uniform_scan_inclusive_logical_or( int predicate );
524+int __ovld sub_group_non_uniform_scan_inclusive_logical_xor( int predicate );
525+
526+int __ovld sub_group_non_uniform_scan_exclusive_logical_and( int predicate );
527+int __ovld sub_group_non_uniform_scan_exclusive_logical_or( int predicate );
528+int __ovld sub_group_non_uniform_scan_exclusive_logical_xor( int predicate );
529+
530+#if defined(cl_khr_fp16)
531+half __ovld sub_group_non_uniform_reduce_add( half value );
532+half __ovld sub_group_non_uniform_reduce_mul( half value );
533+half __ovld sub_group_non_uniform_reduce_min( half value );
534+half __ovld sub_group_non_uniform_reduce_max( half value );
535+half __ovld sub_group_non_uniform_scan_inclusive_add( half value );
536+half __ovld sub_group_non_uniform_scan_inclusive_mul( half value );
537+half __ovld sub_group_non_uniform_scan_inclusive_min( half value );
538+half __ovld sub_group_non_uniform_scan_inclusive_max( half value );
539+half __ovld sub_group_non_uniform_scan_exclusive_add( half value );
540+half __ovld sub_group_non_uniform_scan_exclusive_mul( half value );
541+half __ovld sub_group_non_uniform_scan_exclusive_min( half value );
542+half __ovld sub_group_non_uniform_scan_exclusive_max( half value );
543+#endif // cl_khr_fp16
544+
545+#if defined(cl_khr_fp64)
546+double __ovld sub_group_non_uniform_reduce_add( double value );
547+double __ovld sub_group_non_uniform_reduce_mul( double value );
548+double __ovld sub_group_non_uniform_reduce_min( double value );
549+double __ovld sub_group_non_uniform_reduce_max( double value );
550+double __ovld sub_group_non_uniform_scan_inclusive_add( double value );
551+double __ovld sub_group_non_uniform_scan_inclusive_mul( double value );
552+double __ovld sub_group_non_uniform_scan_inclusive_min( double value );
553+double __ovld sub_group_non_uniform_scan_inclusive_max( double value );
554+double __ovld sub_group_non_uniform_scan_exclusive_add( double value );
555+double __ovld sub_group_non_uniform_scan_exclusive_mul( double value );
556+double __ovld sub_group_non_uniform_scan_exclusive_min( double value );
557+double __ovld sub_group_non_uniform_scan_exclusive_max( double value );
558+#endif // cl_khr_fp64
559+
560+#endif // cl_khr_subgroup_non_uniform_arithmetic
561+
562+#if defined(cl_khr_subgroup_shuffle)
563+char __ovld sub_group_shuffle( char value, uint index );
564+uchar __ovld sub_group_shuffle( uchar value, uint index );
565+short __ovld sub_group_shuffle( short value, uint index );
566+ushort __ovld sub_group_shuffle( ushort value, uint index );
567+int __ovld sub_group_shuffle( int value, uint index );
568+uint __ovld sub_group_shuffle( uint value, uint index );
569+long __ovld sub_group_shuffle( long value, uint index );
570+ulong __ovld sub_group_shuffle( ulong value, uint index );
571+float __ovld sub_group_shuffle( float value, uint index );
572+
573+char __ovld sub_group_shuffle_xor( char value, uint mask );
574+uchar __ovld sub_group_shuffle_xor( uchar value, uint mask );
575+short __ovld sub_group_shuffle_xor( short value, uint mask );
576+ushort __ovld sub_group_shuffle_xor( ushort value, uint mask );
577+int __ovld sub_group_shuffle_xor( int value, uint mask );
578+uint __ovld sub_group_shuffle_xor( uint value, uint mask );
579+long __ovld sub_group_shuffle_xor( long value, uint mask );
580+ulong __ovld sub_group_shuffle_xor( ulong value, uint mask );
581+float __ovld sub_group_shuffle_xor( float value, uint mask );
582+
583+#if defined(cl_khr_fp16)
584+half __ovld sub_group_shuffle( half value, uint index );
585+half __ovld sub_group_shuffle_xor( half value, uint mask );
586+#endif // cl_khr_fp16
587+
588+#if defined(cl_khr_fp64)
589+double __ovld sub_group_shuffle( double value, uint index );
590+double __ovld sub_group_shuffle_xor( double value, uint mask );
591+#endif // cl_khr_fp64
592+
593+#endif // cl_khr_subgroup_shuffle
594+
595+#if defined(cl_khr_subgroup_shuffle_relative)
596+char __ovld sub_group_shuffle_up( char value, uint delta );
597+uchar __ovld sub_group_shuffle_up( uchar value, uint delta );
598+short __ovld sub_group_shuffle_up( short value, uint delta );
599+ushort __ovld sub_group_shuffle_up( ushort value, uint delta );
600+int __ovld sub_group_shuffle_up( int value, uint delta );
601+uint __ovld sub_group_shuffle_up( uint value, uint delta );
602+long __ovld sub_group_shuffle_up( long value, uint delta );
603+ulong __ovld sub_group_shuffle_up( ulong value, uint delta );
604+float __ovld sub_group_shuffle_up( float value, uint delta );
605+
606+char __ovld sub_group_shuffle_down( char value, uint delta );
607+uchar __ovld sub_group_shuffle_down( uchar value, uint delta );
608+short __ovld sub_group_shuffle_down( short value, uint delta );
609+ushort __ovld sub_group_shuffle_down( ushort value, uint delta );
610+int __ovld sub_group_shuffle_down( int value, uint delta );
611+uint __ovld sub_group_shuffle_down( uint value, uint delta );
612+long __ovld sub_group_shuffle_down( long value, uint delta );
613+ulong __ovld sub_group_shuffle_down( ulong value, uint delta );
614+float __ovld sub_group_shuffle_down( float value, uint delta );
615+
616+#if defined(cl_khr_fp16)
617+half __ovld sub_group_shuffle_up( half value, uint delta );
618+half __ovld sub_group_shuffle_down( half value, uint delta );
619+#endif // cl_khr_fp16
620+
621+#if defined(cl_khr_fp64)
622+double __ovld sub_group_shuffle_up( double value, uint delta );
623+double __ovld sub_group_shuffle_down( double value, uint delta );
624+#endif // cl_khr_fp64
625+
626+#endif // cl_khr_subgroup_shuffle_relative
627+
628+#if defined(cl_khr_subgroup_clustered_reduce)
629+char __ovld sub_group_clustered_reduce_add( char value, uint clustersize );
630+uchar __ovld sub_group_clustered_reduce_add( uchar value, uint clustersize );
631+short __ovld sub_group_clustered_reduce_add( short value, uint clustersize );
632+ushort __ovld sub_group_clustered_reduce_add( ushort value, uint clustersize );
633+int __ovld sub_group_clustered_reduce_add( int value, uint clustersize );
634+uint __ovld sub_group_clustered_reduce_add( uint value, uint clustersize );
635+long __ovld sub_group_clustered_reduce_add( long value, uint clustersize );
636+ulong __ovld sub_group_clustered_reduce_add( ulong value, uint clustersize );
637+float __ovld sub_group_clustered_reduce_add( float value, uint clustersize );
638+
639+char __ovld sub_group_clustered_reduce_mul( char value, uint clustersize );
640+uchar __ovld sub_group_clustered_reduce_mul( uchar value, uint clustersize );
641+short __ovld sub_group_clustered_reduce_mul( short value, uint clustersize );
642+ushort __ovld sub_group_clustered_reduce_mul( ushort value, uint clustersize );
643+int __ovld sub_group_clustered_reduce_mul( int value, uint clustersize );
644+uint __ovld sub_group_clustered_reduce_mul( uint value, uint clustersize );
645+long __ovld sub_group_clustered_reduce_mul( long value, uint clustersize );
646+ulong __ovld sub_group_clustered_reduce_mul( ulong value, uint clustersize );
647+float __ovld sub_group_clustered_reduce_mul( float value, uint clustersize );
648+
649+char __ovld sub_group_clustered_reduce_min( char value, uint clustersize );
650+uchar __ovld sub_group_clustered_reduce_min( uchar value, uint clustersize );
651+short __ovld sub_group_clustered_reduce_min( short value, uint clustersize );
652+ushort __ovld sub_group_clustered_reduce_min( ushort value, uint clustersize );
653+int __ovld sub_group_clustered_reduce_min( int value, uint clustersize );
654+uint __ovld sub_group_clustered_reduce_min( uint value, uint clustersize );
655+long __ovld sub_group_clustered_reduce_min( long value, uint clustersize );
656+ulong __ovld sub_group_clustered_reduce_min( ulong value, uint clustersize );
657+float __ovld sub_group_clustered_reduce_min( float value, uint clustersize );
658+
659+char __ovld sub_group_clustered_reduce_max( char value, uint clustersize );
660+uchar __ovld sub_group_clustered_reduce_max( uchar value, uint clustersize );
661+short __ovld sub_group_clustered_reduce_max( short value, uint clustersize );
662+ushort __ovld sub_group_clustered_reduce_max( ushort value, uint clustersize );
663+int __ovld sub_group_clustered_reduce_max( int value, uint clustersize );
664+uint __ovld sub_group_clustered_reduce_max( uint value, uint clustersize );
665+long __ovld sub_group_clustered_reduce_max( long value, uint clustersize );
666+ulong __ovld sub_group_clustered_reduce_max( ulong value, uint clustersize );
667+float __ovld sub_group_clustered_reduce_max( float value, uint clustersize );
668+
669+char __ovld sub_group_clustered_reduce_and( char value, uint clustersize );
670+uchar __ovld sub_group_clustered_reduce_and( uchar value, uint clustersize );
671+short __ovld sub_group_clustered_reduce_and( short value, uint clustersize );
672+ushort __ovld sub_group_clustered_reduce_and( ushort value, uint clustersize );
673+int __ovld sub_group_clustered_reduce_and( int value, uint clustersize );
674+uint __ovld sub_group_clustered_reduce_and( uint value, uint clustersize );
675+long __ovld sub_group_clustered_reduce_and( long value, uint clustersize );
676+ulong __ovld sub_group_clustered_reduce_and( ulong value, uint clustersize );
677+
678+char __ovld sub_group_clustered_reduce_or( char value, uint clustersize );
679+uchar __ovld sub_group_clustered_reduce_or( uchar value, uint clustersize );
680+short __ovld sub_group_clustered_reduce_or( short value, uint clustersize );
681+ushort __ovld sub_group_clustered_reduce_or( ushort value, uint clustersize );
682+int __ovld sub_group_clustered_reduce_or( int value, uint clustersize );
683+uint __ovld sub_group_clustered_reduce_or( uint value, uint clustersize );
684+long __ovld sub_group_clustered_reduce_or( long value, uint clustersize );
685+ulong __ovld sub_group_clustered_reduce_or( ulong value, uint clustersize );
686+
687+char __ovld sub_group_clustered_reduce_xor( char value, uint clustersize );
688+uchar __ovld sub_group_clustered_reduce_xor( uchar value, uint clustersize );
689+short __ovld sub_group_clustered_reduce_xor( short value, uint clustersize );
690+ushort __ovld sub_group_clustered_reduce_xor( ushort value, uint clustersize );
691+int __ovld sub_group_clustered_reduce_xor( int value, uint clustersize );
692+uint __ovld sub_group_clustered_reduce_xor( uint value, uint clustersize );
693+long __ovld sub_group_clustered_reduce_xor( long value, uint clustersize );
694+ulong __ovld sub_group_clustered_reduce_xor( ulong value, uint clustersize );
695+
696+int __ovld sub_group_clustered_reduce_logical_and( int predicate, uint clustersize );
697+int __ovld sub_group_clustered_reduce_logical_or( int predicate, uint clustersize );
698+int __ovld sub_group_clustered_reduce_logical_xor( int predicate, uint clustersize );
699+
700+#if defined(cl_khr_fp16)
701+half __ovld sub_group_clustered_reduce_add( half value, uint clustersize );
702+half __ovld sub_group_clustered_reduce_mul( half value, uint clustersize );
703+half __ovld sub_group_clustered_reduce_min( half value, uint clustersize );
704+half __ovld sub_group_clustered_reduce_max( half value, uint clustersize );
705+#endif // cl_khr_fp16
706+
707+#if defined(cl_khr_fp64)
708+double __ovld sub_group_clustered_reduce_add( double value, uint clustersize );
709+double __ovld sub_group_clustered_reduce_mul( double value, uint clustersize );
710+double __ovld sub_group_clustered_reduce_min( double value, uint clustersize );
711+double __ovld sub_group_clustered_reduce_max( double value, uint clustersize );
712+#endif // cl_khr_fp64
713+
714+#endif // cl_khr_subgroup_clustered_reduce
715+
716 #if defined(cl_intel_subgroups)
717 // Intel-Specific Sub Group Functions
718 float __ovld __conv intel_sub_group_shuffle( float x, uint c );
719diff --git a/clang/test/SemaOpenCL/extension-version.cl b/clang/test/SemaOpenCL/extension-version.cl
720index 0e6bbb7d3bcd..86c78143a0eb 100644
721--- a/clang/test/SemaOpenCL/extension-version.cl
722+++ b/clang/test/SemaOpenCL/extension-version.cl
723@@ -333,3 +333,86 @@
724 #endif
725 #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : enable
726
727+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
728+#ifndef cl_khr_subgroup_extended_types
729+#error "Missing cl_khr_subgroup_extended_types"
730+#endif
731+#else
732+#ifdef cl_khr_subgroup_extended_types
733+#error "Incorrect cl_khr_subgroup_extended_types define"
734+#endif
735+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_extended_types' - ignoring}}
736+#endif
737+#pragma OPENCL EXTENSION cl_khr_subgroup_extended_types : enable
738+
739+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
740+#ifndef cl_khr_subgroup_non_uniform_vote
741+#error "Missing cl_khr_subgroup_non_uniform_vote"
742+#endif
743+#else
744+#ifdef cl_khr_subgroup_non_uniform_vote
745+#error "Incorrect cl_khr_subgroup_non_uniform_vote define"
746+#endif
747+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_vote' - ignoring}}
748+#endif
749+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable
750+
751+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
752+#ifndef cl_khr_subgroup_ballot
753+#error "Missing cl_khr_subgroup_ballot"
754+#endif
755+#else
756+#ifdef cl_khr_subgroup_ballot
757+#error "Incorrect cl_khr_subgroup_ballot define"
758+#endif
759+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_ballot' - ignoring}}
760+#endif
761+#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable
762+
763+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
764+#ifndef cl_khr_subgroup_non_uniform_arithmetic
765+#error "Missing cl_khr_subgroup_non_uniform_arithmetic"
766+#endif
767+#else
768+#ifdef cl_khr_subgroup_non_uniform_arithmetic
769+#error "Incorrect cl_khr_subgroup_non_uniform_arithmetic define"
770+#endif
771+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_non_uniform_arithmetic' - ignoring}}
772+#endif
773+#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable
774+
775+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
776+#ifndef cl_khr_subgroup_shuffle
777+#error "Missing cl_khr_subgroup_shuffle"
778+#endif
779+#else
780+#ifdef cl_khr_subgroup_shuffle
781+#error "Incorrect cl_khr_subgroup_shuffle define"
782+#endif
783+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle' - ignoring}}
784+#endif
785+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable
786+
787+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
788+#ifndef cl_khr_subgroup_shuffle_relative
789+#error "Missing cl_khr_subgroup_shuffle_relative"
790+#endif
791+#else
792+#ifdef cl_khr_subgroup_shuffle_relative
793+#error "Incorrect cl_khr_subgroup_shuffle_relative define"
794+#endif
795+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_shuffle_relative' - ignoring}}
796+#endif
797+#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable
798+
799+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
800+#ifndef cl_khr_subgroup_clustered_reduce
801+#error "Missing cl_khr_subgroup_clustered_reduce"
802+#endif
803+#else
804+#ifdef cl_khr_subgroup_clustered_reduce
805+#error "Incorrect cl_khr_subgroup_clustered_reduce define"
806+#endif
807+// expected-warning@+2{{unsupported OpenCL extension 'cl_khr_subgroup_clustered_reduce' - ignoring}}
808+#endif
809+#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable
810--
8112.17.1
812
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch
deleted file mode 100644
index 1aff65e7..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch
+++ /dev/null
@@ -1,33 +0,0 @@
1From 331e323ae2633a8999a660314022491d670c442c Mon Sep 17 00:00:00 2001
2From: Andrea Bocci <andrea.bocci@cern.ch>
3Date: Sun, 15 Mar 2020 17:35:44 +0100
4Subject: [PATCH 2/3] Fix building in-tree with cmake -DLLVM_LINK_LLVM_DYLIB=ON
5
6Building in-tree with LLVM 11.0 master with the LLVM_LINK_LLVM_DYLIB
7cmake flag fails to link with the LLVMSPIRVLib library.
8
9Add an explicit dependency to force the correct build order and linking.
10
11Signed-off-by: Andrea Bocci <andrea.bocci@cern.ch>
12Upstream-Status: Backport
13Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
14---
15 tools/llvm-spirv/CMakeLists.txt | 2 +-
16 1 file changed, 1 insertion(+), 1 deletion(-)
17
18diff --git a/tools/llvm-spirv/CMakeLists.txt b/tools/llvm-spirv/CMakeLists.txt
19index 9aa96d9c..501c0daf 100644
20--- a/tools/llvm-spirv/CMakeLists.txt
21+++ b/tools/llvm-spirv/CMakeLists.txt
22@@ -14,7 +14,7 @@ add_llvm_tool(llvm-spirv
23 NO_INSTALL_RPATH
24 )
25
26-if (LLVM_SPIRV_BUILD_EXTERNAL)
27+if (LLVM_SPIRV_BUILD_EXTERNAL OR LLVM_LINK_LLVM_DYLIB)
28 target_link_libraries(llvm-spirv PRIVATE LLVMSPIRVLib)
29 endif()
30
31--
322.17.1
33
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch
deleted file mode 100644
index 49edd7e1..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch
+++ /dev/null
@@ -1,982 +0,0 @@
1From fbc9996d6490a5d4720b85b47f38335e7fdc99d9 Mon Sep 17 00:00:00 2001
2From: haonanya <haonan.yang@intel.com>
3Date: Mon, 19 Jul 2021 10:14:20 +0800
4Subject: [PATCH 3/3] Add support for cl_ext_float_atomics in SPIRVWriter
5
6Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch]
7
8Signed-off-by: haonanya <haonan.yang@intel.com>
9Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
10---
11 lib/SPIRV/OCL20ToSPIRV.cpp | 79 ++++++++++++++++--
12 lib/SPIRV/SPIRVToOCL.h | 3 +
13 lib/SPIRV/SPIRVToOCL12.cpp | 21 +++++
14 lib/SPIRV/SPIRVToOCL20.cpp | 28 ++++++-
15 lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 1 -
16 lib/SPIRV/libSPIRV/SPIRVOpCode.h | 8 +-
17 test/AtomicFAddEXTForOCL.ll | 64 +++++++++++++++
18 test/AtomicFAddExt.ll | 111 ++++++++-----------------
19 test/AtomicFMaxEXT.ll | 113 +++++++-------------------
20 test/AtomicFMaxEXTForOCL.ll | 64 +++++++++++++++
21 test/AtomicFMinEXT.ll | 113 +++++++-------------------
22 test/AtomicFMinEXTForOCL.ll | 64 +++++++++++++++
23 test/InvalidAtomicBuiltins.cl | 8 --
24 13 files changed, 417 insertions(+), 260 deletions(-)
25 create mode 100644 test/AtomicFAddEXTForOCL.ll
26 create mode 100644 test/AtomicFMaxEXTForOCL.ll
27 create mode 100644 test/AtomicFMinEXTForOCL.ll
28
29diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp
30index e30aa5be..b676a009 100644
31--- a/lib/SPIRV/OCL20ToSPIRV.cpp
32+++ b/lib/SPIRV/OCL20ToSPIRV.cpp
33@@ -408,10 +408,63 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) {
34 if (DemangledName.find(kOCLBuiltinName::AtomicPrefix) == 0 ||
35 DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) {
36
37- // Compute atomic builtins do not support floating types.
38- if (CI.getType()->isFloatingPointTy() &&
39- isComputeAtomicOCLBuiltin(DemangledName))
40- return;
41+ // Compute "atom" prefixed builtins do not support floating types.
42+ if (CI.getType()->isFloatingPointTy()) {
43+ if (DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0)
44+ return;
45+ // handle functions which are "atomic_" prefixed.
46+ StringRef Stem = DemangledName;
47+ Stem = Stem.drop_front(strlen("atomic_"));
48+ // FP-typed atomic_{add, sub, inc, dec, exchange, min, max, or, and, xor,
49+ // fetch_or, fetch_xor, fetch_and, fetch_or_explicit, fetch_xor_explicit,
50+ // fetch_and_explicit} should be identified as function call
51+ bool IsFunctionCall = llvm::StringSwitch<bool>(Stem)
52+ .Case("add", true)
53+ .Case("sub", true)
54+ .Case("inc", true)
55+ .Case("dec", true)
56+ .Case("cmpxchg", true)
57+ .Case("min", true)
58+ .Case("max", true)
59+ .Case("or", true)
60+ .Case("xor", true)
61+ .Case("and", true)
62+ .Case("fetch_or", true)
63+ .Case("fetch_and", true)
64+ .Case("fetch_xor", true)
65+ .Case("fetch_or_explicit", true)
66+ .Case("fetch_xor_explicit", true)
67+ .Case("fetch_and_explicit", true)
68+ .Default(false);
69+ if (IsFunctionCall)
70+ return;
71+ if (F->arg_size() != 2) {
72+ IsFunctionCall = llvm::StringSwitch<bool>(Stem)
73+ .Case("exchange", true)
74+ .Case("fetch_add", true)
75+ .Case("fetch_sub", true)
76+ .Case("fetch_min", true)
77+ .Case("fetch_max", true)
78+ .Case("load", true)
79+ .Case("store", true)
80+ .Default(false);
81+ if (IsFunctionCall)
82+ return;
83+ }
84+ if (F->arg_size() != 3 && F->arg_size() != 4) {
85+ IsFunctionCall = llvm::StringSwitch<bool>(Stem)
86+ .Case("exchange_explicit", true)
87+ .Case("fetch_add_explicit", true)
88+ .Case("fetch_sub_explicit", true)
89+ .Case("fetch_min_explicit", true)
90+ .Case("fetch_max_explicit", true)
91+ .Case("load_explicit", true)
92+ .Case("store_explicit", true)
93+ .Default(false);
94+ if (IsFunctionCall)
95+ return;
96+ }
97+ }
98
99 auto PCI = &CI;
100 if (DemangledName == kOCLBuiltinName::AtomicInit) {
101@@ -819,7 +872,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
102 AttributeList Attrs = CI->getCalledFunction()->getAttributes();
103 mutateCallInstSPIRV(
104 M, CI,
105- [=](CallInst *CI, std::vector<Value *> &Args) {
106+ [=](CallInst *CI, std::vector<Value *> &Args) -> std::string {
107 Info.PostProc(Args);
108 // Order of args in OCL20:
109 // object, 0-2 other args, 1-2 order, scope
110@@ -864,7 +917,21 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
111 std::rotate(Args.begin() + 2, Args.begin() + OrderIdx,
112 Args.end() - Offset);
113 }
114- return getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
115+ llvm::Type* AtomicBuiltinsReturnType =
116+ CI->getCalledFunction()->getReturnType();
117+ auto IsFPType = [](llvm::Type *ReturnType) {
118+ return ReturnType->isHalfTy() || ReturnType->isFloatTy() ||
119+ ReturnType->isDoubleTy();
120+ };
121+ auto SPIRVFunctionName =
122+ getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
123+ if (!IsFPType(AtomicBuiltinsReturnType))
124+ return SPIRVFunctionName;
125+ // Translate FP-typed atomic builtins.
126+ return llvm::StringSwitch<std::string>(SPIRVFunctionName)
127+ .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT")
128+ .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT")
129+ .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT");
130 },
131 &Attrs);
132 }
133diff --git a/lib/SPIRV/SPIRVToOCL.h b/lib/SPIRV/SPIRVToOCL.h
134index ddeec0b6..006fb0b1 100644
135--- a/lib/SPIRV/SPIRVToOCL.h
136+++ b/lib/SPIRV/SPIRVToOCL.h
137@@ -178,6 +178,9 @@ public:
138 /// using separate maps for OpenCL 1.2 and OpenCL 2.0
139 virtual Instruction *mutateAtomicName(CallInst *CI, Op OC) = 0;
140
141+ // Transform FP atomic opcode to corresponding OpenCL function name
142+ virtual std::string mapFPAtomicName(Op OC) = 0;
143+
144 private:
145 /// Transform uniform group opcode to corresponding OpenCL function name,
146 /// example: GroupIAdd(Reduce) => group_iadd => work_group_reduce_add |
147diff --git a/lib/SPIRV/SPIRVToOCL12.cpp b/lib/SPIRV/SPIRVToOCL12.cpp
148index afddd596..d7f00de3 100644
149--- a/lib/SPIRV/SPIRVToOCL12.cpp
150+++ b/lib/SPIRV/SPIRVToOCL12.cpp
151@@ -104,6 +104,9 @@ public:
152 /// cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions.
153 std::string mapAtomicName(Op OC, Type *Ty);
154
155+ // Transform FP atomic opcode to corresponding OpenCL function name
156+ std::string mapFPAtomicName(Op OC) override;
157+
158 static char ID;
159 };
160
161@@ -338,6 +341,21 @@ Instruction *SPIRVToOCL12::visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) {
162 return NewCI;
163 }
164
165+std::string SPIRVToOCL12::mapFPAtomicName(Op OC) {
166+ assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than "
167+ "AtomicF{Add/Min/Max}EXT!");
168+ switch (OC) {
169+ case OpAtomicFAddEXT:
170+ return "atomic_add";
171+ case OpAtomicFMinEXT:
172+ return "atomic_min";
173+ case OpAtomicFMaxEXT:
174+ return "atomic_max";
175+ default:
176+ llvm_unreachable("Unsupported opcode!");
177+ }
178+}
179+
180 Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) {
181 AttributeList Attrs = CI->getCalledFunction()->getAttributes();
182 return mutateCallInstOCL(
183@@ -351,6 +369,9 @@ Instruction *SPIRVToOCL12::mutateAtomicName(CallInst *CI, Op OC) {
184 std::string SPIRVToOCL12::mapAtomicName(Op OC, Type *Ty) {
185 std::string Prefix = Ty->isIntegerTy(64) ? kOCLBuiltinName::AtomPrefix
186 : kOCLBuiltinName::AtomicPrefix;
187+ // Map fp atomic instructions to regular OpenCL built-ins.
188+ if (isFPAtomicOpCode(OC))
189+ return mapFPAtomicName(OC);
190 return Prefix += OCL12SPIRVBuiltinMap::rmap(OC);
191 }
192
193diff --git a/lib/SPIRV/SPIRVToOCL20.cpp b/lib/SPIRV/SPIRVToOCL20.cpp
194index d829ff42..01d088e9 100644
195--- a/lib/SPIRV/SPIRVToOCL20.cpp
196+++ b/lib/SPIRV/SPIRVToOCL20.cpp
197@@ -82,6 +82,9 @@ public:
198 /// compare_exchange_strong/weak_explicit
199 Instruction *visitCallSPIRVAtomicCmpExchg(CallInst *CI, Op OC) override;
200
201+ // Transform FP atomic opcode to corresponding OpenCL function name
202+ std::string mapFPAtomicName(Op OC) override;
203+
204 static char ID;
205 };
206
207@@ -144,11 +147,29 @@ void SPIRVToOCL20::visitCallSPIRVControlBarrier(CallInst *CI) {
208 &Attrs);
209 }
210
211+std::string SPIRVToOCL20::mapFPAtomicName(Op OC) {
212+ assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than "
213+ "AtomicF{Add/Min/Max}EXT!");
214+ switch (OC) {
215+ case OpAtomicFAddEXT:
216+ return "atomic_fetch_add_explicit";
217+ case OpAtomicFMinEXT:
218+ return "atomic_fetch_min_explicit";
219+ case OpAtomicFMaxEXT:
220+ return "atomic_fetch_max_explicit";
221+ default:
222+ llvm_unreachable("Unsupported opcode!");
223+ }
224+}
225+
226 Instruction *SPIRVToOCL20::mutateAtomicName(CallInst *CI, Op OC) {
227 AttributeList Attrs = CI->getCalledFunction()->getAttributes();
228 return mutateCallInstOCL(
229 M, CI,
230 [=](CallInst *, std::vector<Value *> &Args) {
231+ // Map fp atomic instructions to regular OpenCL built-ins.
232+ if (isFPAtomicOpCode(OC))
233+ return mapFPAtomicName(OC);
234 return OCLSPIRVBuiltinMap::rmap(OC);
235 },
236 &Attrs);
237@@ -215,7 +236,12 @@ CallInst *SPIRVToOCL20::mutateCommonAtomicArguments(CallInst *CI, Op OC) {
238 }
239 }
240 auto Ptr = findFirstPtr(Args);
241- auto Name = OCLSPIRVBuiltinMap::rmap(OC);
242+ std::string Name;
243+ // Map fp atomic instructions to regular OpenCL built-ins.
244+ if (isFPAtomicOpCode(OC))
245+ Name = mapFPAtomicName(OC);
246+ else
247+ Name = OCLSPIRVBuiltinMap::rmap(OC);
248 auto NumOrder = getSPIRVAtomicBuiltinNumMemoryOrderArgs(OC);
249 auto ScopeIdx = Ptr + 1;
250 auto OrderIdx = Ptr + 2;
251diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
252index 13f93fbe..7b707993 100644
253--- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
254+++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
255@@ -521,7 +521,6 @@ template <> inline void SPIRVMap<Capability, std::string>::init() {
256 add(CapabilityAtomicFloat64AddEXT, "AtomicFloat64AddEXT");
257 add(CapabilityAtomicFloat32MinMaxEXT, "AtomicFloat32MinMaxEXT");
258 add(CapabilityAtomicFloat64MinMaxEXT, "AtomicFloat64MinMaxEXT");
259- add(CapabilityAtomicFloat16MinMaxEXT, "AtomicFloat16MinMaxEXT");
260 add(CapabilitySubgroupShuffleINTEL, "SubgroupShuffleINTEL");
261 add(CapabilitySubgroupBufferBlockIOINTEL, "SubgroupBufferBlockIOINTEL");
262 add(CapabilitySubgroupImageBlockIOINTEL, "SubgroupImageBlockIOINTEL");
263diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
264index feec70f6..8e595e83 100644
265--- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h
266+++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
267@@ -54,11 +54,17 @@ template <> inline void SPIRVMap<Op, std::string>::init() {
268 }
269 SPIRV_DEF_NAMEMAP(Op, OpCodeNameMap)
270
271+inline bool isFPAtomicOpCode(Op OpCode) {
272+ return OpCode == OpAtomicFAddEXT || OpCode == OpAtomicFMinEXT ||
273+ OpCode == OpAtomicFMaxEXT;
274+}
275+
276 inline bool isAtomicOpCode(Op OpCode) {
277 static_assert(OpAtomicLoad < OpAtomicXor, "");
278 return ((unsigned)OpCode >= OpAtomicLoad &&
279 (unsigned)OpCode <= OpAtomicXor) ||
280- OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear;
281+ OpCode == OpAtomicFlagTestAndSet || OpCode == OpAtomicFlagClear ||
282+ isFPAtomicOpCode(OpCode);
283 }
284 inline bool isBinaryOpCode(Op OpCode) {
285 return ((unsigned)OpCode >= OpIAdd && (unsigned)OpCode <= OpFMod) ||
286diff --git a/test/AtomicFAddEXTForOCL.ll b/test/AtomicFAddEXTForOCL.ll
287new file mode 100644
288index 00000000..fb146fb9
289--- /dev/null
290+++ b/test/AtomicFAddEXTForOCL.ll
291@@ -0,0 +1,64 @@
292+; RUN: llvm-as %s -o %t.bc
293+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_add -o %t.spv
294+; RUN: spirv-val %t.spv
295+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
296+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
297+
298+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
299+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
300+
301+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
302+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
303+
304+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
305+target triple = "spir-unknown-unknown"
306+
307+; CHECK-SPIRV: Capability AtomicFloat32AddEXT
308+; CHECK-SPIRV: Capability AtomicFloat64AddEXT
309+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_add"
310+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
311+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
312+
313+
314+; Function Attrs: convergent norecurse nounwind
315+define dso_local spir_func void @test_atomic_float(float addrspace(1)* %a) local_unnamed_addr #0 {
316+entry:
317+ ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]]
318+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
319+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}})
320+ %call = tail call spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2
321+ ret void
322+}
323+
324+; Function Attrs: convergent
325+declare spir_func float @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1
326+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
327+
328+; Function Attrs: convergent norecurse nounwind
329+define dso_local spir_func void @test_atomic_double(double addrspace(1)* %a) local_unnamed_addr #0 {
330+entry:
331+ ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]]
332+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
333+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}})
334+ %call = tail call spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2
335+ ret void
336+}
337+; Function Attrs: convergent
338+declare spir_func double @_Z25atomic_fetch_add_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1
339+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
340+
341+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
342+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
343+
344+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
345+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
346+attributes #2 = { convergent nounwind }
347+
348+!llvm.module.flags = !{!0}
349+!opencl.ocl.version = !{!1}
350+!opencl.spir.version = !{!1}
351+!llvm.ident = !{!2}
352+
353+!0 = !{i32 1, !"wchar_size", i32 4}
354+!1 = !{i32 2, i32 0}
355+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"}
356diff --git a/test/AtomicFAddExt.ll b/test/AtomicFAddExt.ll
357index 011dd8a7..42bdfeea 100644
358--- a/test/AtomicFAddExt.ll
359+++ b/test/AtomicFAddExt.ll
360@@ -4,20 +4,16 @@
361 ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
362
363 ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
364-; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
365+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12
366
367-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
368-target triple = "spir64-unknown-unknown-sycldevice"
369-
370-%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
371-%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
372-%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
373-
374-$_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any
375+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
376+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
377
378-$_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any
379+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
380+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
381
382-@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
383+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
384+target triple = "spir64-unknown-unknown-sycldevice"
385
386 ; CHECK-SPIRV: Capability AtomicFloat32AddEXT
387 ; CHECK-SPIRV: Capability AtomicFloat64AddEXT
388@@ -25,62 +21,43 @@ $_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_3
389 ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
390 ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
391
392-; Function Attrs: convergent norecurse mustprogress
393-define weak_odr dso_local spir_kernel void @_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
394+; Function Attrs: convergent norecurse nounwind
395+define dso_local spir_func float @_Z14AtomicFloatIncRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 {
396 entry:
397- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
398- %1 = load i64, i64* %0, align 8
399- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
400- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
401- %3 = load i64, i64* %2, align 8
402- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
403- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
404- %5 = extractelement <3 x i64> %4, i64 0
405+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)*
406 ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]]
407- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}})
408- %call3.i.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float 1.000000e+00) #2
409- %add.i.i = fadd float %call3.i.i.i.i, 1.000000e+00
410- %sext.i = shl i64 %5, 32
411- %conv5.i = ashr exact i64 %sext.i, 32
412- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv5.i
413- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
414- store float %add.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
415- ret void
416+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_add[[:alnum:]]+ff]]({{.*}})
417+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
418+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+fiif]]({{.*}})
419+ %call3.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2
420+ ret float %call3.i.i
421 }
422
423 ; Function Attrs: convergent
424-; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
425 declare dso_local spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1
426+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
427
428-; Function Attrs: convergent norecurse mustprogress
429-define weak_odr dso_local spir_kernel void @_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
430+; Function Attrs: convergent norecurse nounwind
431+define dso_local spir_func double @_Z15AtomicDoubleIncRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 {
432 entry:
433- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
434- %1 = load i64, i64* %0, align 8
435- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
436- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
437- %3 = load i64, i64* %2, align 8
438- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
439- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
440- %5 = extractelement <3 x i64> %4, i64 0
441+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)*
442 ; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]]
443- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}})
444- %call3.i.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double 1.000000e+00) #2
445- %add.i.i = fadd double %call3.i.i.i.i, 1.000000e+00
446- %sext.i = shl i64 %5, 32
447- %conv5.i = ashr exact i64 %sext.i, 32
448- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv5.i
449- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
450- store double %add.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
451- ret void
452+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_add[[:alnum:]]+dd]]({{.*}})
453+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_add_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
454+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+diid]]({{.*}})
455+ %call3.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2
456+ ret double %call3.i.i
457 }
458
459 ; Function Attrs: convergent
460-; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
461 declare dso_local spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1
462+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
463
464-attributes #0 = { convergent norecurse }
465-attributes #1 = { convergent }
466+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
467+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
468+
469+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
470+attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
471 attributes #2 = { convergent nounwind }
472
473 !llvm.module.flags = !{!0}
474@@ -91,29 +68,5 @@ attributes #2 = { convergent nounwind }
475 !0 = !{i32 1, !"wchar_size", i32 4}
476 !1 = !{i32 1, i32 2}
477 !2 = !{i32 4, i32 100000}
478-!3 = !{!"clang version 12.0.0"}
479-!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
480-!5 = !{!6, !8, !10, !12}
481-!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
482-!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
483-!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
484-!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
485-!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
486-!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
487-!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
488-!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
489-!14 = !{!15, !15, i64 0}
490-!15 = !{!"float", !16, i64 0}
491-!16 = !{!"omnipotent char", !17, i64 0}
492-!17 = !{!"Simple C++ TBAA"}
493-!18 = !{!19, !21, !23, !25}
494-!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
495-!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
496-!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
497-!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
498-!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
499-!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
500-!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
501-!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
502-!27 = !{!28, !28, i64 0}
503-!28 = !{!"double", !16, i64 0}
504+!3 = !{!"clang version 13.0.0"}
505+
506diff --git a/test/AtomicFMaxEXT.ll b/test/AtomicFMaxEXT.ll
507index 1b81e53b..1c2eec93 100644
508--- a/test/AtomicFMaxEXT.ll
509+++ b/test/AtomicFMaxEXT.ll
510@@ -4,20 +4,16 @@
511 ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
512
513 ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
514-; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
515+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12
516
517-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
518-target triple = "spir64-unknown-unknown-sycldevice"
519-
520-%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
521-%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
522-%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
523-
524-$_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
525+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
526+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
527
528-$_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
529+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
530+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
531
532-@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
533+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
534+target triple = "spir64-unknown-unknown-sycldevice"
535
536 ; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
537 ; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
538@@ -25,65 +21,42 @@ $_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item
539 ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
540 ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
541
542-; Function Attrs: convergent norecurse
543-define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
544+; Function Attrs: convergent norecurse nounwind
545+define dso_local spir_func float @_Z14AtomicFloatMaxRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 {
546 entry:
547- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
548- %1 = load i64, i64* %0, align 8
549- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
550- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
551- %3 = load i64, i64* %2, align 8
552- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
553- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
554- %5 = extractelement <3 x i64> %4, i64 0
555- %conv.i = trunc i64 %5 to i32
556- %conv3.i = sitofp i32 %conv.i to float
557- %add.i = fadd float %conv3.i, 1.000000e+00
558+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)*
559 ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]]
560- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}})
561- %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2
562- %sext.i = shl i64 %5, 32
563- %conv6.i = ashr exact i64 %sext.i, 32
564- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i
565- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
566- store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
567- ret void
568+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_max[[:alnum:]]+ff]]({{.*}})
569+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
570+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}})
571+ %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2
572+ ret float %call.i.i.i
573 }
574
575 ; Function Attrs: convergent
576-; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
577 declare dso_local spir_func float @_Z21__spirv_AtomicFMaxEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1
578+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
579
580-; Function Attrs: convergent norecurse
581-define weak_odr dso_local spir_kernel void @_ZTSZZ8max_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
582+; Function Attrs: convergent norecurse nounwind
583+define dso_local spir_func double @_Z15AtomicDoubleMaxRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 {
584 entry:
585- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
586- %1 = load i64, i64* %0, align 8
587- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
588- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
589- %3 = load i64, i64* %2, align 8
590- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
591- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
592- %5 = extractelement <3 x i64> %4, i64 0
593- %conv.i = trunc i64 %5 to i32
594- %conv3.i = sitofp i32 %conv.i to double
595- %add.i = fadd double %conv3.i, 1.000000e+00
596+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)*
597 ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]]
598- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+]]({{.*}})
599- %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2
600- %sext.i = shl i64 %5, 32
601- %conv6.i = ashr exact i64 %sext.i, 32
602- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i
603- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
604- store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
605- ret void
606+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_max[[:alnum:]]+dd]]({{.*}})
607+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
608+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}})
609+ %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2
610+ ret double %call.i.i.i
611 }
612
613 ; Function Attrs: convergent
614-; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
615 declare dso_local spir_func double @_Z21__spirv_AtomicFMaxEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1
616+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
617
618-attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
619+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
620+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
621+
622+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
623 attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
624 attributes #2 = { convergent nounwind }
625
626@@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind }
627 !0 = !{i32 1, !"wchar_size", i32 4}
628 !1 = !{i32 1, i32 2}
629 !2 = !{i32 4, i32 100000}
630-!3 = !{!"clang version 12.0.0"}
631-!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
632-!5 = !{!6, !8, !10, !12}
633-!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
634-!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
635-!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
636-!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
637-!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
638-!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
639-!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
640-!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
641-!14 = !{!15, !15, i64 0}
642-!15 = !{!"float", !16, i64 0}
643-!16 = !{!"omnipotent char", !17, i64 0}
644-!17 = !{!"Simple C++ TBAA"}
645-!18 = !{!19, !21, !23, !25}
646-!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
647-!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
648-!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
649-!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
650-!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
651-!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
652-!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
653-!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
654-!27 = !{!28, !28, i64 0}
655-!28 = !{!"double", !16, i64 0}
656+!3 = !{!"clang version 13.0.0"}
657+
658diff --git a/test/AtomicFMaxEXTForOCL.ll b/test/AtomicFMaxEXTForOCL.ll
659new file mode 100644
660index 00000000..1f2530d9
661--- /dev/null
662+++ b/test/AtomicFMaxEXTForOCL.ll
663@@ -0,0 +1,64 @@
664+; RUN: llvm-as %s -o %t.bc
665+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv
666+; RUN: spirv-val %t.spv
667+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
668+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
669+
670+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
671+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
672+
673+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
674+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
675+
676+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
677+target triple = "spir-unknown-unknown"
678+
679+; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
680+; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
681+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max"
682+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
683+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
684+
685+; Function Attrs: convergent norecurse nounwind
686+define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 {
687+entry:
688+ ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_32]]
689+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
690+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+fiif]]({{.*}})
691+ %call = tail call spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2
692+ ret void
693+}
694+
695+; Function Attrs: convergent
696+declare spir_func float @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1
697+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
698+
699+; Function Attrs: convergent norecurse nounwind
700+define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 {
701+entry:
702+ ; CHECK-SPIRV: 7 AtomicFMaxEXT [[TYPE_FLOAT_64]]
703+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_max_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
704+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMaxEXT[[:alnum:]]+diid]]({{.*}})
705+ %call = tail call spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2
706+ ret void
707+}
708+
709+; Function Attrs: convergent
710+declare spir_func double @_Z25atomic_fetch_max_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1
711+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
712+
713+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
714+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
715+
716+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
717+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
718+attributes #2 = { convergent nounwind }
719+
720+!llvm.module.flags = !{!0}
721+!opencl.ocl.version = !{!1}
722+!opencl.spir.version = !{!1}
723+!llvm.ident = !{!2}
724+
725+!0 = !{i32 1, !"wchar_size", i32 4}
726+!1 = !{i32 2, i32 0}
727+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"}
728diff --git a/test/AtomicFMinEXT.ll b/test/AtomicFMinEXT.ll
729index 98c98b8e..9e40a669 100644
730--- a/test/AtomicFMinEXT.ll
731+++ b/test/AtomicFMinEXT.ll
732@@ -4,20 +4,16 @@
733 ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
734
735 ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
736-; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
737+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL12
738
739-target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
740-target triple = "spir64-unknown-unknown-sycldevice"
741-
742-%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
743-%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
744-%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
745-
746-$_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
747+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
748+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
749
750-$_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37 = comdat any
751+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
752+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
753
754-@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
755+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
756+target triple = "spir64-unknown-unknown-sycldevice"
757
758 ; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
759 ; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
760@@ -25,65 +21,42 @@ $_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4item
761 ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
762 ; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
763
764-; Function Attrs: convergent norecurse
765-define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIfEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
766+; Function Attrs: convergent norecurse nounwind
767+define dso_local spir_func float @_Z14AtomicFloatMinRf(float addrspace(4)* align 4 dereferenceable(4) %Arg) local_unnamed_addr #0 {
768 entry:
769- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
770- %1 = load i64, i64* %0, align 8
771- %add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
772- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
773- %3 = load i64, i64* %2, align 8
774- %add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
775- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
776- %5 = extractelement <3 x i64> %4, i64 0
777- %conv.i = trunc i64 %5 to i32
778- %conv3.i = sitofp i32 %conv.i to float
779- %add.i = fadd float %conv3.i, 1.000000e+00
780+ %0 = addrspacecast float addrspace(4)* %Arg to float addrspace(1)*
781 ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]]
782- ; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}})
783- %call3.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float %add.i) #2
784- %sext.i = shl i64 %5, 32
785- %conv6.i = ashr exact i64 %sext.i, 32
786- %ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv6.i
787- %ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
788- store float %call3.i.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
789- ret void
790+ ; CHECK-LLVM-CL12: call spir_func float @[[FLOAT_FUNC_NAME:_Z10atomic_min[[:alnum:]]+ff]]({{.*}})
791+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
792+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}})
793+ %call.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %0, i32 1, i32 896, float 1.000000e+00) #2
794+ ret float %call.i.i.i
795 }
796
797 ; Function Attrs: convergent
798-; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
799 declare dso_local spir_func float @_Z21__spirv_AtomicFMinEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1
800+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
801
802-; Function Attrs: convergent norecurse
803-define weak_odr dso_local spir_kernel void @_ZTSZZ8min_testIdEvN2cl4sycl5queueEmENKUlRNS1_7handlerEE16_14clES4_EUlNS1_4itemILi1ELb1EEEE19_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
804+; Function Attrs: convergent norecurse nounwind
805+define dso_local spir_func double @_Z15AtomicDoubleMinRd(double addrspace(4)* align 8 dereferenceable(8) %Arg) local_unnamed_addr #0 {
806 entry:
807- %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
808- %1 = load i64, i64* %0, align 8
809- %add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
810- %2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
811- %3 = load i64, i64* %2, align 8
812- %add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
813- %4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
814- %5 = extractelement <3 x i64> %4, i64 0
815- %conv.i = trunc i64 %5 to i32
816- %conv3.i = sitofp i32 %conv.i to double
817- %add.i = fadd double %conv3.i, 1.000000e+00
818+ %0 = addrspacecast double addrspace(4)* %Arg to double addrspace(1)*
819 ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]]
820- ; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+]]({{.*}})
821- %call3.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double %add.i) #2
822- %sext.i = shl i64 %5, 32
823- %conv6.i = ashr exact i64 %sext.i, 32
824- %ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv6.i
825- %ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
826- store double %call3.i.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
827- ret void
828+ ; CHECK-LLVM-CL12: call spir_func double @[[DOUBLE_FUNC_NAME:_Z10atomic_min[[:alnum:]]+dd]]({{.*}})
829+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
830+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}})
831+ %call.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %0, i32 1, i32 896, double 1.000000e+00) #2
832+ ret double %call.i.i.i
833 }
834
835 ; Function Attrs: convergent
836-; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
837 declare dso_local spir_func double @_Z21__spirv_AtomicFMinEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1
838+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
839
840-attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
841+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
842+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
843+
844+attributes #0 = { convergent norecurse nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
845 attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
846 attributes #2 = { convergent nounwind }
847
848@@ -95,29 +68,5 @@ attributes #2 = { convergent nounwind }
849 !0 = !{i32 1, !"wchar_size", i32 4}
850 !1 = !{i32 1, i32 2}
851 !2 = !{i32 4, i32 100000}
852-!3 = !{!"clang version 12.0.0 (https://github.com/otcshare/llvm.git 67add71766d55d6a8d8d894822f583d6365a3b7d)"}
853-!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
854-!5 = !{!6, !8, !10, !12}
855-!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
856-!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
857-!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
858-!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
859-!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
860-!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
861-!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
862-!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
863-!14 = !{!15, !15, i64 0}
864-!15 = !{!"float", !16, i64 0}
865-!16 = !{!"omnipotent char", !17, i64 0}
866-!17 = !{!"Simple C++ TBAA"}
867-!18 = !{!19, !21, !23, !25}
868-!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
869-!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
870-!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
871-!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
872-!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
873-!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
874-!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
875-!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
876-!27 = !{!28, !28, i64 0}
877-!28 = !{!"double", !16, i64 0}
878+!3 = !{!"clang version 13.0.0"}
879+
880diff --git a/test/AtomicFMinEXTForOCL.ll b/test/AtomicFMinEXTForOCL.ll
881new file mode 100644
882index 00000000..6196b0f8
883--- /dev/null
884+++ b/test/AtomicFMinEXTForOCL.ll
885@@ -0,0 +1,64 @@
886+; RUN: llvm-as %s -o %t.bc
887+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_min_max -o %t.spv
888+; RUN: spirv-val %t.spv
889+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
890+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
891+
892+; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
893+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL,CHECK-LLVM-CL20
894+
895+; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
896+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
897+
898+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
899+target triple = "spir-unknown-unknown"
900+
901+; CHECK-SPIRV: Capability AtomicFloat32MinMaxEXT
902+; CHECK-SPIRV: Capability AtomicFloat64MinMaxEXT
903+; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_min_max"
904+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
905+; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
906+
907+; Function Attrs: convergent norecurse nounwind
908+define dso_local spir_func void @test_float(float addrspace(1)* %a) local_unnamed_addr #0 {
909+entry:
910+ ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_32]]
911+ ; CHECK-LLVM-CL20: call spir_func float @[[FLOAT_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicff[a-zA-Z0-9_]+]]({{.*}})
912+ ; CHECK-LLVM-SPV: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+fiif]]({{.*}})
913+ %call = tail call spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)* %a, float 0.000000e+00, i32 0) #2
914+ ret void
915+}
916+
917+; Function Attrs: convergent
918+declare spir_func float @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicff12memory_order(float addrspace(1)*, float, i32) local_unnamed_addr #1
919+; CHECK-LLVM-SPV: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
920+
921+; Function Attrs: convergent norecurse nounwind
922+define dso_local spir_func void @test_double(double addrspace(1)* %a) local_unnamed_addr #0 {
923+entry:
924+ ; CHECK-SPIRV: 7 AtomicFMinEXT [[TYPE_FLOAT_64]]
925+ ; CHECK-LLVM-CL20: call spir_func double @[[DOUBLE_FUNC_NAME:_Z25atomic_fetch_min_explicit[[:alnum:]]+_Atomicdd[a-zA-Z0-9_]+]]({{.*}})
926+ ; CHECK-LLVM-SPV: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFMinEXT[[:alnum:]]+diid]]({{.*}})
927+ %call = tail call spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)* %a, double 0.000000e+00, i32 0) #2
928+ ret void
929+}
930+
931+; Function Attrs: convergent
932+declare spir_func double @_Z25atomic_fetch_min_explicitPU3AS1VU7_Atomicdd12memory_order(double addrspace(1)*, double, i32) local_unnamed_addr #1
933+; CHECK-LLVM-SPV: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
934+
935+; CHECK-LLVM-CL: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float
936+; CHECK-LLVM-CL: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double
937+
938+attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
939+attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
940+attributes #2 = { convergent nounwind }
941+
942+!llvm.module.flags = !{!0}
943+!opencl.ocl.version = !{!1}
944+!opencl.spir.version = !{!1}
945+!llvm.ident = !{!2}
946+
947+!0 = !{i32 1, !"wchar_size", i32 4}
948+!1 = !{i32 2, i32 0}
949+!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 94aa388f0ce0723bb15503cf41c2c15b288375b9)"}
950diff --git a/test/InvalidAtomicBuiltins.cl b/test/InvalidAtomicBuiltins.cl
951index b8ec5b89..2182f070 100644
952--- a/test/InvalidAtomicBuiltins.cl
953+++ b/test/InvalidAtomicBuiltins.cl
954@@ -41,13 +41,9 @@ float __attribute__((overloadable)) atomic_fetch_xor(volatile generic atomic_flo
955 double __attribute__((overloadable)) atomic_fetch_and(volatile generic atomic_double *object, double operand, memory_order order);
956 double __attribute__((overloadable)) atomic_fetch_max(volatile generic atomic_double *object, double operand, memory_order order);
957 double __attribute__((overloadable)) atomic_fetch_min(volatile generic atomic_double *object, double operand, memory_order order);
958-float __attribute__((overloadable)) atomic_fetch_add_explicit(volatile generic atomic_float *object, float operand, memory_order order);
959-float __attribute__((overloadable)) atomic_fetch_sub_explicit(volatile generic atomic_float *object, float operand, memory_order order);
960 float __attribute__((overloadable)) atomic_fetch_or_explicit(volatile generic atomic_float *object, float operand, memory_order order);
961 float __attribute__((overloadable)) atomic_fetch_xor_explicit(volatile generic atomic_float *object, float operand, memory_order order);
962 double __attribute__((overloadable)) atomic_fetch_and_explicit(volatile generic atomic_double *object, double operand, memory_order order);
963-double __attribute__((overloadable)) atomic_fetch_max_explicit(volatile generic atomic_double *object, double operand, memory_order order);
964-double __attribute__((overloadable)) atomic_fetch_min_explicit(volatile generic atomic_double *object, double operand, memory_order order);
965
966 __kernel void test_atomic_fn(volatile __global float *p,
967 volatile __global double *pp,
968@@ -86,11 +82,7 @@ __kernel void test_atomic_fn(volatile __global float *p,
969 d = atomic_fetch_and(pp, val, order);
970 d = atomic_fetch_min(pp, val, order);
971 d = atomic_fetch_max(pp, val, order);
972- f = atomic_fetch_add_explicit(p, val, order);
973- f = atomic_fetch_sub_explicit(p, val, order);
974 f = atomic_fetch_or_explicit(p, val, order);
975 f = atomic_fetch_xor_explicit(p, val, order);
976 d = atomic_fetch_and_explicit(pp, val, order);
977- d = atomic_fetch_min_explicit(pp, val, order);
978- d = atomic_fetch_max_explicit(pp, val, order);
979 }
980--
9812.17.1
982
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch
deleted file mode 100644
index 3b035f47..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch
+++ /dev/null
@@ -1,35 +0,0 @@
1From cfb18b75e8a353bc7486f337541476a36994b063 Mon Sep 17 00:00:00 2001
2From: juanrod2 <>
3Date: Tue, 22 Dec 2020 08:33:08 +0800
4Subject: [PATCH 3/7] Memory leak fix for Managed Static Mutex
5
6Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0001-Memory-leak-fix-for-Managed-Static-Mutex.patch]
7
8Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
9
10Cleaning a mutex inside ManagedStatic llvm class.
11---
12 llvm/lib/Support/ManagedStatic.cpp | 6 +++++-
13 1 file changed, 5 insertions(+), 1 deletion(-)
14
15diff --git a/llvm/lib/Support/ManagedStatic.cpp b/llvm/lib/Support/ManagedStatic.cpp
16index 053493f72fb5..6571580ccecf 100644
17--- a/llvm/lib/Support/ManagedStatic.cpp
18+++ b/llvm/lib/Support/ManagedStatic.cpp
19@@ -76,8 +76,12 @@ void ManagedStaticBase::destroy() const {
20
21 /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
22 void llvm::llvm_shutdown() {
23- std::lock_guard<std::recursive_mutex> Lock(*getManagedStaticMutex());
24+ getManagedStaticMutex()->lock();
25
26 while (StaticList)
27 StaticList->destroy();
28+
29+ getManagedStaticMutex()->unlock();
30+ delete ManagedStaticMutex;
31+ ManagedStaticMutex = nullptr;
32 }
33--
342.17.1
35
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch
deleted file mode 100644
index f8dec996..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0004-Remove-repo-name-in-LLVM-IR.patch
+++ /dev/null
@@ -1,49 +0,0 @@
1From b794037bf1f90a93efa4c542855ad569cb13b4c5 Mon Sep 17 00:00:00 2001
2From: Feng Zou <feng.zou@intel.com>
3Date: Mon, 19 Oct 2020 14:43:38 +0800
4Subject: [PATCH 4/7] Remove repo name in LLVM IR
5
6Upstream-Status: Backport [Taken from opencl-clang patches, https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/llvm/0003-Remove-repo-name-in-LLVM-IR.patch]
7Signed-off-by: Feng Zou <feng.zou@intel.com>
8Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
9---
10 llvm/cmake/modules/VersionFromVCS.cmake | 23 ++++++++++++-----------
11 1 file changed, 12 insertions(+), 11 deletions(-)
12
13diff --git a/llvm/cmake/modules/VersionFromVCS.cmake b/llvm/cmake/modules/VersionFromVCS.cmake
14index 1b6519b4b7c4..8fd6b23bb345 100644
15--- a/llvm/cmake/modules/VersionFromVCS.cmake
16+++ b/llvm/cmake/modules/VersionFromVCS.cmake
17@@ -33,17 +33,18 @@ function(get_source_info path revision repository)
18 else()
19 set(remote "origin")
20 endif()
21- execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
22- WORKING_DIRECTORY ${path}
23- RESULT_VARIABLE git_result
24- OUTPUT_VARIABLE git_output
25- ERROR_QUIET)
26- if(git_result EQUAL 0)
27- string(STRIP "${git_output}" git_output)
28- set(${repository} ${git_output} PARENT_SCOPE)
29- else()
30- set(${repository} ${path} PARENT_SCOPE)
31- endif()
32+ # Do not show repo name in IR
33+ # execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
34+ # WORKING_DIRECTORY ${path}
35+ # RESULT_VARIABLE git_result
36+ # OUTPUT_VARIABLE git_output
37+ # ERROR_QUIET)
38+ # if(git_result EQUAL 0)
39+ # string(STRIP "${git_output}" git_output)
40+ # set(${repository} ${git_output} PARENT_SCOPE)
41+ # else()
42+ # set(${repository} ${path} PARENT_SCOPE)
43+ # endif()
44 endif()
45 endif()
46 endfunction()
47--
482.17.1
49
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch
deleted file mode 100644
index f8f177e5..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch
+++ /dev/null
@@ -1,47 +0,0 @@
1From 3dd4766499d25e5978a5d90001f18e657e875da0 Mon Sep 17 00:00:00 2001
2From: haonanya <haonan.yang@intel.com>
3Date: Thu, 12 Aug 2021 15:48:34 +0800
4Subject: [PATCH 5/7] Remove __IMAGE_SUPPORT__ macro for SPIR since SPIR
5 doesn't require image support
6
7Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0003-Remove-__IMAGE_SUPPORT__-macro-for-SPIR.patch]
8
9Signed-off-by: haonanya <haonan.yang@intel.com>
10Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
11---
12 clang/lib/Frontend/InitPreprocessor.cpp | 3 ---
13 clang/test/Preprocessor/predefined-macros.c | 4 ----
14 2 files changed, 7 deletions(-)
15
16diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
17index aefd208e6cd3..b4a84636673a 100644
18--- a/clang/lib/Frontend/InitPreprocessor.cpp
19+++ b/clang/lib/Frontend/InitPreprocessor.cpp
20@@ -1108,9 +1108,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
21 if (TI.getSupportedOpenCLOpts().isSupported(#Ext)) \
22 Builder.defineMacro(#Ext);
23 #include "clang/Basic/OpenCLExtensions.def"
24-
25- if (TI.getTriple().isSPIR())
26- Builder.defineMacro("__IMAGE_SUPPORT__");
27 }
28
29 if (TI.hasInt128Type() && LangOpts.CPlusPlus && LangOpts.GNUMode) {
30diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c
31index b088a37ba665..39a222d02faf 100644
32--- a/clang/test/Preprocessor/predefined-macros.c
33+++ b/clang/test/Preprocessor/predefined-macros.c
34@@ -184,10 +184,6 @@
35 // MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_GROUP 1
36 // MSCOPE:#define __OPENCL_MEMORY_SCOPE_WORK_ITEM 0
37
38-// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spir-unknown-unknown \
39-// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIR
40-// CHECK-SPIR: #define __IMAGE_SUPPORT__ 1
41-
42 // RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \
43 // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP
44 // CHECK-HIP-NOT: #define __CUDA_ARCH__
45--
462.17.1
47
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch
deleted file mode 100644
index 0b4ee8c7..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch
+++ /dev/null
@@ -1,53 +0,0 @@
1From 2c53abd0008bbecfcfe871c6060f4bbf1c94c74a Mon Sep 17 00:00:00 2001
2From: Raphael Isemann <teemperor@gmail.com>
3Date: Thu, 1 Apr 2021 18:41:44 +0200
4Subject: [PATCH 6/7] Avoid calling ParseCommandLineOptions in BackendUtil if
5 possible
6
7Calling `ParseCommandLineOptions` should only be called from `main` as the
8CommandLine setup code isn't thread-safe. As BackendUtil is part of the
9generic Clang FrontendAction logic, a process which has several threads executing
10Clang FrontendActions will randomly crash in the unsafe setup code.
11
12This patch avoids calling the function unless either the debug-pass option or
13limit-float-precision option is set. Without these two options set the
14`ParseCommandLineOptions` call doesn't do anything beside parsing
15the command line `clang` which doesn't set any options.
16
17See also D99652 where LLDB received a workaround for this crash.
18
19Reviewed By: JDevlieghere
20
21Differential Revision: https://reviews.llvm.org/D99740
22
23Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0004-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch]
24
25Signed-off-by: Raphael Isemann <teemperor@gmail.com>
26Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
27---
28 clang/lib/CodeGen/BackendUtil.cpp | 8 ++++++++
29 1 file changed, 8 insertions(+)
30
31diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
32index 0bfcab88a3a9..db8fd4166d7a 100644
33--- a/clang/lib/CodeGen/BackendUtil.cpp
34+++ b/clang/lib/CodeGen/BackendUtil.cpp
35@@ -743,7 +743,15 @@ static void setCommandLineOpts(const CodeGenOptions &CodeGenOpts) {
36 BackendArgs.push_back("-limit-float-precision");
37 BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
38 }
39+ // Check for the default "clang" invocation that won't set any cl::opt values.
40+ // Skip trying to parse the command line invocation to avoid the issues
41+ // described below.
42+ if (BackendArgs.size() == 1)
43+ return;
44 BackendArgs.push_back(nullptr);
45+ // FIXME: The command line parser below is not thread-safe and shares a global
46+ // state, so this call might crash or overwrite the options of another Clang
47+ // instance in the same process.
48 llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
49 BackendArgs.data());
50 }
51--
522.17.1
53
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch
deleted file mode 100644
index f7d191ff..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0007-support-cl_ext_float_atomics.patch
+++ /dev/null
@@ -1,377 +0,0 @@
1From a685de6fc45afcdbe4a7120e9d5b33e175dd71cd Mon Sep 17 00:00:00 2001
2From: haonanya <haonan.yang@intel.com>
3Date: Fri, 13 Aug 2021 10:00:02 +0800
4Subject: [PATCH 7/7] support cl_ext_float_atomics
5
6Upstream-Status: Backport [Taken from opencl-clang patches; https://github.com/intel/opencl-clang/blob/ocl-open-100/patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch]
7
8Signed-off-by: haonanya <haonan.yang@intel.com>
9Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
10---
11 clang/lib/Headers/opencl-c-base.h | 25 ++++
12 clang/lib/Headers/opencl-c.h | 208 ++++++++++++++++++++++++++
13 clang/test/Headers/opencl-c-header.cl | 96 ++++++++++++
14 3 files changed, 329 insertions(+)
15
16diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
17index 2cc688ccc3da..86bbee12fdf8 100644
18--- a/clang/lib/Headers/opencl-c-base.h
19+++ b/clang/lib/Headers/opencl-c-base.h
20@@ -14,6 +14,31 @@
21 #define CL_VERSION_3_0 300
22 #endif
23
24+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
25+// For SPIR all extensions are supported.
26+#if defined(__SPIR__)
27+#define cl_ext_float_atomics 1
28+#ifdef cl_khr_fp16
29+#define __opencl_c_ext_fp16_global_atomic_load_store 1
30+#define __opencl_c_ext_fp16_local_atomic_load_store 1
31+#define __opencl_c_ext_fp16_global_atomic_add 1
32+#define __opencl_c_ext_fp16_local_atomic_add 1
33+#define __opencl_c_ext_fp16_global_atomic_min_max 1
34+#define __opencl_c_ext_fp16_local_atomic_min_max 1
35+#endif
36+#ifdef __opencl_c_fp64
37+#define __opencl_c_ext_fp64_global_atomic_add 1
38+#define __opencl_c_ext_fp64_local_atomic_add 1
39+#define __opencl_c_ext_fp64_global_atomic_min_max 1
40+#define __opencl_c_ext_fp64_local_atomic_min_max 1
41+#endif
42+#define __opencl_c_ext_fp32_global_atomic_add 1
43+#define __opencl_c_ext_fp32_local_atomic_add 1
44+#define __opencl_c_ext_fp32_global_atomic_min_max 1
45+#define __opencl_c_ext_fp32_local_atomic_min_max 1
46+#endif // defined(__SPIR__)
47+#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
48+
49 // Define features for 2.0 for header backward compatibility
50 #ifndef __opencl_c_int64
51 #define __opencl_c_int64 1
52diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
53index 67d900eb1c3d..b463e702d95e 100644
54--- a/clang/lib/Headers/opencl-c.h
55+++ b/clang/lib/Headers/opencl-c.h
56@@ -14354,6 +14354,214 @@ intptr_t __ovld atomic_fetch_max_explicit(
57 // defined(cl_khr_int64_extended_atomics)
58 #endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0)
59
60+#if defined(cl_ext_float_atomics)
61+
62+#if defined(__opencl_c_ext_fp32_global_atomic_min_max)
63+float __ovld atomic_fetch_min(volatile __global atomic_float *object,
64+ float operand);
65+float __ovld atomic_fetch_max(volatile __global atomic_float *object,
66+ float operand);
67+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
68+ float operand, memory_order order);
69+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
70+ float operand, memory_order order);
71+float __ovld atomic_fetch_min_explicit(volatile __global atomic_float *object,
72+ float operand, memory_order order,
73+ memory_scope scope);
74+float __ovld atomic_fetch_max_explicit(volatile __global atomic_float *object,
75+ float operand, memory_order order,
76+ memory_scope scope);
77+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max)
78+
79+#if defined(__opencl_c_ext_fp32_local_atomic_min_max)
80+float __ovld atomic_fetch_min(volatile __local atomic_float *object,
81+ float operand);
82+float __ovld atomic_fetch_max(volatile __local atomic_float *object,
83+ float operand);
84+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
85+ float operand, memory_order order);
86+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
87+ float operand, memory_order order);
88+float __ovld atomic_fetch_min_explicit(volatile __local atomic_float *object,
89+ float operand, memory_order order,
90+ memory_scope scope);
91+float __ovld atomic_fetch_max_explicit(volatile __local atomic_float *object,
92+ float operand, memory_order order,
93+ memory_scope scope);
94+#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max)
95+
96+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \
97+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
98+float __ovld atomic_fetch_min(volatile atomic_float *object, float operand);
99+float __ovld atomic_fetch_max(volatile atomic_float *object, float operand);
100+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
101+ float operand, memory_order order);
102+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
103+ float operand, memory_order order);
104+float __ovld atomic_fetch_min_explicit(volatile atomic_float *object,
105+ float operand, memory_order order,
106+ memory_scope scope);
107+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
108+ float operand, memory_order order,
109+ memory_scope scope);
110+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) || \
111+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
112+
113+#if defined(__opencl_c_ext_fp64_global_atomic_min_max)
114+double __ovld atomic_fetch_min(volatile __global atomic_double *object,
115+ double operand);
116+double __ovld atomic_fetch_max(volatile __global atomic_double *object,
117+ double operand);
118+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
119+ double operand, memory_order order);
120+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
121+ double operand, memory_order order);
122+double __ovld atomic_fetch_min_explicit(volatile __global atomic_double *object,
123+ double operand, memory_order order,
124+ memory_scope scope);
125+double __ovld atomic_fetch_max_explicit(volatile __global atomic_double *object,
126+ double operand, memory_order order,
127+ memory_scope scope);
128+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max)
129+
130+#if defined(__opencl_c_ext_fp64_local_atomic_min_max)
131+double __ovld atomic_fetch_min(volatile __local atomic_double *object,
132+ double operand);
133+double __ovld atomic_fetch_max(volatile __local atomic_double *object,
134+ double operand);
135+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
136+ double operand, memory_order order);
137+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
138+ double operand, memory_order order);
139+double __ovld atomic_fetch_min_explicit(volatile __local atomic_double *object,
140+ double operand, memory_order order,
141+ memory_scope scope);
142+double __ovld atomic_fetch_max_explicit(volatile __local atomic_double *object,
143+ double operand, memory_order order,
144+ memory_scope scope);
145+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max)
146+
147+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \
148+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
149+double __ovld atomic_fetch_min(volatile atomic_double *object, double operand);
150+double __ovld atomic_fetch_max(volatile atomic_double *object, double operand);
151+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
152+ double operand, memory_order order);
153+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
154+ double operand, memory_order order);
155+double __ovld atomic_fetch_min_explicit(volatile atomic_double *object,
156+ double operand, memory_order order,
157+ memory_scope scope);
158+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
159+ double operand, memory_order order,
160+ memory_scope scope);
161+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) || \
162+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
163+
164+#if defined(__opencl_c_ext_fp32_global_atomic_add)
165+float __ovld atomic_fetch_add(volatile __global atomic_float *object,
166+ float operand);
167+float __ovld atomic_fetch_sub(volatile __global atomic_float *object,
168+ float operand);
169+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
170+ float operand, memory_order order);
171+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
172+ float operand, memory_order order);
173+float __ovld atomic_fetch_add_explicit(volatile __global atomic_float *object,
174+ float operand, memory_order order,
175+ memory_scope scope);
176+float __ovld atomic_fetch_sub_explicit(volatile __global atomic_float *object,
177+ float operand, memory_order order,
178+ memory_scope scope);
179+#endif // defined(__opencl_c_ext_fp32_global_atomic_add)
180+
181+#if defined(__opencl_c_ext_fp32_local_atomic_add)
182+float __ovld atomic_fetch_add(volatile __local atomic_float *object,
183+ float operand);
184+float __ovld atomic_fetch_sub(volatile __local atomic_float *object,
185+ float operand);
186+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
187+ float operand, memory_order order);
188+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
189+ float operand, memory_order order);
190+float __ovld atomic_fetch_add_explicit(volatile __local atomic_float *object,
191+ float operand, memory_order order,
192+ memory_scope scope);
193+float __ovld atomic_fetch_sub_explicit(volatile __local atomic_float *object,
194+ float operand, memory_order order,
195+ memory_scope scope);
196+#endif // defined(__opencl_c_ext_fp32_local_atomic_add)
197+
198+#if defined(__opencl_c_ext_fp32_global_atomic_add) || \
199+ defined(__opencl_c_ext_fp32_local_atomic_add)
200+float __ovld atomic_fetch_add(volatile atomic_float *object, float operand);
201+float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand);
202+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
203+ float operand, memory_order order);
204+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
205+ float operand, memory_order order);
206+float __ovld atomic_fetch_add_explicit(volatile atomic_float *object,
207+ float operand, memory_order order,
208+ memory_scope scope);
209+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
210+ float operand, memory_order order,
211+ memory_scope scope);
212+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) || \
213+ defined(__opencl_c_ext_fp32_local_atomic_add)
214+
215+#if defined(__opencl_c_ext_fp64_global_atomic_add)
216+double __ovld atomic_fetch_add(volatile __global atomic_double *object,
217+ double operand);
218+double __ovld atomic_fetch_sub(volatile __global atomic_double *object,
219+ double operand);
220+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
221+ double operand, memory_order order);
222+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
223+ double operand, memory_order order);
224+double __ovld atomic_fetch_add_explicit(volatile __global atomic_double *object,
225+ double operand, memory_order order,
226+ memory_scope scope);
227+double __ovld atomic_fetch_sub_explicit(volatile __global atomic_double *object,
228+ double operand, memory_order order,
229+ memory_scope scope);
230+#endif // defined(__opencl_c_ext_fp64_global_atomic_add)
231+
232+#if defined(__opencl_c_ext_fp64_local_atomic_add)
233+double __ovld atomic_fetch_add(volatile __local atomic_double *object,
234+ double operand);
235+double __ovld atomic_fetch_sub(volatile __local atomic_double *object,
236+ double operand);
237+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
238+ double operand, memory_order order);
239+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
240+ double operand, memory_order order);
241+double __ovld atomic_fetch_add_explicit(volatile __local atomic_double *object,
242+ double operand, memory_order order,
243+ memory_scope scope);
244+double __ovld atomic_fetch_sub_explicit(volatile __local atomic_double *object,
245+ double operand, memory_order order,
246+ memory_scope scope);
247+#endif // defined(__opencl_c_ext_fp64_local_atomic_add)
248+
249+#if defined(__opencl_c_ext_fp64_global_atomic_add) || \
250+ defined(__opencl_c_ext_fp64_local_atomic_add)
251+double __ovld atomic_fetch_add(volatile atomic_double *object, double operand);
252+double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand);
253+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
254+ double operand, memory_order order);
255+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
256+ double operand, memory_order order);
257+double __ovld atomic_fetch_add_explicit(volatile atomic_double *object,
258+ double operand, memory_order order,
259+ memory_scope scope);
260+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
261+ double operand, memory_order order,
262+ memory_scope scope);
263+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) || \
264+ defined(__opencl_c_ext_fp64_local_atomic_add)
265+
266+#endif // cl_ext_float_atomics
267+
268 // atomic_store()
269
270 #if defined(__opencl_c_atomic_scope_device) && \
271diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl
272index 2716076acdcf..7f720cf28142 100644
273--- a/clang/test/Headers/opencl-c-header.cl
274+++ b/clang/test/Headers/opencl-c-header.cl
275@@ -98,3 +98,99 @@ global atomic_int z = ATOMIC_VAR_INIT(99);
276 #pragma OPENCL EXTENSION cl_intel_planar_yuv : enable
277
278 // CHECK-MOD: Reading modules
279+
280+// For SPIR all extensions are supported.
281+#if defined(__SPIR__)
282+
283+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
284+
285+#if __opencl_c_ext_fp16_global_atomic_load_store != 1
286+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_load_store"
287+#endif
288+#if __opencl_c_ext_fp16_local_atomic_load_store != 1
289+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_load_store"
290+#endif
291+#if __opencl_c_ext_fp16_global_atomic_add != 1
292+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_add"
293+#endif
294+#if __opencl_c_ext_fp32_global_atomic_add != 1
295+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_add"
296+#endif
297+#if __opencl_c_ext_fp64_global_atomic_add != 1
298+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_add"
299+#endif
300+#if __opencl_c_ext_fp16_local_atomic_add != 1
301+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_add"
302+#endif
303+#if __opencl_c_ext_fp32_local_atomic_add != 1
304+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_add"
305+#endif
306+#if __opencl_c_ext_fp64_local_atomic_add != 1
307+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_add"
308+#endif
309+#if __opencl_c_ext_fp16_global_atomic_min_max != 1
310+#error "Incorrectly defined __opencl_c_ext_fp16_global_atomic_min_max"
311+#endif
312+#if __opencl_c_ext_fp32_global_atomic_min_max != 1
313+#error "Incorrectly defined __opencl_c_ext_fp32_global_atomic_min_max"
314+#endif
315+#if __opencl_c_ext_fp64_global_atomic_min_max != 1
316+#error "Incorrectly defined __opencl_c_ext_fp64_global_atomic_min_max"
317+#endif
318+#if __opencl_c_ext_fp16_local_atomic_min_max != 1
319+#error "Incorrectly defined __opencl_c_ext_fp16_local_atomic_min_max"
320+#endif
321+#if __opencl_c_ext_fp32_local_atomic_min_max != 1
322+#error "Incorrectly defined __opencl_c_ext_fp32_local_atomic_min_max"
323+#endif
324+#if __opencl_c_ext_fp64_local_atomic_min_max != 1
325+#error "Incorrectly defined __opencl_c_ext_fp64_local_atomic_min_max"
326+#endif
327+#else
328+
329+#ifdef __opencl_c_ext_fp16_global_atomic_load_store
330+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_load_store defined"
331+#endif
332+#ifdef __opencl_c_ext_fp16_local_atomic_load_store
333+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_load_store defined"
334+#endif
335+#ifdef __opencl_c_ext_fp16_global_atomic_add
336+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_add defined"
337+#endif
338+#ifdef __opencl_c_ext_fp32_global_atomic_add
339+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_add defined"
340+#endif
341+#ifdef __opencl_c_ext_fp64_global_atomic_add
342+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_add defined"
343+#endif
344+#ifdef __opencl_c_ext_fp16_local_atomic_add
345+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_add defined"
346+#endif
347+#ifdef __opencl_c_ext_fp32_local_atomic_add
348+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_add defined"
349+#endif
350+#ifdef __opencl_c_ext_fp64_local_atomic_add
351+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_add defined"
352+#endif
353+#ifdef __opencl_c_ext_fp16_global_atomic_min_max
354+#error "Incorrectly __opencl_c_ext_fp16_global_atomic_min_max defined"
355+#endif
356+#ifdef __opencl_c_ext_fp32_global_atomic_min_max
357+#error "Incorrectly __opencl_c_ext_fp32_global_atomic_min_max defined"
358+#endif
359+#ifdef __opencl_c_ext_fp64_global_atomic_min_max
360+#error "Incorrectly __opencl_c_ext_fp64_global_atomic_min_max defined"
361+#endif
362+#ifdef __opencl_c_ext_fp16_local_atomic_min_max
363+#error "Incorrectly __opencl_c_ext_fp16_local_atomic_min_max defined"
364+#endif
365+#ifdef __opencl_c_ext_fp32_local_atomic_min_max
366+#error "Incorrectly __opencl_c_ext_fp32_local_atomic_min_max defined"
367+#endif
368+#ifdef __opencl_c_ext_fp64_local_atomic_min_max
369+#error "Incorrectly __opencl_c_ext_fp64_local_atomic_min_max defined"
370+#endif
371+
372+#endif //(defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
373+
374+#endif // defined(__SPIR__)
375--
3762.17.1
377
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
deleted file mode 100644
index 09be8202..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch
+++ /dev/null
@@ -1,96 +0,0 @@
1From 294ca2fd69a077b35acec9d498120d6cb0324dae Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 11:53:27 +0800
4Subject: [PATCH 1/2] This patch is required to fix the crash referenced to in
5 #1767
6
7It is a port of the following llvm 11.0 commit : https://reviews.llvm.org/D76994.
8
9Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/41f13f1f64d2074ae7512fb23656c22585e912bd]
10
11Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
12---
13 .../CodeGen/SelectionDAG/LegalizeTypes.cpp | 3 +-
14 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 31 ++++++++++++-------
15 2 files changed, 21 insertions(+), 13 deletions(-)
16
17diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
18index 63ddb59fce68..822da2183269 100644
19--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
20+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
21@@ -173,7 +173,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
22 }
23 }
24 }
25-
26+#ifndef NDEBUG
27 // Checked that NewNodes are only used by other NewNodes.
28 for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
29 SDNode *N = NewNodes[i];
30@@ -181,6 +181,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
31 UI != UE; ++UI)
32 assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
33 }
34+#endif
35 }
36
37 /// This is the main entry point for the type legalizer. This does a top-down
38diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
39index faae14444d51..b908c5c58e9f 100644
40--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
41+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
42@@ -155,7 +155,9 @@ private:
43 const SDValue &getSDValue(TableId &Id) {
44 RemapId(Id);
45 assert(Id && "TableId should be non-zero");
46- return IdToValueMap[Id];
47+ auto I = IdToValueMap.find(Id);
48+ assert(I != IdToValueMap.end() && "cannot find Id in map");
49+ return I->second;
50 }
51
52 public:
53@@ -172,24 +174,29 @@ public:
54 bool run();
55
56 void NoteDeletion(SDNode *Old, SDNode *New) {
57+ assert(Old != New && "node replaced with self");
58 for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
59 TableId NewId = getTableId(SDValue(New, i));
60 TableId OldId = getTableId(SDValue(Old, i));
61
62- if (OldId != NewId)
63+ if (OldId != NewId) {
64 ReplacedValues[OldId] = NewId;
65
66- // Delete Node from tables.
67+ // Delete Node from tables. We cannot do this when OldId == NewId,
68+ // because NewId can still have table references to it in
69+ // ReplacedValues.
70+ IdToValueMap.erase(OldId);
71+ PromotedIntegers.erase(OldId);
72+ ExpandedIntegers.erase(OldId);
73+ SoftenedFloats.erase(OldId);
74+ PromotedFloats.erase(OldId);
75+ ExpandedFloats.erase(OldId);
76+ ScalarizedVectors.erase(OldId);
77+ SplitVectors.erase(OldId);
78+ WidenedVectors.erase(OldId);
79+ }
80+
81 ValueToIdMap.erase(SDValue(Old, i));
82- IdToValueMap.erase(OldId);
83- PromotedIntegers.erase(OldId);
84- ExpandedIntegers.erase(OldId);
85- SoftenedFloats.erase(OldId);
86- PromotedFloats.erase(OldId);
87- ExpandedFloats.erase(OldId);
88- ScalarizedVectors.erase(OldId);
89- SplitVectors.erase(OldId);
90- WidenedVectors.erase(OldId);
91 }
92 }
93
94--
952.17.1
96
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
deleted file mode 100644
index 72877d83..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0009-ispc-10_0_fix_for_1788.patch
+++ /dev/null
@@ -1,105 +0,0 @@
1From d266087e8dba9e8fd4984e1cb85c20376e2c8ea3 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 11:56:01 +0800
4Subject: [PATCH 2/2] This patch is a fix for #1788.
5
6It is a port of the following llvm 11.0 commit: https://reviews.llvm.org/D81698
7This also needed part of another llvm 11.0 commit: https://reviews.llvm.org/D72975
8
9Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/aeb50448019ce1b1002f3781f9647d486320d83c]
10
11Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
12---
13 llvm/include/llvm/IR/PatternMatch.h | 22 ++++++++++++---
14 .../InstCombine/InstructionCombining.cpp | 27 +++++++++++++++++--
15 2 files changed, 44 insertions(+), 5 deletions(-)
16
17diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
18index 6621fc9f819c..fb7ad93519f6 100644
19--- a/llvm/include/llvm/IR/PatternMatch.h
20+++ b/llvm/include/llvm/IR/PatternMatch.h
21@@ -152,8 +152,10 @@ inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
22
23 struct apint_match {
24 const APInt *&Res;
25+ bool AllowUndef;
26
27- apint_match(const APInt *&R) : Res(R) {}
28+ apint_match(const APInt *&Res, bool AllowUndef)
29+ : Res(Res), AllowUndef(AllowUndef) {}
30
31 template <typename ITy> bool match(ITy *V) {
32 if (auto *CI = dyn_cast<ConstantInt>(V)) {
33@@ -162,7 +164,8 @@ struct apint_match {
34 }
35 if (V->getType()->isVectorTy())
36 if (const auto *C = dyn_cast<Constant>(V))
37- if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
38+ if (auto *CI = dyn_cast_or_null<ConstantInt>(
39+ C->getSplatValue(AllowUndef))) {
40 Res = &CI->getValue();
41 return true;
42 }
43@@ -192,7 +195,20 @@ struct apfloat_match {
44
45 /// Match a ConstantInt or splatted ConstantVector, binding the
46 /// specified pointer to the contained APInt.
47-inline apint_match m_APInt(const APInt *&Res) { return Res; }
48+inline apint_match m_APInt(const APInt *&Res) {
49+ // Forbid undefs by default to maintain previous behavior.
50+ return apint_match(Res, /* AllowUndef */ false);
51+}
52+
53+/// Match APInt while allowing undefs in splat vector constants.
54+inline apint_match m_APIntAllowUndef(const APInt *&Res) {
55+ return apint_match(Res, /* AllowUndef */ true);
56+}
57+
58+/// Match APInt while forbidding undefs in splat vector constants.
59+inline apint_match m_APIntForbidUndef(const APInt *&Res) {
60+ return apint_match(Res, /* AllowUndef */ false);
61+}
62
63 /// Match a ConstantFP or splatted ConstantVector, binding the
64 /// specified pointer to the contained APFloat.
65diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
66index bf32996d96e2..40a246b9d7a7 100644
67--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
68+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
69@@ -925,8 +925,31 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
70 if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
71 if (CI->hasOneUse()) {
72 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
73- if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
74- (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
75+
76+ // FIXME: This is a hack to avoid infinite looping with min/max patterns.
77+ // We have to ensure that vector constants that only differ with
78+ // undef elements are treated as equivalent.
79+ auto areLooselyEqual = [](Value *A, Value *B) {
80+ if (A == B)
81+ return true;
82+
83+ // Test for vector constants.
84+ Constant *ConstA, *ConstB;
85+ if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
86+ return false;
87+
88+ // TODO: Deal with FP constants?
89+ if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
90+ return false;
91+
92+ // Compare for equality including undefs as equal.
93+ auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
94+ const APInt *C;
95+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
96+ };
97+
98+ if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
99+ (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
100 return nullptr;
101 }
102 }
103--
1042.17.1
105
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
deleted file mode 100644
index fc6935a1..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0010-ispc-10_0_fix_for_1793.patch
+++ /dev/null
@@ -1,43 +0,0 @@
1From 8f83e2b7618da7a98a30839a8f41a6dd82dec468 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:00:23 +0800
4Subject: [PATCH 1/2] This patch is required to fix stability problem #1793
5
6It's backport of the following llvm 11.0 commit: 120c5f1057dc50229f73bc75bbabf4df6ee50fef
7
8Upstream-Status: Backport
9
10Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
11---
12 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++++--
13 1 file changed, 4 insertions(+), 2 deletions(-)
14
15diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
16index 2476fd26f250..2743acc89bca 100644
17--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
18+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
19@@ -10702,8 +10702,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
20 SDValue N0 = N->getOperand(0);
21 EVT VT = N->getValueType(0);
22
23+ // zext_vector_inreg(undef) = 0 because the top bits will be zero.
24 if (N0.isUndef())
25- return DAG.getUNDEF(VT);
26+ return DAG.getConstant(0, SDLoc(N), VT);
27
28 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
29 return Res;
30@@ -10718,8 +10719,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
31 SDValue N0 = N->getOperand(0);
32 EVT VT = N->getValueType(0);
33
34+ // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
35 if (N0.isUndef())
36- return DAG.getUNDEF(VT);
37+ return DAG.getConstant(0, SDLoc(N), VT);
38
39 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
40 return Res;
41--
422.17.1
43
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
deleted file mode 100644
index e3e70107..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0011-ispc-10_0_fix_for_1844.patch
+++ /dev/null
@@ -1,34 +0,0 @@
1From 62b05a69b4a185cd0b7535f19742686e19fcaf22 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:02:37 +0800
4Subject: [PATCH 2/2] Fix for #1844, affects avx512skx-i8x64 and
5 avx512skx-i16x32.
6
7It's a port of 11.0 commit edcfb47ff6d5562e22207f364c65f84302aa346b
8https://reviews.llvm.org/D76312
9
10Upstream-Status: Backport
11
12Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
13---
14 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
15 1 file changed, 3 insertions(+), 1 deletion(-)
16
17diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
18index 2743acc89bca..439a8367dabe 100644
19--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
20+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
21@@ -10841,7 +10841,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
22
23 // Attempt to pre-truncate BUILD_VECTOR sources.
24 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
25- TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
26+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
27+ // Avoid creating illegal types if running after type legalizer.
28+ (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
29 SDLoc DL(N);
30 EVT SVT = VT.getScalarType();
31 SmallVector<SDValue, 8> TruncOps;
32--
332.17.1
34
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
deleted file mode 100644
index 8aca5fbf..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch
+++ /dev/null
@@ -1,40 +0,0 @@
1From cc4301f82ca1bde1d438c3708de285b0ab8c72d3 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:07:25 +0800
4Subject: [PATCH 1/2] [X86] createVariablePermute - handle case where recursive
5 createVariablePermute call fails
6
7Account for the case where a recursive createVariablePermute call with a wider vector type fails.
8
9Original test case from @craig.topper (Craig Topper)
10
11Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/6bdd63dc28208a597542b0c6bc41093f32417804]
12
13Signed-off-by: Simon Pilgrim <llvm-dev@redking.me.uk>
14Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
15---
16 llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++---
17 1 file changed, 5 insertions(+), 3 deletions(-)
18
19diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
20index c8720d9ae3a6..63eb050e9b3a 100644
21--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
22+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
23@@ -9571,9 +9571,11 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
24 IndicesVT = EVT(VT).changeVectorElementTypeToInteger();
25 IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false,
26 Subtarget, DAG, SDLoc(IndicesVec));
27- return extractSubVector(
28- createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget), 0,
29- DAG, DL, SizeInBits);
30+ SDValue NewSrcVec =
31+ createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);
32+ if (NewSrcVec)
33+ return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits);
34+ return SDValue();
35 } else if (SrcVec.getValueSizeInBits() < SizeInBits) {
36 // Widen smaller SrcVec to match VT.
37 SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));
38--
392.17.1
40
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
deleted file mode 100644
index e03c279f..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch
+++ /dev/null
@@ -1,61 +0,0 @@
1From 9cdff0785d5cf9effc8e922d3330311c4d3dda78 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:09:42 +0800
4Subject: [PATCH 2/2] This patch is needed for avx512skx-i8x64 and
5 avx512skx-i16x32 targets.
6
7This is combination of two commits:
8- 0cd6712a7af0fa2702b5d4cc733500eb5e62e7d0 - stability fix.
9- d8ad7cc0885f32104a7cd83c77191aec15fd684f - performance follow up.
10
11Upstream-Status: Backport
12
13Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
14---
15 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 +++++++++++++++++--
16 1 file changed, 21 insertions(+), 2 deletions(-)
17
18diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
19index 439a8367dabe..b1639c7f275d 100644
20--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
21+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
22@@ -18471,6 +18471,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
23
24 // Allow targets to opt-out.
25 EVT VT = Extract->getValueType(0);
26+
27+ // We can only create byte sized loads.
28+ if (!VT.isByteSized())
29+ return SDValue();
30+
31+ unsigned Index = ExtIdx->getZExtValue();
32+ unsigned NumElts = VT.getVectorNumElements();
33+
34+ // If the index is a multiple of the extract element count, we can offset the
35+ // address by the store size multiplied by the subvector index. Otherwise if
36+ // the scalar type is byte sized, we can just use the index multiplied by
37+ // the element size in bytes as the offset.
38+ unsigned Offset;
39+ if (Index % NumElts == 0)
40+ Offset = (Index / NumElts) * VT.getStoreSize();
41+ else if (VT.getScalarType().isByteSized())
42+ Offset = Index * VT.getScalarType().getStoreSize();
43+ else
44+ return SDValue();
45+
46 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
47 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
48 return SDValue();
49@@ -18478,8 +18498,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
50 // The narrow load will be offset from the base address of the old load if
51 // we are extracting from something besides index 0 (little-endian).
52 SDLoc DL(Extract);
53- SDValue BaseAddr = Ld->getOperand(1);
54- unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
55+ SDValue BaseAddr = Ld->getBasePtr();
56
57 // TODO: Use "BaseIndexOffset" to make this more effective.
58 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
59--
602.17.1
61
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
deleted file mode 100644
index d1768216..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch
+++ /dev/null
@@ -1,97 +0,0 @@
1From c2ebd328979c081dd2c9fd0e359ed99473731d0e Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:13:00 +0800
4Subject: [PATCH 1/2] [X86] When storing v1i1/v2i1/v4i1 to memory, make sure we
5 store zeros in the rest of the byte
6
7We can't store garbage in the unused bits. It possible that something like zextload from i1/i2/i4 is created to read the memory. Those zextloads would be legalized assuming the extra bits are 0.
8
9I'm not sure that the code in lowerStore is executed for the v1i1/v2i1/v4i1 case. It looks like the DAG combine in combineStore may have converted them to v8i1 first. And I think we're missing some cases to avoid going to the stack in the first place. But I don't have time to investigate those things at the moment so I wanted to focus on the correctness issue.
10
11Should fix PR48147.
12
13Reviewed By: RKSimon
14
15Differential Revision: https://reviews.llvm.org/D9129
16
17Upstream-Status: Backport
18
19Signed-off-by:Craig Topper <craig.topper@sifive.com>
20Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
21---
22 llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++++------
23 llvm/lib/Target/X86/X86InstrAVX512.td | 2 --
24 2 files changed, 14 insertions(+), 8 deletions(-)
25
26diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
27index 63eb050e9b3a..96b5e2cfbd82 100644
28--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
29+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
30@@ -22688,17 +22688,22 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
31 // Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 stores.
32 if (StoredVal.getValueType().isVector() &&
33 StoredVal.getValueType().getVectorElementType() == MVT::i1) {
34- assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
35- "Unexpected VT");
36+ unsigned NumElts = StoredVal.getValueType().getVectorNumElements();
37+ assert(NumElts <= 8 && "Unexpected VT");
38 assert(!St->isTruncatingStore() && "Expected non-truncating store");
39 assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
40 "Expected AVX512F without AVX512DQI");
41
42+ // We must pad with zeros to ensure we store zeroes to any unused bits.
43 StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
44 DAG.getUNDEF(MVT::v16i1), StoredVal,
45 DAG.getIntPtrConstant(0, dl));
46 StoredVal = DAG.getBitcast(MVT::i16, StoredVal);
47 StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal);
48+ // Make sure we store zeros in the extra bits.
49+ if (NumElts < 8)
50+ StoredVal = DAG.getZeroExtendInReg(StoredVal, dl,
51+ MVT::getIntegerVT(NumElts));
52
53 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
54 St->getPointerInfo(), St->getAlignment(),
55@@ -41585,8 +41590,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
56
57 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
58 StoredVal = DAG.getBitcast(NewVT, StoredVal);
59-
60- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
61+ SDValue Val = StoredVal.getOperand(0);
62+ // We must store zeros to the unused bits.
63+ Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1);
64+ return DAG.getStore(St->getChain(), dl, Val, St->getBasePtr(),
65 St->getPointerInfo(), St->getAlignment(),
66 St->getMemOperand()->getFlags());
67 }
68@@ -41602,10 +41609,11 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
69 }
70
71 // Widen v2i1/v4i1 stores to v8i1.
72- if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
73+ if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
74 Subtarget.hasAVX512()) {
75 unsigned NumConcats = 8 / VT.getVectorNumElements();
76- SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
77+ // We must store zeros to the unused bits.
78+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));
79 Ops[0] = StoredVal;
80 StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
81 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
82diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
83index 32f012033fb0..d3b92183f87b 100644
84--- a/llvm/lib/Target/X86/X86InstrAVX512.td
85+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
86@@ -2888,8 +2888,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
87
88 // Load/store kreg
89 let Predicates = [HasDQI] in {
90- def : Pat<(store VK1:$src, addr:$dst),
91- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
92
93 def : Pat<(v1i1 (load addr:$src)),
94 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
95--
962.17.1
97
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
deleted file mode 100644
index 03b40e9b..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch
+++ /dev/null
@@ -1,173 +0,0 @@
1From c1565af764adceca118daad0f592e5f14c2bdd4a Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Fri, 27 Aug 2021 12:15:09 +0800
4Subject: [PATCH 2/2] [X86] Convert vXi1 vectors to xmm/ymm/zmm types via
5 getRegisterTypeForCallingConv rather than using CCPromoteToType in the td
6 file
7
8 Previously we tried to promote these to xmm/ymm/zmm by promoting
9 in the X86CallingConv.td file. But this breaks when we run out
10 of xmm/ymm/zmm registers and need to fall back to memory. We end
11 up trying to create a non-sensical scalar to vector. This lead
12 to an assertion. The new tests in avx512-calling-conv.ll all
13 trigger this assertion.
14
15 Since we really want to treat these types like we do on avx2,
16 it seems better to promote them before the calling convention
17 code gets involved. Except when the calling convention is one
18 that passes the vXi1 type in a k register.
19
20 The changes in avx512-regcall-Mask.ll are because we indicated
21 that xmm/ymm/zmm types should be passed indirectly for the
22 Win64 ABI before we go to the common lines that promoted the
23 vXi1 types. This caused the promoted types to be picked up by
24 the default calling convention code. Now we promote them earlier
25 so they get passed indirectly as though they were xmm/ymm/zmm.
26
27 Differential Revision: https://reviews.llvm.org/D75154
28
29Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/eadea7868f5b7542ee6bdcd9a975697a0c919ffc]
30
31Signed-off-by:Craig Topper <craig.topper@intel.com>
32Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
33---
34 llvm/lib/Target/X86/X86ISelLowering.cpp | 90 +++++++++++++++++--------
35 1 file changed, 61 insertions(+), 29 deletions(-)
36
37diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
38index 96b5e2cfbd82..d5de94aeb8a2 100644
39--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
40+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
41@@ -2085,51 +2085,83 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const {
42 return TargetLoweringBase::getPreferredVectorAction(VT);
43 }
44
45+static std::pair<MVT, unsigned>
46+handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
47+ const X86Subtarget &Subtarget) {
48+ // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
49+ // convention is one that uses k registers.
50+ if (NumElts == 2)
51+ return {MVT::v2i64, 1};
52+ if (NumElts == 4)
53+ return {MVT::v4i32, 1};
54+ if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
55+ CC != CallingConv::Intel_OCL_BI)
56+ return {MVT::v8i16, 1};
57+ if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
58+ CC != CallingConv::Intel_OCL_BI)
59+ return {MVT::v16i8, 1};
60+ // v32i1 passes in ymm unless we have BWI and the calling convention is
61+ // regcall.
62+ if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
63+ return {MVT::v32i8, 1};
64+ // Split v64i1 vectors if we don't have v64i8 available.
65+ if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
66+ if (Subtarget.useAVX512Regs())
67+ return {MVT::v64i8, 1};
68+ return {MVT::v32i8, 2};
69+ }
70+
71+ // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
72+ if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
73+ NumElts > 64)
74+ return {MVT::i8, NumElts};
75+
76+ return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
77+}
78+
79 MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
80 CallingConv::ID CC,
81 EVT VT) const {
82- // v32i1 vectors should be promoted to v32i8 to match avx2.
83- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
84- return MVT::v32i8;
85- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
86 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
87- Subtarget.hasAVX512() &&
88- (!isPowerOf2_32(VT.getVectorNumElements()) ||
89- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
90- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
91- return MVT::i8;
92- // Split v64i1 vectors if we don't have v64i8 available.
93- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
94- CC != CallingConv::X86_RegCall)
95- return MVT::v32i1;
96+ Subtarget.hasAVX512()) {
97+ unsigned NumElts = VT.getVectorNumElements();
98+
99+ MVT RegisterVT;
100+ unsigned NumRegisters;
101+ std::tie(RegisterVT, NumRegisters) =
102+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
103+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
104+ return RegisterVT;
105+ }
106+
107 // FIXME: Should we just make these types legal and custom split operations?
108 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
109 Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
110 return MVT::v16i32;
111+
112 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
113 }
114
115 unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
116 CallingConv::ID CC,
117 EVT VT) const {
118- // v32i1 vectors should be promoted to v32i8 to match avx2.
119- if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI())
120- return 1;
121- // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
122 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
123- Subtarget.hasAVX512() &&
124- (!isPowerOf2_32(VT.getVectorNumElements()) ||
125- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
126- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
127- return VT.getVectorNumElements();
128- // Split v64i1 vectors if we don't have v64i8 available.
129- if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
130- CC != CallingConv::X86_RegCall)
131- return 2;
132+ Subtarget.hasAVX512()) {
133+ unsigned NumElts = VT.getVectorNumElements();
134+
135+ MVT RegisterVT;
136+ unsigned NumRegisters;
137+ std::tie(RegisterVT, NumRegisters) =
138+ handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
139+ if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
140+ return NumRegisters;
141+ }
142+
143 // FIXME: Should we just make these types legal and custom split operations?
144 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
145 Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
146 return 1;
147+
148 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
149 }
150
151@@ -2140,8 +2172,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
152 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
153 Subtarget.hasAVX512() &&
154 (!isPowerOf2_32(VT.getVectorNumElements()) ||
155- (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
156- (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
157+ (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
158+ VT.getVectorNumElements() > 64)) {
159 RegisterVT = MVT::i8;
160 IntermediateVT = MVT::i1;
161 NumIntermediates = VT.getVectorNumElements();
162@@ -2151,7 +2183,7 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
163 // Split v64i1 vectors if we don't have v64i8 available.
164 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
165 CC != CallingConv::X86_RegCall) {
166- RegisterVT = MVT::v32i1;
167+ RegisterVT = MVT::v32i8;
168 IntermediateVT = MVT::v32i1;
169 NumIntermediates = 2;
170 return 2;
171--
1722.17.1
173
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch
deleted file mode 100644
index 2e3872dc..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch
+++ /dev/null
@@ -1,550 +0,0 @@
1From 447cb2e1b2f0d8bdcfd8a0b39f47d28de50b5d82 Mon Sep 17 00:00:00 2001
2From: Djordje Todorovic <djordje.todorovic@syrmia.com>
3Date: Mon, 9 Mar 2020 11:02:35 +0100
4Subject: [PATCH] Enable the call site info only for -g + optimizations
5
6Emit call site info only in the case of '-g' + 'O>0' level.
7
8Differential Revision: https://reviews.llvm.org/D75175
9
10Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/c15c68abdc6f1afece637bdedba808676191a8e6]
11Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
12---
13 clang/include/clang/Basic/CodeGenOptions.def | 2 ++
14 clang/lib/CodeGen/BackendUtil.cpp | 1 +
15 clang/lib/Frontend/CompilerInvocation.cpp | 4 +++-
16 llvm/include/llvm/CodeGen/CommandFlags.inc | 7 +++++++
17 llvm/include/llvm/Target/TargetOptions.h | 7 ++++++-
18 llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 4 ++--
19 llvm/lib/CodeGen/MachineFunction.cpp | 2 +-
20 llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 2 +-
21 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +-
22 llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +-
23 llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
24 llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir | 2 +-
25 llvm/test/CodeGen/X86/call-site-info-output.ll | 4 ++--
26 llvm/test/DebugInfo/AArch64/call-site-info-output.ll | 2 +-
27 llvm/test/DebugInfo/ARM/call-site-info-output.ll | 2 +-
28 .../MIR/AArch64/dbgcall-site-interpret-movzxi.mir | 2 +-
29 .../DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir | 2 +-
30 llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir | 2 +-
31 .../test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir | 2 +-
32 .../test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir | 2 +-
33 .../DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir | 2 +-
34 llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir | 2 +-
35 .../MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir | 2 +-
36 .../MIR/Hexagon/live-debug-values-bundled-entry-values.mir | 2 +-
37 llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir | 2 +-
38 llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir | 2 +-
39 llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir | 2 +-
40 .../test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir | 2 +-
41 .../test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir | 2 +-
42 .../DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir | 2 +-
43 llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir | 2 +-
44 .../DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir | 2 +-
45 llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir | 2 +-
46 llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir | 4 ++--
47 .../DebugInfo/MIR/X86/entry-value-of-modified-param.mir | 2 +-
48 llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir | 2 +-
49 .../DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir | 2 +-
50 .../test/DebugInfo/MIR/X86/unreachable-block-call-site.mir | 2 +-
51 llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll | 2 +-
52 llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll | 2 +-
53 .../tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll | 2 +-
54 41 files changed, 58 insertions(+), 41 deletions(-)
55
56diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
57index 1ecae98b13b1..6a6a9465273f 100644
58--- a/clang/include/clang/Basic/CodeGenOptions.def
59+++ b/clang/include/clang/Basic/CodeGenOptions.def
60@@ -64,6 +64,8 @@ CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
61 ///< pass manager.
62 CODEGENOPT(DisableRedZone , 1, 0) ///< Set when -mno-red-zone is enabled.
63 CODEGENOPT(EnableDebugEntryValues, 1, 0) ///< Emit call site parameter dbg info
64+CODEGENOPT(EmitCallSiteInfo, 1, 0) ///< Emit call site info only in the case of
65+ ///< '-g' + 'O>0' level.
66 CODEGENOPT(IndirectTlsSegRefs, 1, 0) ///< Set when -mno-tls-direct-seg-refs
67 ///< is specified.
68 CODEGENOPT(DisableTailCalls , 1, 0) ///< Do not emit tail calls.
69diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
70index db8fd4166d7a..db09f9b641fe 100644
71--- a/clang/lib/CodeGen/BackendUtil.cpp
72+++ b/clang/lib/CodeGen/BackendUtil.cpp
73@@ -482,6 +482,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
74 Options.EmitAddrsig = CodeGenOpts.Addrsig;
75 Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues;
76 Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection;
77+ Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo;
78
79 Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
80 Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
81diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
82index 18fa06bf3c6d..2e73dcbdebe4 100644
83--- a/clang/lib/Frontend/CompilerInvocation.cpp
84+++ b/clang/lib/Frontend/CompilerInvocation.cpp
85@@ -789,8 +789,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
86
87 llvm::Triple T(TargetOpts.Triple);
88 if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() &&
89- llvm::is_contained(DebugEntryValueArchs, T.getArch()))
90+ llvm::is_contained(DebugEntryValueArchs, T.getArch())) {
91 Opts.EnableDebugEntryValues = Args.hasArg(OPT_femit_debug_entry_values);
92+ Opts.EmitCallSiteInfo = true;
93+ }
94
95 Opts.DisableO0ImplyOptNone = Args.hasArg(OPT_disable_O0_optnone);
96 Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone);
97diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc
98index 6475a5b19edb..36073fe9cc98 100644
99--- a/llvm/include/llvm/CodeGen/CommandFlags.inc
100+++ b/llvm/include/llvm/CodeGen/CommandFlags.inc
101@@ -286,6 +286,12 @@ static cl::opt<bool>
102 EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"),
103 cl::init(false));
104
105+static cl::opt<bool> EmitCallSiteInfo(
106+ "emit-call-site-info",
107+ cl::desc(
108+ "Emit call site debug information, if debug information is enabled."),
109+ cl::init(false));
110+
111 static cl::opt<bool>
112 EnableDebugEntryValues("debug-entry-values",
113 cl::desc("Emit debug info about parameter's entry values"),
114@@ -349,6 +355,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
115 Options.ExceptionModel = ExceptionModel;
116 Options.EmitStackSizeSection = EnableStackSizeSection;
117 Options.EmitAddrsig = EnableAddrsig;
118+ Options.EmitCallSiteInfo = EmitCallSiteInfo;
119 Options.EnableDebugEntryValues = EnableDebugEntryValues;
120 Options.ForceDwarfFrameSection = ForceDwarfFrameSection;
121
122diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
123index d27c7b0178f0..9378e290bed1 100644
124--- a/llvm/include/llvm/Target/TargetOptions.h
125+++ b/llvm/include/llvm/Target/TargetOptions.h
126@@ -134,7 +134,8 @@ namespace llvm {
127 EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
128 EmitStackSizeSection(false), EnableMachineOutliner(false),
129 SupportsDefaultOutlining(false), EmitAddrsig(false),
130- EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {}
131+ EmitCallSiteInfo(false), EnableDebugEntryValues(false),
132+ ForceDwarfFrameSection(false) {}
133
134 /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
135 /// option is specified on the command line, and should enable debugging
136@@ -281,6 +282,10 @@ namespace llvm {
137 /// to selectively generate basic block sections.
138 std::shared_ptr<MemoryBuffer> BBSectionsFuncListBuf;
139
140+ /// The flag enables call site info production. It is used only for debug
141+ /// info, and it is restricted only to optimized code. This can be used for
142+ /// something else, so that should be controlled in the frontend.
143+ unsigned EmitCallSiteInfo : 1;
144 /// Emit debug info about parameter's entry values.
145 unsigned EnableDebugEntryValues : 1;
146
147diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
148index 10157c746b46..f955bdc6186a 100644
149--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
150+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
151@@ -381,11 +381,11 @@ bool MIRParserImpl::initializeCallSiteInfo(
152 CSInfo.emplace_back(Reg, ArgRegPair.ArgNo);
153 }
154
155- if (TM.Options.EnableDebugEntryValues)
156+ if (TM.Options.EmitCallSiteInfo)
157 MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo));
158 }
159
160- if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues)
161+ if (YamlMF.CallSitesInfo.size() && !TM.Options.EmitCallSiteInfo)
162 return error(Twine("Call site info provided but not used"));
163 return false;
164 }
165diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
166index 4612690644fe..c3795b7ed314 100644
167--- a/llvm/lib/CodeGen/MachineFunction.cpp
168+++ b/llvm/lib/CodeGen/MachineFunction.cpp
169@@ -855,7 +855,7 @@ MachineFunction::CallSiteInfoMap::iterator
170 MachineFunction::getCallSiteInfo(const MachineInstr *MI) {
171 assert(MI->isCall() && "Call site info refers only to call instructions!");
172
173- if (!Target.Options.EnableDebugEntryValues)
174+ if (!Target.Options.EmitCallSiteInfo)
175 return CallSitesInfo.end();
176 return CallSitesInfo.find(MI);
177 }
178diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
179index 0e4d783e3505..52099f24aca5 100644
180--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
181+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
182@@ -863,7 +863,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
183 MI = &*std::next(Before);
184 }
185
186- if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues)
187+ if (MI->isCall() && DAG->getTarget().Options.EmitCallSiteInfo)
188 MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
189
190 return MI;
191diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
192index 23f05eaad944..63ff3031a5e8 100644
193--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
194+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
195@@ -4132,7 +4132,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
196 RegsToPass.emplace_back(VA.getLocReg(), Arg);
197 RegsUsed.insert(VA.getLocReg());
198 const TargetOptions &Options = DAG.getTarget().Options;
199- if (Options.EnableDebugEntryValues)
200+ if (Options.EmitCallSiteInfo)
201 CSInfo.emplace_back(VA.getLocReg(), i);
202 }
203 } else {
204diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
205index 9f504b1eaa42..5589ba34a2ac 100644
206--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
207+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
208@@ -2222,7 +2222,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
209 isThisReturn = true;
210 }
211 const TargetOptions &Options = DAG.getTarget().Options;
212- if (Options.EnableDebugEntryValues)
213+ if (Options.EmitCallSiteInfo)
214 CSInfo.emplace_back(VA.getLocReg(), i);
215 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
216 } else if (isByVal) {
217diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
218index d5de94aeb8a2..4808bdf6ddc2 100644
219--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
220+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
221@@ -4030,7 +4030,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
222 } else if (VA.isRegLoc()) {
223 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
224 const TargetOptions &Options = DAG.getTarget().Options;
225- if (Options.EnableDebugEntryValues)
226+ if (Options.EmitCallSiteInfo)
227 CSInfo.emplace_back(VA.getLocReg(), I);
228 if (isVarArg && IsWin64) {
229 // Win64 ABI requires argument XMM reg to be copied to the corresponding
230diff --git a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir
231index 5ffa0293a2e1..fec542223fc9 100644
232--- a/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir
233+++ b/llvm/test/CodeGen/MIR/Hexagon/bundled-call-site-info.mir
234@@ -1,4 +1,4 @@
235-# RUN: llc -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s
236+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s
237
238 # Verify that it is possible to read and write MIR where a callSites entry
239 # points to a call residing in a bundle. The offset should point to the call
240diff --git a/llvm/test/CodeGen/X86/call-site-info-output.ll b/llvm/test/CodeGen/X86/call-site-info-output.ll
241index 4b1e236aadfe..a0438f0c2b98 100644
242--- a/llvm/test/CodeGen/X86/call-site-info-output.ll
243+++ b/llvm/test/CodeGen/X86/call-site-info-output.ll
244@@ -1,6 +1,6 @@
245 ; Test call site info MIR printer and parser.Parser assertions and machine
246 ; verifier will check the rest;
247-; RUN: llc -debug-entry-values %s -stop-before=finalize-isel -o %t.mir
248+; RUN: llc -emit-call-site-info -debug-entry-values %s -stop-before=finalize-isel -o %t.mir
249 ; RUN: cat %t.mir | FileCheck %s
250 ; CHECK: name: fn2
251 ; CHECK: callSites:
252@@ -10,7 +10,7 @@
253 ; CHECK-NEXT: arg: 0, reg: '$edi'
254 ; CHECK-NEXT: arg: 1, reg: '$esi'
255 ; CHECK-NEXT: arg: 2, reg: '$edx'
256-; RUN: llc -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER
257+; RUN: llc -emit-call-site-info -debug-entry-values %t.mir -run-pass=finalize-isel -o -| FileCheck %s --check-prefix=PARSER
258 ; Verify that we are able to parse output mir and that we are getting the same result.
259 ; PARSER: name: fn2
260 ; PARSER: callSites:
261diff --git a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll
262index d52d6962f3c4..17d9f7f18762 100644
263--- a/llvm/test/DebugInfo/AArch64/call-site-info-output.ll
264+++ b/llvm/test/DebugInfo/AArch64/call-site-info-output.ll
265@@ -1,4 +1,4 @@
266-; RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
267+; RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
268 ; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers.
269 ; Produced from:
270 ; extern int fn1(int,int,int);
271diff --git a/llvm/test/DebugInfo/ARM/call-site-info-output.ll b/llvm/test/DebugInfo/ARM/call-site-info-output.ll
272index 9255a7d57dde..ed726dfe753f 100644
273--- a/llvm/test/DebugInfo/ARM/call-site-info-output.ll
274+++ b/llvm/test/DebugInfo/ARM/call-site-info-output.ll
275@@ -1,4 +1,4 @@
276-; RUN: llc -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
277+; RUN: llc -emit-call-site-info -mtriple arm-linux-gnu -debug-entry-values %s -o - -stop-before=finalize-isel | FileCheck %s
278 ; Verify that Selection DAG knows how to recognize simple function parameter forwarding registers.
279 ; Produced from:
280 ; extern int fn1(int,int,int);
281diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir
282index dc7561ca6400..057779a90721 100644
283--- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir
284+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpret-movzxi.mir
285@@ -1,4 +1,4 @@
286-# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
287+# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
288 #
289 # Based on the following C reproducer:
290 #
291diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir
292index 0371ccef603e..d925bc395878 100644
293--- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir
294+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-interpretation.mir
295@@ -1,4 +1,4 @@
296-# RUN: llc -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
297+# RUN: llc -emit-call-site-info -mtriple aarch64-linux-gnu -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
298 # Following code is used for producing this test case. Note that
299 # some of argument loading instruction are modified in order to
300 # cover certain cases.
301diff --git a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir
302index 916a14022ba5..4a87dad3b9b5 100644
303--- a/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir
304+++ b/llvm/test/DebugInfo/MIR/AArch64/dbgcall-site-orr-moves.mir
305@@ -1,4 +1,4 @@
306-# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
307+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
308
309 # Based on the following C reproducer:
310 #
311diff --git a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir
312index fbf9b3454689..d85f2d25391d 100644
313--- a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir
314+++ b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir
315@@ -1,4 +1,4 @@
316-# RUN: llc -start-after=livedebugvalues -filetype=obj -o - %s \
317+# RUN: llc -emit-call-site-info -start-after=livedebugvalues -filetype=obj -o - %s \
318 # RUN: | llvm-dwarfdump -v - | FileCheck %s
319
320 # This tests for a crash in DwarfDebug's singular DBG_VALUE range promotion when
321diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir
322index ce8dc97f0e72..0ae4e6ec485c 100644
323--- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir
324+++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-interpretation.mir
325@@ -1,4 +1,4 @@
326-# RUN: llc -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s
327+# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnueabi -debug-entry-values -filetype=obj -start-after=machineverifier %s -o -| llvm-dwarfdump -| FileCheck %s
328 # Following code is used for producing this test case. Note that
329 # some of argument loading instruction are modified in order to
330 # cover certain cases.
331diff --git a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir
332index 9001c8ba8eea..5b84d9e9627f 100644
333--- a/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir
334+++ b/llvm/test/DebugInfo/MIR/ARM/dbgcall-site-propagated-value.mir
335@@ -1,4 +1,4 @@
336-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
337+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
338
339 # Based on the following C reproducer:
340 #
341diff --git a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir
342index aa7b54c1e5bb..11e9c4c90836 100644
343--- a/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir
344+++ b/llvm/test/DebugInfo/MIR/ARM/if-coverter-call-site-info.mir
345@@ -1,4 +1,4 @@
346-# RUN: llc -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s
347+# RUN: llc -emit-call-site-info -mtriple=arm-linux-gnu -debug-entry-values -run-pass if-converter %s -o -| FileCheck %s
348
349 # Vefify that the call site info will be updated after the optimization.
350 # This test case would previously trigger an assertion when
351diff --git a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir
352index 8ae628af2c09..3ae23d4189bf 100644
353--- a/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir
354+++ b/llvm/test/DebugInfo/MIR/Hexagon/dbgcall-site-instr-before-bundled-call.mir
355@@ -1,4 +1,4 @@
356-# RUN: llc -mtriple hexagon -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
357+# RUN: llc -mtriple hexagon -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
358
359 # Based on the following C reproducer:
360 #
361diff --git a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir
362index ff0a539dd15d..8bb0b3202acd 100644
363--- a/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir
364+++ b/llvm/test/DebugInfo/MIR/Hexagon/live-debug-values-bundled-entry-values.mir
365@@ -1,4 +1,4 @@
366-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
367+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -o - %s | FileCheck %s
368
369 # Verify that the entry values for the input parameters are inserted after the
370 # bundles which contains the registers' clobbering instructions (the calls to
371diff --git a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir
372index 8a4e8b5632c2..3cf41467f7f9 100644
373--- a/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir
374+++ b/llvm/test/DebugInfo/MIR/SystemZ/call-site-lzer.mir
375@@ -1,4 +1,4 @@
376-# RUN: llc -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s
377+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -o - %s | FileCheck %s
378
379 # This test would previously trigger an assertion when trying to describe the
380 # call site value for callee()'s float parameter.
381diff --git a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir
382index e6fe5d2de878..4e5a07321d42 100644
383--- a/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir
384+++ b/llvm/test/DebugInfo/MIR/X86/DW_OP_entry_value.mir
385@@ -1,4 +1,4 @@
386-# RUN: llc -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj
387+# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -mtriple=x86_64-apple-darwin -o %t %s -filetype=obj
388 # RUN: llvm-dwarfdump %t | FileCheck %s
389 #
390 # int global;
391diff --git a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir
392index c32a1155d038..edeef2c7aed4 100644
393--- a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir
394+++ b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir
395@@ -1,6 +1,6 @@
396 # Check that llvm can describe a call site parameter which resides in a spill slot.
397 #
398-# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
399+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
400 #
401 # Command:
402 # $ ~/src/builds/llvm-project-master-RA/bin/clang -g -Xclang -femit-debug-entry-values -O2 -c -o spill.o spill.cc -mllvm -stop-before=machineverifier -o spill.mir
403diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir
404index a2d51a203512..01a2b887a60b 100644
405--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir
406+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir
407@@ -1,4 +1,4 @@
408-# RUN: llc -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
409+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
410
411 # Based on the following reproducer:
412 #
413diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir
414index f9e9459f1abd..104bc0146798 100644
415--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir
416+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir
417@@ -1,4 +1,4 @@
418-# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
419+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
420 #
421 # CHECK: DW_TAG_GNU_call_site
422 # CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo"
423diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir
424index 1bb70f6d4530..4d88fa9aab74 100644
425--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir
426+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir
427@@ -1,4 +1,4 @@
428-# RUN: llc -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
429+# RUN: llc -emit-call-site-info -debug-entry-values -start-after=machineverifier -filetype=obj %s -o -| llvm-dwarfdump -| FileCheck %s
430 # CHECK: DW_TAG_GNU_call_site
431 # CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo")
432 # CHECK-NEXT: DW_AT_low_pc {{.*}}
433diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir
434index 235787573f51..81af598ba194 100644
435--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir
436+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir
437@@ -1,4 +1,4 @@
438-# RUN: llc -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
439+# RUN: llc -emit-call-site-info -debug-entry-values -start-before=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
440
441 # Based on the following C++ code:
442 # struct A { A(A &) {} };
443diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir
444index db0934c595c3..46adedd1be44 100644
445--- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir
446+++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir
447@@ -1,4 +1,4 @@
448-# RUN: llc -O1 -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
449+# RUN: llc -O1 -emit-call-site-info -debug-entry-values -start-after=livedebugvalues -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
450
451 # Based on the following C reproducer:
452 #
453diff --git a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir
454index 5d203029936e..1d7b64f169d1 100644
455--- a/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir
456+++ b/llvm/test/DebugInfo/MIR/X86/dbginfo-entryvals.mir
457@@ -1,4 +1,4 @@
458-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
459+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
460 #
461 #extern void fn2(int);
462 #
463diff --git a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir
464index e79be66cd4e3..c39bc4db50be 100644
465--- a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir
466+++ b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir
467@@ -2,8 +2,8 @@
468 # When the debugger tuning is set to gdb, use GNU opcodes.
469 # For lldb, use the standard DWARF5 opcodes.
470
471-# RUN: llc -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU
472-# RUN: llc -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5
473+# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=gdb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-GNU
474+# RUN: llc -emit-call-site-info -debug-entry-values -debugger-tune=lldb -filetype=obj -mtriple=x86_64-unknown-unknown -start-after=machineverifier -o - %s | llvm-dwarfdump - | FileCheck %s -check-prefixes=CHECK-DWARF5
475 #
476 # extern void foo(int *a, int b, int c, int d, int e, int f);
477 # extern int getVal();
478diff --git a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir
479index 8d121c3a30b9..c7f15aaaa562 100644
480--- a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir
481+++ b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir
482@@ -1,4 +1,4 @@
483-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
484+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
485 #
486 #extern void fn1 (int, int, int);
487 #
488diff --git a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
489index 2396daada876..aa8fdd7afd47 100644
490--- a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
491+++ b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
492@@ -1,4 +1,4 @@
493-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
494+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
495 #
496 # The test case was artificially adjusted, in order to make proper diamond basic
497 # block structure relevant to the debug entry values propagation.
498diff --git a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir
499index 86b1cddaa462..c5af863954bf 100644
500--- a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir
501+++ b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir
502@@ -1,4 +1,4 @@
503-# RUN: llc -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
504+# RUN: llc -emit-call-site-info -debug-entry-values -run-pass=livedebugvalues -march=x86-64 -o - %s | FileCheck %s
505 #
506 #extern void fn1 (int, int, int);
507 #__attribute__((noinline))
508diff --git a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir
509index d282d796f6d7..ea9c12b5a192 100644
510--- a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir
511+++ b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir
512@@ -1,4 +1,4 @@
513-# RUN: llc -mtriple=x86_64-pc-linux -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s
514+# RUN: llc -mtriple=x86_64-pc-linux -emit-call-site-info -debug-entry-values -run-pass=unreachable-mbb-elimination -o - %s | FileCheck %s
515
516 # Verify that the call site information for the call residing in the eliminated
517 # block is removed. This test case would previously trigger an assertion when
518diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll
519index b698f1cdbfe8..b8cd9574cc63 100644
520--- a/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll
521+++ b/llvm/test/DebugInfo/X86/dbgcall-site-64-bit-imms.ll
522@@ -1,4 +1,4 @@
523-; RUN: llc -O1 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
524+; RUN: llc -O1 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
525
526 ; Verify that the 64-bit call site immediates are not truncated.
527 ;
528diff --git a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll
529index 9fe67f82a2b4..5d37774f55d6 100644
530--- a/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll
531+++ b/llvm/test/DebugInfo/X86/dbgcall-site-zero-valued-imms.ll
532@@ -1,4 +1,4 @@
533-; RUN: llc -O3 -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
534+; RUN: llc -O3 -emit-call-site-info -debug-entry-values -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
535
536 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
537 target triple = "x86_64-unknown-linux-gnu"
538diff --git a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll
539index c304e9d768a5..d126757398ff 100644
540--- a/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll
541+++ b/llvm/test/tools/llvm-dwarfdump/X86/stats-dbg-callsite-info.ll
542@@ -1,4 +1,4 @@
543-; RUN: llc -debug-entry-values %s -o - -filetype=obj \
544+; RUN: llc -emit-call-site-info -debug-entry-values %s -o - -filetype=obj \
545 ; RUN: | llvm-dwarfdump -statistics - | FileCheck %s
546 ;
547 ; The LLVM IR file was generated on this source code by using
548--
5492.33.1
550
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch
deleted file mode 100644
index 6b2db931..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch
+++ /dev/null
@@ -1,2243 +0,0 @@
1From fd7146658e3b4ce045dfb332c2edf216f76c1e1f Mon Sep 17 00:00:00 2001
2From: serge-sans-paille <sguelton@redhat.com>
3Date: Wed, 4 Mar 2020 00:47:43 +0100
4Subject: [PATCH] Replace MCTargetOptionsCommandFlags.inc and CommandFlags.inc
5 by runtime registration
6
7MCTargetOptionsCommandFlags.inc and CommandFlags.inc are headers which contain
8cl::opt with static storage.
9These headers are meant to be incuded by tools to make it easier to parametrize
10codegen/mc.
11
12However, these headers are also included in at least two libraries: lldCommon
13and handle-llvm. As a result, when creating DYLIB, clang-cpp holds a reference
14to the options, and lldCommon holds another reference. Linking the two in a
15single executable, as zig does[0], results in a double registration.
16
17This patch explores an other approach: the .inc files are moved to regular
18files, and the registration happens on-demand through static declaration of
19options in the constructor of a static object.
20
21[0] https://bugzilla.redhat.com/show_bug.cgi?id=1756977#c5
22
23Differential Revision: https://reviews.llvm.org/D75579
24
25Upstream-Status: Backport
26Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
27---
28 .../clang-fuzzer/handle-llvm/handle_llvm.cpp | 32 +-
29 lld/Common/TargetOptionsCommandFlags.cpp | 25 +-
30 llvm/include/llvm/CodeGen/CommandFlags.h | 149 +++++
31 .../llvm/MC/MCTargetOptionsCommandFlags.h | 54 ++
32 .../llvm/MC/MCTargetOptionsCommandFlags.inc | 65 --
33 llvm/include/llvm/module.modulemap | 3 -
34 llvm/lib/CodeGen/CMakeLists.txt | 1 +
35 llvm/lib/CodeGen/CommandFlags.cpp | 588 ++++++++++++++++++
36 llvm/lib/MC/CMakeLists.txt | 1 +
37 llvm/lib/MC/MCTargetOptionsCommandFlags.cpp | 105 ++++
38 llvm/tools/dsymutil/DwarfStreamer.cpp | 7 +-
39 llvm/tools/gold/gold-plugin.cpp | 16 +-
40 llvm/tools/llc/CMakeLists.txt | 1 +
41 llvm/tools/llc/llc.cpp | 43 +-
42 llvm/tools/lli/CMakeLists.txt | 1 +
43 llvm/tools/lli/lli.cpp | 41 +-
44 llvm/tools/llvm-dwp/llvm-dwp.cpp | 6 +-
45 .../llvm-isel-fuzzer/llvm-isel-fuzzer.cpp | 17 +-
46 llvm/tools/llvm-lto/CMakeLists.txt | 5 +-
47 llvm/tools/llvm-lto/llvm-lto.cpp | 28 +-
48 llvm/tools/llvm-lto2/CMakeLists.txt | 1 +
49 llvm/tools/llvm-lto2/llvm-lto2.cpp | 20 +-
50 .../llvm-mc-assemble-fuzzer/CMakeLists.txt | 1 +
51 .../llvm-mc-assemble-fuzzer.cpp | 12 +-
52 llvm/tools/llvm-mc/CMakeLists.txt | 1 +
53 llvm/tools/llvm-mc/llvm-mc.cpp | 6 +-
54 llvm/tools/llvm-mca/llvm-mca.cpp | 8 +-
55 .../tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp | 16 +-
56 llvm/tools/lto/CMakeLists.txt | 4 +-
57 llvm/tools/lto/lto.cpp | 35 +-
58 llvm/tools/opt/opt.cpp | 23 +-
59 .../DebugInfo/DWARF/DwarfGenerator.cpp | 6 +-
60 32 files changed, 1088 insertions(+), 233 deletions(-)
61 create mode 100644 llvm/include/llvm/CodeGen/CommandFlags.h
62 create mode 100644 llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
63 delete mode 100644 llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc
64 create mode 100644 llvm/lib/CodeGen/CommandFlags.cpp
65 create mode 100644 llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
66
67diff --git a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp
68index d8ab14594185..aefb761cd70c 100644
69--- a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp
70+++ b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp
71@@ -19,7 +19,7 @@
72 #include "llvm/ADT/Triple.h"
73 #include "llvm/Analysis/TargetLibraryInfo.h"
74 #include "llvm/Analysis/TargetTransformInfo.h"
75-#include "llvm/CodeGen/CommandFlags.inc"
76+#include "llvm/CodeGen/CommandFlags.h"
77 #include "llvm/CodeGen/MachineModuleInfo.h"
78 #include "llvm/CodeGen/TargetPassConfig.h"
79 #include "llvm/ExecutionEngine/JITEventListener.h"
80@@ -29,9 +29,9 @@
81 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
82 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
83 #include "llvm/IR/IRPrintingPasses.h"
84+#include "llvm/IR/LLVMContext.h"
85 #include "llvm/IR/LegacyPassManager.h"
86 #include "llvm/IR/LegacyPassNameParser.h"
87-#include "llvm/IR/LLVMContext.h"
88 #include "llvm/IR/Module.h"
89 #include "llvm/IR/Verifier.h"
90 #include "llvm/IRReader/IRReader.h"
91@@ -42,12 +42,14 @@
92 #include "llvm/Support/TargetRegistry.h"
93 #include "llvm/Support/TargetSelect.h"
94 #include "llvm/Target/TargetMachine.h"
95-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
96 #include "llvm/Transforms/IPO.h"
97+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
98 #include "llvm/Transforms/Vectorize.h"
99
100 using namespace llvm;
101
102+static codegen::RegisterCodeGenFlags CGF;
103+
104 // Define a type for the functions that are compiled and executed
105 typedef void (*LLVMFunc)(int*, int*, int*, int);
106
107@@ -100,15 +102,17 @@ static std::string OptLLVM(const std::string &IR, CodeGenOpt::Level OLvl) {
108 ErrorAndExit("Could not parse IR");
109
110 Triple ModuleTriple(M->getTargetTriple());
111- const TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
112+ const TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
113 std::string E;
114- const Target *TheTarget = TargetRegistry::lookupTarget(MArch, ModuleTriple, E);
115- TargetMachine *Machine =
116- TheTarget->createTargetMachine(M->getTargetTriple(), getCPUStr(),
117- getFeaturesStr(), Options, getRelocModel(),
118- getCodeModel(), OLvl);
119+ const Target *TheTarget =
120+ TargetRegistry::lookupTarget(codegen::getMArch(), ModuleTriple, E);
121+ TargetMachine *Machine = TheTarget->createTargetMachine(
122+ M->getTargetTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(),
123+ Options, codegen::getExplicitRelocModel(),
124+ codegen::getExplicitCodeModel(), OLvl);
125 std::unique_ptr<TargetMachine> TM(Machine);
126- setFunctionAttributes(getCPUStr(), getFeaturesStr(), *M);
127+ codegen::setFunctionAttributes(codegen::getCPUStr(),
128+ codegen::getFeaturesStr(), *M);
129
130 legacy::PassManager Passes;
131
132@@ -154,14 +158,14 @@ static void CreateAndRunJITFunc(const std::string &IR, CodeGenOpt::Level OLvl) {
133
134 std::string ErrorMsg;
135 EngineBuilder builder(std::move(M));
136- builder.setMArch(MArch);
137- builder.setMCPU(getCPUStr());
138- builder.setMAttrs(getFeatureList());
139+ builder.setMArch(codegen::getMArch());
140+ builder.setMCPU(codegen::getCPUStr());
141+ builder.setMAttrs(codegen::getFeatureList());
142 builder.setErrorStr(&ErrorMsg);
143 builder.setEngineKind(EngineKind::JIT);
144 builder.setMCJITMemoryManager(std::make_unique<SectionMemoryManager>());
145 builder.setOptLevel(OLvl);
146- builder.setTargetOptions(InitTargetOptionsFromCodeGenFlags());
147+ builder.setTargetOptions(codegen::InitTargetOptionsFromCodeGenFlags());
148
149 std::unique_ptr<ExecutionEngine> EE(builder.create());
150 if (!EE)
151diff --git a/lld/Common/TargetOptionsCommandFlags.cpp b/lld/Common/TargetOptionsCommandFlags.cpp
152index 0137feb63f37..9b166a3e130a 100644
153--- a/lld/Common/TargetOptionsCommandFlags.cpp
154+++ b/lld/Common/TargetOptionsCommandFlags.cpp
155@@ -5,35 +5,26 @@
156 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
157 //
158 //===----------------------------------------------------------------------===//
159-//
160-// This file exists as a place for global variables defined in LLVM's
161-// CodeGen/CommandFlags.inc. By putting the resulting object file in
162-// an archive and linking with it, the definitions will automatically be
163-// included when needed and skipped when already present.
164-//
165-//===----------------------------------------------------------------------===//
166
167 #include "lld/Common/TargetOptionsCommandFlags.h"
168
169-#include "llvm/CodeGen/CommandFlags.inc"
170+#include "llvm/CodeGen/CommandFlags.h"
171 #include "llvm/Target/TargetOptions.h"
172
173-// Define an externally visible version of
174-// initTargetOptionsFromCodeGenFlags, so that its functionality can be
175-// used without having to include llvm/CodeGen/CommandFlags.inc, which
176-// would lead to multiple definitions of the command line flags.
177+static llvm::codegen::RegisterCodeGenFlags CGF;
178+
179 llvm::TargetOptions lld::initTargetOptionsFromCodeGenFlags() {
180- return ::InitTargetOptionsFromCodeGenFlags();
181+ return llvm::codegen::InitTargetOptionsFromCodeGenFlags();
182 }
183
184 llvm::Optional<llvm::Reloc::Model> lld::getRelocModelFromCMModel() {
185- return getRelocModel();
186+ return llvm::codegen::getExplicitRelocModel();
187 }
188
189 llvm::Optional<llvm::CodeModel::Model> lld::getCodeModelFromCMModel() {
190- return getCodeModel();
191+ return llvm::codegen::getExplicitCodeModel();
192 }
193
194-std::string lld::getCPUStr() { return ::getCPUStr(); }
195+std::string lld::getCPUStr() { return llvm::codegen::getCPUStr(); }
196
197-std::vector<std::string> lld::getMAttrs() { return ::MAttrs; }
198+std::vector<std::string> lld::getMAttrs() { return llvm::codegen::getMAttrs(); }
199diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h
200new file mode 100644
201index 000000000000..cdec26879f73
202--- /dev/null
203+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
204@@ -0,0 +1,149 @@
205+//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===//
206+//
207+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
208+// See https://llvm.org/LICENSE.txt for license information.
209+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
210+//
211+//===----------------------------------------------------------------------===//
212+//
213+// This file contains codegen-specific flags that are shared between different
214+// command line tools. The tools "llc" and "opt" both use this file to prevent
215+// flag duplication.
216+//
217+//===----------------------------------------------------------------------===//
218+
219+#include "llvm/ADT/StringExtras.h"
220+#include "llvm/IR/Instructions.h"
221+#include "llvm/IR/Intrinsics.h"
222+#include "llvm/IR/Module.h"
223+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
224+#include "llvm/MC/SubtargetFeature.h"
225+#include "llvm/Support/CodeGen.h"
226+#include "llvm/Support/CommandLine.h"
227+#include "llvm/Support/Host.h"
228+#include "llvm/Target/TargetMachine.h"
229+#include "llvm/Target/TargetOptions.h"
230+#include <string>
231+
232+namespace llvm {
233+
234+namespace codegen {
235+
236+std::string getMArch();
237+
238+std::string getMCPU();
239+
240+std::vector<std::string> getMAttrs();
241+
242+Reloc::Model getRelocModel();
243+Optional<Reloc::Model> getExplicitRelocModel();
244+
245+ThreadModel::Model getThreadModel();
246+
247+CodeModel::Model getCodeModel();
248+Optional<CodeModel::Model> getExplicitCodeModel();
249+
250+llvm::ExceptionHandling getExceptionModel();
251+
252+CodeGenFileType getFileType();
253+Optional<CodeGenFileType> getExplicitFileType();
254+
255+CodeGenFileType getFileType();
256+
257+llvm::FramePointer::FP getFramePointerUsage();
258+
259+bool getEnableUnsafeFPMath();
260+
261+bool getEnableNoInfsFPMath();
262+
263+bool getEnableNoNaNsFPMath();
264+
265+bool getEnableNoSignedZerosFPMath();
266+
267+bool getEnableNoTrappingFPMath();
268+
269+llvm::FPDenormal::DenormalMode getDenormalFPMath();
270+
271+bool getEnableHonorSignDependentRoundingFPMath();
272+
273+llvm::FloatABI::ABIType getFloatABIForCalls();
274+
275+llvm::FPOpFusion::FPOpFusionMode getFuseFPOps();
276+
277+bool getDontPlaceZerosInBSS();
278+
279+bool getEnableGuaranteedTailCallOpt();
280+
281+bool getDisableTailCalls();
282+
283+bool getStackSymbolOrdering();
284+
285+unsigned getOverrideStackAlignment();
286+
287+bool getStackRealign();
288+
289+std::string getTrapFuncName();
290+
291+bool getUseCtors();
292+
293+bool getRelaxELFRelocations();
294+
295+bool getDataSections();
296+Optional<bool> getExplicitDataSections();
297+
298+bool getFunctionSections();
299+Optional<bool> getExplicitFunctionSections();
300+
301+std::string getBBSections();
302+
303+unsigned getTLSSize();
304+
305+bool getEmulatedTLS();
306+
307+bool getUniqueSectionNames();
308+
309+bool getUniqueBBSectionNames();
310+
311+llvm::EABI getEABIVersion();
312+
313+llvm::DebuggerKind getDebuggerTuningOpt();
314+
315+bool getEnableStackSizeSection();
316+
317+bool getEnableAddrsig();
318+
319+bool getEmitCallSiteInfo();
320+
321+bool getEnableDebugEntryValues();
322+
323+bool getForceDwarfFrameSection();
324+
325+/// Create this object with static storage to register codegen-related command
326+/// line options.
327+struct RegisterCodeGenFlags {
328+ RegisterCodeGenFlags();
329+};
330+
331+llvm::BasicBlockSection getBBSectionsMode(llvm::TargetOptions &Options);
332+
333+// Common utility function tightly tied to the options listed here. Initializes
334+// a TargetOptions object with CodeGen flags and returns it.
335+TargetOptions InitTargetOptionsFromCodeGenFlags();
336+
337+std::string getCPUStr();
338+
339+std::string getFeaturesStr();
340+
341+std::vector<std::string> getFeatureList();
342+
343+void renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val);
344+
345+/// Set function attributes of function \p F based on CPU, Features, and command
346+/// line flags.
347+void setFunctionAttributes(StringRef CPU, StringRef Features, Function &F);
348+
349+/// Set function attributes of functions in Module M based on CPU,
350+/// Features, and command line flags.
351+void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M);
352+} // namespace codegen
353+} // namespace llvm
354diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
355new file mode 100644
356index 000000000000..7a5edf78fdcc
357--- /dev/null
358+++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
359@@ -0,0 +1,54 @@
360+//===-- MCTargetOptionsCommandFlags.h --------------------------*- C++ -*-===//
361+//
362+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
363+// See https://llvm.org/LICENSE.txt for license information.
364+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
365+//
366+//===----------------------------------------------------------------------===//
367+//
368+// This file contains machine code-specific flags that are shared between
369+// different command line tools.
370+//
371+//===----------------------------------------------------------------------===//
372+
373+#ifndef LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H
374+#define LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H
375+
376+#include "llvm/ADT/Optional.h"
377+#include "llvm/MC/MCTargetOptions.h"
378+#include "llvm/Support/CommandLine.h"
379+
380+namespace llvm {
381+
382+namespace mc {
383+
384+bool getRelaxAll();
385+Optional<bool> getExplicitRelaxAll();
386+
387+bool getIncrementalLinkerCompatible();
388+
389+int getDwarfVersion();
390+
391+bool getShowMCInst();
392+
393+bool getFatalWarnings();
394+
395+bool getNoWarn();
396+
397+bool getNoDeprecatedWarn();
398+
399+std::string getABIName();
400+
401+/// Create this object with static storage to register mc-related command
402+/// line options.
403+struct RegisterMCTargetOptionsFlags {
404+ RegisterMCTargetOptionsFlags();
405+};
406+
407+MCTargetOptions InitMCTargetOptionsFromFlags();
408+
409+} // namespace mc
410+
411+} // namespace llvm
412+
413+#endif
414diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc
415deleted file mode 100644
416index 93e21b626eac..000000000000
417--- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.inc
418+++ /dev/null
419@@ -1,65 +0,0 @@
420-//===-- MCTargetOptionsCommandFlags.h --------------------------*- C++ -*-===//
421-//
422-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
423-// See https://llvm.org/LICENSE.txt for license information.
424-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
425-//
426-//===----------------------------------------------------------------------===//
427-//
428-// This file contains machine code-specific flags that are shared between
429-// different command line tools.
430-//
431-//===----------------------------------------------------------------------===//
432-
433-#ifndef LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H
434-#define LLVM_MC_MCTARGETOPTIONSCOMMANDFLAGS_H
435-
436-#include "llvm/MC/MCTargetOptions.h"
437-#include "llvm/Support/CommandLine.h"
438-using namespace llvm;
439-
440-static cl::opt<bool> RelaxAll("mc-relax-all",
441- cl::desc("When used with filetype=obj, "
442- "relax all fixups in the emitted object file"));
443-
444-static cl::opt<bool> IncrementalLinkerCompatible(
445- "incremental-linker-compatible",
446- cl::desc(
447- "When used with filetype=obj, "
448- "emit an object file which can be used with an incremental linker"));
449-
450-static cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"),
451- cl::init(0));
452-
453-static cl::opt<bool> ShowMCInst("asm-show-inst",
454- cl::desc("Emit internal instruction representation to "
455- "assembly file"));
456-
457-static cl::opt<bool> FatalWarnings("fatal-warnings",
458- cl::desc("Treat warnings as errors"));
459-
460-static cl::opt<bool> NoWarn("no-warn", cl::desc("Suppress all warnings"));
461-static cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"), cl::aliasopt(NoWarn));
462-
463-static cl::opt<bool> NoDeprecatedWarn("no-deprecated-warn",
464- cl::desc("Suppress all deprecated warnings"));
465-
466-static cl::opt<std::string>
467-ABIName("target-abi", cl::Hidden,
468- cl::desc("The name of the ABI to be targeted from the backend."),
469- cl::init(""));
470-
471-static MCTargetOptions InitMCTargetOptionsFromFlags() {
472- MCTargetOptions Options;
473- Options.MCRelaxAll = RelaxAll;
474- Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible;
475- Options.DwarfVersion = DwarfVersion;
476- Options.ShowMCInst = ShowMCInst;
477- Options.ABIName = ABIName;
478- Options.MCFatalWarnings = FatalWarnings;
479- Options.MCNoWarn = NoWarn;
480- Options.MCNoDeprecatedWarn = NoDeprecatedWarn;
481- return Options;
482-}
483-
484-#endif
485diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap
486index d281682ae003..d176b3dfd4be 100644
487--- a/llvm/include/llvm/module.modulemap
488+++ b/llvm/include/llvm/module.modulemap
489@@ -29,7 +29,6 @@ module LLVM_Backend {
490 exclude header "CodeGen/LinkAllCodegenComponents.h"
491
492 // These are intended for (repeated) textual inclusion.
493- textual header "CodeGen/CommandFlags.inc"
494 textual header "CodeGen/DIEValue.def"
495 }
496 }
497@@ -308,8 +307,6 @@ module LLVM_MC {
498
499 umbrella "MC"
500 module * { export * }
501-
502- textual header "MC/MCTargetOptionsCommandFlags.inc"
503 }
504
505 // Used by llvm-tblgen
506diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
507index a3916b7c6242..c6be91247017 100644
508--- a/llvm/lib/CodeGen/CMakeLists.txt
509+++ b/llvm/lib/CodeGen/CMakeLists.txt
510@@ -14,6 +14,7 @@ add_llvm_component_library(LLVMCodeGen
511 CFIInstrInserter.cpp
512 CodeGen.cpp
513 CodeGenPrepare.cpp
514+ CommandFlags.cpp
515 CriticalAntiDepBreaker.cpp
516 DeadMachineInstructionElim.cpp
517 DetectDeadLanes.cpp
518diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
519new file mode 100644
520index 000000000000..7acb84df582f
521--- /dev/null
522+++ b/llvm/lib/CodeGen/CommandFlags.cpp
523@@ -0,0 +1,588 @@
524+//===-- CommandFlags.cpp - Command Line Flags Interface ---------*- C++ -*-===//
525+//
526+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
527+// See https://llvm.org/LICENSE.txt for license information.
528+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
529+//
530+//===----------------------------------------------------------------------===//
531+//
532+// This file contains codegen-specific flags that are shared between different
533+// command line tools. The tools "llc" and "opt" both use this file to prevent
534+// flag duplication.
535+//
536+//===----------------------------------------------------------------------===//
537+
538+#include "llvm/CodeGen/CommandFlags.h"
539+
540+using namespace llvm;
541+
542+#define CGOPT(TY, NAME) \
543+ static cl::opt<TY> *NAME##View; \
544+ TY codegen::get##NAME() { \
545+ assert(NAME##View && "RegisterCodeGenFlags not created."); \
546+ return *NAME##View; \
547+ }
548+
549+#define CGLIST(TY, NAME) \
550+ static cl::list<TY> *NAME##View; \
551+ std::vector<TY> codegen::get##NAME() { \
552+ assert(NAME##View && "RegisterCodeGenFlags not created."); \
553+ return *NAME##View; \
554+ }
555+
556+#define CGOPT_EXP(TY, NAME) \
557+ CGOPT(TY, NAME) \
558+ Optional<TY> codegen::getExplicit##NAME() { \
559+ if (NAME##View->getNumOccurrences()) { \
560+ TY res = *NAME##View; \
561+ return res; \
562+ } \
563+ return None; \
564+ }
565+
566+CGOPT(std::string, MArch)
567+CGOPT(std::string, MCPU)
568+CGLIST(std::string, MAttrs)
569+CGOPT_EXP(Reloc::Model, RelocModel)
570+CGOPT(ThreadModel::Model, ThreadModel)
571+CGOPT_EXP(CodeModel::Model, CodeModel)
572+CGOPT(ExceptionHandling, ExceptionModel)
573+CGOPT_EXP(CodeGenFileType, FileType)
574+CGOPT(FramePointer::FP, FramePointerUsage)
575+CGOPT(bool, EnableUnsafeFPMath)
576+CGOPT(bool, EnableNoInfsFPMath)
577+CGOPT(bool, EnableNoNaNsFPMath)
578+CGOPT(bool, EnableNoSignedZerosFPMath)
579+CGOPT(bool, EnableNoTrappingFPMath)
580+CGOPT(FPDenormal::DenormalMode, DenormalFPMath)
581+CGOPT(bool, EnableHonorSignDependentRoundingFPMath)
582+CGOPT(FloatABI::ABIType, FloatABIForCalls)
583+CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps)
584+CGOPT(bool, DontPlaceZerosInBSS)
585+CGOPT(bool, EnableGuaranteedTailCallOpt)
586+CGOPT(bool, DisableTailCalls)
587+CGOPT(bool, StackSymbolOrdering)
588+CGOPT(unsigned, OverrideStackAlignment)
589+CGOPT(bool, StackRealign)
590+CGOPT(std::string, TrapFuncName)
591+CGOPT(bool, UseCtors)
592+CGOPT(bool, RelaxELFRelocations)
593+CGOPT_EXP(bool, DataSections)
594+CGOPT_EXP(bool, FunctionSections)
595+CGOPT(std::string, BBSections)
596+CGOPT(unsigned, TLSSize)
597+CGOPT(bool, EmulatedTLS)
598+CGOPT(bool, UniqueSectionNames)
599+CGOPT(bool, UniqueBBSectionNames)
600+CGOPT(EABI, EABIVersion)
601+CGOPT(DebuggerKind, DebuggerTuningOpt)
602+CGOPT(bool, EnableStackSizeSection)
603+CGOPT(bool, EnableAddrsig)
604+CGOPT(bool, EmitCallSiteInfo)
605+CGOPT(bool, EnableDebugEntryValues)
606+CGOPT(bool, ForceDwarfFrameSection)
607+
608+codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
609+#define CGBINDOPT(NAME) \
610+ do { \
611+ NAME##View = std::addressof(NAME); \
612+ } while (0)
613+
614+ static cl::opt<std::string> MArch(
615+ "march", cl::desc("Architecture to generate code for (see --version)"));
616+ CGBINDOPT(MArch);
617+
618+ static cl::opt<std::string> MCPU(
619+ "mcpu", cl::desc("Target a specific cpu type (-mcpu=help for details)"),
620+ cl::value_desc("cpu-name"), cl::init(""));
621+ CGBINDOPT(MCPU);
622+
623+ static cl::list<std::string> MAttrs(
624+ "mattr", cl::CommaSeparated,
625+ cl::desc("Target specific attributes (-mattr=help for details)"),
626+ cl::value_desc("a1,+a2,-a3,..."));
627+ CGBINDOPT(MAttrs);
628+
629+ static cl::opt<Reloc::Model> RelocModel(
630+ "relocation-model", cl::desc("Choose relocation model"),
631+ cl::values(
632+ clEnumValN(Reloc::Static, "static", "Non-relocatable code"),
633+ clEnumValN(Reloc::PIC_, "pic",
634+ "Fully relocatable, position independent code"),
635+ clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
636+ "Relocatable external references, non-relocatable code"),
637+ clEnumValN(
638+ Reloc::ROPI, "ropi",
639+ "Code and read-only data relocatable, accessed PC-relative"),
640+ clEnumValN(
641+ Reloc::RWPI, "rwpi",
642+ "Read-write data relocatable, accessed relative to static base"),
643+ clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi",
644+ "Combination of ropi and rwpi")));
645+ CGBINDOPT(RelocModel);
646+
647+ static cl::opt<ThreadModel::Model> ThreadModel(
648+ "thread-model", cl::desc("Choose threading model"),
649+ cl::init(ThreadModel::POSIX),
650+ cl::values(
651+ clEnumValN(ThreadModel::POSIX, "posix", "POSIX thread model"),
652+ clEnumValN(ThreadModel::Single, "single", "Single thread model")));
653+ CGBINDOPT(ThreadModel);
654+
655+ static cl::opt<CodeModel::Model> CodeModel(
656+ "code-model", cl::desc("Choose code model"),
657+ cl::values(clEnumValN(CodeModel::Tiny, "tiny", "Tiny code model"),
658+ clEnumValN(CodeModel::Small, "small", "Small code model"),
659+ clEnumValN(CodeModel::Kernel, "kernel", "Kernel code model"),
660+ clEnumValN(CodeModel::Medium, "medium", "Medium code model"),
661+ clEnumValN(CodeModel::Large, "large", "Large code model")));
662+ CGBINDOPT(CodeModel);
663+
664+ static cl::opt<ExceptionHandling> ExceptionModel(
665+ "exception-model", cl::desc("exception model"),
666+ cl::init(ExceptionHandling::None),
667+ cl::values(
668+ clEnumValN(ExceptionHandling::None, "default",
669+ "default exception handling model"),
670+ clEnumValN(ExceptionHandling::DwarfCFI, "dwarf",
671+ "DWARF-like CFI based exception handling"),
672+ clEnumValN(ExceptionHandling::SjLj, "sjlj",
673+ "SjLj exception handling"),
674+ clEnumValN(ExceptionHandling::ARM, "arm", "ARM EHABI exceptions"),
675+ clEnumValN(ExceptionHandling::WinEH, "wineh",
676+ "Windows exception model"),
677+ clEnumValN(ExceptionHandling::Wasm, "wasm",
678+ "WebAssembly exception handling")));
679+ CGBINDOPT(ExceptionModel);
680+
681+ static cl::opt<CodeGenFileType> FileType(
682+ "filetype", cl::init(CGFT_AssemblyFile),
683+ cl::desc(
684+ "Choose a file type (not all types are supported by all targets):"),
685+ cl::values(
686+ clEnumValN(CGFT_AssemblyFile, "asm", "Emit an assembly ('.s') file"),
687+ clEnumValN(CGFT_ObjectFile, "obj",
688+ "Emit a native object ('.o') file"),
689+ clEnumValN(CGFT_Null, "null",
690+ "Emit nothing, for performance testing")));
691+ CGBINDOPT(FileType);
692+
693+ static cl::opt<FramePointer::FP> FramePointerUsage(
694+ "frame-pointer",
695+ cl::desc("Specify frame pointer elimination optimization"),
696+ cl::init(FramePointer::None),
697+ cl::values(
698+ clEnumValN(FramePointer::All, "all",
699+ "Disable frame pointer elimination"),
700+ clEnumValN(FramePointer::NonLeaf, "non-leaf",
701+ "Disable frame pointer elimination for non-leaf frame"),
702+ clEnumValN(FramePointer::None, "none",
703+ "Enable frame pointer elimination")));
704+ CGBINDOPT(FramePointerUsage);
705+
706+ static cl::opt<bool> EnableUnsafeFPMath(
707+ "enable-unsafe-fp-math",
708+ cl::desc("Enable optimizations that may decrease FP precision"),
709+ cl::init(false));
710+ CGBINDOPT(EnableUnsafeFPMath);
711+
712+ static cl::opt<bool> EnableNoInfsFPMath(
713+ "enable-no-infs-fp-math",
714+ cl::desc("Enable FP math optimizations that assume no +-Infs"),
715+ cl::init(false));
716+ CGBINDOPT(EnableNoInfsFPMath);
717+
718+ static cl::opt<bool> EnableNoNaNsFPMath(
719+ "enable-no-nans-fp-math",
720+ cl::desc("Enable FP math optimizations that assume no NaNs"),
721+ cl::init(false));
722+ CGBINDOPT(EnableNoNaNsFPMath);
723+
724+ static cl::opt<bool> EnableNoSignedZerosFPMath(
725+ "enable-no-signed-zeros-fp-math",
726+ cl::desc("Enable FP math optimizations that assume "
727+ "the sign of 0 is insignificant"),
728+ cl::init(false));
729+ CGBINDOPT(EnableNoSignedZerosFPMath);
730+
731+ static cl::opt<bool> EnableNoTrappingFPMath(
732+ "enable-no-trapping-fp-math",
733+ cl::desc("Enable setting the FP exceptions build "
734+ "attribute not to use exceptions"),
735+ cl::init(false));
736+ CGBINDOPT(EnableNoTrappingFPMath);
737+
738+ static cl::opt<FPDenormal::DenormalMode> DenormalFPMath(
739+ "denormal-fp-math",
740+ cl::desc(
741+ "Select which denormal numbers the code is permitted to require"),
742+ cl::init(FPDenormal::IEEE),
743+ cl::values(
744+ clEnumValN(FPDenormal::IEEE, "ieee", "IEEE 754 denormal numbers"),
745+ clEnumValN(FPDenormal::PreserveSign, "preserve-sign",
746+ "the sign of a flushed-to-zero number is preserved "
747+ "in the sign of 0"),
748+ clEnumValN(FPDenormal::PositiveZero, "positive-zero",
749+ "denormals are flushed to positive zero")));
750+ CGBINDOPT(DenormalFPMath);
751+
752+ static cl::opt<bool> EnableHonorSignDependentRoundingFPMath(
753+ "enable-sign-dependent-rounding-fp-math", cl::Hidden,
754+ cl::desc("Force codegen to assume rounding mode can change dynamically"),
755+ cl::init(false));
756+ CGBINDOPT(EnableHonorSignDependentRoundingFPMath);
757+
758+ static cl::opt<FloatABI::ABIType> FloatABIForCalls(
759+ "float-abi", cl::desc("Choose float ABI type"),
760+ cl::init(FloatABI::Default),
761+ cl::values(clEnumValN(FloatABI::Default, "default",
762+ "Target default float ABI type"),
763+ clEnumValN(FloatABI::Soft, "soft",
764+ "Soft float ABI (implied by -soft-float)"),
765+ clEnumValN(FloatABI::Hard, "hard",
766+ "Hard float ABI (uses FP registers)")));
767+ CGBINDOPT(FloatABIForCalls);
768+
769+ static cl::opt<FPOpFusion::FPOpFusionMode> FuseFPOps(
770+ "fp-contract", cl::desc("Enable aggressive formation of fused FP ops"),
771+ cl::init(FPOpFusion::Standard),
772+ cl::values(
773+ clEnumValN(FPOpFusion::Fast, "fast",
774+ "Fuse FP ops whenever profitable"),
775+ clEnumValN(FPOpFusion::Standard, "on", "Only fuse 'blessed' FP ops."),
776+ clEnumValN(FPOpFusion::Strict, "off",
777+ "Only fuse FP ops when the result won't be affected.")));
778+ CGBINDOPT(FuseFPOps);
779+
780+ static cl::opt<bool> DontPlaceZerosInBSS(
781+ "nozero-initialized-in-bss",
782+ cl::desc("Don't place zero-initialized symbols into bss section"),
783+ cl::init(false));
784+ CGBINDOPT(DontPlaceZerosInBSS);
785+
786+ static cl::opt<bool> EnableGuaranteedTailCallOpt(
787+ "tailcallopt",
788+ cl::desc(
789+ "Turn fastcc calls into tail calls by (potentially) changing ABI."),
790+ cl::init(false));
791+ CGBINDOPT(EnableGuaranteedTailCallOpt);
792+
793+ static cl::opt<bool> DisableTailCalls(
794+ "disable-tail-calls", cl::desc("Never emit tail calls"), cl::init(false));
795+ CGBINDOPT(DisableTailCalls);
796+
797+ static cl::opt<bool> StackSymbolOrdering(
798+ "stack-symbol-ordering", cl::desc("Order local stack symbols."),
799+ cl::init(true));
800+ CGBINDOPT(StackSymbolOrdering);
801+
802+ static cl::opt<unsigned> OverrideStackAlignment(
803+ "stack-alignment", cl::desc("Override default stack alignment"),
804+ cl::init(0));
805+ CGBINDOPT(OverrideStackAlignment);
806+
807+ static cl::opt<bool> StackRealign(
808+ "stackrealign",
809+ cl::desc("Force align the stack to the minimum alignment"),
810+ cl::init(false));
811+ CGBINDOPT(StackRealign);
812+
813+ static cl::opt<std::string> TrapFuncName(
814+ "trap-func", cl::Hidden,
815+ cl::desc("Emit a call to trap function rather than a trap instruction"),
816+ cl::init(""));
817+ CGBINDOPT(TrapFuncName);
818+
819+ static cl::opt<bool> UseCtors("use-ctors",
820+ cl::desc("Use .ctors instead of .init_array."),
821+ cl::init(false));
822+ CGBINDOPT(UseCtors);
823+
824+ static cl::opt<bool> RelaxELFRelocations(
825+ "relax-elf-relocations",
826+ cl::desc(
827+ "Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"),
828+ cl::init(false));
829+ CGBINDOPT(RelaxELFRelocations);
830+
831+ static cl::opt<bool> DataSections(
832+ "data-sections", cl::desc("Emit data into separate sections"),
833+ cl::init(false));
834+ CGBINDOPT(DataSections);
835+
836+ static cl::opt<bool> FunctionSections(
837+ "function-sections", cl::desc("Emit functions into separate sections"),
838+ cl::init(false));
839+ CGBINDOPT(FunctionSections);
840+
841+ static cl::opt<std::string> BBSections(
842+ "basicblock-sections",
843+ cl::desc("Emit basic blocks into separate sections"),
844+ cl::value_desc("all | <function list (file)> | labels | none"),
845+ cl::init("none"));
846+ CGBINDOPT(BBSections);
847+
848+ static cl::opt<unsigned> TLSSize(
849+ "tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0));
850+ CGBINDOPT(TLSSize);
851+
852+ static cl::opt<bool> EmulatedTLS(
853+ "emulated-tls", cl::desc("Use emulated TLS model"), cl::init(false));
854+ CGBINDOPT(EmulatedTLS);
855+
856+ static cl::opt<bool> UniqueSectionNames(
857+ "unique-section-names", cl::desc("Give unique names to every section"),
858+ cl::init(true));
859+ CGBINDOPT(UniqueSectionNames);
860+
861+ static cl::opt<bool> UniqueBBSectionNames(
862+ "unique-bb-section-names",
863+ cl::desc("Give unique names to every basic block section"),
864+ cl::init(false));
865+ CGBINDOPT(UniqueBBSectionNames);
866+
867+ static cl::opt<EABI> EABIVersion(
868+ "meabi", cl::desc("Set EABI type (default depends on triple):"),
869+ cl::init(EABI::Default),
870+ cl::values(
871+ clEnumValN(EABI::Default, "default", "Triple default EABI version"),
872+ clEnumValN(EABI::EABI4, "4", "EABI version 4"),
873+ clEnumValN(EABI::EABI5, "5", "EABI version 5"),
874+ clEnumValN(EABI::GNU, "gnu", "EABI GNU")));
875+ CGBINDOPT(EABIVersion);
876+
877+ static cl::opt<DebuggerKind> DebuggerTuningOpt(
878+ "debugger-tune", cl::desc("Tune debug info for a particular debugger"),
879+ cl::init(DebuggerKind::Default),
880+ cl::values(
881+ clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
882+ clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
883+ clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)")));
884+ CGBINDOPT(DebuggerTuningOpt);
885+
886+ static cl::opt<bool> EnableStackSizeSection(
887+ "stack-size-section",
888+ cl::desc("Emit a section containing stack size metadata"),
889+ cl::init(false));
890+ CGBINDOPT(EnableStackSizeSection);
891+
892+ static cl::opt<bool> EnableAddrsig(
893+ "addrsig", cl::desc("Emit an address-significance table"),
894+ cl::init(false));
895+ CGBINDOPT(EnableAddrsig);
896+
897+ static cl::opt<bool> EmitCallSiteInfo(
898+ "emit-call-site-info",
899+ cl::desc(
900+ "Emit call site debug information, if debug information is enabled."),
901+ cl::init(false));
902+ CGBINDOPT(EmitCallSiteInfo);
903+
904+ static cl::opt<bool> EnableDebugEntryValues(
905+ "debug-entry-values",
906+ cl::desc("Emit debug info about parameter's entry values"),
907+ cl::init(false));
908+ CGBINDOPT(EnableDebugEntryValues);
909+
910+ static cl::opt<bool> ForceDwarfFrameSection(
911+ "force-dwarf-frame-section",
912+ cl::desc("Always emit a debug frame section."), cl::init(false));
913+ CGBINDOPT(ForceDwarfFrameSection);
914+
915+#undef CGBINDOPT
916+
917+ mc::RegisterMCTargetOptionsFlags();
918+}
919+
920+llvm::BasicBlockSection
921+codegen::getBBSectionsMode(llvm::TargetOptions &Options) {
922+ if (getBBSections() == "all")
923+ return BasicBlockSection::All;
924+ else if (getBBSections() == "labels")
925+ return BasicBlockSection::Labels;
926+ else if (getBBSections() == "none")
927+ return BasicBlockSection::None;
928+ else {
929+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
930+ MemoryBuffer::getFile(getBBSections());
931+ if (!MBOrErr) {
932+ errs() << "Error loading basic block sections function list file: "
933+ << MBOrErr.getError().message() << "\n";
934+ } else {
935+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
936+ }
937+ return BasicBlockSection::List;
938+ }
939+}
940+
941+// Common utility function tightly tied to the options listed here. Initializes
942+// a TargetOptions object with CodeGen flags and returns it.
943+TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() {
944+ TargetOptions Options;
945+ Options.AllowFPOpFusion = getFuseFPOps();
946+ Options.UnsafeFPMath = getEnableUnsafeFPMath();
947+ Options.NoInfsFPMath = getEnableNoInfsFPMath();
948+ Options.NoNaNsFPMath = getEnableNoNaNsFPMath();
949+ Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath();
950+ Options.NoTrappingFPMath = getEnableNoTrappingFPMath();
951+ Options.FPDenormalMode = getDenormalFPMath();
952+ Options.HonorSignDependentRoundingFPMathOption =
953+ getEnableHonorSignDependentRoundingFPMath();
954+ if (getFloatABIForCalls() != FloatABI::Default)
955+ Options.FloatABIType = getFloatABIForCalls();
956+ Options.NoZerosInBSS = getDontPlaceZerosInBSS();
957+ Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
958+ Options.StackAlignmentOverride = getOverrideStackAlignment();
959+ Options.StackSymbolOrdering = getStackSymbolOrdering();
960+ Options.UseInitArray = !getUseCtors();
961+ Options.RelaxELFRelocations = getRelaxELFRelocations();
962+ Options.DataSections = getDataSections();
963+ Options.FunctionSections = getFunctionSections();
964+ Options.BBSections = getBBSectionsMode(Options);
965+ Options.UniqueSectionNames = getUniqueSectionNames();
966+ Options.UniqueBBSectionNames = getUniqueBBSectionNames();
967+ Options.TLSSize = getTLSSize();
968+ Options.EmulatedTLS = getEmulatedTLS();
969+ Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0;
970+ Options.ExceptionModel = getExceptionModel();
971+ Options.EmitStackSizeSection = getEnableStackSizeSection();
972+ Options.EmitAddrsig = getEnableAddrsig();
973+ Options.EmitCallSiteInfo = getEmitCallSiteInfo();
974+ Options.EnableDebugEntryValues = getEnableDebugEntryValues();
975+ Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
976+
977+ Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
978+
979+ Options.ThreadModel = getThreadModel();
980+ Options.EABIVersion = getEABIVersion();
981+ Options.DebuggerTuning = getDebuggerTuningOpt();
982+
983+ return Options;
984+}
985+
986+std::string codegen::getCPUStr() {
987+ // If user asked for the 'native' CPU, autodetect here. If autodection fails,
988+ // this will set the CPU to an empty string which tells the target to
989+ // pick a basic default.
990+ if (getMCPU() == "native")
991+ return std::string(sys::getHostCPUName());
992+
993+ return getMCPU();
994+}
995+
996+std::string codegen::getFeaturesStr() {
997+ SubtargetFeatures Features;
998+
999+ // If user asked for the 'native' CPU, we need to autodetect features.
1000+ // This is necessary for x86 where the CPU might not support all the
1001+ // features the autodetected CPU name lists in the target. For example,
1002+ // not all Sandybridge processors support AVX.
1003+ if (getMCPU() == "native") {
1004+ StringMap<bool> HostFeatures;
1005+ if (sys::getHostCPUFeatures(HostFeatures))
1006+ for (auto &F : HostFeatures)
1007+ Features.AddFeature(F.first(), F.second);
1008+ }
1009+
1010+ for (auto const &MAttr : getMAttrs())
1011+ Features.AddFeature(MAttr);
1012+
1013+ return Features.getString();
1014+}
1015+
1016+std::vector<std::string> codegen::getFeatureList() {
1017+ SubtargetFeatures Features;
1018+
1019+ // If user asked for the 'native' CPU, we need to autodetect features.
1020+ // This is necessary for x86 where the CPU might not support all the
1021+ // features the autodetected CPU name lists in the target. For example,
1022+ // not all Sandybridge processors support AVX.
1023+ if (getMCPU() == "native") {
1024+ StringMap<bool> HostFeatures;
1025+ if (sys::getHostCPUFeatures(HostFeatures))
1026+ for (auto &F : HostFeatures)
1027+ Features.AddFeature(F.first(), F.second);
1028+ }
1029+
1030+ for (auto const &MAttr : getMAttrs())
1031+ Features.AddFeature(MAttr);
1032+
1033+ return Features.getFeatures();
1034+}
1035+
1036+void codegen::renderBoolStringAttr(AttrBuilder &B, StringRef Name, bool Val) {
1037+ B.addAttribute(Name, Val ? "true" : "false");
1038+}
1039+
1040+#define HANDLE_BOOL_ATTR(CL, AttrName) \
1041+ do { \
1042+ if (CL->getNumOccurrences() > 0 && !F.hasFnAttribute(AttrName)) \
1043+ renderBoolStringAttr(NewAttrs, AttrName, *CL); \
1044+ } while (0)
1045+
1046+/// Set function attributes of function \p F based on CPU, Features, and command
1047+/// line flags.
1048+void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
1049+ Function &F) {
1050+ auto &Ctx = F.getContext();
1051+ AttributeList Attrs = F.getAttributes();
1052+ AttrBuilder NewAttrs;
1053+
1054+ if (!CPU.empty() && !F.hasFnAttribute("target-cpu"))
1055+ NewAttrs.addAttribute("target-cpu", CPU);
1056+ if (!Features.empty()) {
1057+ // Append the command line features to any that are already on the function.
1058+ StringRef OldFeatures =
1059+ F.getFnAttribute("target-features").getValueAsString();
1060+ if (OldFeatures.empty())
1061+ NewAttrs.addAttribute("target-features", Features);
1062+ else {
1063+ SmallString<256> Appended(OldFeatures);
1064+ Appended.push_back(',');
1065+ Appended.append(Features);
1066+ NewAttrs.addAttribute("target-features", Appended);
1067+ }
1068+ }
1069+ if (FramePointerUsageView->getNumOccurrences() > 0 &&
1070+ !F.hasFnAttribute("frame-pointer")) {
1071+ if (getFramePointerUsage() == FramePointer::All)
1072+ NewAttrs.addAttribute("frame-pointer", "all");
1073+ else if (getFramePointerUsage() == FramePointer::NonLeaf)
1074+ NewAttrs.addAttribute("frame-pointer", "non-leaf");
1075+ else if (getFramePointerUsage() == FramePointer::None)
1076+ NewAttrs.addAttribute("frame-pointer", "none");
1077+ }
1078+ if (DisableTailCallsView->getNumOccurrences() > 0)
1079+ NewAttrs.addAttribute("disable-tail-calls",
1080+ toStringRef(getDisableTailCalls()));
1081+ if (getStackRealign())
1082+ NewAttrs.addAttribute("stackrealign");
1083+
1084+ HANDLE_BOOL_ATTR(EnableUnsafeFPMathView, "unsafe-fp-math");
1085+ HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math");
1086+ HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math");
1087+ HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math");
1088+
1089+ if (TrapFuncNameView->getNumOccurrences() > 0)
1090+ for (auto &B : F)
1091+ for (auto &I : B)
1092+ if (auto *Call = dyn_cast<CallInst>(&I))
1093+ if (const auto *F = Call->getCalledFunction())
1094+ if (F->getIntrinsicID() == Intrinsic::debugtrap ||
1095+ F->getIntrinsicID() == Intrinsic::trap)
1096+ Call->addAttribute(
1097+ AttributeList::FunctionIndex,
1098+ Attribute::get(Ctx, "trap-func-name", getTrapFuncName()));
1099+
1100+ // Let NewAttrs override Attrs.
1101+ F.setAttributes(
1102+ Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
1103+}
1104+
1105+/// Set function attributes of functions in Module M based on CPU,
1106+/// Features, and command line flags.
1107+void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
1108+ Module &M) {
1109+ for (Function &F : M)
1110+ setFunctionAttributes(CPU, Features, F);
1111+}
1112diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt
1113index de2e47d8d9b2..ab809daf5273 100644
1114--- a/llvm/lib/MC/CMakeLists.txt
1115+++ b/llvm/lib/MC/CMakeLists.txt
1116@@ -44,6 +44,7 @@ add_llvm_component_library(LLVMMC
1117 MCSymbol.cpp
1118 MCSymbolELF.cpp
1119 MCTargetOptions.cpp
1120+ MCTargetOptionsCommandFlags.cpp
1121 MCValue.cpp
1122 MCWasmObjectTargetWriter.cpp
1123 MCWasmStreamer.cpp
1124diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
1125new file mode 100644
1126index 000000000000..3ca34061241a
1127--- /dev/null
1128+++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
1129@@ -0,0 +1,105 @@
1130+//===-- MCTargetOptionsCommandFlags.cpp --------------------------*- C++
1131+//-*-===//
1132+//
1133+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1134+// See https://llvm.org/LICENSE.txt for license information.
1135+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1136+//
1137+//===----------------------------------------------------------------------===//
1138+//
1139+// This file contains machine code-specific flags that are shared between
1140+// different command line tools.
1141+//
1142+//===----------------------------------------------------------------------===//
1143+
1144+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
1145+
1146+using namespace llvm;
1147+
1148+#define MCOPT(TY, NAME) \
1149+ static cl::opt<TY> *NAME##View; \
1150+ TY llvm::mc::get##NAME() { \
1151+ assert(NAME##View && "RegisterMCTargetOptionsFlags not created."); \
1152+ return *NAME##View; \
1153+ }
1154+
1155+#define MCOPT_EXP(TY, NAME) \
1156+ MCOPT(TY, NAME) \
1157+ Optional<TY> llvm::mc::getExplicit##NAME() { \
1158+ if (NAME##View->getNumOccurrences()) { \
1159+ TY res = *NAME##View; \
1160+ return res; \
1161+ } \
1162+ return None; \
1163+ }
1164+
1165+MCOPT_EXP(bool, RelaxAll)
1166+MCOPT(bool, IncrementalLinkerCompatible)
1167+MCOPT(int, DwarfVersion)
1168+MCOPT(bool, ShowMCInst)
1169+MCOPT(bool, FatalWarnings)
1170+MCOPT(bool, NoWarn)
1171+MCOPT(bool, NoDeprecatedWarn)
1172+MCOPT(std::string, ABIName)
1173+
1174+llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() {
1175+#define MCBINDOPT(NAME) \
1176+ do { \
1177+ NAME##View = std::addressof(NAME); \
1178+ } while (0)
1179+
1180+ static cl::opt<bool> RelaxAll(
1181+ "mc-relax-all", cl::desc("When used with filetype=obj, relax all fixups "
1182+ "in the emitted object file"));
1183+ MCBINDOPT(RelaxAll);
1184+
1185+ static cl::opt<bool> IncrementalLinkerCompatible(
1186+ "incremental-linker-compatible",
1187+ cl::desc(
1188+ "When used with filetype=obj, "
1189+ "emit an object file which can be used with an incremental linker"));
1190+ MCBINDOPT(IncrementalLinkerCompatible);
1191+
1192+ static cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"),
1193+ cl::init(0));
1194+ MCBINDOPT(DwarfVersion);
1195+
1196+ static cl::opt<bool> ShowMCInst(
1197+ "asm-show-inst",
1198+ cl::desc("Emit internal instruction representation to assembly file"));
1199+ MCBINDOPT(ShowMCInst);
1200+
1201+ static cl::opt<bool> FatalWarnings("fatal-warnings",
1202+ cl::desc("Treat warnings as errors"));
1203+ MCBINDOPT(FatalWarnings);
1204+
1205+ static cl::opt<bool> NoWarn("no-warn", cl::desc("Suppress all warnings"));
1206+ static cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"),
1207+ cl::aliasopt(NoWarn));
1208+ MCBINDOPT(NoWarn);
1209+
1210+ static cl::opt<bool> NoDeprecatedWarn(
1211+ "no-deprecated-warn", cl::desc("Suppress all deprecated warnings"));
1212+ MCBINDOPT(NoDeprecatedWarn);
1213+
1214+ static cl::opt<std::string> ABIName(
1215+ "target-abi", cl::Hidden,
1216+ cl::desc("The name of the ABI to be targeted from the backend."),
1217+ cl::init(""));
1218+ MCBINDOPT(ABIName);
1219+
1220+#undef MCBINDOPT
1221+}
1222+
1223+MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() {
1224+ MCTargetOptions Options;
1225+ Options.MCRelaxAll = getRelaxAll();
1226+ Options.MCIncrementalLinkerCompatible = getIncrementalLinkerCompatible();
1227+ Options.DwarfVersion = getDwarfVersion();
1228+ Options.ShowMCInst = getShowMCInst();
1229+ Options.ABIName = getABIName();
1230+ Options.MCFatalWarnings = getFatalWarnings();
1231+ Options.MCNoWarn = getNoWarn();
1232+ Options.MCNoDeprecatedWarn = getNoDeprecatedWarn();
1233+ return Options;
1234+}
1235diff --git a/llvm/tools/dsymutil/DwarfStreamer.cpp b/llvm/tools/dsymutil/DwarfStreamer.cpp
1236index 3e132c29eada..eb068effbc71 100644
1237--- a/llvm/tools/dsymutil/DwarfStreamer.cpp
1238+++ b/llvm/tools/dsymutil/DwarfStreamer.cpp
1239@@ -13,13 +13,16 @@
1240 #include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
1241 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
1242 #include "llvm/MC/MCTargetOptions.h"
1243-#include "llvm/MC/MCTargetOptionsCommandFlags.inc"
1244+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
1245 #include "llvm/Support/LEB128.h"
1246 #include "llvm/Support/TargetRegistry.h"
1247 #include "llvm/Target/TargetMachine.h"
1248 #include "llvm/Target/TargetOptions.h"
1249
1250 namespace llvm {
1251+
1252+static mc::RegisterMCTargetOptionsFlags MOF;
1253+
1254 namespace dsymutil {
1255
1256 /// Retrieve the section named \a SecName in \a Obj.
1257@@ -61,7 +64,7 @@ bool DwarfStreamer::init(Triple TheTriple) {
1258 if (!MRI)
1259 return error(Twine("no register info for target ") + TripleName, Context);
1260
1261- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
1262+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
1263 MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
1264 if (!MAI)
1265 return error("no asm info for target " + TripleName, Context);
1266diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp
1267index 406079dad307..95e72b861197 100644
1268--- a/llvm/tools/gold/gold-plugin.cpp
1269+++ b/llvm/tools/gold/gold-plugin.cpp
1270@@ -14,7 +14,7 @@
1271 #include "llvm/ADT/Statistic.h"
1272 #include "llvm/Bitcode/BitcodeReader.h"
1273 #include "llvm/Bitcode/BitcodeWriter.h"
1274-#include "llvm/CodeGen/CommandFlags.inc"
1275+#include "llvm/CodeGen/CommandFlags.h"
1276 #include "llvm/Config/config.h" // plugin-api.h requires HAVE_STDINT_H
1277 #include "llvm/IR/Constants.h"
1278 #include "llvm/IR/DiagnosticPrinter.h"
1279@@ -50,6 +50,8 @@
1280 using namespace llvm;
1281 using namespace lto;
1282
1283+static codegen::RegisterCodeGenFlags CodeGenFlags;
1284+
1285 // FIXME: Remove when binutils 2.31 (containing gold 1.16) is the minimum
1286 // required version.
1287 typedef enum ld_plugin_status (*ld_plugin_get_wrap_symbols)(
1288@@ -842,21 +844,21 @@ static std::unique_ptr<LTO> createLTO(IndexWriteCallback OnIndexWrite,
1289 ThinBackend Backend;
1290
1291 Conf.CPU = options::mcpu;
1292- Conf.Options = InitTargetOptionsFromCodeGenFlags();
1293+ Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags();
1294
1295 // Disable the new X86 relax relocations since gold might not support them.
1296 // FIXME: Check the gold version or add a new option to enable them.
1297 Conf.Options.RelaxELFRelocations = false;
1298
1299 // Toggle function/data sections.
1300- if (FunctionSections.getNumOccurrences() == 0)
1301+ if (!codegen::getExplicitFunctionSections())
1302 Conf.Options.FunctionSections = SplitSections;
1303- if (DataSections.getNumOccurrences() == 0)
1304+ if (!codegen::getExplicitDataSections())
1305 Conf.Options.DataSections = SplitSections;
1306
1307- Conf.MAttrs = MAttrs;
1308- Conf.RelocModel = RelocationModel;
1309- Conf.CodeModel = getCodeModel();
1310+ Conf.MAttrs = codegen::getMAttrs();
1311+ Conf.RelocModel = codegen::getExplicitRelocModel();
1312+ Conf.CodeModel = codegen::getExplicitCodeModel();
1313 Conf.CGOptLevel = getCGOptLevel();
1314 Conf.DisableVerify = options::DisableVerify;
1315 Conf.OptLevel = options::OptLevel;
1316diff --git a/llvm/tools/llc/CMakeLists.txt b/llvm/tools/llc/CMakeLists.txt
1317index 880deefa539c..479bc6b55b27 100644
1318--- a/llvm/tools/llc/CMakeLists.txt
1319+++ b/llvm/tools/llc/CMakeLists.txt
1320@@ -26,4 +26,5 @@ add_llvm_tool(llc
1321 intrinsics_gen
1322 SUPPORT_PLUGINS
1323 )
1324+
1325 export_executable_symbols(llc)
1326diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
1327index b35f8e853c30..4c41ed292fdc 100644
1328--- a/llvm/tools/llc/llc.cpp
1329+++ b/llvm/tools/llc/llc.cpp
1330@@ -15,7 +15,7 @@
1331 #include "llvm/ADT/STLExtras.h"
1332 #include "llvm/ADT/Triple.h"
1333 #include "llvm/Analysis/TargetLibraryInfo.h"
1334-#include "llvm/CodeGen/CommandFlags.inc"
1335+#include "llvm/CodeGen/CommandFlags.h"
1336 #include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
1337 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
1338 #include "llvm/CodeGen/MIRParser/MIRParser.h"
1339@@ -55,6 +55,8 @@
1340 #include <memory>
1341 using namespace llvm;
1342
1343+static codegen::RegisterCodeGenFlags CGF;
1344+
1345 // General options for llc. Other pass-specific options are specified
1346 // within the corresponding llc passes, and target-specific options
1347 // and back-end code generation options are specified with the target machine.
1348@@ -202,7 +204,7 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
1349 else
1350 OutputFilename = IFN;
1351
1352- switch (FileType) {
1353+ switch (codegen::getFileType()) {
1354 case CGFT_AssemblyFile:
1355 if (TargetName[0] == 'c') {
1356 if (TargetName[1] == 0)
1357@@ -229,7 +231,7 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
1358
1359 // Decide if we need "binary" output.
1360 bool Binary = false;
1361- switch (FileType) {
1362+ switch (codegen::getFileType()) {
1363 case CGFT_AssemblyFile:
1364 break;
1365 case CGFT_ObjectFile:
1366@@ -395,14 +397,16 @@ static int compileModule(char **argv, LLVMContext &Context) {
1367 std::unique_ptr<Module> M;
1368 std::unique_ptr<MIRParser> MIR;
1369 Triple TheTriple;
1370- std::string CPUStr = getCPUStr(), FeaturesStr = getFeaturesStr();
1371+ std::string CPUStr = codegen::getCPUStr(),
1372+ FeaturesStr = codegen::getFeaturesStr();
1373
1374 // Set attributes on functions as loaded from MIR from command line arguments.
1375 auto setMIRFunctionAttributes = [&CPUStr, &FeaturesStr](Function &F) {
1376- setFunctionAttributes(CPUStr, FeaturesStr, F);
1377+ codegen::setFunctionAttributes(CPUStr, FeaturesStr, F);
1378 };
1379
1380- bool SkipModule = MCPU == "help" ||
1381+ auto MAttrs = codegen::getMAttrs();
1382+ bool SkipModule = codegen::getMCPU() == "help" ||
1383 (!MAttrs.empty() && MAttrs.front() == "help");
1384
1385 // If user just wants to list available options, skip module loading
1386@@ -433,8 +437,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
1387
1388 // Get the target specific parser.
1389 std::string Error;
1390- const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
1391- Error);
1392+ const Target *TheTarget =
1393+ TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error);
1394 if (!TheTarget) {
1395 WithColor::error(errs(), argv[0]) << Error;
1396 return 1;
1397@@ -452,7 +456,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
1398 case '3': OLvl = CodeGenOpt::Aggressive; break;
1399 }
1400
1401- TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
1402+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
1403 Options.DisableIntegratedAS = NoIntegratedAssembler;
1404 Options.MCOptions.ShowMCEncoding = ShowMCEncoding;
1405 Options.MCOptions.MCUseDwarfDirectory = EnableDwarfDirectory;
1406@@ -462,8 +466,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
1407 Options.MCOptions.SplitDwarfFile = SplitDwarfFile;
1408
1409 std::unique_ptr<TargetMachine> Target(TheTarget->createTargetMachine(
1410- TheTriple.getTriple(), CPUStr, FeaturesStr, Options, getRelocModel(),
1411- getCodeModel(), OLvl));
1412+ TheTriple.getTriple(), CPUStr, FeaturesStr, Options, codegen::getExplicitRelocModel(),
1413+ codegen::getExplicitCodeModel(), OLvl));
1414
1415 assert(Target && "Could not allocate target machine!");
1416
1417@@ -474,8 +478,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
1418 return 0;
1419
1420 assert(M && "Should have exited if we didn't have a module!");
1421- if (FloatABIForCalls != FloatABI::Default)
1422- Options.FloatABIType = FloatABIForCalls;
1423+ if (codegen::getFloatABIForCalls() != FloatABI::Default)
1424+ Options.FloatABIType = codegen::getFloatABIForCalls();
1425
1426 // Figure out where we are going to send the output.
1427 std::unique_ptr<ToolOutputFile> Out =
1428@@ -522,10 +526,9 @@ static int compileModule(char **argv, LLVMContext &Context) {
1429
1430 // Override function attributes based on CPUStr, FeaturesStr, and command line
1431 // flags.
1432- setFunctionAttributes(CPUStr, FeaturesStr, *M);
1433+ codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M);
1434
1435- if (RelaxAll.getNumOccurrences() > 0 &&
1436- FileType != CGFT_ObjectFile)
1437+ if (mc::getExplicitRelaxAll() && codegen::getFileType() != CGFT_ObjectFile)
1438 WithColor::warning(errs(), argv[0])
1439 << ": warning: ignoring -mc-relax-all because filetype != obj";
1440
1441@@ -536,7 +539,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
1442 // so we can memcmp the contents in CompileTwice mode
1443 SmallVector<char, 0> Buffer;
1444 std::unique_ptr<raw_svector_ostream> BOS;
1445- if ((FileType != CGFT_AssemblyFile &&
1446+ if ((codegen::getFileType() != CGFT_AssemblyFile &&
1447 !Out->os().supportsSeeking()) ||
1448 CompileTwice) {
1449 BOS = std::make_unique<raw_svector_ostream>(Buffer);
1450@@ -575,9 +578,9 @@ static int compileModule(char **argv, LLVMContext &Context) {
1451 TPC.setInitialized();
1452 PM.add(createPrintMIRPass(*OS));
1453 PM.add(createFreeMachineFunctionPass());
1454- } else if (Target->addPassesToEmitFile(PM, *OS,
1455- DwoOut ? &DwoOut->os() : nullptr,
1456- FileType, NoVerify, MMIWP)) {
1457+ } else if (Target->addPassesToEmitFile(
1458+ PM, *OS, DwoOut ? &DwoOut->os() : nullptr,
1459+ codegen::getFileType(), NoVerify, MMIWP)) {
1460 WithColor::warning(errs(), argv[0])
1461 << "target does not support generation of this"
1462 << " file type!\n";
1463diff --git a/llvm/tools/lli/CMakeLists.txt b/llvm/tools/lli/CMakeLists.txt
1464index db163ad131e8..bc6ef213b8fd 100644
1465--- a/llvm/tools/lli/CMakeLists.txt
1466+++ b/llvm/tools/lli/CMakeLists.txt
1467@@ -53,4 +53,5 @@ add_llvm_tool(lli
1468 DEPENDS
1469 intrinsics_gen
1470 )
1471+
1472 export_executable_symbols(lli)
1473diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp
1474index 0efd0df2c12b..b6a4e3f2833c 100644
1475--- a/llvm/tools/lli/lli.cpp
1476+++ b/llvm/tools/lli/lli.cpp
1477@@ -16,7 +16,7 @@
1478 #include "llvm/ADT/StringExtras.h"
1479 #include "llvm/ADT/Triple.h"
1480 #include "llvm/Bitcode/BitcodeReader.h"
1481-#include "llvm/CodeGen/CommandFlags.inc"
1482+#include "llvm/CodeGen/CommandFlags.h"
1483 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
1484 #include "llvm/Config/llvm-config.h"
1485 #include "llvm/ExecutionEngine/GenericValue.h"
1486@@ -67,6 +67,8 @@
1487
1488 using namespace llvm;
1489
1490+static codegen::RegisterCodeGenFlags CGF;
1491+
1492 #define DEBUG_TYPE "lli"
1493
1494 namespace {
1495@@ -410,13 +412,13 @@ int main(int argc, char **argv, char * const *envp) {
1496
1497 std::string ErrorMsg;
1498 EngineBuilder builder(std::move(Owner));
1499- builder.setMArch(MArch);
1500- builder.setMCPU(getCPUStr());
1501- builder.setMAttrs(getFeatureList());
1502- if (RelocModel.getNumOccurrences())
1503- builder.setRelocationModel(RelocModel);
1504- if (CMModel.getNumOccurrences())
1505- builder.setCodeModel(CMModel);
1506+ builder.setMArch(codegen::getMArch());
1507+ builder.setMCPU(codegen::getCPUStr());
1508+ builder.setMAttrs(codegen::getFeatureList());
1509+ if (auto RM = codegen::getExplicitRelocModel())
1510+ builder.setRelocationModel(RM.getValue());
1511+ if (auto CM = codegen::getExplicitCodeModel())
1512+ builder.setCodeModel(CM.getValue());
1513 builder.setErrorStr(&ErrorMsg);
1514 builder.setEngineKind(ForceInterpreter
1515 ? EngineKind::Interpreter
1516@@ -448,9 +450,9 @@ int main(int argc, char **argv, char * const *envp) {
1517
1518 builder.setOptLevel(getOptLevel());
1519
1520- TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
1521- if (FloatABIForCalls != FloatABI::Default)
1522- Options.FloatABIType = FloatABIForCalls;
1523+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
1524+ if (codegen::getFloatABIForCalls() != FloatABI::Default)
1525+ Options.FloatABIType = codegen::getFloatABIForCalls();
1526
1527 builder.setTargetOptions(Options);
1528
1529@@ -762,18 +764,15 @@ int runOrcLazyJIT(const char *ProgName) {
1530 TT.empty() ? ExitOnErr(orc::JITTargetMachineBuilder::detectHost())
1531 : orc::JITTargetMachineBuilder(Triple(TT)));
1532
1533- if (!MArch.empty())
1534- Builder.getJITTargetMachineBuilder()->getTargetTriple().setArchName(MArch);
1535+ if (!codegen::getMArch().empty())
1536+ Builder.getJITTargetMachineBuilder()->getTargetTriple().setArchName(
1537+ codegen::getMArch());
1538
1539 Builder.getJITTargetMachineBuilder()
1540- ->setCPU(getCPUStr())
1541- .addFeatures(getFeatureList())
1542- .setRelocationModel(RelocModel.getNumOccurrences()
1543- ? Optional<Reloc::Model>(RelocModel)
1544- : None)
1545- .setCodeModel(CMModel.getNumOccurrences()
1546- ? Optional<CodeModel::Model>(CMModel)
1547- : None);
1548+ ->setCPU(codegen::getCPUStr())
1549+ .addFeatures(codegen::getFeatureList())
1550+ .setRelocationModel(codegen::getExplicitRelocModel())
1551+ .setCodeModel(codegen::getExplicitCodeModel());
1552
1553 Builder.setLazyCompileFailureAddr(
1554 pointerToJITTargetAddress(exitOnLazyCallThroughFailure));
1555diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
1556index 23513ef8fb4e..8cfd433d5da3 100644
1557--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
1558+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
1559@@ -27,7 +27,7 @@
1560 #include "llvm/MC/MCObjectWriter.h"
1561 #include "llvm/MC/MCRegisterInfo.h"
1562 #include "llvm/MC/MCStreamer.h"
1563-#include "llvm/MC/MCTargetOptionsCommandFlags.inc"
1564+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
1565 #include "llvm/Object/Decompressor.h"
1566 #include "llvm/Object/ObjectFile.h"
1567 #include "llvm/Support/DataExtractor.h"
1568@@ -46,6 +46,8 @@
1569 using namespace llvm;
1570 using namespace llvm::object;
1571
1572+static mc::RegisterMCTargetOptionsFlags MCTargetOptionsFlags;
1573+
1574 cl::OptionCategory DwpCategory("Specific Options");
1575 static cl::list<std::string> InputFiles(cl::Positional, cl::ZeroOrMore,
1576 cl::desc("<input files>"),
1577@@ -676,7 +678,7 @@ int main(int argc, char **argv) {
1578 if (!MRI)
1579 return error(Twine("no register info for target ") + TripleName, Context);
1580
1581- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
1582+ MCTargetOptions MCOptions = llvm::mc::InitMCTargetOptionsFromFlags();
1583 std::unique_ptr<MCAsmInfo> MAI(
1584 TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
1585 if (!MAI)
1586diff --git a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp
1587index b71ed4a70566..627e9ab4c03f 100644
1588--- a/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp
1589+++ b/llvm/tools/llvm-isel-fuzzer/llvm-isel-fuzzer.cpp
1590@@ -14,7 +14,7 @@
1591 #include "llvm/Analysis/TargetLibraryInfo.h"
1592 #include "llvm/Bitcode/BitcodeReader.h"
1593 #include "llvm/Bitcode/BitcodeWriter.h"
1594-#include "llvm/CodeGen/CommandFlags.inc"
1595+#include "llvm/CodeGen/CommandFlags.h"
1596 #include "llvm/FuzzMutate/FuzzerCLI.h"
1597 #include "llvm/FuzzMutate/IRMutator.h"
1598 #include "llvm/FuzzMutate/Operations.h"
1599@@ -35,6 +35,8 @@
1600
1601 using namespace llvm;
1602
1603+static codegen::RegisterCodeGenFlags CGF;
1604+
1605 static cl::opt<char>
1606 OptLevel("O",
1607 cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
1608@@ -133,14 +135,15 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
1609 // Get the target specific parser.
1610 std::string Error;
1611 const Target *TheTarget =
1612- TargetRegistry::lookupTarget(MArch, TheTriple, Error);
1613+ TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error);
1614 if (!TheTarget) {
1615 errs() << argv[0] << ": " << Error;
1616 return 1;
1617 }
1618
1619 // Set up the pipeline like llc does.
1620- std::string CPUStr = getCPUStr(), FeaturesStr = getFeaturesStr();
1621+ std::string CPUStr = codegen::getCPUStr(),
1622+ FeaturesStr = codegen::getFeaturesStr();
1623
1624 CodeGenOpt::Level OLvl = CodeGenOpt::Default;
1625 switch (OptLevel) {
1626@@ -154,10 +157,10 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
1627 case '3': OLvl = CodeGenOpt::Aggressive; break;
1628 }
1629
1630- TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
1631- TM.reset(TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr,
1632- FeaturesStr, Options, getRelocModel(),
1633- getCodeModel(), OLvl));
1634+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
1635+ TM.reset(TheTarget->createTargetMachine(
1636+ TheTriple.getTriple(), CPUStr, FeaturesStr, Options,
1637+ codegen::getExplicitRelocModel(), codegen::getExplicitCodeModel(), OLvl));
1638 assert(TM && "Could not allocate target machine!");
1639
1640 // Make sure we print the summary and the current unit when LLVM errors out.
1641diff --git a/llvm/tools/llvm-lto/CMakeLists.txt b/llvm/tools/llvm-lto/CMakeLists.txt
1642index 69868fb870c0..5128e713eecf 100644
1643--- a/llvm/tools/llvm-lto/CMakeLists.txt
1644+++ b/llvm/tools/llvm-lto/CMakeLists.txt
1645@@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
1646 AllTargetsInfos
1647 BitReader
1648 BitWriter
1649+ CodeGen
1650 Core
1651 IRReader
1652 LTO
1653@@ -17,7 +18,5 @@ set(LLVM_LINK_COMPONENTS
1654 add_llvm_tool(llvm-lto
1655 llvm-lto.cpp
1656
1657- DEPENDS
1658- intrinsics_gen
1659+ DEPENDS intrinsics_gen
1660 )
1661-
1662diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
1663index b47e68e82850..7886426e8945 100644
1664--- a/llvm/tools/llvm-lto/llvm-lto.cpp
1665+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
1666@@ -21,7 +21,7 @@
1667 #include "llvm/ADT/Twine.h"
1668 #include "llvm/Bitcode/BitcodeReader.h"
1669 #include "llvm/Bitcode/BitcodeWriter.h"
1670-#include "llvm/CodeGen/CommandFlags.inc"
1671+#include "llvm/CodeGen/CommandFlags.h"
1672 #include "llvm/IR/DiagnosticInfo.h"
1673 #include "llvm/IR/DiagnosticPrinter.h"
1674 #include "llvm/IR/LLVMContext.h"
1675@@ -62,6 +62,8 @@
1676
1677 using namespace llvm;
1678
1679+static codegen::RegisterCodeGenFlags CGF;
1680+
1681 static cl::opt<char>
1682 OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
1683 "(default = '-O2')"),
1684@@ -521,7 +523,7 @@ public:
1685 ThinLTOCodeGenerator ThinGenerator;
1686
1687 ThinLTOProcessing(const TargetOptions &Options) {
1688- ThinGenerator.setCodePICModel(getRelocModel());
1689+ ThinGenerator.setCodePICModel(codegen::getExplicitRelocModel());
1690 ThinGenerator.setTargetOptions(Options);
1691 ThinGenerator.setCacheDir(ThinLTOCacheDir);
1692 ThinGenerator.setCachePruningInterval(ThinLTOCachePruningInterval);
1693@@ -873,7 +875,7 @@ int main(int argc, char **argv) {
1694 InitializeAllAsmParsers();
1695
1696 // set up the TargetOptions for the machine
1697- TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
1698+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
1699
1700 if (ListSymbolsOnly) {
1701 listSymbols(Options);
1702@@ -929,7 +931,7 @@ int main(int argc, char **argv) {
1703 if (UseDiagnosticHandler)
1704 CodeGen.setDiagnosticHandler(handleDiagnostics, nullptr);
1705
1706- CodeGen.setCodePICModel(getRelocModel());
1707+ CodeGen.setCodePICModel(codegen::getExplicitRelocModel());
1708 CodeGen.setFreestanding(EnableFreestanding);
1709
1710 CodeGen.setDebugInfo(LTO_DEBUG_MODEL_DWARF);
1711@@ -980,22 +982,18 @@ int main(int argc, char **argv) {
1712 CodeGen.addMustPreserveSymbol(KeptDSOSyms[i]);
1713
1714 // Set cpu and attrs strings for the default target/subtarget.
1715- CodeGen.setCpu(MCPU.c_str());
1716+ CodeGen.setCpu(codegen::getMCPU().c_str());
1717
1718 CodeGen.setOptLevel(OptLevel - '0');
1719
1720- std::string attrs;
1721- for (unsigned i = 0; i < MAttrs.size(); ++i) {
1722- if (i > 0)
1723- attrs.append(",");
1724- attrs.append(MAttrs[i]);
1725- }
1726-
1727- if (!attrs.empty())
1728+ auto MAttrs = codegen::getMAttrs();
1729+ if (!MAttrs.empty()) {
1730+ std::string attrs = join(MAttrs, ",");
1731 CodeGen.setAttr(attrs);
1732+ }
1733
1734- if (FileType.getNumOccurrences())
1735- CodeGen.setFileType(FileType);
1736+ if (auto FT = codegen::getExplicitFileType())
1737+ CodeGen.setFileType(FT.getValue());
1738
1739 if (!OutputFilename.empty()) {
1740 if (!CodeGen.optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
1741diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt
1742index fa2d8624fd94..4d3364175b04 100644
1743--- a/llvm/tools/llvm-lto2/CMakeLists.txt
1744+++ b/llvm/tools/llvm-lto2/CMakeLists.txt
1745@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
1746 AllTargetsDescs
1747 AllTargetsInfos
1748 BitReader
1749+ CodeGen
1750 Core
1751 Linker
1752 LTO
1753diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
1754index 67a677dd45fb..142ba605aa2a 100644
1755--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
1756+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
1757@@ -16,7 +16,7 @@
1758 //===----------------------------------------------------------------------===//
1759
1760 #include "llvm/Bitcode/BitcodeReader.h"
1761-#include "llvm/CodeGen/CommandFlags.inc"
1762+#include "llvm/CodeGen/CommandFlags.h"
1763 #include "llvm/IR/DiagnosticPrinter.h"
1764 #include "llvm/LTO/Caching.h"
1765 #include "llvm/LTO/LTO.h"
1766@@ -29,6 +29,8 @@
1767 using namespace llvm;
1768 using namespace lto;
1769
1770+static codegen::RegisterCodeGenFlags CGF;
1771+
1772 static cl::opt<char>
1773 OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
1774 "(default = '-O2')"),
1775@@ -217,12 +219,12 @@ static int run(int argc, char **argv) {
1776 exit(1);
1777 };
1778
1779- Conf.CPU = MCPU;
1780- Conf.Options = InitTargetOptionsFromCodeGenFlags();
1781- Conf.MAttrs = MAttrs;
1782- if (auto RM = getRelocModel())
1783- Conf.RelocModel = *RM;
1784- Conf.CodeModel = getCodeModel();
1785+ Conf.CPU = codegen::getMCPU();
1786+ Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags();
1787+ Conf.MAttrs = codegen::getMAttrs();
1788+ if (auto RM = codegen::getExplicitRelocModel())
1789+ Conf.RelocModel = RM.getValue();
1790+ Conf.CodeModel = codegen::getExplicitCodeModel();
1791
1792 Conf.DebugPassManager = DebugPassManager;
1793
1794@@ -264,8 +266,8 @@ static int run(int argc, char **argv) {
1795 return 1;
1796 }
1797
1798- if (FileType.getNumOccurrences())
1799- Conf.CGFileType = FileType;
1800+ if (auto FT = codegen::getExplicitFileType())
1801+ Conf.CGFileType = FT.getValue();
1802
1803 Conf.OverrideTriple = OverrideTriple;
1804 Conf.DefaultTriple = DefaultTriple;
1805diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt b/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt
1806index fb6befd3c54a..6bbc502e2eee 100644
1807--- a/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt
1808+++ b/llvm/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt
1809@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
1810 MCParser
1811 Support
1812 )
1813+
1814 add_llvm_fuzzer(llvm-mc-assemble-fuzzer
1815 llvm-mc-assemble-fuzzer.cpp
1816 )
1817diff --git a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
1818index 6c5961f7027c..29699c634bfa 100644
1819--- a/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
1820+++ b/llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
1821@@ -9,7 +9,6 @@
1822 //===----------------------------------------------------------------------===//
1823
1824 #include "llvm-c/Target.h"
1825-#include "llvm/MC/SubtargetFeature.h"
1826 #include "llvm/MC/MCAsmBackend.h"
1827 #include "llvm/MC/MCAsmInfo.h"
1828 #include "llvm/MC/MCCodeEmitter.h"
1829@@ -24,15 +23,16 @@
1830 #include "llvm/MC/MCSectionMachO.h"
1831 #include "llvm/MC/MCStreamer.h"
1832 #include "llvm/MC/MCSubtargetInfo.h"
1833-#include "llvm/MC/MCTargetOptionsCommandFlags.inc"
1834-#include "llvm/Support/MemoryBuffer.h"
1835+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
1836+#include "llvm/MC/SubtargetFeature.h"
1837 #include "llvm/Support/CommandLine.h"
1838 #include "llvm/Support/FileUtilities.h"
1839-#include "llvm/Support/raw_ostream.h"
1840+#include "llvm/Support/MemoryBuffer.h"
1841 #include "llvm/Support/SourceMgr.h"
1842-#include "llvm/Support/TargetSelect.h"
1843 #include "llvm/Support/TargetRegistry.h"
1844+#include "llvm/Support/TargetSelect.h"
1845 #include "llvm/Support/ToolOutputFile.h"
1846+#include "llvm/Support/raw_ostream.h"
1847
1848 using namespace llvm;
1849
1850@@ -161,7 +161,7 @@ int AssembleOneInput(const uint8_t *Data, size_t Size) {
1851 abort();
1852 }
1853
1854- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
1855+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
1856 std::unique_ptr<MCAsmInfo> MAI(
1857 TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
1858 if (!MAI) {
1859diff --git a/llvm/tools/llvm-mc/CMakeLists.txt b/llvm/tools/llvm-mc/CMakeLists.txt
1860index 15c6dda1b258..94add70b6943 100644
1861--- a/llvm/tools/llvm-mc/CMakeLists.txt
1862+++ b/llvm/tools/llvm-mc/CMakeLists.txt
1863@@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
1864 AllTargetsDescs
1865 AllTargetsDisassemblers
1866 AllTargetsInfos
1867+ CodeGen
1868 MC
1869 MCParser
1870 Support
1871diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp
1872index 6aa347d98be2..8c1b3cf2cab0 100644
1873--- a/llvm/tools/llvm-mc/llvm-mc.cpp
1874+++ b/llvm/tools/llvm-mc/llvm-mc.cpp
1875@@ -25,7 +25,7 @@
1876 #include "llvm/MC/MCRegisterInfo.h"
1877 #include "llvm/MC/MCStreamer.h"
1878 #include "llvm/MC/MCSubtargetInfo.h"
1879-#include "llvm/MC/MCTargetOptionsCommandFlags.inc"
1880+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
1881 #include "llvm/Support/CommandLine.h"
1882 #include "llvm/Support/Compression.h"
1883 #include "llvm/Support/FileUtilities.h"
1884@@ -41,6 +41,8 @@
1885
1886 using namespace llvm;
1887
1888+static mc::RegisterMCTargetOptionsFlags MOF;
1889+
1890 static cl::opt<std::string>
1891 InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
1892
1893@@ -317,7 +319,7 @@ int main(int argc, char **argv) {
1894 cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
1895
1896 cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
1897- const MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
1898+ const MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
1899 setDwarfDebugFlags(argc, argv);
1900
1901 setDwarfDebugProducer();
1902diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
1903index fff5906bb59b..eca86768aa5c 100644
1904--- a/llvm/tools/llvm-mca/llvm-mca.cpp
1905+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
1906@@ -39,7 +39,7 @@
1907 #include "llvm/MC/MCObjectFileInfo.h"
1908 #include "llvm/MC/MCRegisterInfo.h"
1909 #include "llvm/MC/MCSubtargetInfo.h"
1910-#include "llvm/MC/MCTargetOptionsCommandFlags.inc"
1911+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
1912 #include "llvm/MCA/CodeEmitter.h"
1913 #include "llvm/MCA/Context.h"
1914 #include "llvm/MCA/InstrBuilder.h"
1915@@ -62,6 +62,8 @@
1916
1917 using namespace llvm;
1918
1919+static mc::RegisterMCTargetOptionsFlags MOF;
1920+
1921 static cl::OptionCategory ToolOptions("Tool Options");
1922 static cl::OptionCategory ViewOptions("View Options");
1923
1924@@ -353,7 +355,7 @@ int main(int argc, char **argv) {
1925 std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
1926 assert(MRI && "Unable to create target register info!");
1927
1928- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
1929+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
1930 std::unique_ptr<MCAsmInfo> MAI(
1931 TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
1932 assert(MAI && "Unable to create target asm info!");
1933@@ -443,7 +445,7 @@ int main(int argc, char **argv) {
1934 TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
1935
1936 std::unique_ptr<MCAsmBackend> MAB(TheTarget->createMCAsmBackend(
1937- *STI, *MRI, InitMCTargetOptionsFromFlags()));
1938+ *STI, *MRI, mc::InitMCTargetOptionsFromFlags()));
1939
1940 for (const std::unique_ptr<mca::CodeRegion> &Region : Regions) {
1941 // Skip empty code regions.
1942diff --git a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
1943index cd6c7d380cc6..e353e333f580 100644
1944--- a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
1945+++ b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp
1946@@ -12,7 +12,7 @@
1947
1948 #include "llvm/Bitcode/BitcodeReader.h"
1949 #include "llvm/Bitcode/BitcodeWriter.h"
1950-#include "llvm/CodeGen/CommandFlags.inc"
1951+#include "llvm/CodeGen/CommandFlags.h"
1952 #include "llvm/FuzzMutate/FuzzerCLI.h"
1953 #include "llvm/FuzzMutate/IRMutator.h"
1954 #include "llvm/IR/Verifier.h"
1955@@ -24,6 +24,8 @@
1956
1957 using namespace llvm;
1958
1959+static codegen::RegisterCodeGenFlags CGF;
1960+
1961 static cl::opt<std::string>
1962 TargetTripleStr("mtriple", cl::desc("Override target triple for module"));
1963
1964@@ -124,7 +126,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
1965
1966 M->setTargetTriple(TM->getTargetTriple().normalize());
1967 M->setDataLayout(TM->createDataLayout());
1968- setFunctionAttributes(TM->getTargetCPU(), TM->getTargetFeatureString(), *M);
1969+ codegen::setFunctionAttributes(TM->getTargetCPU(),
1970+ TM->getTargetFeatureString(), *M);
1971
1972 // Create pass pipeline
1973 //
1974@@ -214,16 +217,17 @@ extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(
1975
1976 std::string Error;
1977 const Target *TheTarget =
1978- TargetRegistry::lookupTarget(MArch, TargetTriple, Error);
1979+ TargetRegistry::lookupTarget(codegen::getMArch(), TargetTriple, Error);
1980 if (!TheTarget) {
1981 errs() << *argv[0] << ": " << Error;
1982 exit(1);
1983 }
1984
1985- TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
1986+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
1987 TM.reset(TheTarget->createTargetMachine(
1988- TargetTriple.getTriple(), getCPUStr(), getFeaturesStr(),
1989- Options, getRelocModel(), getCodeModel(), CodeGenOpt::Default));
1990+ TargetTriple.getTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(),
1991+ Options, codegen::getExplicitRelocModel(),
1992+ codegen::getExplicitCodeModel(), CodeGenOpt::Default));
1993 assert(TM && "Could not allocate target machine!");
1994
1995 // Check that pass pipeline is specified and correct
1996diff --git a/llvm/tools/lto/CMakeLists.txt b/llvm/tools/lto/CMakeLists.txt
1997index b86e4abd01a7..2963f97cad88 100644
1998--- a/llvm/tools/lto/CMakeLists.txt
1999+++ b/llvm/tools/lto/CMakeLists.txt
2000@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
2001 AllTargetsInfos
2002 BitReader
2003 Core
2004+ CodeGen
2005 LTO
2006 MC
2007 MCDisassembler
2008@@ -20,7 +21,8 @@ set(SOURCES
2009
2010 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/lto.exports)
2011
2012-add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS intrinsics_gen)
2013+add_llvm_library(LTO SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES} DEPENDS
2014+ intrinsics_gen)
2015
2016 install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/lto.h
2017 DESTINATION include/llvm-c
2018diff --git a/llvm/tools/lto/lto.cpp b/llvm/tools/lto/lto.cpp
2019index 9933af94de1e..6d207b76685f 100644
2020--- a/llvm/tools/lto/lto.cpp
2021+++ b/llvm/tools/lto/lto.cpp
2022@@ -15,7 +15,7 @@
2023 #include "llvm/ADT/STLExtras.h"
2024 #include "llvm/ADT/StringExtras.h"
2025 #include "llvm/Bitcode/BitcodeReader.h"
2026-#include "llvm/CodeGen/CommandFlags.inc"
2027+#include "llvm/CodeGen/CommandFlags.h"
2028 #include "llvm/IR/DiagnosticInfo.h"
2029 #include "llvm/IR/DiagnosticPrinter.h"
2030 #include "llvm/IR/LLVMContext.h"
2031@@ -28,6 +28,10 @@
2032 #include "llvm/Support/TargetSelect.h"
2033 #include "llvm/Support/raw_ostream.h"
2034
2035+using namespace llvm;
2036+
2037+static codegen::RegisterCodeGenFlags CGF;
2038+
2039 // extra command-line flags needed for LTOCodeGenerator
2040 static cl::opt<char>
2041 OptLevel("O",
2042@@ -154,14 +158,9 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LTOModule, lto_module_t)
2043 // Convert the subtarget features into a string to pass to LTOCodeGenerator.
2044 static void lto_add_attrs(lto_code_gen_t cg) {
2045 LTOCodeGenerator *CG = unwrap(cg);
2046- if (MAttrs.size()) {
2047- std::string attrs;
2048- for (unsigned i = 0; i < MAttrs.size(); ++i) {
2049- if (i > 0)
2050- attrs.append(",");
2051- attrs.append(MAttrs[i]);
2052- }
2053-
2054+ auto MAttrs = codegen::getMAttrs();
2055+ if (!MAttrs.empty()) {
2056+ std::string attrs = join(MAttrs, ",");
2057 CG->setAttr(attrs);
2058 }
2059
2060@@ -219,7 +218,7 @@ lto_module_is_object_file_in_memory_for_target(const void* mem,
2061
2062 lto_module_t lto_module_create(const char* path) {
2063 lto_initialize();
2064- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
2065+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
2066 ErrorOr<std::unique_ptr<LTOModule>> M =
2067 LTOModule::createFromFile(*LTOContext, StringRef(path), Options);
2068 if (!M)
2069@@ -229,7 +228,7 @@ lto_module_t lto_module_create(const char* path) {
2070
2071 lto_module_t lto_module_create_from_fd(int fd, const char *path, size_t size) {
2072 lto_initialize();
2073- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
2074+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
2075 ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createFromOpenFile(
2076 *LTOContext, fd, StringRef(path), size, Options);
2077 if (!M)
2078@@ -242,7 +241,7 @@ lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path,
2079 size_t map_size,
2080 off_t offset) {
2081 lto_initialize();
2082- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
2083+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
2084 ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createFromOpenFileSlice(
2085 *LTOContext, fd, StringRef(path), map_size, offset, Options);
2086 if (!M)
2087@@ -252,7 +251,7 @@ lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path,
2088
2089 lto_module_t lto_module_create_from_memory(const void* mem, size_t length) {
2090 lto_initialize();
2091- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
2092+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
2093 ErrorOr<std::unique_ptr<LTOModule>> M =
2094 LTOModule::createFromBuffer(*LTOContext, mem, length, Options);
2095 if (!M)
2096@@ -264,7 +263,7 @@ lto_module_t lto_module_create_from_memory_with_path(const void* mem,
2097 size_t length,
2098 const char *path) {
2099 lto_initialize();
2100- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
2101+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
2102 ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createFromBuffer(
2103 *LTOContext, mem, length, Options, StringRef(path));
2104 if (!M)
2105@@ -275,7 +274,7 @@ lto_module_t lto_module_create_from_memory_with_path(const void* mem,
2106 lto_module_t lto_module_create_in_local_context(const void *mem, size_t length,
2107 const char *path) {
2108 lto_initialize();
2109- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
2110+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
2111
2112 // Create a local context. Ownership will be transferred to LTOModule.
2113 std::unique_ptr<LLVMContext> Context = std::make_unique<LLVMContext>();
2114@@ -294,7 +293,7 @@ lto_module_t lto_module_create_in_codegen_context(const void *mem,
2115 const char *path,
2116 lto_code_gen_t cg) {
2117 lto_initialize();
2118- llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
2119+ llvm::TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
2120 ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createFromBuffer(
2121 unwrap(cg)->getContext(), mem, length, Options, StringRef(path));
2122 return wrap(M->release());
2123@@ -336,7 +335,7 @@ void lto_codegen_set_diagnostic_handler(lto_code_gen_t cg,
2124 static lto_code_gen_t createCodeGen(bool InLocalContext) {
2125 lto_initialize();
2126
2127- TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
2128+ TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
2129
2130 LibLTOCodeGenerator *CodeGen =
2131 InLocalContext ? new LibLTOCodeGenerator(std::make_unique<LLVMContext>())
2132@@ -484,7 +483,7 @@ void lto_codegen_set_should_embed_uselists(lto_code_gen_t cg,
2133 thinlto_code_gen_t thinlto_create_codegen(void) {
2134 lto_initialize();
2135 ThinLTOCodeGenerator *CodeGen = new ThinLTOCodeGenerator();
2136- CodeGen->setTargetOptions(InitTargetOptionsFromCodeGenFlags());
2137+ CodeGen->setTargetOptions(codegen::InitTargetOptionsFromCodeGenFlags());
2138 CodeGen->setFreestanding(EnableFreestanding);
2139
2140 if (OptLevel.getNumOccurrences()) {
2141diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
2142index 75a6cdc3892b..8d619ef76b7a 100644
2143--- a/llvm/tools/opt/opt.cpp
2144+++ b/llvm/tools/opt/opt.cpp
2145@@ -22,7 +22,7 @@
2146 #include "llvm/Analysis/TargetLibraryInfo.h"
2147 #include "llvm/Analysis/TargetTransformInfo.h"
2148 #include "llvm/Bitcode/BitcodeWriterPass.h"
2149-#include "llvm/CodeGen/CommandFlags.inc"
2150+#include "llvm/CodeGen/CommandFlags.h"
2151 #include "llvm/CodeGen/TargetPassConfig.h"
2152 #include "llvm/Config/llvm-config.h"
2153 #include "llvm/IR/DataLayout.h"
2154@@ -61,6 +61,8 @@
2155 using namespace llvm;
2156 using namespace opt_tool;
2157
2158+static codegen::RegisterCodeGenFlags CFG;
2159+
2160 // The OptimizationList is automatically populated with registered Passes by the
2161 // PassNameParser.
2162 //
2163@@ -470,16 +472,17 @@ static TargetMachine* GetTargetMachine(Triple TheTriple, StringRef CPUStr,
2164 StringRef FeaturesStr,
2165 const TargetOptions &Options) {
2166 std::string Error;
2167- const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
2168- Error);
2169+ const Target *TheTarget =
2170+ TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error);
2171 // Some modules don't specify a triple, and this is okay.
2172 if (!TheTarget) {
2173 return nullptr;
2174 }
2175
2176- return TheTarget->createTargetMachine(TheTriple.getTriple(), CPUStr,
2177- FeaturesStr, Options, getRelocModel(),
2178- getCodeModel(), GetCodeGenOptLevel());
2179+ return TheTarget->createTargetMachine(
2180+ TheTriple.getTriple(), codegen::getCPUStr(), codegen::getFeaturesStr(),
2181+ Options, codegen::getExplicitRelocModel(),
2182+ codegen::getExplicitCodeModel(), GetCodeGenOptLevel());
2183 }
2184
2185 #ifdef BUILD_EXAMPLES
2186@@ -659,11 +662,11 @@ int main(int argc, char **argv) {
2187 Triple ModuleTriple(M->getTargetTriple());
2188 std::string CPUStr, FeaturesStr;
2189 TargetMachine *Machine = nullptr;
2190- const TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
2191+ const TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags();
2192
2193 if (ModuleTriple.getArch()) {
2194- CPUStr = getCPUStr();
2195- FeaturesStr = getFeaturesStr();
2196+ CPUStr = codegen::getCPUStr();
2197+ FeaturesStr = codegen::getFeaturesStr();
2198 Machine = GetTargetMachine(ModuleTriple, CPUStr, FeaturesStr, Options);
2199 } else if (ModuleTriple.getArchName() != "unknown" &&
2200 ModuleTriple.getArchName() != "") {
2201@@ -676,7 +679,7 @@ int main(int argc, char **argv) {
2202
2203 // Override function attributes based on CPUStr, FeaturesStr, and command line
2204 // flags.
2205- setFunctionAttributes(CPUStr, FeaturesStr, *M);
2206+ codegen::setFunctionAttributes(CPUStr, FeaturesStr, *M);
2207
2208 // If the output is set to be emitted to standard out, and standard out is a
2209 // console, print out a warning message and refuse to do it. We don't
2210diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
2211index 472d4dd6ad1e..32d1d3c91ff2 100644
2212--- a/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
2213+++ b/llvm/unittests/DebugInfo/DWARF/DwarfGenerator.cpp
2214@@ -25,7 +25,7 @@
2215 #include "llvm/MC/MCRegisterInfo.h"
2216 #include "llvm/MC/MCStreamer.h"
2217 #include "llvm/MC/MCSubtargetInfo.h"
2218-#include "llvm/MC/MCTargetOptionsCommandFlags.inc"
2219+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
2220 #include "llvm/PassAnalysisSupport.h"
2221 #include "llvm/Support/TargetRegistry.h"
2222 #include "llvm/Support/raw_ostream.h"
2223@@ -36,6 +36,8 @@
2224 using namespace llvm;
2225 using namespace dwarf;
2226
2227+mc::RegisterMCTargetOptionsFlags MOF;
2228+
2229 namespace {} // end anonymous namespace
2230
2231 //===----------------------------------------------------------------------===//
2232@@ -410,7 +412,7 @@ llvm::Error dwarfgen::Generator::init(Triple TheTriple, uint16_t V) {
2233 TripleName,
2234 inconvertibleErrorCode());
2235
2236- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
2237+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
2238 MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
2239 if (!MAI)
2240 return make_error<StringError>("no asm info for target " + TripleName,
2241--
22422.33.1
2243
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch b/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch
deleted file mode 100644
index f90a79ae..00000000
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/files/llvm10-basic-block-sections-support.patch
+++ /dev/null
@@ -1,237 +0,0 @@
1From d51fdb9f2986747a56c593fa057d531720b39deb Mon Sep 17 00:00:00 2001
2From: Sriraman Tallam <tmsriram@google.com>
3Date: Fri, 13 Mar 2020 15:58:57 -0700
4Subject: [PATCH] Basic Block Sections Support.
5
6This is the first in a series of patches to enable Basic Block Sections
7in LLVM.
8
9We introduce a new compiler option, -fbasicblock-sections=, which places every
10basic block in a unique ELF text section in the object file along with a
11symbol labeling the basic block. The linker can then order the basic block
12sections in any arbitrary sequence which when done correctly can encapsulate
13block layout, function layout and function splitting optimizations. However,
14there are a couple of challenges to be addressed for this to be feasible:
15
161) The compiler must not allow any implicit fall-through between any two
17 adjacent basic blocks as they could be reordered at link time to be
18 non-adjacent. In other words, the compiler must make a fall-through
19 between adjacent basic blocks explicit by retaining the direct jump
20 instruction that jumps to the next basic block. These branches can only
21 be removed later by the linker after the blocks have been reordered.
222) All inter-basic block branch targets would now need to be resolved by
23 the linker as they cannot be calculated during compile time. This is
24 done using static relocations which bloats the size of the object files.
25 Further, the compiler tries to use short branch instructions on some ISAs
26 for branch offsets that can be accommodated in one byte. This is not
27 possible with basic block sections as the offset is not determined at
28 compile time, and long branch instructions have to be used everywhere.
293) Each additional section bloats object file sizes by tens of bytes. The
30 number of basic blocks can be potentially very large compared to the
31 size of functions and can bloat object sizes significantly. Option
32 fbasicblock-sections= also takes a file path which can be used to
33 specify a subset of basic blocks that needs unique sections to keep
34 the bloats small.
354) Debug Info and CFI need special handling and will be presented as
36 separate patches.
37
38Basic Block Labels
39
40With -fbasicblock-sections=labels, or when a basic block is placed in a
41unique section, it is labelled with a symbol. This allows easy mapping of
42virtual addresses from PMU profiles back to the corresponding basic blocks.
43Since the number of basic blocks is large, the labeling bloats the symbol
44table sizes and the string table sizes significantly. While the binary size
45does increase, it does not affect performance as the symbol table is not
46loaded in memory during run-time. The string table size bloat is kept very
47minimal using a unary naming scheme that uses string suffix compression.
48The basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ...
49This turns out to be very good for string table sizes and the bloat in the
50string table size for a very large binary is ~8 %. The naming also allows
51using the --symbol-ordering-file option in LLD to arbitrarily reorder the
52sections.
53
54Differential Revision: https://reviews.llvm.org/D68063
55
56Upstream-Status: Backport [https://github.com/llvm/llvm-project/commit/4dfe92e46542be46d634a7ec24da2f2f889623d0]
57Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
58---
59 llvm/include/llvm/CodeGen/CommandFlags.inc | 34 ++++++++++++++++++++++
60 llvm/include/llvm/Target/TargetMachine.h | 14 +++++++++
61 llvm/include/llvm/Target/TargetOptions.h | 31 ++++++++++++++++++--
62 3 files changed, 76 insertions(+), 3 deletions(-)
63
64diff --git a/llvm/include/llvm/CodeGen/CommandFlags.inc b/llvm/include/llvm/CodeGen/CommandFlags.inc
65index 8739b644873d..6475a5b19edb 100644
66--- a/llvm/include/llvm/CodeGen/CommandFlags.inc
67+++ b/llvm/include/llvm/CodeGen/CommandFlags.inc
68@@ -238,6 +238,12 @@ static cl::opt<bool>
69 cl::desc("Emit functions into separate sections"),
70 cl::init(false));
71
72+static cl::opt<std::string>
73+ BBSections("basicblock-sections",
74+ cl::desc("Emit basic blocks into separate sections"),
75+ cl::value_desc("all | <function list (file)> | labels | none"),
76+ cl::init("none"));
77+
78 static cl::opt<unsigned> TLSSize("tls-size",
79 cl::desc("Bit size of immediate TLS offsets"),
80 cl::init(0));
81@@ -251,6 +257,11 @@ static cl::opt<bool>
82 cl::desc("Give unique names to every section"),
83 cl::init(true));
84
85+static cl::opt<bool> UniqueBBSectionNames(
86+ "unique-bb-section-names",
87+ cl::desc("Give unique names to every basic block section"),
88+ cl::init(false));
89+
90 static cl::opt<llvm::EABI>
91 EABIVersion("meabi", cl::desc("Set EABI type (default depends on triple):"),
92 cl::init(EABI::Default),
93@@ -285,6 +296,27 @@ static cl::opt<bool>
94 cl::desc("Always emit a debug frame section."),
95 cl::init(false));
96
97+static llvm::BasicBlockSection
98+getBBSectionsMode(llvm::TargetOptions &Options) {
99+ if (BBSections == "all")
100+ return BasicBlockSection::All;
101+ else if (BBSections == "labels")
102+ return BasicBlockSection::Labels;
103+ else if (BBSections == "none")
104+ return BasicBlockSection::None;
105+ else {
106+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
107+ MemoryBuffer::getFile(BBSections);
108+ if (!MBOrErr) {
109+ errs() << "Error loading basic block sections function list file: "
110+ << MBOrErr.getError().message() << "\n";
111+ } else {
112+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
113+ }
114+ return BasicBlockSection::List;
115+ }
116+}
117+
118 // Common utility function tightly tied to the options listed here. Initializes
119 // a TargetOptions object with CodeGen flags and returns it.
120 static TargetOptions InitTargetOptionsFromCodeGenFlags() {
121@@ -308,7 +340,9 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
122 Options.RelaxELFRelocations = RelaxELFRelocations;
123 Options.DataSections = DataSections;
124 Options.FunctionSections = FunctionSections;
125+ Options.BBSections = getBBSectionsMode(Options);
126 Options.UniqueSectionNames = UniqueSectionNames;
127+ Options.UniqueBBSectionNames = UniqueBBSectionNames;
128 Options.TLSSize = TLSSize;
129 Options.EmulatedTLS = EmulatedTLS;
130 Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0;
131diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
132index 176ae39b17a7..4a1f3377f31d 100644
133--- a/llvm/include/llvm/Target/TargetMachine.h
134+++ b/llvm/include/llvm/Target/TargetMachine.h
135@@ -242,6 +242,9 @@ public:
136
137 bool getUniqueSectionNames() const { return Options.UniqueSectionNames; }
138
139+ /// Return true if unique basic block section names must be generated.
140+ bool getUniqueBBSectionNames() const { return Options.UniqueBBSectionNames; }
141+
142 /// Return true if data objects should be emitted into their own section,
143 /// corresponds to -fdata-sections.
144 bool getDataSections() const {
145@@ -254,6 +257,17 @@ public:
146 return Options.FunctionSections;
147 }
148
149+ /// If basic blocks should be emitted into their own section,
150+ /// corresponding to -fbasicblock-sections.
151+ llvm::BasicBlockSection getBBSectionsType() const {
152+ return Options.BBSections;
153+ }
154+
155+ /// Get the list of functions and basic block ids that need unique sections.
156+ const MemoryBuffer *getBBSectionsFuncListBuf() const {
157+ return Options.BBSectionsFuncListBuf.get();
158+ }
159+
160 /// Get a \c TargetIRAnalysis appropriate for the target.
161 ///
162 /// This is used to construct the new pass manager's target IR analysis pass,
163diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
164index 84c6ee2a6387..d27c7b0178f0 100644
165--- a/llvm/include/llvm/Target/TargetOptions.h
166+++ b/llvm/include/llvm/Target/TargetOptions.h
167@@ -16,8 +16,11 @@
168
169 #include "llvm/MC/MCTargetOptions.h"
170
171+#include <memory>
172+
173 namespace llvm {
174 class MachineFunction;
175+ class MemoryBuffer;
176 class Module;
177
178 namespace FloatABI {
179@@ -63,6 +66,18 @@ namespace llvm {
180 };
181 }
182
183+ enum class BasicBlockSection {
184+ All, // Use Basic Block Sections for all basic blocks. A section
185+ // for every basic block can significantly bloat object file sizes.
186+ List, // Get list of functions & BBs from a file. Selectively enables
187+ // basic block sections for a subset of basic blocks which can be
188+ // used to control object size bloats from creating sections.
189+ Labels, // Do not use Basic Block Sections but label basic blocks. This
190+ // is useful when associating profile counts from virtual addresses
191+ // to basic blocks.
192+ None // Do not use Basic Block Sections.
193+ };
194+
195 enum class EABI {
196 Unknown,
197 Default, // Default means not specified
198@@ -114,9 +129,9 @@ namespace llvm {
199 EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
200 DisableIntegratedAS(false), RelaxELFRelocations(false),
201 FunctionSections(false), DataSections(false),
202- UniqueSectionNames(true), TrapUnreachable(false),
203- NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false),
204- ExplicitEmulatedTLS(false), EnableIPRA(false),
205+ UniqueSectionNames(true), UniqueBBSectionNames(false),
206+ TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0),
207+ EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false),
208 EmitStackSizeSection(false), EnableMachineOutliner(false),
209 SupportsDefaultOutlining(false), EmitAddrsig(false),
210 EnableDebugEntryValues(false), ForceDwarfFrameSection(false) {}
211@@ -224,6 +239,9 @@ namespace llvm {
212
213 unsigned UniqueSectionNames : 1;
214
215+ /// Use unique names for basic block sections.
216+ unsigned UniqueBBSectionNames : 1;
217+
218 /// Emit target-specific trap instruction for 'unreachable' IR instructions.
219 unsigned TrapUnreachable : 1;
220
221@@ -256,6 +274,13 @@ namespace llvm {
222 /// Emit address-significance table.
223 unsigned EmitAddrsig : 1;
224
225+ /// Emit basic blocks into separate sections.
226+ BasicBlockSection BBSections = BasicBlockSection::None;
227+
228+ /// Memory Buffer that contains information on sampled basic blocks and used
229+ /// to selectively generate basic block sections.
230+ std::shared_ptr<MemoryBuffer> BBSectionsFuncListBuf;
231+
232 /// Emit debug info about parameter's entry values.
233 unsigned EnableDebugEntryValues : 1;
234
235--
2362.33.1
237
diff --git a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
index 9701aca6..42ccb1dd 100644
--- a/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
+++ b/dynamic-layers/clang-layer/recipes-devtools/clang/llvm-project-source.bbappend
@@ -1,33 +1,5 @@
1FILESEXTRAPATHS:prepend:intel-x86-common := "${THISDIR}/files:" 1FILESEXTRAPATHS:prepend:intel-x86-common := "${THISDIR}/files:"
2 2
3SPIRV_SRCREV = "fe4d6b767363a1995ccbfca27f79efb10dcfe110"
4
5SRC_URI_LLVM10_PATCHES = " \
6 file://llvm10-0001-llvm-spirv-skip-building-tests.patch;patchdir=llvm/projects/llvm-spirv \
7 file://llvm10-0002-Fix-building-in-tree-with-cmake-DLLVM_LINK_LLVM_DYLI.patch;patchdir=llvm/projects/llvm-spirv \
8 file://llvm10-0003-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch;patchdir=llvm/projects/llvm-spirv \
9 file://BasicBlockUtils-Add-metadata-fixing-in-SplitBlockPre.patch;patchdir=llvm \
10 file://IndVarSimplify-Do-not-use-SCEV-expander-for-IVCount-.patch;patchdir=llvm \
11 file://llvm10-0001-OpenCL-3.0-support.patch \
12 file://llvm10-0002-Add-cl_khr_extended_subgroup-extensions.patch \
13 file://llvm10-0003-Memory-leak-fix-for-Managed-Static-Mutex.patch \
14 file://llvm10-0004-Remove-repo-name-in-LLVM-IR.patch \
15 file://llvm10-0005-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
16 file://llvm10-0006-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
17 file://llvm10-0007-support-cl_ext_float_atomics.patch \
18 file://llvm10-0008-ispc-10_0_9_0_fix_for_1767.patch \
19 file://llvm10-0009-ispc-10_0_fix_for_1788.patch \
20 file://llvm10-0010-ispc-10_0_fix_for_1793.patch \
21 file://llvm10-0011-ispc-10_0_fix_for_1844.patch \
22 file://llvm10-0012-ispc-10_0_i8_shuffle_avx512_i8_i16.patch \
23 file://llvm10-0013-ispc-10_0_k_reg_mov_avx512_i8_i16.patch \
24 file://llvm10-0014-ispc-10_0_packed_load_store_avx512skx.patch \
25 file://llvm10-0015-ispc-10_0_vXi1calling_avx512_i8_i16.patch \
26 file://llvm10-basic-block-sections-support.patch \
27 file://llvm10-Enable-the-call-site-info-only-for-g-optimizations.patch \
28 file://llvm10-Replace-MCTargetOptionsCommandFlags.inc-and-CommandF.patch \
29 "
30
31SRC_URI_LLVM12_PATCHES = " \ 3SRC_URI_LLVM12_PATCHES = " \
32 file://llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \ 4 file://llvm12-0001-Remove-__IMAGE_SUPPORT__-macro-for-SPIR-since-SPIR-d.patch \
33 file://llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \ 5 file://llvm12-0002-Avoid-calling-ParseCommandLineOptions-in-BackendUtil.patch \
@@ -36,10 +8,4 @@ SRC_URI_LLVM12_PATCHES = " \
36 file://llvm12-0005-ispc-12_0_fix_for_2111.patch \ 8 file://llvm12-0005-ispc-12_0_fix_for_2111.patch \
37 " 9 "
38 10
39
40SPIRV_LLVM10_SRC_URI = "git://github.com/KhronosGroup/SPIRV-LLVM-Translator.git;protocol=https;branch=llvm_release_100;destsuffix=git/llvm/projects/llvm-spirv;name=spirv"
41
42SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '10.0.1', ' ${SPIRV_LLVM10_SRC_URI} ${SRC_URI_LLVM10_PATCHES} ', '', d)}"
43SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '12.0.0', ' ${SRC_URI_LLVM12_PATCHES} ', '', d)}" 11SRC_URI:append:intel-x86-common = "${@bb.utils.contains('LLVMVERSION', '12.0.0', ' ${SRC_URI_LLVM12_PATCHES} ', '', d)}"
44
45SRCREV_spirv = "${@bb.utils.contains_any('LLVMVERSION', [ '13.0.0', '12.0.0' ], '', '${SPIRV_SRCREV}', d)}"
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch
deleted file mode 100644
index 8ffa853b..00000000
--- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch
+++ /dev/null
@@ -1,35 +0,0 @@
1From 7fc05c52dd91902fa324a7aac9b90715cfca4717 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Wed, 15 Apr 2020 17:55:32 +0800
4Subject: [PATCH] Building in-tree with LLVM 10.0 with the LLVM_LINK_LLVM_DYLIB
5
6Failed to link with the LLVMSPIRVLib library.
7
8Add an explicit dependency to force the correct build order and linking.
9
10Reference:
11https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/a6d4ccf082858e63e139ca06c02a071c343d2657
12
13Upstream-Status: Submitted [https://github.com/intel/opencl-clang/pull/118]
14
15Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
16---
17 CMakeLists.txt | 2 +-
18 1 file changed, 1 insertion(+), 1 deletion(-)
19
20diff --git a/CMakeLists.txt b/CMakeLists.txt
21index 51c140d..b8b514e 100644
22--- a/CMakeLists.txt
23+++ b/CMakeLists.txt
24@@ -208,7 +208,7 @@ link_directories(
25
26 set(OPENCL_CLANG_LINK_LIBS ${CMAKE_DL_LIBS})
27
28-if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS)
29+if(NOT LLVMSPIRVLib IN_LIST LLVM_AVAILABLE_LIBS OR (USE_PREBUILT_LLVM AND LLVM_LINK_LLVM_DYLIB))
30 # SPIRV-LLVM-Translator is not included into LLVM as a component.
31 # So, we need to list it here explicitly as an external library
32 list(APPEND OPENCL_CLANG_LINK_LIBS LLVMSPIRVLib)
33--
342.17.1
35
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch
deleted file mode 100644
index 473f4d24..00000000
--- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/files/0002-make-sure-only-static-libraries-linked-for-native-bu.patch
+++ /dev/null
@@ -1,42 +0,0 @@
1From b29e00e6fe428a031cf577dfb703cf13eff837f6 Mon Sep 17 00:00:00 2001
2From: Naveen Saini <naveen.kumar.saini@intel.com>
3Date: Wed, 15 Apr 2020 18:05:14 +0800
4Subject: [PATCH 2/2] make sure only static libraries linked for native build
5
6LINK_COMPONENTS=all isn't working for static libs for out of tree builds. Use
7LLVM_AVAILABLE_LIBS instead. Reported:
8
9https://github.com/intel/opencl-clang/issues/114
10
11Upstream-Status: Pending
12
13Signed-off-by: Anuj Mittal <anuj.mittal@intel.com>
14Signed-off-by: Naveen Saini <naveen.kumar.saini@intel.com>
15---
16 CMakeLists.txt | 3 ++-
17 1 file changed, 2 insertions(+), 1 deletion(-)
18
19diff --git a/CMakeLists.txt b/CMakeLists.txt
20index 8707487..ad2dbda 100644
21--- a/CMakeLists.txt
22+++ b/CMakeLists.txt
23@@ -218,7 +218,7 @@ add_subdirectory(cl_headers)
24
25 set(LLVM_REQUIRES_EH ON)
26
27-if(USE_PREBUILT_LLVM OR CLANG_LINK_CLANG_DYLIB)
28+if(false)
29 list(APPEND OPENCL_CLANG_LINK_LIBS clang-cpp)
30 else()
31 list(APPEND OPENCL_CLANG_LINK_LIBS
32@@ -266,6 +266,7 @@ add_llvm_library(${TARGET_NAME} SHARED
33 all
34 LINK_LIBS
35 ${OPENCL_CLANG_LINK_LIBS}
36+ ${LLVM_AVAILABLE_LIBS}
37 )
38
39 # Configure resource file on Windows
40--
412.17.1
42
diff --git a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb b/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb
deleted file mode 100644
index e08f2278..00000000
--- a/dynamic-layers/clang-layer/recipes-opencl/opencl-clang/opencl-clang_10.0.0.bb
+++ /dev/null
@@ -1,15 +0,0 @@
1require opencl-clang.inc
2
3SRC_URI:append = " file://0001-don-t-redefine-LLVM_TABLEGEN_EXE.patch \
4 file://0001-Building-in-tree-with-LLVM-10.0-with-the-LLVM_LINK_L.patch \
5 "
6SRC_URI:append:class-native = " file://0002-make-sure-only-static-libraries-linked-for-native-bu.patch"
7
8BRANCH = "ocl-open-100"
9
10SRCREV = "c8cd72e32b6abc18ce6da71c357ea45ba78b52f0"
11
12EXTRA_OECMAKE += "\
13 -DLLVM_TABLEGEN_EXE=${STAGING_BINDIR_NATIVE}/llvm-tblgen \
14 -DCMAKE_SKIP_RPATH=TRUE \
15 "